Merge pull request #3707 from ryanpetrello/report-ansible-runner-crashes

if runner crashes, attempt to record why

Reviewed-by: https://github.com/softwarefactory-project-zuul[bot]
This commit is contained in:
softwarefactory-project-zuul[bot] 2019-04-15 18:16:09 +00:00 committed by GitHub
commit 1d4773545e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 27 additions and 7 deletions

View File

@ -210,10 +210,24 @@ class IsolatedManager(object):
if status == 'successful':
status_path = self.path_to('artifacts', self.ident, 'status')
rc_path = self.path_to('artifacts', self.ident, 'rc')
with open(status_path, 'r') as f:
status = f.readline()
with open(rc_path, 'r') as f:
rc = int(f.readline())
if os.path.exists(status_path):
with open(status_path, 'r') as f:
status = f.readline()
with open(rc_path, 'r') as f:
rc = int(f.readline())
else:
# if there's no status file, it means that runner _probably_
# exited with a traceback (which should be logged to
# daemon.log) Record it so we can see how runner failed.
daemon_path = self.path_to('daemon.log')
if os.path.exists(daemon_path):
with open(daemon_path, 'r') as f:
self.instance.result_traceback = f.read()
self.instance.save(update_fields=['result_traceback'])
else:
logger.error('Failed to rsync daemon.log (is ansible-runner installed on the isolated host?)')
status = 'failed'
rc = 1
# consume events one last time just to be sure we didn't miss anything
# in the final sync

View File

@ -1103,7 +1103,7 @@ class BaseTask(object):
start_args='') # blank field to remove encrypted passwords
self.instance.websocket_emit_status("running")
status, rc, tb = 'error', None, ''
status, rc = 'error', None
output_replacements = []
extra_update_fields = {}
fact_modification_times = {}
@ -1255,7 +1255,7 @@ class BaseTask(object):
except Exception:
# this could catch programming or file system errors
tb = traceback.format_exc()
extra_update_fields['result_traceback'] = traceback.format_exc()
logger.exception('%s Exception occurred while running task', self.instance.log_format)
finally:
logger.info('%s finished running, producing %s events.', self.instance.log_format, self.event_ct)
@ -1266,7 +1266,7 @@ class BaseTask(object):
logger.exception('{} Post run hook errored.'.format(self.instance.log_format))
self.instance = self.update_model(pk)
self.instance = self.update_model(pk, status=status, result_traceback=tb,
self.instance = self.update_model(pk, status=status,
output_replacements=output_replacements,
emitted_events=self.event_ct,
**extra_update_fields)

View File

@ -21,6 +21,12 @@
delete: yes
recursive: yes
- name: Copy daemon log from the isolated host
synchronize:
src: "{{src}}/daemon.log"
dest: "{{src}}/daemon.log"
mode: pull
- name: Fail if previous check determined that process is not alive.
fail:
msg: "isolated task is still running"