diff --git a/awx/main/isolated/manager.py b/awx/main/isolated/manager.py index 43afdac6df..7872b85d4b 100644 --- a/awx/main/isolated/manager.py +++ b/awx/main/isolated/manager.py @@ -210,10 +210,24 @@ class IsolatedManager(object): if status == 'successful': status_path = self.path_to('artifacts', self.ident, 'status') rc_path = self.path_to('artifacts', self.ident, 'rc') - with open(status_path, 'r') as f: - status = f.readline() - with open(rc_path, 'r') as f: - rc = int(f.readline()) + if os.path.exists(status_path): + with open(status_path, 'r') as f: + status = f.readline() + with open(rc_path, 'r') as f: + rc = int(f.readline()) + else: + # if there's no status file, it means that runner _probably_ + # exited with a traceback (which should be logged to + # daemon.log) Record it so we can see how runner failed. + daemon_path = self.path_to('daemon.log') + if os.path.exists(daemon_path): + with open(daemon_path, 'r') as f: + self.instance.result_traceback = f.read() + self.instance.save(update_fields=['result_traceback']) + else: + logger.error('Failed to rsync daemon.log (is ansible-runner installed on the isolated host?)') + status = 'failed' + rc = 1 # consume events one last time just to be sure we didn't miss anything # in the final sync diff --git a/awx/main/tasks.py b/awx/main/tasks.py index 186fa466a0..ed421a8551 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -1103,7 +1103,7 @@ class BaseTask(object): start_args='') # blank field to remove encrypted passwords self.instance.websocket_emit_status("running") - status, rc, tb = 'error', None, '' + status, rc = 'error', None output_replacements = [] extra_update_fields = {} fact_modification_times = {} @@ -1255,7 +1255,7 @@ class BaseTask(object): except Exception: # this could catch programming or file system errors - tb = traceback.format_exc() + extra_update_fields['result_traceback'] = traceback.format_exc() logger.exception('%s Exception occurred while running task', self.instance.log_format) finally: logger.info('%s finished running, producing %s events.', self.instance.log_format, self.event_ct) @@ -1266,7 +1266,7 @@ class BaseTask(object): logger.exception('{} Post run hook errored.'.format(self.instance.log_format)) self.instance = self.update_model(pk) - self.instance = self.update_model(pk, status=status, result_traceback=tb, + self.instance = self.update_model(pk, status=status, output_replacements=output_replacements, emitted_events=self.event_ct, **extra_update_fields) diff --git a/awx/playbooks/check_isolated.yml b/awx/playbooks/check_isolated.yml index 5d35783bce..e3bc9e2115 100644 --- a/awx/playbooks/check_isolated.yml +++ b/awx/playbooks/check_isolated.yml @@ -21,6 +21,12 @@ delete: yes recursive: yes + - name: Copy daemon log from the isolated host + synchronize: + src: "{{src}}/daemon.log" + dest: "{{src}}/daemon.log" + mode: pull + - name: Fail if previous check determined that process is not alive. fail: msg: "isolated task is still running"