From fdae7a3a0e69e693bf374d16c0c404f50a91ba10 Mon Sep 17 00:00:00 2001 From: Jim Ladd Date: Fri, 15 Oct 2021 16:49:14 -0700 Subject: [PATCH] cancel job if receptor no longer knows about the work item --- awx/main/tasks.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/awx/main/tasks.py b/awx/main/tasks.py index 6a083f69e2..6c9ae0d9cf 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -3137,6 +3137,11 @@ class AWXReceptorJob: logger.warn(f"Could not launch pod for {log_name}. Exceeded quota.") self.task.update_model(self.task.instance.pk, status='pending') return + + # if we did not exceed the quota, continue with shutting down the job + resultsock.shutdown(socket.SHUT_RDWR) + resultfile.close() + # If ansible-runner ran, but an error occured at runtime, the traceback information # is saved via the status_handler passed in to the processor. if state_name == 'Succeeded': @@ -3216,10 +3221,21 @@ class AWXReceptorJob: @cleanup_new_process def cancel_watcher(self, processor_future): + receptor_ctl = get_receptor_ctl() while True: if processor_future.done(): return processor_future.result() + # cancel job if receptor no longer knows about work item + try: + unit_status = receptor_ctl.simple_command(f'work status {self.unit_id}') + except RuntimeError as e: + self.task.instance.result_traceback = traceback.format_exc() + self.task.instance.save(update_fields=['result_traceback']) + + result = namedtuple('result', ['status', 'rc']) + return result('error', 1) + if self.task.cancel_callback(): result = namedtuple('result', ['status', 'rc']) return result('canceled', 1)