From dfb7b083801a054bb15ce9f4fc49215dd2ed36ac Mon Sep 17 00:00:00 2001 From: Chris Meyers Date: Thu, 9 Feb 2017 15:27:08 -0500 Subject: [PATCH] check job status before marking as failed * When rectifying celery <-> db job status, re-check the job status to ensure it did not finish between the time that we pulled the list of celery tasks from celery and the time in which we rectify. --- awx/main/scheduler/__init__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/awx/main/scheduler/__init__.py b/awx/main/scheduler/__init__.py index 7446785a7b..50a80b7116 100644 --- a/awx/main/scheduler/__init__.py +++ b/awx/main/scheduler/__init__.py @@ -346,10 +346,14 @@ class TaskManager(): for task in all_running_sorted_tasks: if (task['celery_task_id'] not in active_tasks and not hasattr(settings, 'IGNORE_CELERY_INSPECTOR')): - # NOTE: Pull status again and make sure it didn't finish in - # the meantime? # TODO: try catch the getting of the job. The job COULD have been deleted task_obj = task.get_full() + # Ensure job did not finish running between the time we get the + # list of task id's from celery and now. + # Note: This is an actual fix, not a reduction in the time + # window that this can happen. + if task_obj.status is not 'running': + continue task_obj.status = 'failed' task_obj.job_explanation += ' '.join(( 'Task was marked as running in Tower but was not present in',