make the dispatcher more fault-tolerant to prolonged database outages

2026-02-19 20:20:06 -03:30 · 2018-10-17 13:36:19 -04:00
parent ce8117ef19
commit 0d29bbfdc6
5 changed files with 78 additions and 19 deletions
--- a/awx/main/dispatch/reaper.py
+++ b/awx/main/dispatch/reaper.py
@@ -26,7 +26,7 @@ def reap_job(j, status):
    )


-def reap(instance=None, status='failed'):
+def reap(instance=None, status='failed', excluded_uuids=[]):
    '''
    Reap all jobs in waiting|running for this instance.
    '''
@@ -41,6 +41,6 @@ def reap(instance=None, status='failed'):
            Q(execution_node=me.hostname) |
            Q(controller_node=me.hostname)
        ) & ~Q(polymorphic_ctype_id=workflow_ctype_id)
-    )
+    ).exclude(celery_task_id__in=excluded_uuids)
    for j in jobs:
        reap_job(j, status)