make the dispatcher more fault-tolerant to prolonged database outages

This commit is contained in:
Ryan Petrello
2018-10-17 13:36:19 -04:00
parent ce8117ef19
commit 0d29bbfdc6
5 changed files with 78 additions and 19 deletions

View File

@@ -26,7 +26,7 @@ def reap_job(j, status):
)
def reap(instance=None, status='failed'):
def reap(instance=None, status='failed', excluded_uuids=[]):
'''
Reap all jobs in waiting|running for this instance.
'''
@@ -41,6 +41,6 @@ def reap(instance=None, status='failed'):
Q(execution_node=me.hostname) |
Q(controller_node=me.hostname)
) & ~Q(polymorphic_ctype_id=workflow_ctype_id)
)
).exclude(celery_task_id__in=excluded_uuids)
for j in jobs:
reap_job(j, status)