Give specific messages if job was killed due to SIGTERM or SIGKILL (#12435)

* Reap jobs on dispatcher startup to increase clarity, replace existing reaping logic

* Exit jobs if receiving SIGTERM signal

* Fix unwanted reaping on shutdown, let subprocess close out

* Add some sanity tests for signal module

* Add a log for an unhandled dispatcher error

* Refine wording of error messages

Co-authored-by: Elijah DeLee <kdelee@redhat.com>
This commit is contained in:
Alan Rominger
2022-06-30 13:20:08 -04:00
committed by GitHub
parent a0d5f1fb03
commit fd671ecc9d
9 changed files with 164 additions and 10 deletions

View File

@@ -3,6 +3,8 @@ from django.db import transaction, DatabaseError, InterfaceError
import logging
import time
from awx.main.tasks.signals import signal_callback
logger = logging.getLogger('awx.main.tasks.utils')
@@ -37,7 +39,10 @@ def update_model(model, pk, _attempt=0, _max_attempts=5, select_for_update=False
# Attempt to retry the update, assuming we haven't already
# tried too many times.
if _attempt < _max_attempts:
time.sleep(5)
for i in range(5):
time.sleep(1)
if signal_callback():
raise RuntimeError(f'Could not fetch {pk} because of receiving abort signal')
return update_model(model, pk, _attempt=_attempt + 1, _max_attempts=_max_attempts, **updates)
else:
logger.error('Failed to update %s after %d retries.', model._meta.object_name, _attempt)