mirror of
https://github.com/ansible/awx.git
synced 2026-05-07 09:27:36 -02:30
Patches to make jobs robust to database restarts (#11905)
* Simple patches to make jobs robust to database restarts * Add some wait time before retrying loop due to DB error * Apply dispatcher downtime setting to job updates, fix dispatcher bug This resolves a bug where the pg_is_down property never had the right value the loop is normally stuck in the conn.events() iterator so it never recognized successful database interactions this lead to serial database outages terminating jobs New setting for allowable PG downtime is shared with task code any calls to update_model will use _max_attempts parameter to make it align with the patience time that the dispatcher respects when consuming new events * To avoid restart loops, handle DB errors on startup with prejudice * If reconnect consistently fails, exit with non-zero code
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
from django.db import transaction, DatabaseError
|
||||
from django.db import transaction, DatabaseError, InterfaceError
|
||||
|
||||
import logging
|
||||
import time
|
||||
@@ -7,7 +7,7 @@ import time
|
||||
logger = logging.getLogger('awx.main.tasks.utils')
|
||||
|
||||
|
||||
def update_model(model, pk, _attempt=0, **updates):
|
||||
def update_model(model, pk, _attempt=0, _max_attempts=5, **updates):
|
||||
"""Reload the model instance from the database and update the
|
||||
given fields.
|
||||
"""
|
||||
@@ -27,14 +27,14 @@ def update_model(model, pk, _attempt=0, **updates):
|
||||
update_fields.append('failed')
|
||||
instance.save(update_fields=update_fields)
|
||||
return instance
|
||||
except DatabaseError as e:
|
||||
except (DatabaseError, InterfaceError) as e:
|
||||
# Log out the error to the debug logger.
|
||||
logger.debug('Database error updating %s, retrying in 5 seconds (retry #%d): %s', model._meta.object_name, _attempt + 1, e)
|
||||
|
||||
# Attempt to retry the update, assuming we haven't already
|
||||
# tried too many times.
|
||||
if _attempt < 5:
|
||||
if _attempt < _max_attempts:
|
||||
time.sleep(5)
|
||||
return update_model(model, pk, _attempt=_attempt + 1, **updates)
|
||||
return update_model(model, pk, _attempt=_attempt + 1, _max_attempts=_max_attempts, **updates)
|
||||
else:
|
||||
logger.error('Failed to update %s after %d retries.', model._meta.object_name, _attempt)
|
||||
|
||||
Reference in New Issue
Block a user