add additional DB retry logic to the callback receiver

initially, I implemented this for _only_ the task worker, but it's
probably needed for callback event workers, too
This commit is contained in:
Ryan Petrello
2018-11-29 10:44:37 -05:00
parent 273d7a83f2
commit 0391dbc292
3 changed files with 6 additions and 9 deletions

View File

@@ -7,6 +7,7 @@ import signal
from uuid import UUID from uuid import UUID
from Queue import Empty as QueueEmpty from Queue import Empty as QueueEmpty
from django import db
from kombu import Producer from kombu import Producer
from kombu.mixins import ConsumerMixin from kombu.mixins import ConsumerMixin
@@ -128,6 +129,10 @@ class BaseWorker(object):
logger.error("Exception on worker {}, restarting: ".format(idx) + str(e)) logger.error("Exception on worker {}, restarting: ".format(idx) + str(e))
continue continue
try: try:
for conn in db.connections.all():
# If the database connection has a hiccup during the prior message, close it
# so we can establish a new connection
conn.close_if_unusable_or_obsolete()
self.perform_work(body, *args) self.perform_work(body, *args)
finally: finally:
if 'uuid' in body: if 'uuid' in body:

View File

@@ -1,7 +1,5 @@
import logging import logging
import time import time
import os
import signal
import traceback import traceback
from django.conf import settings from django.conf import settings
@@ -110,8 +108,7 @@ class CallbackBrokerWorker(BaseWorker):
break break
except (OperationalError, InterfaceError, InternalError): except (OperationalError, InterfaceError, InternalError):
if retries >= self.MAX_RETRIES: if retries >= self.MAX_RETRIES:
logger.exception('Worker could not re-establish database connectivity, shutting down gracefully: Job {}'.format(job_identifier)) logger.exception('Worker could not re-establish database connectivity, giving up on event for Job {}'.format(job_identifier))
os.kill(os.getppid(), signal.SIGINT)
return return
delay = 60 * retries delay = 60 * retries
logger.exception('Database Error Saving Job Event, retry #{i} in {delay} seconds:'.format( logger.exception('Database Error Saving Job Event, retry #{i} in {delay} seconds:'.format(

View File

@@ -5,7 +5,6 @@ import sys
import traceback import traceback
import six import six
from django import db
from awx.main.tasks import dispatch_startup, inform_cluster_of_shutdown from awx.main.tasks import dispatch_startup, inform_cluster_of_shutdown
@@ -75,10 +74,6 @@ class TaskWorker(BaseWorker):
'task': u'awx.main.tasks.RunProjectUpdate' 'task': u'awx.main.tasks.RunProjectUpdate'
} }
''' '''
for conn in db.connections.all():
# If the database connection has a hiccup during at task, close it
# so we can establish a new connection
conn.close_if_unusable_or_obsolete()
result = None result = None
try: try:
result = self.run_callable(body) result = self.run_callable(body)