add additional DB retry logic to the callback receiver

initially, I implemented this for _only_ the task worker, but it's
probably needed for callback event workers, too
This commit is contained in:
Ryan Petrello 2018-11-29 10:44:37 -05:00
parent 273d7a83f2
commit 0391dbc292
No known key found for this signature in database
GPG Key ID: F2AA5F2122351777
3 changed files with 6 additions and 9 deletions

View File

@ -7,6 +7,7 @@ import signal
from uuid import UUID
from Queue import Empty as QueueEmpty
from django import db
from kombu import Producer
from kombu.mixins import ConsumerMixin
@ -128,6 +129,10 @@ class BaseWorker(object):
logger.error("Exception on worker {}, restarting: ".format(idx) + str(e))
continue
try:
for conn in db.connections.all():
# If the database connection has a hiccup during the prior message, close it
# so we can establish a new connection
conn.close_if_unusable_or_obsolete()
self.perform_work(body, *args)
finally:
if 'uuid' in body:

View File

@ -1,7 +1,5 @@
import logging
import time
import os
import signal
import traceback
from django.conf import settings
@ -110,8 +108,7 @@ class CallbackBrokerWorker(BaseWorker):
break
except (OperationalError, InterfaceError, InternalError):
if retries >= self.MAX_RETRIES:
logger.exception('Worker could not re-establish database connectivity, shutting down gracefully: Job {}'.format(job_identifier))
os.kill(os.getppid(), signal.SIGINT)
logger.exception('Worker could not re-establish database connectivity, giving up on event for Job {}'.format(job_identifier))
return
delay = 60 * retries
logger.exception('Database Error Saving Job Event, retry #{i} in {delay} seconds:'.format(

View File

@ -5,7 +5,6 @@ import sys
import traceback
import six
from django import db
from awx.main.tasks import dispatch_startup, inform_cluster_of_shutdown
@ -75,10 +74,6 @@ class TaskWorker(BaseWorker):
'task': u'awx.main.tasks.RunProjectUpdate'
}
'''
for conn in db.connections.all():
# If the database connection has a hiccup during at task, close it
# so we can establish a new connection
conn.close_if_unusable_or_obsolete()
result = None
try:
result = self.run_callable(body)