From 8be1bea33a25c6d6ce5da680fbc5531b4a187bba Mon Sep 17 00:00:00 2001 From: AlanCoding Date: Mon, 14 Aug 2017 16:51:19 -0400 Subject: [PATCH 1/2] import os, fixing bug that forced SIGKILL --- awx/main/management/commands/run_callback_receiver.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/awx/main/management/commands/run_callback_receiver.py b/awx/main/management/commands/run_callback_receiver.py index 8c14166679..329b4e9c85 100644 --- a/awx/main/management/commands/run_callback_receiver.py +++ b/awx/main/management/commands/run_callback_receiver.py @@ -9,6 +9,7 @@ from multiprocessing import Process from multiprocessing import Queue as MPQueue from Queue import Empty as QueueEmpty from Queue import Full as QueueFull +import os from kombu import Connection, Exchange, Queue from kombu.mixins import ConsumerMixin @@ -42,8 +43,7 @@ class CallbackBrokerWorker(ConsumerMixin): signal.signal(signum, signal.SIG_DFL) os.kill(os.getpid(), signum) # Rethrow signal, this time without catching it except Exception: - # TODO: LOG - pass + logger.exception('Error in shutdown_handler') return _handler if use_workers: @@ -108,7 +108,7 @@ class CallbackBrokerWorker(ConsumerMixin): except QueueEmpty: continue except Exception as e: - logger.error("Exception on worker thread, restarting: " + str(e)) + logger.info("Exception on worker thread, restarting: " + str(e)) continue try: if 'job_id' not in body and 'ad_hoc_command_id' not in body: From 9ad34edc3857beaf2c7bd04e7478d3f6a3744e54 Mon Sep 17 00:00:00 2001 From: AlanCoding Date: Mon, 14 Aug 2017 17:13:32 -0400 Subject: [PATCH 2/2] graceful killing of receiver worker processes --- .../management/commands/run_callback_receiver.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/awx/main/management/commands/run_callback_receiver.py b/awx/main/management/commands/run_callback_receiver.py index 329b4e9c85..9539a01762 100644 --- a/awx/main/management/commands/run_callback_receiver.py +++ b/awx/main/management/commands/run_callback_receiver.py @@ -27,6 +27,17 @@ from awx.main.models import * # noqa logger = logging.getLogger('awx.main.commands.run_callback_receiver') +class WorkerSignalHandler: + + def __init__(self): + self.kill_now = False + signal.signal(signal.SIGINT, self.exit_gracefully) + signal.signal(signal.SIGTERM, self.exit_gracefully) + + def exit_gracefully(self, *args, **kwargs): + self.kill_now = True + + class CallbackBrokerWorker(ConsumerMixin): def __init__(self, connection, use_workers=True): self.connection = connection @@ -102,13 +113,14 @@ class CallbackBrokerWorker(ConsumerMixin): return None def callback_worker(self, queue_actual, idx): - while True: + signal_handler = WorkerSignalHandler() + while not signal_handler.kill_now: try: body = queue_actual.get(block=True, timeout=1) except QueueEmpty: continue except Exception as e: - logger.info("Exception on worker thread, restarting: " + str(e)) + logger.error("Exception on worker thread, restarting: " + str(e)) continue try: if 'job_id' not in body and 'ad_hoc_command_id' not in body: