From 3fb3125bc37dad6ca38ba369c55b1972ae8611c3 Mon Sep 17 00:00:00 2001 From: Hao Liu <44379968+TheRealHaoLiu@users.noreply.github.com> Date: Wed, 21 Feb 2024 16:08:43 -0500 Subject: [PATCH] Send QUIT to worker before dying (#14913) Fix deadlock scenario where dispatcher child process stuck in reading from queue loop after dispatcher parent process decided to quit Co-authored-by: Alan Rominger --- awx/main/dispatch/worker/base.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/awx/main/dispatch/worker/base.py b/awx/main/dispatch/worker/base.py index 8a8f8dc02a..264205a8ed 100644 --- a/awx/main/dispatch/worker/base.py +++ b/awx/main/dispatch/worker/base.py @@ -259,6 +259,12 @@ class AWXConsumerPG(AWXConsumerBase): current_downtime = time.time() - self.pg_down_time if current_downtime > self.pg_max_wait: logger.exception(f"Postgres event consumer has not recovered in {current_downtime} s, exiting") + # Sending QUIT to multiprocess queue to signal workers to exit + for worker in self.pool.workers: + try: + worker.quit() + except Exception: + logger.exception(f"Error sending QUIT to worker {worker}") raise # Wait for a second before next attempt, but still listen for any shutdown signals for i in range(10): @@ -270,6 +276,12 @@ class AWXConsumerPG(AWXConsumerBase): except Exception: # Log unanticipated exception in addition to writing to stderr to get timestamps and other metadata logger.exception('Encountered unhandled error in dispatcher main loop') + # Sending QUIT to multiprocess queue to signal workers to exit + for worker in self.pool.workers: + try: + worker.quit() + except Exception: + logger.exception(f"Error sending QUIT to worker {worker}") raise