diff --git a/awx/main/scheduler/task_manager.py b/awx/main/scheduler/task_manager.py index 7e72d4cb8e..272779b3e5 100644 --- a/awx/main/scheduler/task_manager.py +++ b/awx/main/scheduler/task_manager.py @@ -138,7 +138,8 @@ class TaskBase: # Lock with task_manager_bulk_reschedule(): - with advisory_lock(f"{self.prefix}_lock", wait=False) as acquired: + lock_session_timeout_milliseconds = settings.TASK_MANAGER_LOCK_TIMEOUT * 1000 # convert to milliseconds + with advisory_lock(f"{self.prefix}_lock", lock_session_timeout_milliseconds=lock_session_timeout_milliseconds, wait=False) as acquired: with transaction.atomic(): if acquired is False: logger.debug(f"Not running {self.prefix} scheduler, another task holds lock") diff --git a/awx/main/tasks/system.py b/awx/main/tasks/system.py index aa65e706c1..c0f71bbc7f 100644 --- a/awx/main/tasks/system.py +++ b/awx/main/tasks/system.py @@ -715,7 +715,8 @@ def awx_k8s_reaper(): @task(queue=get_task_queuename) def awx_periodic_scheduler(): - with advisory_lock('awx_periodic_scheduler_lock', wait=False) as acquired: + lock_session_timeout_milliseconds = settings.TASK_MANAGER_LOCK_TIMEOUT * 1000 + with advisory_lock('awx_periodic_scheduler_lock', lock_session_timeout_milliseconds=lock_session_timeout_milliseconds, wait=False) as acquired: if acquired is False: logger.debug("Not running periodic scheduler, another task holds lock") return diff --git a/awx/main/utils/pglock.py b/awx/main/utils/pglock.py index 3d8a00d20a..a47880b667 100644 --- a/awx/main/utils/pglock.py +++ b/awx/main/utils/pglock.py @@ -8,9 +8,22 @@ from django.db import connection @contextmanager -def advisory_lock(*args, **kwargs): +def advisory_lock(*args, lock_session_timeout_milliseconds=0, **kwargs): if connection.vendor == 'postgresql': + cur = None + idle_in_transaction_session_timeout = None + idle_session_timeout = None + if lock_session_timeout_milliseconds > 0: + with connection.cursor() as cur: + idle_in_transaction_session_timeout = cur.execute('SHOW idle_in_transaction_session_timeout').fetchone()[0] + idle_session_timeout = cur.execute('SHOW idle_session_timeout').fetchone()[0] + cur.execute(f"SET idle_in_transaction_session_timeout = {lock_session_timeout_milliseconds}") + cur.execute(f"SET idle_session_timeout = {lock_session_timeout_milliseconds}") with django_pglocks_advisory_lock(*args, **kwargs) as internal_lock: yield internal_lock + if lock_session_timeout_milliseconds > 0: + with connection.cursor() as cur: + cur.execute(f"SET idle_in_transaction_session_timeout = {idle_in_transaction_session_timeout}") + cur.execute(f"SET idle_session_timeout = {idle_session_timeout}") else: yield True diff --git a/awx/settings/defaults.py b/awx/settings/defaults.py index 93c6e6f60d..546f68f50d 100644 --- a/awx/settings/defaults.py +++ b/awx/settings/defaults.py @@ -262,6 +262,7 @@ START_TASK_LIMIT = 100 # We have the grace period so the task manager can bail out before the timeout. TASK_MANAGER_TIMEOUT = 300 TASK_MANAGER_TIMEOUT_GRACE_PERIOD = 60 +TASK_MANAGER_LOCK_TIMEOUT = TASK_MANAGER_TIMEOUT + TASK_MANAGER_TIMEOUT_GRACE_PERIOD # Number of seconds _in addition to_ the task manager timeout a job can stay # in waiting without being reaped