mirror of
https://github.com/ansible/awx.git
synced 2026-03-03 09:48:51 -03:30
Add grace period settings for task manager timeout, and pod / job waiting reapers
Co-authored-by: Alan Rominger <arominge@redhat.com>
This commit is contained in:
committed by
Alan Rominger
parent
c649809eb2
commit
3c51cb130f
@@ -344,6 +344,10 @@ class AutoscalePool(WorkerPool):
|
||||
# max workers can't be less than min_workers
|
||||
self.max_workers = max(self.min_workers, self.max_workers)
|
||||
|
||||
# the task manager enforces settings.TASK_MANAGER_TIMEOUT on its own
|
||||
# but if the task takes longer than the time defined here, we will force it to stop here
|
||||
self.task_manager_timeout = settings.TASK_MANAGER_TIMEOUT + settings.TASK_MANAGER_TIMEOUT_GRACE_PERIOD
|
||||
|
||||
@property
|
||||
def should_grow(self):
|
||||
if len(self.workers) < self.min_workers:
|
||||
@@ -377,7 +381,6 @@ class AutoscalePool(WorkerPool):
|
||||
if there's an outage, this method _can_ throw various
|
||||
django.db.utils.Error exceptions. Act accordingly.
|
||||
"""
|
||||
start_time = time.time()
|
||||
orphaned = []
|
||||
for w in self.workers[::]:
|
||||
if not w.alive:
|
||||
@@ -419,8 +422,8 @@ class AutoscalePool(WorkerPool):
|
||||
w.managed_tasks[current_task['uuid']]['started'] = time.time()
|
||||
age = time.time() - current_task['started']
|
||||
w.managed_tasks[current_task['uuid']]['age'] = age
|
||||
if age > (settings.TASK_MANAGER_TIMEOUT + settings.TASK_MANAGER_TIMEOUT_GRACE_PERIOD):
|
||||
logger.error(f'{current_task_name} has held the advisory lock for {age}, sending SIGTERM to {w.pid}') # noqa
|
||||
if age > self.task_manager_timeout:
|
||||
logger.error(f'{current_task_name} has held the advisory lock for {age}, sending SIGTERM to {w.pid}')
|
||||
os.kill(w.pid, signal.SIGTERM)
|
||||
|
||||
for m in orphaned:
|
||||
@@ -436,10 +439,11 @@ class AutoscalePool(WorkerPool):
|
||||
for worker in self.workers:
|
||||
worker.calculate_managed_tasks()
|
||||
running_uuids.extend(list(worker.managed_tasks.keys()))
|
||||
delta = time.time() - start_time
|
||||
if delta > 1.0:
|
||||
logger.warning(f'Took {delta} for internal part of cleanup')
|
||||
start_time = time.time()
|
||||
|
||||
# if we are not in the dangerous situation of queue backup then clear old waiting jobs
|
||||
if self.workers and max(len(w.managed_tasks) for w in self.workers) <= 1:
|
||||
reaper.reap_waiting(excluded_uuids=running_uuids)
|
||||
|
||||
reaper.reap(excluded_uuids=running_uuids)
|
||||
|
||||
def up(self):
|
||||
|
||||
Reference in New Issue
Block a user