From 4890c15eeb8c68a302efa8c67c8383c627295312 Mon Sep 17 00:00:00 2001 From: Jeff Bradberry Date: Fri, 29 Jul 2022 16:11:34 -0400 Subject: [PATCH] Update task management to only do things with ready instances --- awx/main/scheduler/task_manager_models.py | 4 +++- awx/main/tasks/system.py | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/awx/main/scheduler/task_manager_models.py b/awx/main/scheduler/task_manager_models.py index 678e545152..c0580ca646 100644 --- a/awx/main/scheduler/task_manager_models.py +++ b/awx/main/scheduler/task_manager_models.py @@ -38,7 +38,9 @@ class TaskManagerInstances: self.instances_by_hostname = dict() if instances is None: instances = ( - Instance.objects.filter(hostname__isnull=False, enabled=True).exclude(node_type='hop').only('node_type', 'capacity', 'hostname', 'enabled') + Instance.objects.filter(hostname__isnull=False, node_state=Instance.States.READY, enabled=True) + .exclude(node_type='hop') + .only('node_type', 'node_state', 'capacity', 'hostname', 'enabled') ) for instance in instances: self.instances_by_hostname[instance.hostname] = TaskManagerInstance(instance) diff --git a/awx/main/tasks/system.py b/awx/main/tasks/system.py index 0d1d049784..2f35109213 100644 --- a/awx/main/tasks/system.py +++ b/awx/main/tasks/system.py @@ -341,9 +341,13 @@ def _cleanup_images_and_files(**kwargs): logger.info(f'Performed local cleanup with kwargs {kwargs}, output:\n{stdout}') # if we are the first instance alphabetically, then run cleanup on execution nodes - checker_instance = Instance.objects.filter(node_type__in=['hybrid', 'control'], enabled=True, capacity__gt=0).order_by('-hostname').first() + checker_instance = ( + Instance.objects.filter(node_type__in=['hybrid', 'control'], node_state=Instance.States.READY, enabled=True, capacity__gt=0) + .order_by('-hostname') + .first() + ) if checker_instance and this_inst.hostname == checker_instance.hostname: - for inst in Instance.objects.filter(node_type='execution', enabled=True, capacity__gt=0): + for inst in Instance.objects.filter(node_type='execution', node_state=Instance.States.READY, enabled=True, capacity__gt=0): runner_cleanup_kwargs = inst.get_cleanup_task_kwargs(**kwargs) if not runner_cleanup_kwargs: continue