AAP-43117 Additional dispatcher removal simplifications and waiting reaper updates (#16243)

* Additional dispatcher removal simplifications and waiting repear updates

* Fix double call and logging message

* Implement bugbot comment, should reap running on lost instances

* Add test case for new pending behavior
This commit is contained in:
Alan Rominger
2026-01-26 13:55:37 -05:00
committed by GitHub
parent 12a7229ee9
commit f80bbc57d8
11 changed files with 63 additions and 130 deletions

View File

@@ -5,6 +5,7 @@ import pytest
from awx.main.models import Job, WorkflowJob, Instance
from awx.main.dispatch import reaper
from awx.main.tasks import system
from dispatcherd.publish import task
'''
@@ -61,11 +62,6 @@ class TestJobReaper(object):
('running', '', '', None, False), # running, not assigned to the instance
('running', 'awx', '', None, True), # running, has the instance as its execution_node
('running', '', 'awx', None, True), # running, has the instance as its controller_node
('waiting', '', '', None, False), # waiting, not assigned to the instance
('waiting', 'awx', '', None, False), # waiting, was edited less than a minute ago
('waiting', '', 'awx', None, False), # waiting, was edited less than a minute ago
('waiting', 'awx', '', yesterday, False), # waiting, managed by another node, ignore
('waiting', '', 'awx', yesterday, True), # waiting, assigned to the controller_node, stale
],
)
def test_should_reap(self, status, fail, execution_node, controller_node, modified):
@@ -83,7 +79,6 @@ class TestJobReaper(object):
# (because .save() overwrites it to _now_)
Job.objects.filter(id=j.id).update(modified=modified)
reaper.reap(i)
reaper.reap_waiting(i)
job = Job.objects.first()
if fail:
assert job.status == 'failed'
@@ -92,6 +87,20 @@ class TestJobReaper(object):
else:
assert job.status == status
def test_waiting_job_sent_back_to_pending(self):
this_inst = Instance(hostname='awx')
this_inst.save()
lost_inst = Instance(hostname='lost', node_type=Instance.Types.EXECUTION, node_state=Instance.States.UNAVAILABLE)
lost_inst.save()
job = Job.objects.create(status='waiting', controller_node=lost_inst.hostname, execution_node='lost')
system._heartbeat_handle_lost_instances([lost_inst], this_inst)
job.refresh_from_db()
assert job.status == 'pending'
assert job.controller_node == ''
assert job.execution_node == ''
@pytest.mark.parametrize(
'excluded_uuids, fail, started',
[