AAP-43117 Additional dispatcher removal simplifications and waiting reaper updates (#16243)

* Additional dispatcher removal simplifications and waiting repear updates * Fix double call and logging message * Implement bugbot comment, should reap running on lost instances * Add test case for new pending behavior
2026-06-23 15:47:49 -02:30 · 2026-01-26 13:55:37 -05:00
parent 12a7229ee9
commit f80bbc57d8
11 changed files with 63 additions and 130 deletions
--- a/awx/main/tests/functional/test_dispatch.py
+++ b/awx/main/tests/functional/test_dispatch.py
@@ -5,6 +5,7 @@ import pytest

 from awx.main.models import Job, WorkflowJob, Instance
 from awx.main.dispatch import reaper
+from awx.main.tasks import system
 from dispatcherd.publish import task

 '''
@@ -61,11 +62,6 @@ class TestJobReaper(object):
            ('running', '', '', None, False),  # running, not assigned to the instance
            ('running', 'awx', '', None, True),  # running, has the instance as its execution_node
            ('running', '', 'awx', None, True),  # running, has the instance as its controller_node
-            ('waiting', '', '', None, False),  # waiting, not assigned to the instance
-            ('waiting', 'awx', '', None, False),  # waiting, was edited less than a minute ago
-            ('waiting', '', 'awx', None, False),  # waiting, was edited less than a minute ago
-            ('waiting', 'awx', '', yesterday, False),  # waiting, managed by another node, ignore
-            ('waiting', '', 'awx', yesterday, True),  # waiting, assigned to the controller_node, stale
        ],
    )
    def test_should_reap(self, status, fail, execution_node, controller_node, modified):
@@ -83,7 +79,6 @@ class TestJobReaper(object):
            # (because .save() overwrites it to _now_)
            Job.objects.filter(id=j.id).update(modified=modified)
        reaper.reap(i)
-        reaper.reap_waiting(i)
        job = Job.objects.first()
        if fail:
            assert job.status == 'failed'
@@ -92,6 +87,20 @@ class TestJobReaper(object):
        else:
            assert job.status == status

+    def test_waiting_job_sent_back_to_pending(self):
+        this_inst = Instance(hostname='awx')
+        this_inst.save()
+        lost_inst = Instance(hostname='lost', node_type=Instance.Types.EXECUTION, node_state=Instance.States.UNAVAILABLE)
+        lost_inst.save()
+        job = Job.objects.create(status='waiting', controller_node=lost_inst.hostname, execution_node='lost')
+
+        system._heartbeat_handle_lost_instances([lost_inst], this_inst)
+        job.refresh_from_db()
+
+        assert job.status == 'pending'
+        assert job.controller_node == ''
+        assert job.execution_node == ''
+
    @pytest.mark.parametrize(
        'excluded_uuids, fail, started',
        [