Fix race with heartbeat and reaper logic (#13713)

* Fix race with heartbeat and reaper logic

* Fix tests to fail when over drift over heartbeat time

* replaced modified with started time for reap() code and added test

* fixed logic bug and cleaned up tests

* Added comments to tests to call out reasoning
This commit is contained in:
Gabriel Muniz
2023-03-17 14:24:31 -04:00
committed by GitHub
parent 3e6e0463b9
commit e15f4de0dd
3 changed files with 36 additions and 10 deletions

View File

@@ -581,7 +581,7 @@ def cluster_node_heartbeat(dispatch_time=None, worker_tasks=None):
active_task_ids = []
for task_list in worker_tasks.values():
active_task_ids.extend(task_list)
reaper.reap(instance=this_inst, excluded_uuids=active_task_ids)
reaper.reap(instance=this_inst, excluded_uuids=active_task_ids, ref_time=datetime.fromisoformat(dispatch_time))
if max(len(task_list) for task_list in worker_tasks.values()) <= 1:
reaper.reap_waiting(instance=this_inst, excluded_uuids=active_task_ids, ref_time=datetime.fromisoformat(dispatch_time))