mirror of
https://github.com/ansible/awx.git
synced 2026-03-03 17:51:06 -03:30
periodically run orphaned task cleanup as part of the scheduler
Running orphaned task cleanup within its own scheduled task via celery-beat causes a race-y lock contention between the cleanup task and the task scheduler. Unfortunately, the scheduler and the cleanup task both run at similar intervals, so this race condition is fairly easy to hit. At best, it results in situations where the scheduler is regularly delayed 20s; depending on timing, this can cause situations where task execution is needlessly delayed a minute+. At worst, it can result in situations where the scheduler is never able to schedule tasks. This change implements the cleanup as a periodic block of code in the scheduler itself that tracks its "last run" time in memcached (by default, it performs a cleanup every 60 seconds) see: #6534
This commit is contained in:
@@ -1,6 +1,9 @@
|
||||
import pytest
|
||||
import mock
|
||||
from datetime import timedelta
|
||||
from datetime import timedelta, datetime
|
||||
|
||||
from django.core.cache import cache
|
||||
|
||||
from awx.main.scheduler import TaskManager
|
||||
|
||||
|
||||
@@ -198,3 +201,32 @@ def test_shared_dependencies_launch(default_instance_group, job_template_factory
|
||||
iu = [x for x in ii.inventory_updates.all()]
|
||||
assert len(pu) == 1
|
||||
assert len(iu) == 1
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_cleanup_interval():
|
||||
assert cache.get('last_celery_task_cleanup') is None
|
||||
|
||||
TaskManager().cleanup_inconsistent_celery_tasks()
|
||||
last_cleanup = cache.get('last_celery_task_cleanup')
|
||||
assert isinstance(last_cleanup, datetime)
|
||||
|
||||
TaskManager().cleanup_inconsistent_celery_tasks()
|
||||
assert cache.get('last_celery_task_cleanup') == last_cleanup
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@mock.patch('awx.main.tasks._send_notification_templates')
|
||||
@mock.patch.object(TaskManager, 'get_active_tasks', lambda self: [[], []])
|
||||
@mock.patch.object(TaskManager, 'get_running_tasks')
|
||||
def test_cleanup_inconsistent_task(get_running_tasks, notify):
|
||||
orphaned_task = mock.Mock(job_explanation='')
|
||||
get_running_tasks.return_value = [orphaned_task]
|
||||
TaskManager().cleanup_inconsistent_celery_tasks()
|
||||
|
||||
notify.assert_called_once_with(orphaned_task, 'failed')
|
||||
orphaned_task.websocket_emit_status.assert_called_once_with('failed')
|
||||
assert orphaned_task.status == 'failed'
|
||||
assert orphaned_task.job_explanation == (
|
||||
'Task was marked as running in Tower but was not present in Celery, so it has been marked as failed.'
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user