fix scheduled jobs race condition

* The periodic scheduler that runs and spawns jobs from Schedule()'s can
end up spawning more jobs than intended, for a single Schedule.
Specifically, when tower clustering is involed. This change adds a
"global" database lock around this critical code. If another process is
already doing the scheduling, short circuit.
This commit is contained in:
chris meyers
2019-02-07 16:57:06 -05:00
parent 1328fb80a0
commit d4c3c089df

View File

@@ -435,6 +435,12 @@ def awx_isolated_heartbeat():
@task() @task()
def awx_periodic_scheduler(): def awx_periodic_scheduler():
with advisory_lock('awx_periodic_scheduler_lock', wait=False) as acquired:
if acquired is False:
logger.debug("Not running periodic scheduler, another task holds lock")
return
logger.debug("Starting periodic scheduler")
run_now = now() run_now = now()
state = TowerScheduleState.get_solo() state = TowerScheduleState.get_solo()
last_run = state.schedule_last_run last_run = state.schedule_last_run
@@ -462,7 +468,7 @@ def awx_periodic_scheduler():
try: try:
job_kwargs = schedule.get_job_kwargs() job_kwargs = schedule.get_job_kwargs()
new_unified_job = schedule.unified_job_template.create_unified_job(**job_kwargs) new_unified_job = schedule.unified_job_template.create_unified_job(**job_kwargs)
logger.info('Spawned {} from schedule {}-{}.'.format( logger.info(six.text_type('Spawned {} from schedule {}-{}.').format(
new_unified_job.log_format, schedule.name, schedule.pk)) new_unified_job.log_format, schedule.name, schedule.pk))
if invalid_license: if invalid_license: