Integrate scheduler into dispatcher main loop (#14067)

Dispatcher refactoring to get pg_notify publish payload
  as separate method

Refactor periodic module under dispatcher entirely
  Use real numbers for schedule reference time
  Run based on due_to_run method

Review comments about naming and code comments
This commit is contained in:
Alan Rominger
2023-08-10 14:43:07 -04:00
committed by GitHub
parent 14992cee17
commit 284bd8377a
10 changed files with 308 additions and 126 deletions

View File

@@ -40,8 +40,12 @@ def get_task_queuename():
class PubSub(object): class PubSub(object):
def __init__(self, conn): def __init__(self, conn, select_timeout=None):
self.conn = conn self.conn = conn
if select_timeout is None:
self.select_timeout = 5
else:
self.select_timeout = select_timeout
def listen(self, channel): def listen(self, channel):
with self.conn.cursor() as cur: with self.conn.cursor() as cur:
@@ -72,12 +76,12 @@ class PubSub(object):
n = psycopg.connection.Notify(pgn.relname.decode(enc), pgn.extra.decode(enc), pgn.be_pid) n = psycopg.connection.Notify(pgn.relname.decode(enc), pgn.extra.decode(enc), pgn.be_pid)
yield n yield n
def events(self, select_timeout=5, yield_timeouts=False): def events(self, yield_timeouts=False):
if not self.conn.autocommit: if not self.conn.autocommit:
raise RuntimeError('Listening for events can only be done in autocommit mode') raise RuntimeError('Listening for events can only be done in autocommit mode')
while True: while True:
if select.select([self.conn], [], [], select_timeout) == NOT_READY: if select.select([self.conn], [], [], self.select_timeout) == NOT_READY:
if yield_timeouts: if yield_timeouts:
yield None yield None
else: else:
@@ -90,7 +94,7 @@ class PubSub(object):
@contextmanager @contextmanager
def pg_bus_conn(new_connection=False): def pg_bus_conn(new_connection=False, select_timeout=None):
''' '''
Any listeners probably want to establish a new database connection, Any listeners probably want to establish a new database connection,
separate from the Django connection used for queries, because that will prevent separate from the Django connection used for queries, because that will prevent
@@ -115,7 +119,7 @@ def pg_bus_conn(new_connection=False):
raise RuntimeError('Unexpectedly could not connect to postgres for pg_notify actions') raise RuntimeError('Unexpectedly could not connect to postgres for pg_notify actions')
conn = pg_connection.connection conn = pg_connection.connection
pubsub = PubSub(conn) pubsub = PubSub(conn, select_timeout=select_timeout)
yield pubsub yield pubsub
if new_connection: if new_connection:
conn.close() conn.close()

View File

@@ -40,6 +40,9 @@ class Control(object):
def cancel(self, task_ids, *args, **kwargs): def cancel(self, task_ids, *args, **kwargs):
return self.control_with_reply('cancel', *args, extra_data={'task_ids': task_ids}, **kwargs) return self.control_with_reply('cancel', *args, extra_data={'task_ids': task_ids}, **kwargs)
def schedule(self, *args, **kwargs):
return self.control_with_reply('schedule', *args, **kwargs)
@classmethod @classmethod
def generate_reply_queue_name(cls): def generate_reply_queue_name(cls):
return f"reply_to_{str(uuid.uuid4()).replace('-','_')}" return f"reply_to_{str(uuid.uuid4()).replace('-','_')}"
@@ -52,14 +55,14 @@ class Control(object):
if not connection.get_autocommit(): if not connection.get_autocommit():
raise RuntimeError('Control-with-reply messages can only be done in autocommit mode') raise RuntimeError('Control-with-reply messages can only be done in autocommit mode')
with pg_bus_conn() as conn: with pg_bus_conn(select_timeout=timeout) as conn:
conn.listen(reply_queue) conn.listen(reply_queue)
send_data = {'control': command, 'reply_to': reply_queue} send_data = {'control': command, 'reply_to': reply_queue}
if extra_data: if extra_data:
send_data.update(extra_data) send_data.update(extra_data)
conn.notify(self.queuename, json.dumps(send_data)) conn.notify(self.queuename, json.dumps(send_data))
for reply in conn.events(select_timeout=timeout, yield_timeouts=True): for reply in conn.events(yield_timeouts=True):
if reply is None: if reply is None:
logger.error(f'{self.service} did not reply within {timeout}s') logger.error(f'{self.service} did not reply within {timeout}s')
raise RuntimeError(f"{self.service} did not reply within {timeout}s") raise RuntimeError(f"{self.service} did not reply within {timeout}s")

View File

@@ -1,57 +1,142 @@
import logging import logging
import os
import time import time
from multiprocessing import Process import yaml
from datetime import datetime
from django.conf import settings
from django.db import connections
from schedule import Scheduler
from django_guid import set_guid
from django_guid.utils import generate_guid
from awx.main.dispatch.worker import TaskWorker
from awx.main.utils.db import set_connection_name
logger = logging.getLogger('awx.main.dispatch.periodic') logger = logging.getLogger('awx.main.dispatch.periodic')
class Scheduler(Scheduler): class ScheduledTask:
def run_continuously(self): """
idle_seconds = max(1, min(self.jobs).period.total_seconds() / 2) Class representing schedules, very loosely modeled after python schedule library Job
the idea of this class is to:
- only deal in relative times (time since the scheduler global start)
- only deal in integer math for target runtimes, but float for current relative time
def run(): Missed schedule policy:
ppid = os.getppid() Invariant target times are maintained, meaning that if interval=10s offset=0
logger.warning('periodic beat started') and it runs at t=7s, then it calls for next run in 3s.
However, if a complete interval has passed, that is counted as a missed run,
and missed runs are abandoned (no catch-up runs).
"""
set_connection_name('periodic') # set application_name to distinguish from other dispatcher processes def __init__(self, name: str, data: dict):
# parameters need for schedule computation
self.interval = int(data['schedule'].total_seconds())
self.offset = 0 # offset relative to start time this schedule begins
self.index = 0 # number of periods of the schedule that has passed
while True: # parameters that do not affect scheduling logic
if os.getppid() != ppid: self.last_run = None # time of last run, only used for debug
# if the parent PID changes, this process has been orphaned self.completed_runs = 0 # number of times schedule is known to run
# via e.g., segfault or sigkill, we should exit too self.name = name
pid = os.getpid() self.data = data # used by caller to know what to run
logger.warning(f'periodic beat exiting gracefully pid:{pid}')
raise SystemExit()
try:
for conn in connections.all():
# If the database connection has a hiccup, re-establish a new
# connection
conn.close_if_unusable_or_obsolete()
set_guid(generate_guid())
self.run_pending()
except Exception:
logger.exception('encountered an error while scheduling periodic tasks')
time.sleep(idle_seconds)
process = Process(target=run) @property
process.daemon = True def next_run(self):
process.start() "Time until the next run with t=0 being the global_start of the scheduler class"
return (self.index + 1) * self.interval + self.offset
def due_to_run(self, relative_time):
return bool(self.next_run <= relative_time)
def expected_runs(self, relative_time):
return int((relative_time - self.offset) / self.interval)
def mark_run(self, relative_time):
self.last_run = relative_time
self.completed_runs += 1
new_index = self.expected_runs(relative_time)
if new_index > self.index + 1:
logger.warning(f'Missed {new_index - self.index - 1} schedules of {self.name}')
self.index = new_index
def missed_runs(self, relative_time):
"Number of times job was supposed to ran but failed to, only used for debug"
missed_ct = self.expected_runs(relative_time) - self.completed_runs
# if this is currently due to run do not count that as a missed run
if missed_ct and self.due_to_run(relative_time):
missed_ct -= 1
return missed_ct
def run_continuously(): class Scheduler:
scheduler = Scheduler() def __init__(self, schedule):
for task in settings.CELERYBEAT_SCHEDULE.values(): """
apply_async = TaskWorker.resolve_callable(task['task']).apply_async Expects schedule in the form of a dictionary like
total_seconds = task['schedule'].total_seconds() {
scheduler.every(total_seconds).seconds.do(apply_async) 'job1': {'schedule': timedelta(seconds=50), 'other': 'stuff'}
scheduler.run_continuously() }
Only the schedule nearest-second value is used for scheduling,
the rest of the data is for use by the caller to know what to run.
"""
self.jobs = [ScheduledTask(name, data) for name, data in schedule.items()]
min_interval = min(job.interval for job in self.jobs)
num_jobs = len(self.jobs)
# this is intentionally oppioniated against spammy schedules
# a core goal is to spread out the scheduled tasks (for worker management)
# and high-frequency schedules just do not work with that
if num_jobs > min_interval:
raise RuntimeError(f'Number of schedules ({num_jobs}) is more than the shortest schedule interval ({min_interval} seconds).')
# even space out jobs over the base interval
for i, job in enumerate(self.jobs):
job.offset = (i * min_interval) // num_jobs
# internally times are all referenced relative to startup time, add grace period
self.global_start = time.time() + 2.0
def get_and_mark_pending(self):
relative_time = time.time() - self.global_start
to_run = []
for job in self.jobs:
if job.due_to_run(relative_time):
to_run.append(job)
logger.debug(f'scheduler found {job.name} to run, {relative_time - job.next_run} seconds after target')
job.mark_run(relative_time)
return to_run
def time_until_next_run(self):
relative_time = time.time() - self.global_start
next_job = min(self.jobs, key=lambda j: j.next_run)
delta = next_job.next_run - relative_time
if delta <= 0.1:
# careful not to give 0 or negative values to the select timeout, which has unclear interpretation
logger.warning(f'Scheduler next run of {next_job.name} is {-delta} seconds in the past')
return 0.1
elif delta > 20.0:
logger.warning(f'Scheduler next run unexpectedly over 20 seconds in future: {delta}')
return 20.0
logger.debug(f'Scheduler next run is {next_job.name} in {delta} seconds')
return delta
def debug(self, *args, **kwargs):
data = dict()
data['title'] = 'Scheduler status'
now = datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S UTC')
start_time = datetime.fromtimestamp(self.global_start).strftime('%Y-%m-%d %H:%M:%S UTC')
relative_time = time.time() - self.global_start
data['started_time'] = start_time
data['current_time'] = now
data['current_time_relative'] = round(relative_time, 3)
data['total_schedules'] = len(self.jobs)
data['schedule_list'] = dict(
[
(
job.name,
dict(
last_run_seconds_ago=round(relative_time - job.last_run, 3) if job.last_run else None,
next_run_in_seconds=round(job.next_run - relative_time, 3),
offset_in_seconds=job.offset,
completed_runs=job.completed_runs,
missed_runs=job.missed_runs(relative_time),
),
)
for job in sorted(self.jobs, key=lambda job: job.interval)
]
)
return yaml.safe_dump(data, default_flow_style=False, sort_keys=False)

View File

@@ -73,15 +73,15 @@ class task:
return cls.apply_async(args, kwargs) return cls.apply_async(args, kwargs)
@classmethod @classmethod
def apply_async(cls, args=None, kwargs=None, queue=None, uuid=None, **kw): def get_async_body(cls, args=None, kwargs=None, uuid=None, **kw):
"""
Get the python dict to become JSON data in the pg_notify message
This same message gets passed over the dispatcher IPC queue to workers
If a task is submitted to a multiprocessing pool, skipping pg_notify, this might be used directly
"""
task_id = uuid or str(uuid4()) task_id = uuid or str(uuid4())
args = args or [] args = args or []
kwargs = kwargs or {} kwargs = kwargs or {}
queue = queue or getattr(cls.queue, 'im_func', cls.queue)
if not queue:
msg = f'{cls.name}: Queue value required and may not be None'
logger.error(msg)
raise ValueError(msg)
obj = {'uuid': task_id, 'args': args, 'kwargs': kwargs, 'task': cls.name, 'time_pub': time.time()} obj = {'uuid': task_id, 'args': args, 'kwargs': kwargs, 'task': cls.name, 'time_pub': time.time()}
guid = get_guid() guid = get_guid()
if guid: if guid:
@@ -89,6 +89,16 @@ class task:
if bind_kwargs: if bind_kwargs:
obj['bind_kwargs'] = bind_kwargs obj['bind_kwargs'] = bind_kwargs
obj.update(**kw) obj.update(**kw)
return obj
@classmethod
def apply_async(cls, args=None, kwargs=None, queue=None, uuid=None, **kw):
queue = queue or getattr(cls.queue, 'im_func', cls.queue)
if not queue:
msg = f'{cls.name}: Queue value required and may not be None'
logger.error(msg)
raise ValueError(msg)
obj = cls.get_async_body(args=args, kwargs=kwargs, uuid=uuid, **kw)
if callable(queue): if callable(queue):
queue = queue() queue = queue()
if not is_testing(): if not is_testing():
@@ -116,4 +126,5 @@ class task:
setattr(fn, 'name', cls.name) setattr(fn, 'name', cls.name)
setattr(fn, 'apply_async', cls.apply_async) setattr(fn, 'apply_async', cls.apply_async)
setattr(fn, 'delay', cls.delay) setattr(fn, 'delay', cls.delay)
setattr(fn, 'get_async_body', cls.get_async_body)
return fn return fn

View File

@@ -11,11 +11,13 @@ import psycopg
import time import time
from uuid import UUID from uuid import UUID
from queue import Empty as QueueEmpty from queue import Empty as QueueEmpty
from datetime import timedelta
from django import db from django import db
from django.conf import settings from django.conf import settings
from awx.main.dispatch.pool import WorkerPool from awx.main.dispatch.pool import WorkerPool
from awx.main.dispatch.periodic import Scheduler
from awx.main.dispatch import pg_bus_conn from awx.main.dispatch import pg_bus_conn
from awx.main.utils.common import log_excess_runtime from awx.main.utils.common import log_excess_runtime
from awx.main.utils.db import set_connection_name from awx.main.utils.db import set_connection_name
@@ -64,10 +66,12 @@ class AWXConsumerBase(object):
def control(self, body): def control(self, body):
logger.warning(f'Received control signal:\n{body}') logger.warning(f'Received control signal:\n{body}')
control = body.get('control') control = body.get('control')
if control in ('status', 'running', 'cancel'): if control in ('status', 'schedule', 'running', 'cancel'):
reply_queue = body['reply_to'] reply_queue = body['reply_to']
if control == 'status': if control == 'status':
msg = '\n'.join([self.listening_on, self.pool.debug()]) msg = '\n'.join([self.listening_on, self.pool.debug()])
if control == 'schedule':
msg = self.scheduler.debug()
elif control == 'running': elif control == 'running':
msg = [] msg = []
for worker in self.pool.workers: for worker in self.pool.workers:
@@ -93,16 +97,11 @@ class AWXConsumerBase(object):
else: else:
logger.error('unrecognized control message: {}'.format(control)) logger.error('unrecognized control message: {}'.format(control))
def process_task(self, body): def dispatch_task(self, body):
"""This will place the given body into a worker queue to run method decorated as a task"""
if isinstance(body, dict): if isinstance(body, dict):
body['time_ack'] = time.time() body['time_ack'] = time.time()
if 'control' in body:
try:
return self.control(body)
except Exception:
logger.exception(f"Exception handling control message: {body}")
return
if len(self.pool): if len(self.pool):
if "uuid" in body and body['uuid']: if "uuid" in body and body['uuid']:
try: try:
@@ -116,6 +115,16 @@ class AWXConsumerBase(object):
self.pool.write(queue, body) self.pool.write(queue, body)
self.total_messages += 1 self.total_messages += 1
def process_task(self, body):
"""Routes the task details in body as either a control task or a task-task"""
if 'control' in body:
try:
return self.control(body)
except Exception:
logger.exception(f"Exception handling control message: {body}")
return
self.dispatch_task(body)
@log_excess_runtime(logger) @log_excess_runtime(logger)
def record_statistics(self): def record_statistics(self):
if time.time() - self.last_stats > 1: # buffer stat recording to once per second if time.time() - self.last_stats > 1: # buffer stat recording to once per second
@@ -150,7 +159,7 @@ class AWXConsumerRedis(AWXConsumerBase):
class AWXConsumerPG(AWXConsumerBase): class AWXConsumerPG(AWXConsumerBase):
def __init__(self, *args, **kwargs): def __init__(self, *args, schedule=None, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.pg_max_wait = settings.DISPATCHER_DB_DOWNTOWN_TOLLERANCE self.pg_max_wait = settings.DISPATCHER_DB_DOWNTOWN_TOLLERANCE
# if no successful loops have ran since startup, then we should fail right away # if no successful loops have ran since startup, then we should fail right away
@@ -161,27 +170,53 @@ class AWXConsumerPG(AWXConsumerBase):
self.subsystem_metrics = s_metrics.Metrics(auto_pipe_execute=False) self.subsystem_metrics = s_metrics.Metrics(auto_pipe_execute=False)
self.last_metrics_gather = init_time self.last_metrics_gather = init_time
self.listen_cumulative_time = 0.0 self.listen_cumulative_time = 0.0
if schedule:
schedule = schedule.copy()
else:
schedule = {}
# add control tasks to be ran at regular schedules
# NOTE: if we run out of database connections, it is important to still run cleanup
# so that we scale down workers and free up connections
schedule['pool_cleanup'] = {'control': self.pool.cleanup, 'schedule': timedelta(seconds=60)}
# record subsystem metrics for the dispatcher
schedule['metrics_gather'] = {'control': self.record_metrics, 'schedule': timedelta(seconds=20)}
self.scheduler = Scheduler(schedule)
def record_metrics(self):
current_time = time.time()
self.pool.produce_subsystem_metrics(self.subsystem_metrics)
self.subsystem_metrics.set('dispatcher_availability', self.listen_cumulative_time / (current_time - self.last_metrics_gather))
self.subsystem_metrics.pipe_execute()
self.listen_cumulative_time = 0.0
self.last_metrics_gather = current_time
def run_periodic_tasks(self): def run_periodic_tasks(self):
self.record_statistics() # maintains time buffer in method """
Run general periodic logic, and return maximum time in seconds before
the next requested run
This may be called more often than that when events are consumed
so this should be very efficient in that
"""
try:
self.record_statistics() # maintains time buffer in method
except Exception as exc:
logger.warning(f'Failed to save dispatcher statistics {exc}')
current_time = time.time() for job in self.scheduler.get_and_mark_pending():
if current_time - self.last_cleanup > 60: # same as cluster_node_heartbeat if 'control' in job.data:
# NOTE: if we run out of database connections, it is important to still run cleanup try:
# so that we scale down workers and free up connections job.data['control']()
self.pool.cleanup() except Exception:
self.last_cleanup = current_time logger.exception(f'Error running control task {job.data}')
elif 'task' in job.data:
body = self.worker.resolve_callable(job.data['task']).get_async_body()
# bypasses pg_notify for scheduled tasks
self.dispatch_task(body)
# record subsystem metrics for the dispatcher self.pg_is_down = False
if current_time - self.last_metrics_gather > 20: self.listen_start = time.time()
try:
self.pool.produce_subsystem_metrics(self.subsystem_metrics) return self.scheduler.time_until_next_run()
self.subsystem_metrics.set('dispatcher_availability', self.listen_cumulative_time / (current_time - self.last_metrics_gather))
self.subsystem_metrics.pipe_execute()
except Exception:
logger.exception(f"encountered an error trying to store {self.name} metrics")
self.listen_cumulative_time = 0.0
self.last_metrics_gather = current_time
def run(self, *args, **kwargs): def run(self, *args, **kwargs):
super(AWXConsumerPG, self).run(*args, **kwargs) super(AWXConsumerPG, self).run(*args, **kwargs)
@@ -197,14 +232,15 @@ class AWXConsumerPG(AWXConsumerBase):
if init is False: if init is False:
self.worker.on_start() self.worker.on_start()
init = True init = True
self.listen_start = time.time() # run_periodic_tasks run scheduled actions and gives time until next scheduled action
# this is saved to the conn (PubSub) object in order to modify read timeout in-loop
conn.select_timeout = self.run_periodic_tasks()
# this is the main operational loop for awx-manage run_dispatcher
for e in conn.events(yield_timeouts=True): for e in conn.events(yield_timeouts=True):
self.listen_cumulative_time += time.time() - self.listen_start self.listen_cumulative_time += time.time() - self.listen_start # for metrics
if e is not None: if e is not None:
self.process_task(json.loads(e.payload)) self.process_task(json.loads(e.payload))
self.run_periodic_tasks() conn.select_timeout = self.run_periodic_tasks()
self.pg_is_down = False
self.listen_start = time.time()
if self.should_stop: if self.should_stop:
return return
except psycopg.InterfaceError: except psycopg.InterfaceError:

View File

@@ -3,15 +3,13 @@
import logging import logging
import yaml import yaml
from django.core.cache import cache as django_cache from django.conf import settings
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.db import connection as django_connection
from awx.main.dispatch import get_task_queuename from awx.main.dispatch import get_task_queuename
from awx.main.dispatch.control import Control from awx.main.dispatch.control import Control
from awx.main.dispatch.pool import AutoscalePool from awx.main.dispatch.pool import AutoscalePool
from awx.main.dispatch.worker import AWXConsumerPG, TaskWorker from awx.main.dispatch.worker import AWXConsumerPG, TaskWorker
from awx.main.dispatch import periodic
logger = logging.getLogger('awx.main.dispatch') logger = logging.getLogger('awx.main.dispatch')
@@ -21,6 +19,7 @@ class Command(BaseCommand):
def add_arguments(self, parser): def add_arguments(self, parser):
parser.add_argument('--status', dest='status', action='store_true', help='print the internal state of any running dispatchers') parser.add_argument('--status', dest='status', action='store_true', help='print the internal state of any running dispatchers')
parser.add_argument('--schedule', dest='schedule', action='store_true', help='print the current status of schedules being ran by dispatcher')
parser.add_argument('--running', dest='running', action='store_true', help='print the UUIDs of any tasked managed by this dispatcher') parser.add_argument('--running', dest='running', action='store_true', help='print the UUIDs of any tasked managed by this dispatcher')
parser.add_argument( parser.add_argument(
'--reload', '--reload',
@@ -42,6 +41,9 @@ class Command(BaseCommand):
if options.get('status'): if options.get('status'):
print(Control('dispatcher').status()) print(Control('dispatcher').status())
return return
if options.get('schedule'):
print(Control('dispatcher').schedule())
return
if options.get('running'): if options.get('running'):
print(Control('dispatcher').running()) print(Control('dispatcher').running())
return return
@@ -58,21 +60,11 @@ class Command(BaseCommand):
print(Control('dispatcher').cancel(cancel_data)) print(Control('dispatcher').cancel(cancel_data))
return return
# It's important to close these because we're _about_ to fork, and we
# don't want the forked processes to inherit the open sockets
# for the DB and cache connections (that way lies race conditions)
django_connection.close()
django_cache.close()
# spawn a daemon thread to periodically enqueues scheduled tasks
# (like the node heartbeat)
periodic.run_continuously()
consumer = None consumer = None
try: try:
queues = ['tower_broadcast_all', 'tower_settings_change', get_task_queuename()] queues = ['tower_broadcast_all', 'tower_settings_change', get_task_queuename()]
consumer = AWXConsumerPG('dispatcher', TaskWorker(), queues, AutoscalePool(min_workers=4)) consumer = AWXConsumerPG('dispatcher', TaskWorker(), queues, AutoscalePool(min_workers=4), schedule=settings.CELERYBEAT_SCHEDULE)
consumer.run() consumer.run()
except KeyboardInterrupt: except KeyboardInterrupt:
logger.debug('Terminating Task Dispatcher') logger.debug('Terminating Task Dispatcher')

View File

@@ -3,6 +3,7 @@ import multiprocessing
import random import random
import signal import signal
import time import time
import yaml
from unittest import mock from unittest import mock
from django.utils.timezone import now as tz_now from django.utils.timezone import now as tz_now
@@ -13,6 +14,7 @@ from awx.main.dispatch import reaper
from awx.main.dispatch.pool import StatefulPoolWorker, WorkerPool, AutoscalePool from awx.main.dispatch.pool import StatefulPoolWorker, WorkerPool, AutoscalePool
from awx.main.dispatch.publish import task from awx.main.dispatch.publish import task
from awx.main.dispatch.worker import BaseWorker, TaskWorker from awx.main.dispatch.worker import BaseWorker, TaskWorker
from awx.main.dispatch.periodic import Scheduler
''' '''
@@ -439,3 +441,76 @@ class TestJobReaper(object):
assert job.started > ref_time assert job.started > ref_time
assert job.status == 'running' assert job.status == 'running'
assert job.job_explanation == '' assert job.job_explanation == ''
@pytest.mark.django_db
class TestScheduler:
def test_too_many_schedules_freak_out(self):
with pytest.raises(RuntimeError):
Scheduler({'job1': {'schedule': datetime.timedelta(seconds=1)}, 'job2': {'schedule': datetime.timedelta(seconds=1)}})
def test_spread_out(self):
scheduler = Scheduler(
{
'job1': {'schedule': datetime.timedelta(seconds=16)},
'job2': {'schedule': datetime.timedelta(seconds=16)},
'job3': {'schedule': datetime.timedelta(seconds=16)},
'job4': {'schedule': datetime.timedelta(seconds=16)},
}
)
assert [job.offset for job in scheduler.jobs] == [0, 4, 8, 12]
def test_missed_schedule(self, mocker):
scheduler = Scheduler({'job1': {'schedule': datetime.timedelta(seconds=10)}})
assert scheduler.jobs[0].missed_runs(time.time() - scheduler.global_start) == 0
mocker.patch('awx.main.dispatch.periodic.time.time', return_value=scheduler.global_start + 50)
scheduler.get_and_mark_pending()
assert scheduler.jobs[0].missed_runs(50) > 1
def test_advance_schedule(self, mocker):
scheduler = Scheduler(
{
'job1': {'schedule': datetime.timedelta(seconds=30)},
'joba': {'schedule': datetime.timedelta(seconds=20)},
'jobb': {'schedule': datetime.timedelta(seconds=20)},
}
)
for job in scheduler.jobs:
# HACK: the offsets automatically added make this a hard test to write... so remove offsets
job.offset = 0.0
mocker.patch('awx.main.dispatch.periodic.time.time', return_value=scheduler.global_start + 29)
to_run = scheduler.get_and_mark_pending()
assert set(job.name for job in to_run) == set(['joba', 'jobb'])
mocker.patch('awx.main.dispatch.periodic.time.time', return_value=scheduler.global_start + 39)
to_run = scheduler.get_and_mark_pending()
assert len(to_run) == 1
assert to_run[0].name == 'job1'
@staticmethod
def get_job(scheduler, name):
for job in scheduler.jobs:
if job.name == name:
return job
def test_scheduler_debug(self, mocker):
scheduler = Scheduler(
{
'joba': {'schedule': datetime.timedelta(seconds=20)},
'jobb': {'schedule': datetime.timedelta(seconds=50)},
'jobc': {'schedule': datetime.timedelta(seconds=500)},
'jobd': {'schedule': datetime.timedelta(seconds=20)},
}
)
rel_time = 119.9 # slightly under the 6th 20-second bin, to avoid offset problems
current_time = scheduler.global_start + rel_time
mocker.patch('awx.main.dispatch.periodic.time.time', return_value=current_time - 1.0e-8)
self.get_job(scheduler, 'jobb').mark_run(rel_time)
self.get_job(scheduler, 'jobd').mark_run(rel_time - 20.0)
output = scheduler.debug()
data = yaml.safe_load(output)
assert data['schedule_list']['jobc']['last_run_seconds_ago'] is None
assert data['schedule_list']['joba']['missed_runs'] == 4
assert data['schedule_list']['jobd']['missed_runs'] == 3
assert data['schedule_list']['jobd']['completed_runs'] == 1
assert data['schedule_list']['jobb']['next_run_in_seconds'] > 25.0

View File

@@ -1,21 +0,0 @@
The MIT License (MIT)
Copyright (c) 2013 Daniel Bader (http://dbader.org)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@@ -46,7 +46,6 @@ python-tss-sdk==1.0.0
python-ldap python-ldap
pyyaml>=6.0.1 pyyaml>=6.0.1
receptorctl==1.3.0 receptorctl==1.3.0
schedule==0.6.0
social-auth-core[openidconnect]==4.3.0 # see UPGRADE BLOCKERs social-auth-core[openidconnect]==4.3.0 # see UPGRADE BLOCKERs
social-auth-app-django==5.0.0 # see UPGRADE BLOCKERs social-auth-app-django==5.0.0 # see UPGRADE BLOCKERs
sqlparse >= 0.4.4 # Required by django https://github.com/ansible/awx/security/dependabot/96 sqlparse >= 0.4.4 # Required by django https://github.com/ansible/awx/security/dependabot/96

View File

@@ -380,8 +380,6 @@ rsa==4.9
# python-jose # python-jose
s3transfer==0.6.0 s3transfer==0.6.0
# via boto3 # via boto3
schedule==0.6.0
# via -r /awx_devel/requirements/requirements.in
semantic-version==2.10.0 semantic-version==2.10.0
# via setuptools-rust # via setuptools-rust
service-identity==21.1.0 service-identity==21.1.0