Integrate scheduler into dispatcher main loop (#14067)

Dispatcher refactoring to get pg_notify publish payload as separate method Refactor periodic module under dispatcher entirely Use real numbers for schedule reference time Run based on due_to_run method Review comments about naming and code comments
2026-03-03 01:38:50 -03:30 · 2023-08-10 14:43:07 -04:00
parent 14992cee17
commit 284bd8377a
10 changed files with 308 additions and 126 deletions
--- a/awx/main/dispatch/init.py
+++ b/awx/main/dispatch/init.py
@@ -40,8 +40,12 @@ def get_task_queuename():


 class PubSub(object):
-    def __init__(self, conn):
+    def __init__(self, conn, select_timeout=None):
        self.conn = conn
+        if select_timeout is None:
+            self.select_timeout = 5
+        else:
+            self.select_timeout = select_timeout

    def listen(self, channel):
        with self.conn.cursor() as cur:
@@ -72,12 +76,12 @@ class PubSub(object):
            n = psycopg.connection.Notify(pgn.relname.decode(enc), pgn.extra.decode(enc), pgn.be_pid)
            yield n

-    def events(self, select_timeout=5, yield_timeouts=False):
+    def events(self, yield_timeouts=False):
        if not self.conn.autocommit:
            raise RuntimeError('Listening for events can only be done in autocommit mode')

        while True:
-            if select.select([self.conn], [], [], select_timeout) == NOT_READY:
+            if select.select([self.conn], [], [], self.select_timeout) == NOT_READY:
                if yield_timeouts:
                    yield None
            else:
@@ -90,7 +94,7 @@ class PubSub(object):


@contextmanager
-def pg_bus_conn(new_connection=False):
+def pg_bus_conn(new_connection=False, select_timeout=None):
    '''
    Any listeners probably want to establish a new database connection,
    separate from the Django connection used for queries, because that will prevent
@@ -115,7 +119,7 @@ def pg_bus_conn(new_connection=False):
            raise RuntimeError('Unexpectedly could not connect to postgres for pg_notify actions')
        conn = pg_connection.connection

-    pubsub = PubSub(conn)
+    pubsub = PubSub(conn, select_timeout=select_timeout)
    yield pubsub
    if new_connection:
        conn.close()
--- a/awx/main/dispatch/control.py
+++ b/awx/main/dispatch/control.py
@@ -40,6 +40,9 @@ class Control(object):
    def cancel(self, task_ids, *args, **kwargs):
        return self.control_with_reply('cancel', *args, extra_data={'task_ids': task_ids}, **kwargs)

+    def schedule(self, *args, **kwargs):
+        return self.control_with_reply('schedule', *args, **kwargs)
+
    @classmethod
    def generate_reply_queue_name(cls):
        return f"reply_to_{str(uuid.uuid4()).replace('-','_')}"
@@ -52,14 +55,14 @@ class Control(object):
        if not connection.get_autocommit():
            raise RuntimeError('Control-with-reply messages can only be done in autocommit mode')

-        with pg_bus_conn() as conn:
+        with pg_bus_conn(select_timeout=timeout) as conn:
            conn.listen(reply_queue)
            send_data = {'control': command, 'reply_to': reply_queue}
            if extra_data:
                send_data.update(extra_data)
            conn.notify(self.queuename, json.dumps(send_data))

-            for reply in conn.events(select_timeout=timeout, yield_timeouts=True):
+            for reply in conn.events(yield_timeouts=True):
                if reply is None:
                    logger.error(f'{self.service} did not reply within {timeout}s')
                    raise RuntimeError(f"{self.service} did not reply within {timeout}s")
--- a/awx/main/dispatch/periodic.py
+++ b/awx/main/dispatch/periodic.py
@@ -1,57 +1,142 @@
 import logging
-import os
 import time
-from multiprocessing import Process
+import yaml
+from datetime import datetime

-from django.conf import settings
-from django.db import connections
-from schedule import Scheduler
-from django_guid import set_guid
-from django_guid.utils import generate_guid
-
-from awx.main.dispatch.worker import TaskWorker
-from awx.main.utils.db import set_connection_name

 logger = logging.getLogger('awx.main.dispatch.periodic')


-class Scheduler(Scheduler):
-    def run_continuously(self):
-        idle_seconds = max(1, min(self.jobs).period.total_seconds() / 2)
+class ScheduledTask:
+    """
+    Class representing schedules, very loosely modeled after python schedule library Job
+    the idea of this class is to:
+     - only deal in relative times (time since the scheduler global start)
+     - only deal in integer math for target runtimes, but float for current relative time

-        def run():
-            ppid = os.getppid()
-            logger.warning('periodic beat started')
+    Missed schedule policy:
+    Invariant target times are maintained, meaning that if interval=10s offset=0
+    and it runs at t=7s, then it calls for next run in 3s.
+    However, if a complete interval has passed, that is counted as a missed run,
+    and missed runs are abandoned (no catch-up runs).
+    """

-            set_connection_name('periodic')  # set application_name to distinguish from other dispatcher processes
+    def __init__(self, name: str, data: dict):
+        # parameters need for schedule computation
+        self.interval = int(data['schedule'].total_seconds())
+        self.offset = 0  # offset relative to start time this schedule begins
+        self.index = 0  # number of periods of the schedule that has passed

-            while True:
-                if os.getppid() != ppid:
-                    # if the parent PID changes, this process has been orphaned
-                    # via e.g., segfault or sigkill, we should exit too
-                    pid = os.getpid()
-                    logger.warning(f'periodic beat exiting gracefully pid:{pid}')
-                    raise SystemExit()
-                try:
-                    for conn in connections.all():
-                        # If the database connection has a hiccup, re-establish a new
-                        # connection
-                        conn.close_if_unusable_or_obsolete()
-                    set_guid(generate_guid())
-                    self.run_pending()
-                except Exception:
-                    logger.exception('encountered an error while scheduling periodic tasks')
-                time.sleep(idle_seconds)
+        # parameters that do not affect scheduling logic
+        self.last_run = None  # time of last run, only used for debug
+        self.completed_runs = 0  # number of times schedule is known to run
+        self.name = name
+        self.data = data  # used by caller to know what to run

-        process = Process(target=run)
-        process.daemon = True
-        process.start()
+    @property
+    def next_run(self):
+        "Time until the next run with t=0 being the global_start of the scheduler class"
+        return (self.index + 1) * self.interval + self.offset
+
+    def due_to_run(self, relative_time):
+        return bool(self.next_run <= relative_time)
+
+    def expected_runs(self, relative_time):
+        return int((relative_time - self.offset) / self.interval)
+
+    def mark_run(self, relative_time):
+        self.last_run = relative_time
+        self.completed_runs += 1
+        new_index = self.expected_runs(relative_time)
+        if new_index > self.index + 1:
+            logger.warning(f'Missed {new_index - self.index - 1} schedules of {self.name}')
+        self.index = new_index
+
+    def missed_runs(self, relative_time):
+        "Number of times job was supposed to ran but failed to, only used for debug"
+        missed_ct = self.expected_runs(relative_time) - self.completed_runs
+        # if this is currently due to run do not count that as a missed run
+        if missed_ct and self.due_to_run(relative_time):
+            missed_ct -= 1
+        return missed_ct


-def run_continuously():
-    scheduler = Scheduler()
-    for task in settings.CELERYBEAT_SCHEDULE.values():
-        apply_async = TaskWorker.resolve_callable(task['task']).apply_async
-        total_seconds = task['schedule'].total_seconds()
-        scheduler.every(total_seconds).seconds.do(apply_async)
-    scheduler.run_continuously()
+class Scheduler:
+    def __init__(self, schedule):
+        """
+        Expects schedule in the form of a dictionary like
+        {
+            'job1': {'schedule': timedelta(seconds=50), 'other': 'stuff'}
+        }
+        Only the schedule nearest-second value is used for scheduling,
+        the rest of the data is for use by the caller to know what to run.
+        """
+        self.jobs = [ScheduledTask(name, data) for name, data in schedule.items()]
+        min_interval = min(job.interval for job in self.jobs)
+        num_jobs = len(self.jobs)
+
+        # this is intentionally oppioniated against spammy schedules
+        # a core goal is to spread out the scheduled tasks (for worker management)
+        # and high-frequency schedules just do not work with that
+        if num_jobs > min_interval:
+            raise RuntimeError(f'Number of schedules ({num_jobs}) is more than the shortest schedule interval ({min_interval} seconds).')
+
+        # even space out jobs over the base interval
+        for i, job in enumerate(self.jobs):
+            job.offset = (i * min_interval) // num_jobs
+
+        # internally times are all referenced relative to startup time, add grace period
+        self.global_start = time.time() + 2.0
+
+    def get_and_mark_pending(self):
+        relative_time = time.time() - self.global_start
+        to_run = []
+        for job in self.jobs:
+            if job.due_to_run(relative_time):
+                to_run.append(job)
+                logger.debug(f'scheduler found {job.name} to run, {relative_time - job.next_run} seconds after target')
+                job.mark_run(relative_time)
+        return to_run
+
+    def time_until_next_run(self):
+        relative_time = time.time() - self.global_start
+        next_job = min(self.jobs, key=lambda j: j.next_run)
+        delta = next_job.next_run - relative_time
+        if delta <= 0.1:
+            # careful not to give 0 or negative values to the select timeout, which has unclear interpretation
+            logger.warning(f'Scheduler next run of {next_job.name} is {-delta} seconds in the past')
+            return 0.1
+        elif delta > 20.0:
+            logger.warning(f'Scheduler next run unexpectedly over 20 seconds in future: {delta}')
+            return 20.0
+        logger.debug(f'Scheduler next run is {next_job.name} in {delta} seconds')
+        return delta
+
+    def debug(self, *args, **kwargs):
+        data = dict()
+        data['title'] = 'Scheduler status'
+
+        now = datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S UTC')
+        start_time = datetime.fromtimestamp(self.global_start).strftime('%Y-%m-%d %H:%M:%S UTC')
+        relative_time = time.time() - self.global_start
+        data['started_time'] = start_time
+        data['current_time'] = now
+        data['current_time_relative'] = round(relative_time, 3)
+        data['total_schedules'] = len(self.jobs)
+
+        data['schedule_list'] = dict(
+            [
+                (
+                    job.name,
+                    dict(
+                        last_run_seconds_ago=round(relative_time - job.last_run, 3) if job.last_run else None,
+                        next_run_in_seconds=round(job.next_run - relative_time, 3),
+                        offset_in_seconds=job.offset,
+                        completed_runs=job.completed_runs,
+                        missed_runs=job.missed_runs(relative_time),
+                    ),
+                )
+                for job in sorted(self.jobs, key=lambda job: job.interval)
+            ]
+        )
+        return yaml.safe_dump(data, default_flow_style=False, sort_keys=False)
--- a/awx/main/dispatch/publish.py
+++ b/awx/main/dispatch/publish.py
@@ -73,15 +73,15 @@ class task:
                return cls.apply_async(args, kwargs)

            @classmethod
-            def apply_async(cls, args=None, kwargs=None, queue=None, uuid=None, **kw):
+            def get_async_body(cls, args=None, kwargs=None, uuid=None, **kw):
+                """
+                Get the python dict to become JSON data in the pg_notify message
+                This same message gets passed over the dispatcher IPC queue to workers
+                If a task is submitted to a multiprocessing pool, skipping pg_notify, this might be used directly
+                """
                task_id = uuid or str(uuid4())
                args = args or []
                kwargs = kwargs or {}
-                queue = queue or getattr(cls.queue, 'im_func', cls.queue)
-                if not queue:
-                    msg = f'{cls.name}: Queue value required and may not be None'
-                    logger.error(msg)
-                    raise ValueError(msg)
                obj = {'uuid': task_id, 'args': args, 'kwargs': kwargs, 'task': cls.name, 'time_pub': time.time()}
                guid = get_guid()
                if guid:
@@ -89,6 +89,16 @@ class task:
                if bind_kwargs:
                    obj['bind_kwargs'] = bind_kwargs
                obj.update(**kw)
+                return obj
+
+            @classmethod
+            def apply_async(cls, args=None, kwargs=None, queue=None, uuid=None, **kw):
+                queue = queue or getattr(cls.queue, 'im_func', cls.queue)
+                if not queue:
+                    msg = f'{cls.name}: Queue value required and may not be None'
+                    logger.error(msg)
+                    raise ValueError(msg)
+                obj = cls.get_async_body(args=args, kwargs=kwargs, uuid=uuid, **kw)
                if callable(queue):
                    queue = queue()
                if not is_testing():
@@ -116,4 +126,5 @@ class task:
        setattr(fn, 'name', cls.name)
        setattr(fn, 'apply_async', cls.apply_async)
        setattr(fn, 'delay', cls.delay)
+        setattr(fn, 'get_async_body', cls.get_async_body)
        return fn
--- a/awx/main/dispatch/worker/base.py
+++ b/awx/main/dispatch/worker/base.py
@@ -11,11 +11,13 @@ import psycopg
 import time
 from uuid import UUID
 from queue import Empty as QueueEmpty
+from datetime import timedelta

 from django import db
 from django.conf import settings

 from awx.main.dispatch.pool import WorkerPool
+from awx.main.dispatch.periodic import Scheduler
 from awx.main.dispatch import pg_bus_conn
 from awx.main.utils.common import log_excess_runtime
 from awx.main.utils.db import set_connection_name
@@ -64,10 +66,12 @@ class AWXConsumerBase(object):
    def control(self, body):
        logger.warning(f'Received control signal:\n{body}')
        control = body.get('control')
-        if control in ('status', 'running', 'cancel'):
+        if control in ('status', 'schedule', 'running', 'cancel'):
            reply_queue = body['reply_to']
            if control == 'status':
                msg = '\n'.join([self.listening_on, self.pool.debug()])
+            if control == 'schedule':
+                msg = self.scheduler.debug()
            elif control == 'running':
                msg = []
                for worker in self.pool.workers:
@@ -93,16 +97,11 @@ class AWXConsumerBase(object):
        else:
            logger.error('unrecognized control message: {}'.format(control))

-    def process_task(self, body):
+    def dispatch_task(self, body):
+        """This will place the given body into a worker queue to run method decorated as a task"""
        if isinstance(body, dict):
            body['time_ack'] = time.time()

-        if 'control' in body:
-            try:
-                return self.control(body)
-            except Exception:
-                logger.exception(f"Exception handling control message: {body}")
-                return
        if len(self.pool):
            if "uuid" in body and body['uuid']:
                try:
@@ -116,6 +115,16 @@ class AWXConsumerBase(object):
        self.pool.write(queue, body)
        self.total_messages += 1

+    def process_task(self, body):
+        """Routes the task details in body as either a control task or a task-task"""
+        if 'control' in body:
+            try:
+                return self.control(body)
+            except Exception:
+                logger.exception(f"Exception handling control message: {body}")
+                return
+        self.dispatch_task(body)
+
    @log_excess_runtime(logger)
    def record_statistics(self):
        if time.time() - self.last_stats > 1:  # buffer stat recording to once per second
@@ -150,7 +159,7 @@ class AWXConsumerRedis(AWXConsumerBase):


 class AWXConsumerPG(AWXConsumerBase):
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, schedule=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.pg_max_wait = settings.DISPATCHER_DB_DOWNTOWN_TOLLERANCE
        # if no successful loops have ran since startup, then we should fail right away
@@ -161,27 +170,53 @@ class AWXConsumerPG(AWXConsumerBase):
        self.subsystem_metrics = s_metrics.Metrics(auto_pipe_execute=False)
        self.last_metrics_gather = init_time
        self.listen_cumulative_time = 0.0
+        if schedule:
+            schedule = schedule.copy()
+        else:
+            schedule = {}
+        # add control tasks to be ran at regular schedules
+        # NOTE: if we run out of database connections, it is important to still run cleanup
+        # so that we scale down workers and free up connections
+        schedule['pool_cleanup'] = {'control': self.pool.cleanup, 'schedule': timedelta(seconds=60)}
+        # record subsystem metrics for the dispatcher
+        schedule['metrics_gather'] = {'control': self.record_metrics, 'schedule': timedelta(seconds=20)}
+        self.scheduler = Scheduler(schedule)
+
+    def record_metrics(self):
+        current_time = time.time()
+        self.pool.produce_subsystem_metrics(self.subsystem_metrics)
+        self.subsystem_metrics.set('dispatcher_availability', self.listen_cumulative_time / (current_time - self.last_metrics_gather))
+        self.subsystem_metrics.pipe_execute()
+        self.listen_cumulative_time = 0.0
+        self.last_metrics_gather = current_time

    def run_periodic_tasks(self):
-        self.record_statistics()  # maintains time buffer in method
+        """
+        Run general periodic logic, and return maximum time in seconds before
+        the next requested run
+        This may be called more often than that when events are consumed
+        so this should be very efficient in that
+        """
+        try:
+            self.record_statistics()  # maintains time buffer in method
+        except Exception as exc:
+            logger.warning(f'Failed to save dispatcher statistics {exc}')

-        current_time = time.time()
-        if current_time - self.last_cleanup > 60:  # same as cluster_node_heartbeat
-            # NOTE: if we run out of database connections, it is important to still run cleanup
-            # so that we scale down workers and free up connections
-            self.pool.cleanup()
-            self.last_cleanup = current_time
+        for job in self.scheduler.get_and_mark_pending():
+            if 'control' in job.data:
+                try:
+                    job.data['control']()
+                except Exception:
+                    logger.exception(f'Error running control task {job.data}')
+            elif 'task' in job.data:
+                body = self.worker.resolve_callable(job.data['task']).get_async_body()
+                # bypasses pg_notify for scheduled tasks
+                self.dispatch_task(body)

-        # record subsystem metrics for the dispatcher
-        if current_time - self.last_metrics_gather > 20:
-            try:
-                self.pool.produce_subsystem_metrics(self.subsystem_metrics)
-                self.subsystem_metrics.set('dispatcher_availability', self.listen_cumulative_time / (current_time - self.last_metrics_gather))
-                self.subsystem_metrics.pipe_execute()
-            except Exception:
-                logger.exception(f"encountered an error trying to store {self.name} metrics")
-            self.listen_cumulative_time = 0.0
-            self.last_metrics_gather = current_time
+        self.pg_is_down = False
+        self.listen_start = time.time()
+
+        return self.scheduler.time_until_next_run()

    def run(self, *args, **kwargs):
        super(AWXConsumerPG, self).run(*args, **kwargs)
@@ -197,14 +232,15 @@ class AWXConsumerPG(AWXConsumerBase):
                    if init is False:
                        self.worker.on_start()
                        init = True
-                    self.listen_start = time.time()
+                    # run_periodic_tasks run scheduled actions and gives time until next scheduled action
+                    # this is saved to the conn (PubSub) object in order to modify read timeout in-loop
+                    conn.select_timeout = self.run_periodic_tasks()
+                    # this is the main operational loop for awx-manage run_dispatcher
                    for e in conn.events(yield_timeouts=True):
-                        self.listen_cumulative_time += time.time() - self.listen_start
+                        self.listen_cumulative_time += time.time() - self.listen_start  # for metrics
                        if e is not None:
                            self.process_task(json.loads(e.payload))
-                        self.run_periodic_tasks()
-                        self.pg_is_down = False
-                        self.listen_start = time.time()
+                        conn.select_timeout = self.run_periodic_tasks()
                    if self.should_stop:
                        return
            except psycopg.InterfaceError: