Implement cluster health checks

* Add a local node queue to execute targeted jobs * Add a setting for active cluster node id (per-node) * Base the heartbeat time on the `modified` time on the Instance table * Add periodic task that calls save() on the instance to update the heartbeat time if services are up * Purge/update any ha/instance management commands * Fix up CELERY_ROUTES settings data structure
2026-05-12 20:07:37 -02:30 · 2016-10-06 16:05:39 -04:00
parent aabbd48d17
commit babe29ebfa
11 changed files with 43 additions and 243 deletions
--- a/awx/main/tasks.py
+++ b/awx/main/tasks.py
@@ -125,6 +125,15 @@ def run_administrative_checks(self):
 def cleanup_authtokens(self):
    AuthToken.objects.filter(expires__lt=now()).delete()

+@task(bind=True)
+def cluster_node_heartbeat(self):
+    inst = Instance.objects.filter(hostname=settings.CLUSTER_HOST_ID)
+    if inst.exists():
+        inst = inst[0]
+        inst.save()
+        return
+    raise RuntimeError("Cluster Host Not Found: {}".format(settings.CLUSTER_HOST_ID))
+
@task(bind=True, queue='default')
 def tower_periodic_scheduler(self):
    def get_last_run():
@@ -154,6 +163,7 @@ def tower_periodic_scheduler(self):

    # Sanity check: If this is a secondary machine, there is nothing
    # on the schedule.
+    # TODO: Fix for clustering/ha
    if Instance.objects.my_role() == 'secondary':
        return