Implement cluster health checks

* Add a local node queue to execute targeted jobs
* Add a setting for active cluster node id (per-node)
* Base the heartbeat time on the `modified` time on the Instance table
* Add periodic task that calls save() on the instance to update the
  heartbeat time if services are up
* Purge/update any ha/instance management commands
* Fix up CELERY_ROUTES settings data structure
This commit is contained in:
Matthew Jones
2016-10-06 16:05:39 -04:00
parent aabbd48d17
commit babe29ebfa
11 changed files with 43 additions and 243 deletions

View File

@@ -125,6 +125,15 @@ def run_administrative_checks(self):
def cleanup_authtokens(self):
AuthToken.objects.filter(expires__lt=now()).delete()
@task(bind=True)
def cluster_node_heartbeat(self):
inst = Instance.objects.filter(hostname=settings.CLUSTER_HOST_ID)
if inst.exists():
inst = inst[0]
inst.save()
return
raise RuntimeError("Cluster Host Not Found: {}".format(settings.CLUSTER_HOST_ID))
@task(bind=True, queue='default')
def tower_periodic_scheduler(self):
def get_last_run():
@@ -154,6 +163,7 @@ def tower_periodic_scheduler(self):
# Sanity check: If this is a secondary machine, there is nothing
# on the schedule.
# TODO: Fix for clustering/ha
if Instance.objects.my_role() == 'secondary':
return