Implement cluster health checks

* Add a local node queue to execute targeted jobs
* Add a setting for active cluster node id (per-node)
* Base the heartbeat time on the `modified` time on the Instance table
* Add periodic task that calls save() on the instance to update the
  heartbeat time if services are up
* Purge/update any ha/instance management commands
* Fix up CELERY_ROUTES settings data structure
This commit is contained in:
Matthew Jones
2016-10-06 16:05:39 -04:00
parent aabbd48d17
commit babe29ebfa
11 changed files with 43 additions and 243 deletions

View File

@@ -359,7 +359,7 @@ CELERY_QUEUES = (
# Projects use a fanout queue, this isn't super well supported
Broadcast('projects'),
)
CELERY_ROUTES = ({'awx.main.tasks.run_job': {'queue': 'jobs',
CELERY_ROUTES = {'awx.main.tasks.run_job': {'queue': 'jobs',
'routing_key': 'jobs'},
'awx.main.tasks.run_project_update': {'queue': 'projects'},
'awx.main.tasks.run_inventory_update': {'queue': 'jobs',
@@ -371,7 +371,10 @@ CELERY_ROUTES = ({'awx.main.tasks.run_job': {'queue': 'jobs',
'awx.main.scheduler.tasks.run_job_launch': {'queue': 'scheduler',
'routing_key': 'scheduler.job.launch'},
'awx.main.scheduler.tasks.run_job_complete': {'queue': 'scheduler',
'routing_key': 'scheduler.job.complete'},})
'routing_key': 'scheduler.job.complete'},
'awx.main.tasks.cluster_node_heartbeat': {'queue': 'default',
'routing_key': 'cluster.heartbeat'},
}
CELERYBEAT_SCHEDULE = {
'tower_scheduler': {
@@ -386,6 +389,10 @@ CELERYBEAT_SCHEDULE = {
'task': 'awx.main.tasks.cleanup_authtokens',
'schedule': timedelta(days=30)
},
'cluster_heartbeat': {
'task': 'awx.main.tasks.cluster_node_heartbeat',
'schedule': timedelta(seconds=60)
},
}
# Django Caching Configuration

View File

@@ -4,6 +4,7 @@
# Development settings for AWX project.
# Python
import socket
import copy
import sys
import traceback
@@ -106,3 +107,7 @@ try:
except ImportError:
traceback.print_exc()
sys.exit(1)
CLUSTER_HOST_ID = socket.gethostname()
CELERY_ROUTES['awx.main.tasks.cluster_node_heartbeat'] = {'queue': CLUSTER_HOST_ID, 'routing_key': CLUSTER_HOST_ID}