celeryd attach to queues dynamically

* Based on the tower topology (Instance and InstanceGroup
relationships), have celery dyamically listen to queues on boot
* Add celery task capable of "refreshing" what queues each celeryd
worker listens to. This will be used to support changes in the topology.
* Cleaned up some celery task definitions.
* Converged wrongly targeted job launch/finish messages to 'tower'
queue, rather than a 1-off queue.
* Dynamically route celery tasks destined for the local node
* separate beat process

add support for separate beat process
This commit is contained in:
Chris Meyers
2017-09-26 10:28:41 -04:00
committed by Matthew Jones
parent 7bc3d85913
commit c9ff3e99b8
11 changed files with 237 additions and 42 deletions

View File

@@ -392,6 +392,18 @@ EMAIL_HOST_USER = ''
EMAIL_HOST_PASSWORD = ''
EMAIL_USE_TLS = False
# The number of seconds to sleep between status checks for jobs running on isolated nodes
AWX_ISOLATED_CHECK_INTERVAL = 30
# The timeout (in seconds) for launching jobs on isolated nodes
AWX_ISOLATED_LAUNCH_TIMEOUT = 600
# Ansible connection timeout (in seconds) for communicating with isolated instances
AWX_ISOLATED_CONNECTION_TIMEOUT = 10
# The time (in seconds) between the periodic isolated heartbeat status check
AWX_ISOLATED_PERIODIC_CHECK = 600
# Memcached django cache configuration
# CACHES = {
# 'default': {
@@ -435,20 +447,12 @@ CELERY_BEAT_MAX_LOOP_INTERVAL = 60
CELERY_RESULT_BACKEND = 'django-db'
CELERY_IMPORTS = ('awx.main.scheduler.tasks',)
CELERY_TASK_QUEUES = (
Queue('default', Exchange('default'), routing_key='default'),
Queue('tower', Exchange('tower'), routing_key='tower'),
Queue('tower_scheduler', Exchange('scheduler', type='topic'), routing_key='tower_scheduler.job.#', durable=False),
Broadcast('tower_broadcast_all')
)
CELERY_TASK_ROUTES = {
'awx.main.scheduler.tasks.run_task_manager': {'queue': 'tower', 'routing_key': 'tower'},
'awx.main.scheduler.tasks.run_job_launch': {'queue': 'tower_scheduler', 'routing_key': 'tower_scheduler.job.launch'},
'awx.main.scheduler.tasks.run_job_complete': {'queue': 'tower_scheduler', 'routing_key': 'tower_scheduler.job.complete'},
'awx.main.tasks.cluster_node_heartbeat': {'queue': 'default', 'routing_key': 'cluster.heartbeat'},
'awx.main.tasks.purge_old_stdout_files': {'queue': 'default', 'routing_key': 'cluster.heartbeat'},
}
CELERY_TASK_ROUTES = {}
CELERY_BEAT_SCHEDULE = {
CELERYBEAT_SCHEDULE = {
'tower_scheduler': {
'task': 'awx.main.tasks.awx_periodic_scheduler',
'schedule': timedelta(seconds=30),
@@ -474,11 +478,21 @@ CELERY_BEAT_SCHEDULE = {
'task_manager': {
'task': 'awx.main.scheduler.tasks.run_task_manager',
'schedule': timedelta(seconds=20),
'options': {'expires': 20,}
'options': {'expires': 20}
},
'isolated_heartbeat': {
'task': 'awx.main.tasks.awx_isolated_heartbeat',
'schedule': timedelta(seconds=AWX_ISOLATED_PERIODIC_CHECK),
'options': {'expires': AWX_ISOLATED_PERIODIC_CHECK * 2},
}
}
AWX_INCONSISTENT_TASK_INTERVAL = 60 * 3
# Celery queues that will always be listened to by celery workers
# Note: Broadcast queues have unique, auto-generated names, with the alias
# property value of the original queue name.
AWX_CELERY_QUEUES_STATIC = ['tower_broadcast_all',]
# Django Caching Configuration
if is_testing():
CACHES = {
@@ -627,18 +641,6 @@ AWX_ANSIBLE_CALLBACK_PLUGINS = ""
# Time at which an HA node is considered active
AWX_ACTIVE_NODE_TIME = 7200
# The number of seconds to sleep between status checks for jobs running on isolated nodes
AWX_ISOLATED_CHECK_INTERVAL = 30
# The timeout (in seconds) for launching jobs on isolated nodes
AWX_ISOLATED_LAUNCH_TIMEOUT = 600
# Ansible connection timeout (in seconds) for communicating with isolated instances
AWX_ISOLATED_CONNECTION_TIMEOUT = 10
# The time (in seconds) between the periodic isolated heartbeat status check
AWX_ISOLATED_PERIODIC_CHECK = 600
# Enable Pendo on the UI, possible values are 'off', 'anonymous', and 'detailed'
# Note: This setting may be overridden by database settings.
PENDO_TRACKING_STATE = "off"

View File

@@ -138,15 +138,6 @@ except ImportError:
sys.exit(1)
CLUSTER_HOST_ID = socket.gethostname()
CELERY_TASK_ROUTES['awx.main.tasks.cluster_node_heartbeat'] = {'queue': CLUSTER_HOST_ID, 'routing_key': CLUSTER_HOST_ID}
# Production only runs this schedule on controlling nodes
# but development will just run it on all nodes
CELERY_TASK_ROUTES['awx.main.tasks.awx_isolated_heartbeat'] = {'queue': CLUSTER_HOST_ID, 'routing_key': CLUSTER_HOST_ID}
CELERY_BEAT_SCHEDULE['isolated_heartbeat'] = {
'task': 'awx.main.tasks.awx_isolated_heartbeat',
'schedule': timedelta(seconds = AWX_ISOLATED_PERIODIC_CHECK),
'options': {'expires': AWX_ISOLATED_PERIODIC_CHECK * 2,}
}
# Supervisor service name dictionary used for programatic restart
SERVICE_NAME_DICT = {