mirror of
https://github.com/ansible/awx.git
synced 2026-03-14 23:47:28 -02:30
replace celery task decorators with a kombu-based publisher
this commit implements the bulk of `awx-manage run_dispatcher`, a new command that binds to RabbitMQ via kombu and balances messages across a pool of workers that are similar to celeryd workers in spirit. Specifically, this includes: - a new decorator, `awx.main.dispatch.task`, which can be used to decorate functions or classes so that they can be designated as "Tasks" - support for fanout/broadcast tasks (at this point in time, only `conf.Setting` memcached flushes use this functionality) - support for job reaping - support for success/failure hooks for job runs (i.e., `handle_work_success` and `handle_work_error`) - support for auto scaling worker pool that scale processes up and down on demand - minimal support for RPC, such as status checks and pool recycle/reload
This commit is contained in:
146
awx/main/dispatch/worker/base.py
Normal file
146
awx/main/dispatch/worker/base.py
Normal file
@@ -0,0 +1,146 @@
|
||||
# Copyright (c) 2018 Ansible by Red Hat
|
||||
# All Rights Reserved.
|
||||
|
||||
import os
|
||||
import logging
|
||||
import signal
|
||||
from uuid import UUID
|
||||
from Queue import Empty as QueueEmpty
|
||||
|
||||
from kombu import Producer
|
||||
from kombu.mixins import ConsumerMixin
|
||||
|
||||
from awx.main.dispatch.pool import WorkerPool
|
||||
|
||||
logger = logging.getLogger('awx.main.dispatch')
|
||||
|
||||
|
||||
def signame(sig):
|
||||
return dict(
|
||||
(k, v) for v, k in signal.__dict__.items()
|
||||
if v.startswith('SIG') and not v.startswith('SIG_')
|
||||
)[sig]
|
||||
|
||||
|
||||
class WorkerSignalHandler:
|
||||
|
||||
def __init__(self):
|
||||
self.kill_now = False
|
||||
signal.signal(signal.SIGINT, self.exit_gracefully)
|
||||
|
||||
def exit_gracefully(self, *args, **kwargs):
|
||||
self.kill_now = True
|
||||
|
||||
|
||||
class AWXConsumer(ConsumerMixin):
|
||||
|
||||
def __init__(self, name, connection, worker, queues=[], pool=None):
|
||||
self.connection = connection
|
||||
self.total_messages = 0
|
||||
self.queues = queues
|
||||
self.worker = worker
|
||||
self.pool = pool
|
||||
if pool is None:
|
||||
self.pool = WorkerPool()
|
||||
self.pool.init_workers(self.worker.work_loop)
|
||||
|
||||
def get_consumers(self, Consumer, channel):
|
||||
logger.debug(self.listening_on)
|
||||
return [Consumer(queues=self.queues, accept=['json'],
|
||||
callbacks=[self.process_task])]
|
||||
|
||||
@property
|
||||
def listening_on(self):
|
||||
return 'listening on {}'.format([
|
||||
'{} [{}]'.format(q.name, q.exchange.type) for q in self.queues
|
||||
])
|
||||
|
||||
def control(self, body, message):
|
||||
logger.warn(body)
|
||||
control = body.get('control')
|
||||
if control in ('status', 'running'):
|
||||
producer = Producer(
|
||||
channel=self.connection,
|
||||
routing_key=message.properties['reply_to']
|
||||
)
|
||||
if control == 'status':
|
||||
msg = '\n'.join([self.listening_on, self.pool.debug()])
|
||||
elif control == 'running':
|
||||
msg = []
|
||||
for worker in self.pool.workers:
|
||||
worker.calculate_managed_tasks()
|
||||
msg.extend(worker.managed_tasks.keys())
|
||||
producer.publish(msg)
|
||||
elif control == 'reload':
|
||||
for worker in self.pool.workers:
|
||||
worker.quit()
|
||||
else:
|
||||
logger.error('unrecognized control message: {}'.format(control))
|
||||
message.ack()
|
||||
|
||||
def process_task(self, body, message):
|
||||
if 'control' in body:
|
||||
return self.control(body, message)
|
||||
if len(self.pool):
|
||||
if "uuid" in body and body['uuid']:
|
||||
try:
|
||||
queue = UUID(body['uuid']).int % len(self.pool)
|
||||
except Exception:
|
||||
queue = self.total_messages % len(self.pool)
|
||||
else:
|
||||
queue = self.total_messages % len(self.pool)
|
||||
else:
|
||||
queue = 0
|
||||
self.pool.write(queue, body)
|
||||
self.total_messages += 1
|
||||
message.ack()
|
||||
|
||||
def run(self, *args, **kwargs):
|
||||
signal.signal(signal.SIGINT, self.stop)
|
||||
signal.signal(signal.SIGTERM, self.stop)
|
||||
self.worker.on_start()
|
||||
super(AWXConsumer, self).run(*args, **kwargs)
|
||||
|
||||
def stop(self, signum, frame):
|
||||
self.should_stop = True # this makes the kombu mixin stop consuming
|
||||
logger.debug('received {}, stopping'.format(signame(signum)))
|
||||
self.worker.on_stop()
|
||||
raise SystemExit()
|
||||
|
||||
|
||||
class BaseWorker(object):
|
||||
|
||||
def work_loop(self, queue, finished, idx, *args):
|
||||
ppid = os.getppid()
|
||||
signal_handler = WorkerSignalHandler()
|
||||
while not signal_handler.kill_now:
|
||||
# if the parent PID changes, this process has been orphaned
|
||||
# via e.g., segfault or sigkill, we should exit too
|
||||
if os.getppid() != ppid:
|
||||
break
|
||||
try:
|
||||
body = queue.get(block=True, timeout=1)
|
||||
if body == 'QUIT':
|
||||
break
|
||||
except QueueEmpty:
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.error("Exception on worker, restarting: " + str(e))
|
||||
continue
|
||||
try:
|
||||
self.perform_work(body, *args)
|
||||
finally:
|
||||
if 'uuid' in body:
|
||||
uuid = body['uuid']
|
||||
logger.debug('task {} is finished'.format(uuid))
|
||||
finished.put(uuid)
|
||||
logger.warn('worker exiting gracefully pid:{}'.format(os.getpid()))
|
||||
|
||||
def perform_work(self, body):
|
||||
raise NotImplementedError()
|
||||
|
||||
def on_start(self):
|
||||
pass
|
||||
|
||||
def on_stop(self):
|
||||
pass
|
||||
Reference in New Issue
Block a user