mirror of
https://github.com/ansible/awx.git
synced 2026-02-19 20:20:06 -03:30
replace celery task decorators with a kombu-based publisher
this commit implements the bulk of `awx-manage run_dispatcher`, a new command that binds to RabbitMQ via kombu and balances messages across a pool of workers that are similar to celeryd workers in spirit. Specifically, this includes: - a new decorator, `awx.main.dispatch.task`, which can be used to decorate functions or classes so that they can be designated as "Tasks" - support for fanout/broadcast tasks (at this point in time, only `conf.Setting` memcached flushes use this functionality) - support for job reaping - support for success/failure hooks for job runs (i.e., `handle_work_success` and `handle_work_error`) - support for auto scaling worker pool that scale processes up and down on demand - minimal support for RPC, such as status checks and pool recycle/reload
This commit is contained in:
@@ -12,14 +12,6 @@ __version__ = get_distribution('awx').version
|
||||
__all__ = ['__version__']
|
||||
|
||||
|
||||
# Isolated nodes do not have celery installed
|
||||
try:
|
||||
from .celery import app as celery_app # noqa
|
||||
__all__.append('celery_app')
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
# Check for the presence/absence of "devonly" module to determine if running
|
||||
# from a source code checkout or release packaage.
|
||||
try:
|
||||
|
||||
@@ -3318,7 +3318,7 @@ class JobTemplateCallback(GenericAPIView):
|
||||
with transaction.atomic():
|
||||
job = job_template.create_job(**kv)
|
||||
|
||||
# Send a signal to celery that the job should be started.
|
||||
# Send a signal to signify that the job should be started.
|
||||
result = job.signal_start(inventory_sources_already_updated=inventory_sources_already_updated)
|
||||
if not result:
|
||||
data = dict(msg=_('Error starting job!'))
|
||||
|
||||
@@ -101,7 +101,9 @@ class UnifiedJobDeletionMixin(object):
|
||||
|
||||
class InstanceGroupMembershipMixin(object):
|
||||
'''
|
||||
Manages signaling celery to reload its queue configuration on Instance Group membership changes
|
||||
This mixin overloads attach/detach so that it calls InstanceGroup.save(),
|
||||
triggering a background recalculation of policy-based instance group
|
||||
membership.
|
||||
'''
|
||||
def attach(self, request, *args, **kwargs):
|
||||
response = super(InstanceGroupMembershipMixin, self).attach(request, *args, **kwargs)
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
|
||||
# Copyright (c) 2017 Ansible, Inc.
|
||||
# All Rights Reserved.
|
||||
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
import os
|
||||
from celery import Celery
|
||||
from django.conf import settings # noqa
|
||||
|
||||
|
||||
try:
|
||||
import awx.devonly # noqa
|
||||
MODE = 'development'
|
||||
except ImportError: # pragma: no cover
|
||||
MODE = 'production'
|
||||
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'awx.settings.%s' % MODE)
|
||||
|
||||
app = Celery('awx')
|
||||
app.config_from_object('django.conf:settings')
|
||||
app.autodiscover_tasks(lambda: settings.INSTALLED_APPS)
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.start()
|
||||
@@ -0,0 +1,5 @@
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
def get_local_queuename():
|
||||
return settings.CLUSTER_HOST_ID.encode('utf-8')
|
||||
|
||||
60
awx/main/dispatch/control.py
Normal file
60
awx/main/dispatch/control.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import logging
|
||||
import socket
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from awx.main.dispatch import get_local_queuename
|
||||
from kombu import Connection, Queue, Exchange, Producer, Consumer
|
||||
|
||||
logger = logging.getLogger('awx.main.dispatch')
|
||||
|
||||
|
||||
class Control(object):
|
||||
|
||||
services = ('dispatcher', 'callback_receiver')
|
||||
result = None
|
||||
|
||||
def __init__(self, service):
|
||||
if service not in self.services:
|
||||
raise RuntimeError('{} must be in {}'.format(service, self.services))
|
||||
self.service = service
|
||||
queuename = get_local_queuename()
|
||||
self.queue = Queue(queuename, Exchange(queuename), routing_key=queuename)
|
||||
|
||||
def publish(self, msg, conn, host, **kwargs):
|
||||
producer = Producer(
|
||||
exchange=self.queue.exchange,
|
||||
channel=conn,
|
||||
routing_key=get_local_queuename()
|
||||
)
|
||||
producer.publish(msg, expiration=5, **kwargs)
|
||||
|
||||
def status(self, *args, **kwargs):
|
||||
return self.control_with_reply('status', *args, **kwargs)
|
||||
|
||||
def running(self, *args, **kwargs):
|
||||
return self.control_with_reply('running', *args, **kwargs)
|
||||
|
||||
def control_with_reply(self, command, host=None, timeout=5):
|
||||
host = host or settings.CLUSTER_HOST_ID
|
||||
logger.warn('checking {} {} for {}'.format(self.service, command, host))
|
||||
reply_queue = Queue(name="amq.rabbitmq.reply-to")
|
||||
self.result = None
|
||||
with Connection(settings.BROKER_URL) as conn:
|
||||
with Consumer(conn, reply_queue, callbacks=[self.process_message], no_ack=True):
|
||||
self.publish({'control': command}, conn, host, reply_to='amq.rabbitmq.reply-to')
|
||||
try:
|
||||
conn.drain_events(timeout=timeout)
|
||||
except socket.timeout:
|
||||
logger.error('{} did not reply within {}s'.format(self.service, timeout))
|
||||
raise
|
||||
return self.result
|
||||
|
||||
def control(self, msg, host=None, **kwargs):
|
||||
host = host or settings.CLUSTER_HOST_ID
|
||||
with Connection(settings.BROKER_URL) as conn:
|
||||
self.publish(msg, conn, host)
|
||||
|
||||
def process_message(self, body, message):
|
||||
self.result = body
|
||||
message.ack()
|
||||
@@ -1,81 +1,260 @@
|
||||
import errno
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
import random
|
||||
import traceback
|
||||
from uuid import uuid4
|
||||
|
||||
import collections
|
||||
from multiprocessing import Process
|
||||
from multiprocessing import Queue as MPQueue
|
||||
from Queue import Full as QueueFull
|
||||
from Queue import Full as QueueFull, Empty as QueueEmpty
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import connection as django_connection
|
||||
from django.core.cache import cache as django_cache
|
||||
from jinja2 import Template
|
||||
import psutil
|
||||
|
||||
from awx.main.models import UnifiedJob
|
||||
from awx.main.dispatch import reaper
|
||||
|
||||
logger = logging.getLogger('awx.main.dispatch')
|
||||
|
||||
|
||||
def signame(sig):
|
||||
return dict(
|
||||
(k, v) for v, k in signal.__dict__.items()
|
||||
if v.startswith('SIG') and not v.startswith('SIG_')
|
||||
)[sig]
|
||||
class PoolWorker(object):
|
||||
'''
|
||||
Used to track a worker child process and its pending and finished messages.
|
||||
|
||||
This class makes use of two distinct multiprocessing.Queues to track state:
|
||||
|
||||
- self.queue: this is a queue which represents pending messages that should
|
||||
be handled by this worker process; as new AMQP messages come
|
||||
in, a pool will put() them into this queue; the child
|
||||
process that is forked will get() from this queue and handle
|
||||
received messages in an endless loop
|
||||
- self.finished: this is a queue which the worker process uses to signal
|
||||
that it has finished processing a message
|
||||
|
||||
When a message is put() onto this worker, it is tracked in
|
||||
self.managed_tasks.
|
||||
|
||||
Periodically, the worker will call .calculate_managed_tasks(), which will
|
||||
cause messages in self.finished to be removed from self.managed_tasks.
|
||||
|
||||
In this way, self.managed_tasks represents a view of the messages assigned
|
||||
to a specific process. The message at [0] is the least-recently inserted
|
||||
message, and it represents what the worker is running _right now_
|
||||
(self.current_task).
|
||||
|
||||
A worker is "busy" when it has at least one message in self.managed_tasks.
|
||||
It is "idle" when self.managed_tasks is empty.
|
||||
'''
|
||||
|
||||
def __init__(self, queue_size, target, args):
|
||||
self.messages_sent = 0
|
||||
self.messages_finished = 0
|
||||
self.managed_tasks = collections.OrderedDict()
|
||||
self.finished = MPQueue(queue_size)
|
||||
self.queue = MPQueue(queue_size)
|
||||
self.process = Process(target=target, args=(self.queue, self.finished) + args)
|
||||
self.process.daemon = True
|
||||
|
||||
def start(self):
|
||||
self.process.start()
|
||||
|
||||
def put(self, body):
|
||||
uuid = '?'
|
||||
if isinstance(body, dict):
|
||||
if not body.get('uuid'):
|
||||
body['uuid'] = str(uuid4())
|
||||
uuid = body['uuid']
|
||||
logger.debug('delivered {} to worker[{}] qsize {}'.format(
|
||||
uuid, self.pid, self.qsize
|
||||
))
|
||||
self.managed_tasks[uuid] = body
|
||||
self.queue.put(body, block=True, timeout=5)
|
||||
self.messages_sent += 1
|
||||
self.calculate_managed_tasks()
|
||||
|
||||
def quit(self):
|
||||
'''
|
||||
Send a special control message to the worker that tells it to exit
|
||||
gracefully.
|
||||
'''
|
||||
self.queue.put('QUIT')
|
||||
|
||||
@property
|
||||
def pid(self):
|
||||
return self.process.pid
|
||||
|
||||
@property
|
||||
def qsize(self):
|
||||
return self.queue.qsize()
|
||||
|
||||
@property
|
||||
def alive(self):
|
||||
return self.process.is_alive()
|
||||
|
||||
@property
|
||||
def mb(self):
|
||||
if self.alive:
|
||||
return '{:0.3f}'.format(
|
||||
psutil.Process(self.pid).memory_info().rss / 1024.0 / 1024.0
|
||||
)
|
||||
return '0'
|
||||
|
||||
@property
|
||||
def exitcode(self):
|
||||
return str(self.process.exitcode)
|
||||
|
||||
def calculate_managed_tasks(self):
|
||||
# look to see if any tasks were finished
|
||||
finished = []
|
||||
for _ in range(self.finished.qsize()):
|
||||
try:
|
||||
finished.append(self.finished.get(block=False))
|
||||
except QueueEmpty:
|
||||
break # qsize is not always _totally_ up to date
|
||||
|
||||
# if any tasks were finished, removed them from the managed tasks for
|
||||
# this worker
|
||||
for uuid in finished:
|
||||
self.messages_finished += 1
|
||||
del self.managed_tasks[uuid]
|
||||
|
||||
@property
|
||||
def current_task(self):
|
||||
self.calculate_managed_tasks()
|
||||
# the task at [0] is the one that's running right now (or is about to
|
||||
# be running)
|
||||
if len(self.managed_tasks):
|
||||
return self.managed_tasks[self.managed_tasks.keys()[0]]
|
||||
|
||||
return None
|
||||
|
||||
@property
|
||||
def orphaned_tasks(self):
|
||||
orphaned = []
|
||||
if not self.alive:
|
||||
# if this process had a running task that never finished,
|
||||
# requeue its error callbacks
|
||||
current_task = self.current_task
|
||||
if isinstance(current_task, dict):
|
||||
orphaned.extend(current_task.get('errbacks', []))
|
||||
|
||||
# if this process has any pending messages requeue them
|
||||
for _ in range(self.qsize):
|
||||
try:
|
||||
orphaned.append(self.queue.get(block=False))
|
||||
except QueueEmpty:
|
||||
break # qsize is not always _totally_ up to date
|
||||
if len(orphaned):
|
||||
logger.error(
|
||||
'requeuing {} messages from gone worker pid:{}'.format(
|
||||
len(orphaned), self.pid
|
||||
)
|
||||
)
|
||||
return orphaned
|
||||
|
||||
@property
|
||||
def busy(self):
|
||||
self.calculate_managed_tasks()
|
||||
return len(self.managed_tasks) > 0
|
||||
|
||||
@property
|
||||
def idle(self):
|
||||
return not self.busy
|
||||
|
||||
|
||||
class WorkerPool(object):
|
||||
'''
|
||||
Creates a pool of forked PoolWorkers.
|
||||
|
||||
As WorkerPool.write(...) is called (generally, by a kombu consumer
|
||||
implementation when it receives an AMQP message), messages are passed to
|
||||
one of the multiprocessing Queues where some work can be done on them.
|
||||
|
||||
class MessagePrinter(awx.main.dispatch.worker.BaseWorker):
|
||||
|
||||
def perform_work(self, body):
|
||||
print body
|
||||
|
||||
pool = WorkerPool(min_workers=4) # spawn four worker processes
|
||||
pool.init_workers(MessagePrint().work_loop)
|
||||
pool.write(
|
||||
0, # preferred worker 0
|
||||
'Hello, World!'
|
||||
)
|
||||
'''
|
||||
|
||||
debug_meta = ''
|
||||
|
||||
def __init__(self, min_workers=None, queue_size=None):
|
||||
self.name = settings.CLUSTER_HOST_ID
|
||||
self.pid = os.getpid()
|
||||
self.min_workers = min_workers or settings.JOB_EVENT_WORKERS
|
||||
self.queue_size = queue_size or settings.JOB_EVENT_MAX_QUEUE_SIZE
|
||||
|
||||
# self.workers tracks the state of worker running worker processes:
|
||||
# [
|
||||
# (total_messages_consumed, multiprocessing.Queue, multiprocessing.Process),
|
||||
# (total_messages_consumed, multiprocessing.Queue, multiprocessing.Process),
|
||||
# (total_messages_consumed, multiprocessing.Queue, multiprocessing.Process),
|
||||
# (total_messages_consumed, multiprocessing.Queue, multiprocessing.Process)
|
||||
# ]
|
||||
self.workers = []
|
||||
|
||||
def __len__(self):
|
||||
return len(self.workers)
|
||||
|
||||
def init_workers(self, target, *target_args):
|
||||
def shutdown_handler(active_workers):
|
||||
def _handler(signum, frame):
|
||||
logger.debug('received shutdown {}'.format(signame(signum)))
|
||||
try:
|
||||
for active_worker in active_workers:
|
||||
logger.debug('terminating worker')
|
||||
signal.signal(signum, signal.SIG_DFL)
|
||||
os.kill(os.getpid(), signum) # Rethrow signal, this time without catching it
|
||||
except Exception:
|
||||
logger.exception('error in shutdown_handler')
|
||||
return _handler
|
||||
self.target = target
|
||||
self.target_args = target_args
|
||||
for idx in range(self.min_workers):
|
||||
self.up()
|
||||
|
||||
def up(self):
|
||||
idx = len(self.workers)
|
||||
# It's important to close these because we're _about_ to fork, and we
|
||||
# don't want the forked processes to inherit the open sockets
|
||||
# for the DB and memcached connections (that way lies race conditions)
|
||||
django_connection.close()
|
||||
django_cache.close()
|
||||
for idx in range(self.min_workers):
|
||||
queue_actual = MPQueue(self.queue_size)
|
||||
w = Process(target=target, args=(queue_actual, idx,) + target_args)
|
||||
w.start()
|
||||
logger.debug('started {}[{}]'.format(target.im_self.__class__.__name__, idx))
|
||||
self.workers.append([0, queue_actual, w])
|
||||
worker = PoolWorker(self.queue_size, self.target, (idx,) + self.target_args)
|
||||
self.workers.append(worker)
|
||||
try:
|
||||
worker.start()
|
||||
except Exception:
|
||||
logger.exception('could not fork')
|
||||
else:
|
||||
logger.warn('scaling up worker pid:{}'.format(worker.pid))
|
||||
return idx, worker
|
||||
|
||||
signal.signal(signal.SIGINT, shutdown_handler([p[2] for p in self.workers]))
|
||||
signal.signal(signal.SIGTERM, shutdown_handler([p[2] for p in self.workers]))
|
||||
def debug(self, *args, **kwargs):
|
||||
self.cleanup()
|
||||
tmpl = Template(
|
||||
'{{ pool.name }}[pid:{{ pool.pid }}] workers total={{ workers|length }} {{ meta }} \n'
|
||||
'{% for w in workers %}'
|
||||
'. worker[pid:{{ w.pid }}]{% if not w.alive %} GONE exit={{ w.exitcode }}{% endif %}'
|
||||
' sent={{ w.messages_sent }}'
|
||||
' finished={{ w.messages_finished }}'
|
||||
' qsize={{ w.managed_tasks|length }}'
|
||||
' rss={{ w.mb }}MB'
|
||||
'{% for task in w.managed_tasks.values() %}'
|
||||
'\n - {% if loop.index0 == 0 %}running {% else %}queued {% endif %}'
|
||||
'{{ task["uuid"] }} '
|
||||
'{% if "task" in task %}'
|
||||
'{{ task["task"].rsplit(".", 1)[-1] }}'
|
||||
# don't print kwargs, they often contain launch-time secrets
|
||||
'(*{{ task.get("args", []) }})'
|
||||
'{% endif %}'
|
||||
'{% endfor %}'
|
||||
'{% if not w.managed_tasks|length %}'
|
||||
' [IDLE]'
|
||||
'{% endif %}'
|
||||
'\n'
|
||||
'{% endfor %}'
|
||||
)
|
||||
return tmpl.render(pool=self, workers=self.workers, meta=self.debug_meta)
|
||||
|
||||
def write(self, preferred_queue, body):
|
||||
queue_order = sorted(range(self.min_workers), cmp=lambda x, y: -1 if x==preferred_queue else 0)
|
||||
queue_order = sorted(range(len(self.workers)), cmp=lambda x, y: -1 if x==preferred_queue else 0)
|
||||
write_attempt_order = []
|
||||
for queue_actual in queue_order:
|
||||
try:
|
||||
worker_actual = self.workers[queue_actual]
|
||||
worker_actual[1].put(body, block=True, timeout=5)
|
||||
logger.debug('delivered to Worker[{}] qsize {}'.format(
|
||||
queue_actual, worker_actual[1].qsize()
|
||||
))
|
||||
worker_actual[0] += 1
|
||||
self.workers[queue_actual].put(body)
|
||||
return queue_actual
|
||||
except QueueFull:
|
||||
pass
|
||||
@@ -87,11 +266,113 @@ class WorkerPool(object):
|
||||
logger.warn("could not write payload to any queue, attempted order: {}".format(write_attempt_order))
|
||||
return None
|
||||
|
||||
def stop(self):
|
||||
for worker in self.workers:
|
||||
messages, queue, process = worker
|
||||
try:
|
||||
os.kill(process.pid, signal.SIGTERM)
|
||||
except OSError as e:
|
||||
if e.errno != errno.ESRCH:
|
||||
raise
|
||||
def stop(self, signum):
|
||||
try:
|
||||
for worker in self.workers:
|
||||
os.kill(worker.pid, signum)
|
||||
except Exception:
|
||||
logger.exception('could not kill {}'.format(worker.pid))
|
||||
|
||||
|
||||
class AutoscalePool(WorkerPool):
|
||||
'''
|
||||
An extended pool implementation that automatically scales workers up and
|
||||
down based on demand
|
||||
'''
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.max_workers = kwargs.pop('max_workers', None)
|
||||
super(AutoscalePool, self).__init__(*args, **kwargs)
|
||||
|
||||
if self.max_workers is None:
|
||||
settings_absmem = getattr(settings, 'SYSTEM_TASK_ABS_MEM', None)
|
||||
if settings_absmem is not None:
|
||||
total_memory_gb = int(settings_absmem)
|
||||
else:
|
||||
total_memory_gb = (psutil.virtual_memory().total >> 30) + 1 # noqa: round up
|
||||
# 5 workers per GB of total memory
|
||||
self.max_workers = (total_memory_gb * 5)
|
||||
|
||||
@property
|
||||
def should_grow(self):
|
||||
if len(self.workers) < self.min_workers:
|
||||
# If we don't have at least min_workers, add more
|
||||
return True
|
||||
# If every worker is busy doing something, add more
|
||||
return all([w.busy for w in self.workers])
|
||||
|
||||
@property
|
||||
def full(self):
|
||||
return len(self.workers) == self.max_workers
|
||||
|
||||
@property
|
||||
def debug_meta(self):
|
||||
return 'min={} max={}'.format(self.min_workers, self.max_workers)
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
Perform some internal account and cleanup. This is run on
|
||||
every cluster node heartbeat:
|
||||
|
||||
1. Discover worker processes that exited, and recover messages they
|
||||
were handling.
|
||||
2. Clean up unnecessary, idle workers.
|
||||
"""
|
||||
orphaned = []
|
||||
for w in self.workers[::]:
|
||||
if not w.alive:
|
||||
# the worker process has exited
|
||||
# 1. take the task it was running and enqueue the error
|
||||
# callbacks
|
||||
# 2. take any pending tasks delivered to its queue and
|
||||
# send them to another worker
|
||||
logger.error('worker pid:{} is gone (exit={})'.format(w.pid, w.exitcode))
|
||||
if w.current_task:
|
||||
try:
|
||||
for j in UnifiedJob.objects.filter(celery_task_id=w.current_task['uuid']):
|
||||
reaper.reap_job(j, 'failed')
|
||||
except Exception:
|
||||
logger.exception('failed to reap job UUID {}'.format(w.current_task['uuid']))
|
||||
orphaned.extend(w.orphaned_tasks)
|
||||
self.workers.remove(w)
|
||||
elif w.idle and len(self.workers) > self.min_workers:
|
||||
# the process has an empty queue (it's idle) and we have
|
||||
# more processes in the pool than we need (> min)
|
||||
# send this process a message so it will exit gracefully
|
||||
# at the next opportunity
|
||||
logger.warn('scaling down worker pid:{}'.format(w.pid))
|
||||
w.quit()
|
||||
self.workers.remove(w)
|
||||
|
||||
for m in orphaned:
|
||||
# if all the workers are dead, spawn at least one
|
||||
if not len(self.workers):
|
||||
self.up()
|
||||
idx = random.choice(range(len(self.workers)))
|
||||
self.write(idx, m)
|
||||
|
||||
def up(self):
|
||||
if self.full:
|
||||
# if we can't spawn more workers, just toss this message into a
|
||||
# random worker's backlog
|
||||
idx = random.choice(range(len(self.workers)))
|
||||
return idx, self.workers[idx]
|
||||
else:
|
||||
return super(AutoscalePool, self).up()
|
||||
|
||||
def write(self, preferred_queue, body):
|
||||
# when the cluster heartbeat occurs, clean up internally
|
||||
if isinstance(body, dict) and 'cluster_node_heartbeat' in body['task']:
|
||||
self.cleanup()
|
||||
if self.should_grow:
|
||||
self.up()
|
||||
# we don't care about "preferred queue" round robin distribution, just
|
||||
# find the first non-busy worker and claim it
|
||||
workers = self.workers[:]
|
||||
random.shuffle(workers)
|
||||
for w in workers:
|
||||
if not w.busy:
|
||||
w.put(body)
|
||||
break
|
||||
else:
|
||||
return super(AutoscalePool, self).write(preferred_queue, body)
|
||||
|
||||
128
awx/main/dispatch/publish.py
Normal file
128
awx/main/dispatch/publish.py
Normal file
@@ -0,0 +1,128 @@
|
||||
import inspect
|
||||
import logging
|
||||
import sys
|
||||
from uuid import uuid4
|
||||
|
||||
from django.conf import settings
|
||||
from kombu import Connection, Exchange, Producer
|
||||
|
||||
logger = logging.getLogger('awx.main.dispatch')
|
||||
|
||||
|
||||
def serialize_task(f):
|
||||
return '.'.join([f.__module__, f.__name__])
|
||||
|
||||
|
||||
class task:
|
||||
"""
|
||||
Used to decorate a function or class so that it can be run asynchronously
|
||||
via the task dispatcher. Tasks can be simple functions:
|
||||
|
||||
@task()
|
||||
def add(a, b):
|
||||
return a + b
|
||||
|
||||
...or classes that define a `run` method:
|
||||
|
||||
@task()
|
||||
class Adder:
|
||||
def run(self, a, b):
|
||||
return a + b
|
||||
|
||||
# Tasks can be run synchronously...
|
||||
assert add(1, 1) == 2
|
||||
assert Adder().run(1, 1) == 2
|
||||
|
||||
# ...or published to a queue:
|
||||
add.apply_async([1, 1])
|
||||
Adder.apply_async([1, 1])
|
||||
|
||||
# Tasks can also define a specific target queue or exchange type:
|
||||
|
||||
@task(queue='slow-tasks')
|
||||
def snooze():
|
||||
time.sleep(10)
|
||||
|
||||
@task(queue='tower_broadcast', exchange_type='fanout')
|
||||
def announce():
|
||||
print "Run this everywhere!"
|
||||
"""
|
||||
|
||||
def __init__(self, queue=None, exchange_type=None):
|
||||
self.queue = queue
|
||||
self.exchange_type = exchange_type
|
||||
|
||||
def __call__(self, fn=None):
|
||||
queue = self.queue
|
||||
exchange_type = self.exchange_type
|
||||
|
||||
class PublisherMixin(object):
|
||||
|
||||
queue = None
|
||||
|
||||
@classmethod
|
||||
def delay(cls, *args, **kwargs):
|
||||
return cls.apply_async(args, kwargs)
|
||||
|
||||
@classmethod
|
||||
def apply_async(cls, args=None, kwargs=None, queue=None, uuid=None, **kw):
|
||||
task_id = uuid or str(uuid4())
|
||||
args = args or []
|
||||
kwargs = kwargs or {}
|
||||
queue = (
|
||||
queue or
|
||||
getattr(cls.queue, 'im_func', cls.queue) or
|
||||
settings.CELERY_DEFAULT_QUEUE
|
||||
)
|
||||
obj = {
|
||||
'uuid': task_id,
|
||||
'args': args,
|
||||
'kwargs': kwargs,
|
||||
'task': cls.name
|
||||
}
|
||||
obj.update(**kw)
|
||||
if callable(queue):
|
||||
queue = queue()
|
||||
if not settings.IS_TESTING(sys.argv):
|
||||
with Connection(settings.BROKER_URL) as conn:
|
||||
exchange = Exchange(queue, type=exchange_type or 'direct')
|
||||
producer = Producer(conn)
|
||||
logger.debug('publish {}({}, queue={})'.format(
|
||||
cls.name,
|
||||
task_id,
|
||||
queue
|
||||
))
|
||||
producer.publish(obj,
|
||||
serializer='json',
|
||||
compression='bzip2',
|
||||
exchange=exchange,
|
||||
declare=[exchange],
|
||||
delivery_mode="persistent",
|
||||
routing_key=queue)
|
||||
return (obj, queue)
|
||||
|
||||
# If the object we're wrapping *is* a class (e.g., RunJob), return
|
||||
# a *new* class that inherits from the wrapped class *and* BaseTask
|
||||
# In this way, the new class returned by our decorator is the class
|
||||
# being decorated *plus* PublisherMixin so cls.apply_async() and
|
||||
# cls.delay() work
|
||||
bases = []
|
||||
ns = {'name': serialize_task(fn), 'queue': queue}
|
||||
if inspect.isclass(fn):
|
||||
bases = list(fn.__bases__)
|
||||
ns.update(fn.__dict__)
|
||||
cls = type(
|
||||
fn.__name__,
|
||||
tuple(bases + [PublisherMixin]),
|
||||
ns
|
||||
)
|
||||
if inspect.isclass(fn):
|
||||
return cls
|
||||
|
||||
# if the object being decorated is *not* a class (it's a Python
|
||||
# function), make fn.apply_async and fn.delay proxy through to the
|
||||
# PublisherMixin we dynamically created above
|
||||
setattr(fn, 'name', cls.name)
|
||||
setattr(fn, 'apply_async', cls.apply_async)
|
||||
setattr(fn, 'delay', cls.delay)
|
||||
return fn
|
||||
46
awx/main/dispatch/reaper.py
Normal file
46
awx/main/dispatch/reaper.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from datetime import timedelta
|
||||
import logging
|
||||
|
||||
from django.db.models import Q
|
||||
from django.utils.timezone import now as tz_now
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
|
||||
from awx.main.models import Instance, UnifiedJob, WorkflowJob
|
||||
|
||||
logger = logging.getLogger('awx.main.dispatch')
|
||||
|
||||
|
||||
def reap_job(j, status):
|
||||
j.status = status
|
||||
j.start_args = '' # blank field to remove encrypted passwords
|
||||
j.job_explanation += ' '.join((
|
||||
'Task was marked as running in Tower but was not present in',
|
||||
'the job queue, so it has been marked as failed.',
|
||||
))
|
||||
j.save(update_fields=['status', 'start_args', 'job_explanation'])
|
||||
if hasattr(j, 'send_notification_templates'):
|
||||
j.send_notification_templates('failed')
|
||||
j.websocket_emit_status(status)
|
||||
logger.error(
|
||||
'{} is no longer running; reaping'.format(j.log_format)
|
||||
)
|
||||
|
||||
|
||||
def reap(instance=None, status='failed'):
|
||||
'''
|
||||
Reap all jobs in waiting|running for this instance.
|
||||
'''
|
||||
me = instance or Instance.objects.me()
|
||||
now = tz_now()
|
||||
workflow_ctype_id = ContentType.objects.get_for_model(WorkflowJob).id
|
||||
jobs = UnifiedJob.objects.filter(
|
||||
(
|
||||
Q(status='running') |
|
||||
Q(status='waiting', modified__lte=now - timedelta(seconds=60))
|
||||
) & (
|
||||
Q(execution_node=me.hostname) |
|
||||
Q(controller_node=me.hostname)
|
||||
) & ~Q(polymorphic_ctype_id=workflow_ctype_id)
|
||||
)
|
||||
for j in jobs:
|
||||
reap_job(j, status)
|
||||
3
awx/main/dispatch/worker/__init__.py
Normal file
3
awx/main/dispatch/worker/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .base import AWXConsumer, BaseWorker # noqa
|
||||
from .callback import CallbackBrokerWorker # noqa
|
||||
from .task import TaskWorker # noqa
|
||||
146
awx/main/dispatch/worker/base.py
Normal file
146
awx/main/dispatch/worker/base.py
Normal file
@@ -0,0 +1,146 @@
|
||||
# Copyright (c) 2018 Ansible by Red Hat
|
||||
# All Rights Reserved.
|
||||
|
||||
import os
|
||||
import logging
|
||||
import signal
|
||||
from uuid import UUID
|
||||
from Queue import Empty as QueueEmpty
|
||||
|
||||
from kombu import Producer
|
||||
from kombu.mixins import ConsumerMixin
|
||||
|
||||
from awx.main.dispatch.pool import WorkerPool
|
||||
|
||||
logger = logging.getLogger('awx.main.dispatch')
|
||||
|
||||
|
||||
def signame(sig):
|
||||
return dict(
|
||||
(k, v) for v, k in signal.__dict__.items()
|
||||
if v.startswith('SIG') and not v.startswith('SIG_')
|
||||
)[sig]
|
||||
|
||||
|
||||
class WorkerSignalHandler:
|
||||
|
||||
def __init__(self):
|
||||
self.kill_now = False
|
||||
signal.signal(signal.SIGINT, self.exit_gracefully)
|
||||
|
||||
def exit_gracefully(self, *args, **kwargs):
|
||||
self.kill_now = True
|
||||
|
||||
|
||||
class AWXConsumer(ConsumerMixin):
|
||||
|
||||
def __init__(self, name, connection, worker, queues=[], pool=None):
|
||||
self.connection = connection
|
||||
self.total_messages = 0
|
||||
self.queues = queues
|
||||
self.worker = worker
|
||||
self.pool = pool
|
||||
if pool is None:
|
||||
self.pool = WorkerPool()
|
||||
self.pool.init_workers(self.worker.work_loop)
|
||||
|
||||
def get_consumers(self, Consumer, channel):
|
||||
logger.debug(self.listening_on)
|
||||
return [Consumer(queues=self.queues, accept=['json'],
|
||||
callbacks=[self.process_task])]
|
||||
|
||||
@property
|
||||
def listening_on(self):
|
||||
return 'listening on {}'.format([
|
||||
'{} [{}]'.format(q.name, q.exchange.type) for q in self.queues
|
||||
])
|
||||
|
||||
def control(self, body, message):
|
||||
logger.warn(body)
|
||||
control = body.get('control')
|
||||
if control in ('status', 'running'):
|
||||
producer = Producer(
|
||||
channel=self.connection,
|
||||
routing_key=message.properties['reply_to']
|
||||
)
|
||||
if control == 'status':
|
||||
msg = '\n'.join([self.listening_on, self.pool.debug()])
|
||||
elif control == 'running':
|
||||
msg = []
|
||||
for worker in self.pool.workers:
|
||||
worker.calculate_managed_tasks()
|
||||
msg.extend(worker.managed_tasks.keys())
|
||||
producer.publish(msg)
|
||||
elif control == 'reload':
|
||||
for worker in self.pool.workers:
|
||||
worker.quit()
|
||||
else:
|
||||
logger.error('unrecognized control message: {}'.format(control))
|
||||
message.ack()
|
||||
|
||||
def process_task(self, body, message):
|
||||
if 'control' in body:
|
||||
return self.control(body, message)
|
||||
if len(self.pool):
|
||||
if "uuid" in body and body['uuid']:
|
||||
try:
|
||||
queue = UUID(body['uuid']).int % len(self.pool)
|
||||
except Exception:
|
||||
queue = self.total_messages % len(self.pool)
|
||||
else:
|
||||
queue = self.total_messages % len(self.pool)
|
||||
else:
|
||||
queue = 0
|
||||
self.pool.write(queue, body)
|
||||
self.total_messages += 1
|
||||
message.ack()
|
||||
|
||||
def run(self, *args, **kwargs):
|
||||
signal.signal(signal.SIGINT, self.stop)
|
||||
signal.signal(signal.SIGTERM, self.stop)
|
||||
self.worker.on_start()
|
||||
super(AWXConsumer, self).run(*args, **kwargs)
|
||||
|
||||
def stop(self, signum, frame):
|
||||
self.should_stop = True # this makes the kombu mixin stop consuming
|
||||
logger.debug('received {}, stopping'.format(signame(signum)))
|
||||
self.worker.on_stop()
|
||||
raise SystemExit()
|
||||
|
||||
|
||||
class BaseWorker(object):
|
||||
|
||||
def work_loop(self, queue, finished, idx, *args):
|
||||
ppid = os.getppid()
|
||||
signal_handler = WorkerSignalHandler()
|
||||
while not signal_handler.kill_now:
|
||||
# if the parent PID changes, this process has been orphaned
|
||||
# via e.g., segfault or sigkill, we should exit too
|
||||
if os.getppid() != ppid:
|
||||
break
|
||||
try:
|
||||
body = queue.get(block=True, timeout=1)
|
||||
if body == 'QUIT':
|
||||
break
|
||||
except QueueEmpty:
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.error("Exception on worker, restarting: " + str(e))
|
||||
continue
|
||||
try:
|
||||
self.perform_work(body, *args)
|
||||
finally:
|
||||
if 'uuid' in body:
|
||||
uuid = body['uuid']
|
||||
logger.debug('task {} is finished'.format(uuid))
|
||||
finished.put(uuid)
|
||||
logger.warn('worker exiting gracefully pid:{}'.format(os.getpid()))
|
||||
|
||||
def perform_work(self, body):
|
||||
raise NotImplementedError()
|
||||
|
||||
def on_start(self):
|
||||
pass
|
||||
|
||||
def on_stop(self):
|
||||
pass
|
||||
@@ -1,83 +1,30 @@
|
||||
# Copyright (c) 2018 Ansible by Red Hat
|
||||
# All Rights Reserved.
|
||||
|
||||
import logging
|
||||
import time
|
||||
import os
|
||||
import signal
|
||||
import time
|
||||
import traceback
|
||||
from uuid import UUID
|
||||
from Queue import Empty as QueueEmpty
|
||||
|
||||
from kombu.mixins import ConsumerMixin
|
||||
from django.conf import settings
|
||||
from django.db import DatabaseError, OperationalError, connection as django_connection
|
||||
from django.db.utils import InterfaceError, InternalError
|
||||
|
||||
from awx.main.consumers import emit_channel_notification
|
||||
from awx.main.models import (JobEvent, AdHocCommandEvent, ProjectUpdateEvent,
|
||||
InventoryUpdateEvent, SystemJobEvent, UnifiedJob)
|
||||
from awx.main.consumers import emit_channel_notification
|
||||
from awx.main.dispatch.pool import WorkerPool
|
||||
|
||||
from .base import BaseWorker
|
||||
|
||||
logger = logging.getLogger('awx.main.dispatch')
|
||||
|
||||
|
||||
class WorkerSignalHandler:
|
||||
|
||||
def __init__(self):
|
||||
self.kill_now = False
|
||||
signal.signal(signal.SIGINT, self.exit_gracefully)
|
||||
signal.signal(signal.SIGTERM, self.exit_gracefully)
|
||||
|
||||
def exit_gracefully(self, *args, **kwargs):
|
||||
self.kill_now = True
|
||||
|
||||
|
||||
class AWXConsumer(ConsumerMixin):
|
||||
|
||||
def __init__(self, connection, worker, queues=[]):
|
||||
self.connection = connection
|
||||
self.total_messages = 0
|
||||
self.queues = queues
|
||||
self.pool = WorkerPool()
|
||||
self.pool.init_workers(worker.work_loop)
|
||||
|
||||
def get_consumers(self, Consumer, channel):
|
||||
return [Consumer(queues=self.queues, accept=['json'],
|
||||
callbacks=[self.process_task])]
|
||||
|
||||
def process_task(self, body, message):
|
||||
if "uuid" in body and body['uuid']:
|
||||
try:
|
||||
queue = UUID(body['uuid']).int % len(self.pool)
|
||||
except Exception:
|
||||
queue = self.total_messages % len(self.pool)
|
||||
else:
|
||||
queue = self.total_messages % len(self.pool)
|
||||
self.pool.write(queue, body)
|
||||
self.total_messages += 1
|
||||
message.ack()
|
||||
|
||||
|
||||
class BaseWorker(object):
|
||||
|
||||
def work_loop(self, queue, idx, *args):
|
||||
signal_handler = WorkerSignalHandler()
|
||||
while not signal_handler.kill_now:
|
||||
try:
|
||||
body = queue.get(block=True, timeout=1)
|
||||
except QueueEmpty:
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.error("Exception on worker, restarting: " + str(e))
|
||||
continue
|
||||
self.perform_work(body, *args)
|
||||
|
||||
def perform_work(self, body):
|
||||
raise NotImplemented()
|
||||
|
||||
|
||||
class CallbackBrokerWorker(BaseWorker):
|
||||
'''
|
||||
A worker implementation that deserializes callback event data and persists
|
||||
it into the database.
|
||||
|
||||
The code that *builds* these types of messages is found in the AWX display
|
||||
callback (`awx.lib.awx_display_callback`).
|
||||
'''
|
||||
|
||||
MAX_RETRIES = 2
|
||||
|
||||
@@ -151,7 +98,7 @@ class CallbackBrokerWorker(BaseWorker):
|
||||
try:
|
||||
_save_event_data()
|
||||
break
|
||||
except (OperationalError, InterfaceError, InternalError) as e:
|
||||
except (OperationalError, InterfaceError, InternalError):
|
||||
if retries >= self.MAX_RETRIES:
|
||||
logger.exception('Worker could not re-establish database connectivity, shutting down gracefully: Job {}'.format(job_identifier))
|
||||
os.kill(os.getppid(), signal.SIGINT)
|
||||
@@ -164,7 +111,7 @@ class CallbackBrokerWorker(BaseWorker):
|
||||
django_connection.close()
|
||||
time.sleep(delay)
|
||||
retries += 1
|
||||
except DatabaseError as e:
|
||||
except DatabaseError:
|
||||
logger.exception('Database Error Saving Job Event for Job {}'.format(job_identifier))
|
||||
break
|
||||
except Exception as exc:
|
||||
113
awx/main/dispatch/worker/task.py
Normal file
113
awx/main/dispatch/worker/task.py
Normal file
@@ -0,0 +1,113 @@
|
||||
import inspect
|
||||
import logging
|
||||
import importlib
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
import six
|
||||
|
||||
from awx.main.tasks import dispatch_startup, inform_cluster_of_shutdown
|
||||
|
||||
from .base import BaseWorker
|
||||
|
||||
logger = logging.getLogger('awx.main.dispatch')
|
||||
|
||||
|
||||
class TaskWorker(BaseWorker):
|
||||
'''
|
||||
A worker implementation that deserializes task messages and runs native
|
||||
Python code.
|
||||
|
||||
The code that *builds* these types of messages is found in
|
||||
`awx.main.dispatch.publish`.
|
||||
'''
|
||||
|
||||
@classmethod
|
||||
def resolve_callable(cls, task):
|
||||
'''
|
||||
Transform a dotted notation task into an imported, callable function, e.g.,
|
||||
|
||||
awx.main.tasks.delete_inventory
|
||||
awx.main.tasks.RunProjectUpdate
|
||||
'''
|
||||
module, target = task.rsplit('.', 1)
|
||||
module = importlib.import_module(module)
|
||||
_call = None
|
||||
if hasattr(module, target):
|
||||
_call = getattr(module, target, None)
|
||||
return _call
|
||||
|
||||
def run_callable(self, body):
|
||||
'''
|
||||
Given some AMQP message, import the correct Python code and run it.
|
||||
'''
|
||||
task = body['task']
|
||||
uuid = body.get('uuid', '<unknown>')
|
||||
args = body.get('args', [])
|
||||
kwargs = body.get('kwargs', {})
|
||||
_call = TaskWorker.resolve_callable(task)
|
||||
if inspect.isclass(_call):
|
||||
# the callable is a class, e.g., RunJob; instantiate and
|
||||
# return its `run()` method
|
||||
_call = _call().run
|
||||
# don't print kwargs, they often contain launch-time secrets
|
||||
logger.debug('task {} starting {}(*{})'.format(uuid, task, args))
|
||||
return _call(*args, **kwargs)
|
||||
|
||||
def perform_work(self, body):
|
||||
'''
|
||||
Import and run code for a task e.g.,
|
||||
|
||||
body = {
|
||||
'args': [8],
|
||||
'callbacks': [{
|
||||
'args': [],
|
||||
'kwargs': {}
|
||||
'task': u'awx.main.tasks.handle_work_success'
|
||||
}],
|
||||
'errbacks': [{
|
||||
'args': [],
|
||||
'kwargs': {},
|
||||
'task': 'awx.main.tasks.handle_work_error'
|
||||
}],
|
||||
'kwargs': {},
|
||||
'task': u'awx.main.tasks.RunProjectUpdate'
|
||||
}
|
||||
'''
|
||||
result = None
|
||||
try:
|
||||
result = self.run_callable(body)
|
||||
except Exception as exc:
|
||||
|
||||
try:
|
||||
if getattr(exc, 'is_awx_task_error', False):
|
||||
# Error caused by user / tracked in job output
|
||||
logger.warning(six.text_type("{}").format(exc))
|
||||
else:
|
||||
task = body['task']
|
||||
args = body.get('args', [])
|
||||
kwargs = body.get('kwargs', {})
|
||||
logger.exception('Worker failed to run task {}(*{}, **{}'.format(
|
||||
task, args, kwargs
|
||||
))
|
||||
except Exception:
|
||||
# It's fairly critical that this code _not_ raise exceptions on logging
|
||||
# If you configure external logging in a way that _it_ fails, there's
|
||||
# not a lot we can do here; sys.stderr.write is a final hail mary
|
||||
_, _, tb = sys.exc_info()
|
||||
traceback.print_tb(tb)
|
||||
|
||||
for callback in body.get('errbacks', []) or []:
|
||||
callback['uuid'] = body['uuid']
|
||||
self.perform_work(callback)
|
||||
|
||||
for callback in body.get('callbacks', []) or []:
|
||||
callback['uuid'] = body['uuid']
|
||||
self.perform_work(callback)
|
||||
return result
|
||||
|
||||
def on_start(self):
|
||||
dispatch_startup()
|
||||
|
||||
def on_stop(self):
|
||||
inform_cluster_of_shutdown()
|
||||
@@ -4,11 +4,6 @@
|
||||
import six
|
||||
|
||||
|
||||
# Celery does not respect exception type when using a serializer different than pickle;
|
||||
# and awx uses the json serializer
|
||||
# https://github.com/celery/celery/issues/3586
|
||||
|
||||
|
||||
class _AwxTaskError():
|
||||
def build_exception(self, task, message=None):
|
||||
if message is None:
|
||||
@@ -36,5 +31,3 @@ class _AwxTaskError():
|
||||
|
||||
|
||||
AwxTaskError = _AwxTaskError()
|
||||
|
||||
|
||||
|
||||
@@ -938,7 +938,7 @@ class Command(BaseCommand):
|
||||
self.exclude_empty_groups = bool(options.get('exclude_empty_groups', False))
|
||||
self.instance_id_var = options.get('instance_id_var', None)
|
||||
|
||||
self.celery_invoked = False if os.getenv('INVENTORY_SOURCE_ID', None) is None else True
|
||||
self.invoked_from_dispatcher = False if os.getenv('INVENTORY_SOURCE_ID', None) is None else True
|
||||
|
||||
# Load inventory and related objects from database.
|
||||
if self.inventory_name and self.inventory_id:
|
||||
@@ -1062,7 +1062,7 @@ class Command(BaseCommand):
|
||||
exc = e
|
||||
transaction.rollback()
|
||||
|
||||
if self.celery_invoked is False:
|
||||
if self.invoked_from_dispatcher is False:
|
||||
with ignore_inventory_computed_fields():
|
||||
self.inventory_update = InventoryUpdate.objects.get(pk=self.inventory_update.pk)
|
||||
self.inventory_update.result_traceback = tb
|
||||
|
||||
124
awx/main/management/commands/run_dispatcher.py
Normal file
124
awx/main/management/commands/run_dispatcher.py
Normal file
@@ -0,0 +1,124 @@
|
||||
# Copyright (c) 2015 Ansible, Inc.
|
||||
# All Rights Reserved.
|
||||
import os
|
||||
import logging
|
||||
from multiprocessing import Process
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.cache import cache as django_cache
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db import connection as django_connection
|
||||
from kombu import Connection, Exchange, Queue
|
||||
|
||||
from awx.main.dispatch import get_local_queuename, reaper
|
||||
from awx.main.dispatch.control import Control
|
||||
from awx.main.dispatch.pool import AutoscalePool
|
||||
from awx.main.dispatch.worker import AWXConsumer, TaskWorker
|
||||
|
||||
logger = logging.getLogger('awx.main.dispatch')
|
||||
|
||||
|
||||
def construct_bcast_queue_name(common_name):
|
||||
return common_name.encode('utf8') + '_' + settings.CLUSTER_HOST_ID
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'Launch the task dispatcher'
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('--status', dest='status', action='store_true',
|
||||
help='print the internal state of any running dispatchers')
|
||||
parser.add_argument('--running', dest='running', action='store_true',
|
||||
help='print the UUIDs of any tasked managed by this dispatcher')
|
||||
parser.add_argument('--reload', dest='reload', action='store_true',
|
||||
help=('cause the dispatcher to recycle all of its worker processes;'
|
||||
'running jobs will run to completion first'))
|
||||
|
||||
def beat(self):
|
||||
from celery import app
|
||||
from celery.beat import PersistentScheduler
|
||||
from celery.apps import beat
|
||||
|
||||
class AWXScheduler(PersistentScheduler):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.ppid = os.getppid()
|
||||
super(AWXScheduler, self).__init__(*args, **kwargs)
|
||||
|
||||
def setup_schedule(self):
|
||||
super(AWXScheduler, self).setup_schedule()
|
||||
self.update_from_dict(settings.CELERYBEAT_SCHEDULE)
|
||||
|
||||
def tick(self, *args, **kwargs):
|
||||
if os.getppid() != self.ppid:
|
||||
# if the parent PID changes, this process has been orphaned
|
||||
# via e.g., segfault or sigkill, we should exit too
|
||||
raise SystemExit()
|
||||
return super(AWXScheduler, self).tick(*args, **kwargs)
|
||||
|
||||
def apply_async(self, entry, publisher, **kwargs):
|
||||
task = TaskWorker.resolve_callable(entry.task)
|
||||
result, queue = task.apply_async()
|
||||
|
||||
class TaskResult(object):
|
||||
id = result['uuid']
|
||||
|
||||
return TaskResult()
|
||||
|
||||
app = app.App()
|
||||
app.conf.BROKER_URL = settings.BROKER_URL
|
||||
app.conf.CELERY_TASK_RESULT_EXPIRES = False
|
||||
beat.Beat(
|
||||
30,
|
||||
app,
|
||||
schedule='/var/lib/awx/beat.db', scheduler_cls=AWXScheduler
|
||||
).run()
|
||||
|
||||
def handle(self, *arg, **options):
|
||||
if options.get('status'):
|
||||
print Control('dispatcher').status()
|
||||
return
|
||||
if options.get('running'):
|
||||
print Control('dispatcher').running()
|
||||
return
|
||||
if options.get('reload'):
|
||||
return Control('dispatcher').control({'control': 'reload'})
|
||||
|
||||
# It's important to close these because we're _about_ to fork, and we
|
||||
# don't want the forked processes to inherit the open sockets
|
||||
# for the DB and memcached connections (that way lies race conditions)
|
||||
django_connection.close()
|
||||
django_cache.close()
|
||||
beat = Process(target=self.beat)
|
||||
beat.daemon = True
|
||||
beat.start()
|
||||
|
||||
reaper.reap()
|
||||
consumer = None
|
||||
with Connection(settings.BROKER_URL) as conn:
|
||||
try:
|
||||
bcast = 'tower_broadcast_all'
|
||||
queues = [
|
||||
Queue(q, Exchange(q), routing_key=q)
|
||||
for q in (settings.AWX_CELERY_QUEUES_STATIC + [get_local_queuename()])
|
||||
]
|
||||
queues.append(
|
||||
Queue(
|
||||
construct_bcast_queue_name(bcast),
|
||||
exchange=Exchange(bcast, type='fanout'),
|
||||
routing_key=bcast,
|
||||
reply=True
|
||||
)
|
||||
)
|
||||
consumer = AWXConsumer(
|
||||
'dispatcher',
|
||||
conn,
|
||||
TaskWorker(),
|
||||
queues,
|
||||
AutoscalePool(min_workers=4)
|
||||
)
|
||||
consumer.run()
|
||||
except KeyboardInterrupt:
|
||||
logger.debug('Terminating Task Dispatcher')
|
||||
if consumer:
|
||||
consumer.stop()
|
||||
@@ -1,66 +0,0 @@
|
||||
import datetime
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
from celery import Celery
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""Watch local celery workers"""
|
||||
help=("Sends a periodic ping to the local celery process over AMQP to ensure "
|
||||
"it's responsive; this command is only intended to run in an environment "
|
||||
"where celeryd is running")
|
||||
|
||||
#
|
||||
# Just because celery is _running_ doesn't mean it's _working_; it's
|
||||
# imperative that celery workers are _actually_ handling AMQP messages on
|
||||
# their appropriate queues for awx to function. Unfortunately, we've been
|
||||
# plagued by a variety of bugs in celery that cause it to hang and become
|
||||
# an unresponsive zombie, such as:
|
||||
#
|
||||
# https://github.com/celery/celery/issues/4185
|
||||
# https://github.com/celery/celery/issues/4457
|
||||
#
|
||||
# The goal of this code is periodically send a broadcast AMQP message to
|
||||
# the celery process on the local host via celery.app.control.ping;
|
||||
# If that _fails_, we attempt to determine the pid of the celery process
|
||||
# and send SIGHUP (which tends to resolve these sorts of issues for us).
|
||||
#
|
||||
|
||||
INTERVAL = 60
|
||||
|
||||
def _log(self, msg):
|
||||
sys.stderr.write(datetime.datetime.utcnow().isoformat())
|
||||
sys.stderr.write(' ')
|
||||
sys.stderr.write(msg)
|
||||
sys.stderr.write('\n')
|
||||
|
||||
def handle(self, **options):
|
||||
app = Celery('awx')
|
||||
app.config_from_object('django.conf:settings')
|
||||
while True:
|
||||
try:
|
||||
pongs = app.control.ping(['celery@{}'.format(settings.CLUSTER_HOST_ID)], timeout=30)
|
||||
except Exception:
|
||||
pongs = []
|
||||
if not pongs:
|
||||
self._log('celery is not responsive to ping over local AMQP')
|
||||
pid = self.getpid()
|
||||
if pid:
|
||||
self._log('sending SIGHUP to {}'.format(pid))
|
||||
os.kill(pid, signal.SIGHUP)
|
||||
time.sleep(self.INTERVAL)
|
||||
|
||||
def getpid(self):
|
||||
cmd = 'supervisorctl pid tower-processes:awx-celeryd'
|
||||
if os.path.exists('/supervisor_task.conf'):
|
||||
cmd = 'supervisorctl -c /supervisor_task.conf pid tower-processes:celery'
|
||||
try:
|
||||
return int(subprocess.check_output(cmd, shell=True))
|
||||
except Exception:
|
||||
self._log('could not detect celery pid')
|
||||
@@ -32,7 +32,7 @@ __all__ = ('Instance', 'InstanceGroup', 'JobOrigin', 'TowerScheduleState',)
|
||||
|
||||
|
||||
def validate_queuename(v):
|
||||
# celery and kombu don't play nice with unicode in queue names
|
||||
# kombu doesn't play nice with unicode in queue names
|
||||
if v:
|
||||
try:
|
||||
'{}'.format(v.decode('utf-8'))
|
||||
|
||||
@@ -27,9 +27,6 @@ from rest_framework.exceptions import ParseError
|
||||
# Django-Polymorphic
|
||||
from polymorphic.models import PolymorphicModel
|
||||
|
||||
# Django-Celery
|
||||
from djcelery.models import TaskMeta
|
||||
|
||||
# AWX
|
||||
from awx.main.models.base import * # noqa
|
||||
from awx.main.models.mixins import ResourceMixin, TaskManagerUnifiedJobMixin
|
||||
@@ -1112,14 +1109,6 @@ class UnifiedJob(PolymorphicModel, PasswordFieldsModel, CommonModelNameNotUnique
|
||||
pass
|
||||
return None
|
||||
|
||||
@property
|
||||
def celery_task(self):
|
||||
try:
|
||||
if self.celery_task_id:
|
||||
return TaskMeta.objects.get(task_id=self.celery_task_id)
|
||||
except TaskMeta.DoesNotExist:
|
||||
pass
|
||||
|
||||
def get_passwords_needed_to_start(self):
|
||||
return []
|
||||
|
||||
@@ -1224,29 +1213,6 @@ class UnifiedJob(PolymorphicModel, PasswordFieldsModel, CommonModelNameNotUnique
|
||||
|
||||
return (True, opts)
|
||||
|
||||
def start_celery_task(self, opts, error_callback, success_callback, queue):
|
||||
kwargs = {
|
||||
'link_error': error_callback,
|
||||
'link': success_callback,
|
||||
'queue': None,
|
||||
'task_id': None,
|
||||
}
|
||||
if not self.celery_task_id:
|
||||
raise RuntimeError("Expected celery_task_id to be set on model.")
|
||||
kwargs['task_id'] = self.celery_task_id
|
||||
task_class = self._get_task_class()
|
||||
kwargs['queue'] = queue
|
||||
task_class().apply_async([self.pk], opts, **kwargs)
|
||||
|
||||
def start(self, error_callback, success_callback, **kwargs):
|
||||
'''
|
||||
Start the task running via Celery.
|
||||
'''
|
||||
(res, opts) = self.pre_start(**kwargs)
|
||||
if res:
|
||||
self.start_celery_task(opts, error_callback, success_callback)
|
||||
return res
|
||||
|
||||
def signal_start(self, **kwargs):
|
||||
"""Notify the task runner system to begin work on this task."""
|
||||
|
||||
@@ -1286,42 +1252,6 @@ class UnifiedJob(PolymorphicModel, PasswordFieldsModel, CommonModelNameNotUnique
|
||||
def can_cancel(self):
|
||||
return bool(self.status in CAN_CANCEL)
|
||||
|
||||
def _force_cancel(self):
|
||||
# Update the status to 'canceled' if we can detect that the job
|
||||
# really isn't running (i.e. celery has crashed or forcefully
|
||||
# killed the worker).
|
||||
task_statuses = ('STARTED', 'SUCCESS', 'FAILED', 'RETRY', 'REVOKED')
|
||||
try:
|
||||
taskmeta = self.celery_task
|
||||
if not taskmeta or taskmeta.status not in task_statuses:
|
||||
return
|
||||
from celery import current_app
|
||||
i = current_app.control.inspect()
|
||||
for v in (i.active() or {}).values():
|
||||
if taskmeta.task_id in [x['id'] for x in v]:
|
||||
return
|
||||
for v in (i.reserved() or {}).values():
|
||||
if taskmeta.task_id in [x['id'] for x in v]:
|
||||
return
|
||||
for v in (i.revoked() or {}).values():
|
||||
if taskmeta.task_id in [x['id'] for x in v]:
|
||||
return
|
||||
for v in (i.scheduled() or {}).values():
|
||||
if taskmeta.task_id in [x['id'] for x in v]:
|
||||
return
|
||||
instance = self.__class__.objects.get(pk=self.pk)
|
||||
if instance.can_cancel:
|
||||
instance.status = 'canceled'
|
||||
update_fields = ['status']
|
||||
if not instance.job_explanation:
|
||||
instance.job_explanation = 'Forced cancel'
|
||||
update_fields.append('job_explanation')
|
||||
instance.save(update_fields=update_fields)
|
||||
self.websocket_emit_status("canceled")
|
||||
except Exception: # FIXME: Log this exception!
|
||||
if settings.DEBUG:
|
||||
raise
|
||||
|
||||
def _build_job_explanation(self):
|
||||
if not self.job_explanation:
|
||||
return 'Previous Task Canceled: {"job_type": "%s", "job_name": "%s", "job_id": "%s"}' % \
|
||||
@@ -1345,8 +1275,6 @@ class UnifiedJob(PolymorphicModel, PasswordFieldsModel, CommonModelNameNotUnique
|
||||
cancel_fields.append('job_explanation')
|
||||
self.save(update_fields=cancel_fields)
|
||||
self.websocket_emit_status("canceled")
|
||||
if settings.BROKER_URL.startswith('amqp://'):
|
||||
self._force_cancel()
|
||||
return self.cancel_flag
|
||||
|
||||
@property
|
||||
@@ -1402,7 +1330,7 @@ class UnifiedJob(PolymorphicModel, PasswordFieldsModel, CommonModelNameNotUnique
|
||||
r['{}_user_last_name'.format(name)] = created_by.last_name
|
||||
return r
|
||||
|
||||
def get_celery_queue_name(self):
|
||||
def get_queue_name(self):
|
||||
return self.controller_node or self.execution_node or settings.CELERY_DEFAULT_QUEUE
|
||||
|
||||
def is_isolated(self):
|
||||
|
||||
@@ -481,9 +481,3 @@ class WorkflowJob(UnifiedJob, WorkflowJobOptions, SurveyJobMixin, JobNotificatio
|
||||
@property
|
||||
def preferred_instance_groups(self):
|
||||
return []
|
||||
|
||||
'''
|
||||
A WorkflowJob is a virtual job. It doesn't result in a celery task.
|
||||
'''
|
||||
def start_celery_task(self, opts, error_callback, success_callback, queue):
|
||||
return None
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# All Rights Reserved
|
||||
|
||||
# Python
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import timedelta
|
||||
import logging
|
||||
import uuid
|
||||
import json
|
||||
@@ -11,18 +11,13 @@ import random
|
||||
from sets import Set
|
||||
|
||||
# Django
|
||||
from django.conf import settings
|
||||
from django.core.cache import cache
|
||||
from django.db import transaction, connection, DatabaseError
|
||||
from django.db import transaction, connection
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
from django.utils.timezone import now as tz_now, utc
|
||||
from django.db.models import Q
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
from django.utils.timezone import now as tz_now
|
||||
|
||||
# AWX
|
||||
from awx.main.models import (
|
||||
AdHocCommand,
|
||||
Instance,
|
||||
InstanceGroup,
|
||||
InventorySource,
|
||||
InventoryUpdate,
|
||||
@@ -30,21 +25,15 @@ from awx.main.models import (
|
||||
Project,
|
||||
ProjectUpdate,
|
||||
SystemJob,
|
||||
UnifiedJob,
|
||||
WorkflowJob,
|
||||
)
|
||||
from awx.main.scheduler.dag_workflow import WorkflowDAG
|
||||
from awx.main.utils.pglock import advisory_lock
|
||||
from awx.main.utils import get_type_for_model
|
||||
from awx.main.signals import disable_activity_stream
|
||||
|
||||
from awx.main.scheduler.dependency_graph import DependencyGraph
|
||||
from awx.main.utils import decrypt_field
|
||||
|
||||
# Celery
|
||||
from celery import Celery
|
||||
from celery.app.control import Inspect
|
||||
|
||||
|
||||
logger = logging.getLogger('awx.main.scheduler')
|
||||
|
||||
@@ -85,79 +74,6 @@ class TaskManager():
|
||||
key=lambda task: task.created)
|
||||
return all_tasks
|
||||
|
||||
'''
|
||||
Tasks that are running and SHOULD have a celery task.
|
||||
{
|
||||
'execution_node': [j1, j2,...],
|
||||
'execution_node': [j3],
|
||||
...
|
||||
}
|
||||
'''
|
||||
def get_running_tasks(self):
|
||||
execution_nodes = {}
|
||||
waiting_jobs = []
|
||||
now = tz_now()
|
||||
workflow_ctype_id = ContentType.objects.get_for_model(WorkflowJob).id
|
||||
jobs = UnifiedJob.objects.filter((Q(status='running') |
|
||||
Q(status='waiting', modified__lte=now - timedelta(seconds=60))) &
|
||||
~Q(polymorphic_ctype_id=workflow_ctype_id))
|
||||
for j in jobs:
|
||||
if j.execution_node:
|
||||
execution_nodes.setdefault(j.execution_node, []).append(j)
|
||||
else:
|
||||
waiting_jobs.append(j)
|
||||
return (execution_nodes, waiting_jobs)
|
||||
|
||||
'''
|
||||
Tasks that are currently running in celery
|
||||
|
||||
Transform:
|
||||
{
|
||||
"celery@ec2-54-204-222-62.compute-1.amazonaws.com": [],
|
||||
"celery@ec2-54-163-144-168.compute-1.amazonaws.com": [{
|
||||
...
|
||||
"id": "5238466a-f8c7-43b3-9180-5b78e9da8304",
|
||||
...
|
||||
}, {
|
||||
...,
|
||||
}, ...]
|
||||
}
|
||||
|
||||
to:
|
||||
{
|
||||
"ec2-54-204-222-62.compute-1.amazonaws.com": [
|
||||
"5238466a-f8c7-43b3-9180-5b78e9da8304",
|
||||
"5238466a-f8c7-43b3-9180-5b78e9da8306",
|
||||
...
|
||||
]
|
||||
}
|
||||
'''
|
||||
def get_active_tasks(self):
|
||||
if not hasattr(settings, 'IGNORE_CELERY_INSPECTOR'):
|
||||
app = Celery('awx')
|
||||
app.config_from_object('django.conf:settings')
|
||||
inspector = Inspect(app=app)
|
||||
active_task_queues = inspector.active()
|
||||
else:
|
||||
logger.warn("Ignoring celery task inspector")
|
||||
active_task_queues = None
|
||||
|
||||
queues = None
|
||||
|
||||
if active_task_queues is not None:
|
||||
queues = {}
|
||||
for queue in active_task_queues:
|
||||
active_tasks = set()
|
||||
map(lambda at: active_tasks.add(at['id']), active_task_queues[queue])
|
||||
|
||||
# celery worker name is of the form celery@myhost.com
|
||||
queue_name = queue.split('@')
|
||||
queue_name = queue_name[1 if len(queue_name) > 1 else 0]
|
||||
queues[queue_name] = active_tasks
|
||||
else:
|
||||
return (None, None)
|
||||
|
||||
return (active_task_queues, queues)
|
||||
|
||||
def get_latest_project_update_tasks(self, all_sorted_tasks):
|
||||
project_ids = Set()
|
||||
@@ -256,9 +172,6 @@ class TaskManager():
|
||||
rampart_group.name, task.log_format))
|
||||
return
|
||||
|
||||
error_handler = handle_work_error.s(subtasks=[task_actual] + dependencies)
|
||||
success_handler = handle_work_success.s(task_actual=task_actual)
|
||||
|
||||
task.status = 'waiting'
|
||||
|
||||
(start_status, opts) = task.pre_start()
|
||||
@@ -300,11 +213,23 @@ class TaskManager():
|
||||
|
||||
def post_commit():
|
||||
task.websocket_emit_status(task.status)
|
||||
if task.status != 'failed':
|
||||
task.start_celery_task(opts,
|
||||
error_callback=error_handler,
|
||||
success_callback=success_handler,
|
||||
queue=task.get_celery_queue_name())
|
||||
if task.status != 'failed' and type(task) is not WorkflowJob:
|
||||
task_cls = task._get_task_class()
|
||||
task_cls.apply_async(
|
||||
[task.pk],
|
||||
opts,
|
||||
queue=task.get_queue_name(),
|
||||
uuid=task.celery_task_id,
|
||||
callbacks=[{
|
||||
'task': handle_work_success.name,
|
||||
'kwargs': {'task_actual': task_actual}
|
||||
}],
|
||||
errbacks=[{
|
||||
'task': handle_work_error.name,
|
||||
'args': [task.celery_task_id],
|
||||
'kwargs': {'subtasks': [task_actual] + dependencies}
|
||||
}],
|
||||
)
|
||||
|
||||
connection.on_commit(post_commit)
|
||||
|
||||
@@ -529,105 +454,6 @@ class TaskManager():
|
||||
if not found_acceptable_queue:
|
||||
logger.debug(six.text_type("{} couldn't be scheduled on graph, waiting for next cycle").format(task.log_format))
|
||||
|
||||
def fail_jobs_if_not_in_celery(self, node_jobs, active_tasks, celery_task_start_time,
|
||||
isolated=False):
|
||||
for task in node_jobs:
|
||||
if (task.celery_task_id not in active_tasks and not hasattr(settings, 'IGNORE_CELERY_INSPECTOR')):
|
||||
if isinstance(task, WorkflowJob):
|
||||
continue
|
||||
if task.modified > celery_task_start_time:
|
||||
continue
|
||||
new_status = 'failed'
|
||||
if isolated:
|
||||
new_status = 'error'
|
||||
task.status = new_status
|
||||
task.start_args = '' # blank field to remove encrypted passwords
|
||||
if isolated:
|
||||
# TODO: cancel and reap artifacts of lost jobs from heartbeat
|
||||
task.job_explanation += ' '.join((
|
||||
'Task was marked as running in Tower but its ',
|
||||
'controller management daemon was not present in',
|
||||
'the job queue, so it has been marked as failed.',
|
||||
'Task may still be running, but contactability is unknown.'
|
||||
))
|
||||
else:
|
||||
task.job_explanation += ' '.join((
|
||||
'Task was marked as running in Tower but was not present in',
|
||||
'the job queue, so it has been marked as failed.',
|
||||
))
|
||||
try:
|
||||
task.save(update_fields=['status', 'start_args', 'job_explanation'])
|
||||
except DatabaseError:
|
||||
logger.error("Task {} DB error in marking failed. Job possibly deleted.".format(task.log_format))
|
||||
continue
|
||||
if hasattr(task, 'send_notification_templates'):
|
||||
task.send_notification_templates('failed')
|
||||
task.websocket_emit_status(new_status)
|
||||
logger.error("{}Task {} has no record in celery. Marking as failed".format(
|
||||
'Isolated ' if isolated else '', task.log_format))
|
||||
|
||||
def cleanup_inconsistent_celery_tasks(self):
|
||||
'''
|
||||
Rectify tower db <-> celery inconsistent view of jobs state
|
||||
'''
|
||||
last_cleanup = cache.get('last_celery_task_cleanup') or datetime.min.replace(tzinfo=utc)
|
||||
if (tz_now() - last_cleanup).seconds < settings.AWX_INCONSISTENT_TASK_INTERVAL:
|
||||
return
|
||||
|
||||
logger.debug("Failing inconsistent running jobs.")
|
||||
celery_task_start_time = tz_now()
|
||||
active_task_queues, active_queues = self.get_active_tasks()
|
||||
cache.set('last_celery_task_cleanup', tz_now())
|
||||
|
||||
if active_queues is None:
|
||||
logger.error('Failed to retrieve active tasks from celery')
|
||||
return None
|
||||
|
||||
'''
|
||||
Only consider failing tasks on instances for which we obtained a task
|
||||
list from celery for.
|
||||
'''
|
||||
running_tasks, waiting_tasks = self.get_running_tasks()
|
||||
all_celery_task_ids = []
|
||||
for node, node_jobs in active_queues.iteritems():
|
||||
all_celery_task_ids.extend(node_jobs)
|
||||
|
||||
self.fail_jobs_if_not_in_celery(waiting_tasks, all_celery_task_ids, celery_task_start_time)
|
||||
|
||||
for node, node_jobs in running_tasks.iteritems():
|
||||
isolated = False
|
||||
if node in active_queues:
|
||||
active_tasks = active_queues[node]
|
||||
else:
|
||||
'''
|
||||
Node task list not found in celery. We may branch into cases:
|
||||
- instance is unknown to tower, system is improperly configured
|
||||
- instance is reported as down, then fail all jobs on the node
|
||||
- instance is an isolated node, then check running tasks
|
||||
among all allowed controller nodes for management process
|
||||
- valid healthy instance not included in celery task list
|
||||
probably a netsplit case, leave it alone
|
||||
'''
|
||||
instance = Instance.objects.filter(hostname=node).first()
|
||||
|
||||
if instance is None:
|
||||
logger.error("Execution node Instance {} not found in database. "
|
||||
"The node is currently executing jobs {}".format(
|
||||
node, [j.log_format for j in node_jobs]))
|
||||
active_tasks = []
|
||||
elif instance.capacity == 0:
|
||||
active_tasks = []
|
||||
elif instance.rampart_groups.filter(controller__isnull=False).exists():
|
||||
active_tasks = all_celery_task_ids
|
||||
isolated = True
|
||||
else:
|
||||
continue
|
||||
|
||||
self.fail_jobs_if_not_in_celery(
|
||||
node_jobs, active_tasks, celery_task_start_time,
|
||||
isolated=isolated
|
||||
)
|
||||
|
||||
def calculate_capacity_consumed(self, tasks):
|
||||
self.graph = InstanceGroup.objects.capacity_values(tasks=tasks, graph=self.graph)
|
||||
|
||||
@@ -687,7 +513,6 @@ class TaskManager():
|
||||
return
|
||||
logger.debug("Starting Scheduler")
|
||||
|
||||
self.cleanup_inconsistent_celery_tasks()
|
||||
finished_wfjs = self._schedule()
|
||||
|
||||
# Operations whose queries rely on modifications made during the atomic scheduling session
|
||||
|
||||
@@ -2,30 +2,24 @@
|
||||
# Python
|
||||
import logging
|
||||
|
||||
# Celery
|
||||
from celery import shared_task
|
||||
|
||||
# AWX
|
||||
from awx.main.scheduler import TaskManager
|
||||
from awx.main.dispatch.publish import task
|
||||
|
||||
logger = logging.getLogger('awx.main.scheduler')
|
||||
|
||||
# TODO: move logic to UnifiedJob model and use bind=True feature of celery.
|
||||
# Would we need the request loop then? I think so. Even if we get the in-memory
|
||||
# updated model, the call to schedule() may get stale data.
|
||||
|
||||
|
||||
@shared_task()
|
||||
@task()
|
||||
def run_job_launch(job_id):
|
||||
TaskManager().schedule()
|
||||
|
||||
|
||||
@shared_task()
|
||||
@task()
|
||||
def run_job_complete(job_id):
|
||||
TaskManager().schedule()
|
||||
|
||||
|
||||
@shared_task()
|
||||
@task()
|
||||
def run_task_manager():
|
||||
logger.debug("Running Tower task manager.")
|
||||
TaskManager().schedule()
|
||||
|
||||
@@ -13,12 +13,11 @@ import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import six
|
||||
import stat
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import traceback
|
||||
import six
|
||||
import urlparse
|
||||
from distutils.version import LooseVersion as Version
|
||||
import yaml
|
||||
@@ -28,12 +27,6 @@ try:
|
||||
except Exception:
|
||||
psutil = None
|
||||
|
||||
# Celery
|
||||
from kombu import Queue, Exchange
|
||||
from kombu.common import Broadcast
|
||||
from celery import Task, shared_task
|
||||
from celery.signals import celeryd_init, worker_shutdown
|
||||
|
||||
# Django
|
||||
from django.conf import settings
|
||||
from django.db import transaction, DatabaseError, IntegrityError
|
||||
@@ -58,10 +51,12 @@ from awx.main.constants import ACTIVE_STATES
|
||||
from awx.main.exceptions import AwxTaskError
|
||||
from awx.main.queue import CallbackQueueDispatcher
|
||||
from awx.main.expect import run, isolated_manager
|
||||
from awx.main.dispatch.publish import task
|
||||
from awx.main.dispatch import get_local_queuename, reaper
|
||||
from awx.main.utils import (get_ansible_version, get_ssh_version, decrypt_field, update_scm_url,
|
||||
check_proot_installed, build_proot_temp_dir, get_licenser,
|
||||
wrap_args_with_proot, OutputEventFilter, OutputVerboseFilter, ignore_inventory_computed_fields,
|
||||
ignore_inventory_group_removal, get_type_for_model, extract_ansible_vars)
|
||||
ignore_inventory_group_removal, extract_ansible_vars)
|
||||
from awx.main.utils.safe_yaml import safe_dump, sanitize_jinja
|
||||
from awx.main.utils.reload import stop_local_services
|
||||
from awx.main.utils.pglock import advisory_lock
|
||||
@@ -87,52 +82,7 @@ Try upgrading OpenSSH or providing your private key in an different format. \
|
||||
logger = logging.getLogger('awx.main.tasks')
|
||||
|
||||
|
||||
def log_celery_failure(self, exc, task_id, args, kwargs, einfo):
|
||||
try:
|
||||
if getattr(exc, 'is_awx_task_error', False):
|
||||
# Error caused by user / tracked in job output
|
||||
logger.warning(six.text_type("{}").format(exc))
|
||||
elif isinstance(self, BaseTask):
|
||||
logger.exception(six.text_type(
|
||||
'{!s} {!s} execution encountered exception.')
|
||||
.format(get_type_for_model(self.model), args[0]))
|
||||
else:
|
||||
logger.exception(six.text_type('Task {} encountered exception.').format(self.name), exc_info=exc)
|
||||
except Exception:
|
||||
# It's fairly critical that this code _not_ raise exceptions on logging
|
||||
# If you configure external logging in a way that _it_ fails, there's
|
||||
# not a lot we can do here; sys.stderr.write is a final hail mary
|
||||
_, _, tb = sys.exc_info()
|
||||
traceback.print_tb(tb)
|
||||
|
||||
|
||||
@celeryd_init.connect
|
||||
def celery_startup(conf=None, **kwargs):
|
||||
#
|
||||
# When celeryd starts, if the instance cannot be found in the database,
|
||||
# automatically register it. This is mostly useful for openshift-based
|
||||
# deployments where:
|
||||
#
|
||||
# 2 Instances come online
|
||||
# Instance B encounters a network blip, Instance A notices, and
|
||||
# deprovisions it
|
||||
# Instance B's connectivity is restored, celeryd starts, and it
|
||||
# re-registers itself
|
||||
#
|
||||
# In traditional container-less deployments, instances don't get
|
||||
# deprovisioned when they miss their heartbeat, so this code is mostly a
|
||||
# no-op.
|
||||
#
|
||||
if kwargs['instance'].hostname != 'celery@{}'.format(settings.CLUSTER_HOST_ID):
|
||||
error = six.text_type('celery -n {} does not match settings.CLUSTER_HOST_ID={}').format(
|
||||
instance.hostname, settings.CLUSTER_HOST_ID
|
||||
)
|
||||
logger.error(error)
|
||||
raise RuntimeError(error)
|
||||
(changed, tower_instance) = Instance.objects.get_or_register()
|
||||
if changed:
|
||||
logger.info(six.text_type("Registered tower node '{}'").format(tower_instance.hostname))
|
||||
|
||||
def dispatch_startup():
|
||||
startup_logger = logging.getLogger('awx.main.tasks')
|
||||
startup_logger.info("Syncing Schedules")
|
||||
for sch in Schedule.objects.all():
|
||||
@@ -144,34 +94,44 @@ def celery_startup(conf=None, **kwargs):
|
||||
except Exception:
|
||||
logger.exception(six.text_type("Failed to rebuild schedule {}.").format(sch))
|
||||
|
||||
# set the queues we want to bind to dynamically at startup
|
||||
queues = []
|
||||
me = Instance.objects.me()
|
||||
for q in [me.hostname] + settings.AWX_CELERY_QUEUES_STATIC:
|
||||
q = q.encode('utf-8')
|
||||
queues.append(Queue(q, Exchange(q), routing_key=q))
|
||||
for q in settings.AWX_CELERY_BCAST_QUEUES_STATIC:
|
||||
queues.append(Broadcast(q.encode('utf-8')))
|
||||
conf.CELERY_QUEUES = list(set(queues))
|
||||
|
||||
# Expedite the first hearbeat run so a node comes online quickly.
|
||||
cluster_node_heartbeat.apply([])
|
||||
#
|
||||
# When the dispatcher starts, if the instance cannot be found in the database,
|
||||
# automatically register it. This is mostly useful for openshift-based
|
||||
# deployments where:
|
||||
#
|
||||
# 2 Instances come online
|
||||
# Instance B encounters a network blip, Instance A notices, and
|
||||
# deprovisions it
|
||||
# Instance B's connectivity is restored, the dispatcher starts, and it
|
||||
# re-registers itself
|
||||
#
|
||||
# In traditional container-less deployments, instances don't get
|
||||
# deprovisioned when they miss their heartbeat, so this code is mostly a
|
||||
# no-op.
|
||||
#
|
||||
apply_cluster_membership_policies()
|
||||
cluster_node_heartbeat()
|
||||
if Instance.objects.me().is_controller():
|
||||
awx_isolated_heartbeat()
|
||||
|
||||
|
||||
@worker_shutdown.connect
|
||||
def inform_cluster_of_shutdown(*args, **kwargs):
|
||||
def inform_cluster_of_shutdown():
|
||||
try:
|
||||
this_inst = Instance.objects.get(hostname=settings.CLUSTER_HOST_ID)
|
||||
this_inst.capacity = 0 # No thank you to new jobs while shut down
|
||||
this_inst.save(update_fields=['capacity', 'modified'])
|
||||
try:
|
||||
reaper.reap(this_inst)
|
||||
except Exception:
|
||||
logger.exception('failed to reap jobs for {}'.format(this_inst.hostname))
|
||||
logger.warning(six.text_type('Normal shutdown signal for instance {}, '
|
||||
'removed self from capacity pool.').format(this_inst.hostname))
|
||||
except Exception:
|
||||
logger.exception('Encountered problem with normal shutdown signal.')
|
||||
|
||||
|
||||
@shared_task(bind=True, queue=settings.CELERY_DEFAULT_QUEUE)
|
||||
def apply_cluster_membership_policies(self):
|
||||
@task()
|
||||
def apply_cluster_membership_policies():
|
||||
started_waiting = time.time()
|
||||
with advisory_lock('cluster_policy_lock', wait=True):
|
||||
lock_time = time.time() - started_waiting
|
||||
@@ -280,20 +240,18 @@ def apply_cluster_membership_policies(self):
|
||||
logger.info('Cluster policy computation finished in {} seconds'.format(time.time() - started_compute))
|
||||
|
||||
|
||||
@shared_task(exchange='tower_broadcast_all', bind=True)
|
||||
def handle_setting_changes(self, setting_keys):
|
||||
@task(queue='tower_broadcast_all', exchange_type='fanout')
|
||||
def handle_setting_changes(setting_keys):
|
||||
orig_len = len(setting_keys)
|
||||
for i in range(orig_len):
|
||||
for dependent_key in settings_registry.get_dependent_settings(setting_keys[i]):
|
||||
setting_keys.append(dependent_key)
|
||||
logger.warn('Processing cache changes, task args: {0.args!r} kwargs: {0.kwargs!r}'.format(
|
||||
self.request))
|
||||
cache_keys = set(setting_keys)
|
||||
logger.debug('cache delete_many(%r)', cache_keys)
|
||||
cache.delete_many(cache_keys)
|
||||
|
||||
|
||||
@shared_task(queue=settings.CELERY_DEFAULT_QUEUE)
|
||||
@task()
|
||||
def send_notifications(notification_list, job_id=None):
|
||||
if not isinstance(notification_list, list):
|
||||
raise TypeError("notification_list should be of type list")
|
||||
@@ -322,8 +280,8 @@ def send_notifications(notification_list, job_id=None):
|
||||
logger.exception(six.text_type('Error saving notification {} result.').format(notification.id))
|
||||
|
||||
|
||||
@shared_task(bind=True, queue=settings.CELERY_DEFAULT_QUEUE)
|
||||
def run_administrative_checks(self):
|
||||
@task()
|
||||
def run_administrative_checks():
|
||||
logger.warn("Running administrative checks.")
|
||||
if not settings.TOWER_ADMIN_ALERTS:
|
||||
return
|
||||
@@ -344,8 +302,8 @@ def run_administrative_checks(self):
|
||||
fail_silently=True)
|
||||
|
||||
|
||||
@shared_task(bind=True)
|
||||
def purge_old_stdout_files(self):
|
||||
@task(queue=get_local_queuename)
|
||||
def purge_old_stdout_files():
|
||||
nowtime = time.time()
|
||||
for f in os.listdir(settings.JOBOUTPUT_ROOT):
|
||||
if os.path.getctime(os.path.join(settings.JOBOUTPUT_ROOT,f)) < nowtime - settings.LOCAL_STDOUT_EXPIRE_TIME:
|
||||
@@ -353,8 +311,8 @@ def purge_old_stdout_files(self):
|
||||
logger.info(six.text_type("Removing {}").format(os.path.join(settings.JOBOUTPUT_ROOT,f)))
|
||||
|
||||
|
||||
@shared_task(bind=True)
|
||||
def cluster_node_heartbeat(self):
|
||||
@task(queue=get_local_queuename)
|
||||
def cluster_node_heartbeat():
|
||||
logger.debug("Cluster node heartbeat task.")
|
||||
nowtime = now()
|
||||
instance_list = list(Instance.objects.all_non_isolated())
|
||||
@@ -397,9 +355,13 @@ def cluster_node_heartbeat(self):
|
||||
this_inst.version))
|
||||
# Shutdown signal will set the capacity to zero to ensure no Jobs get added to this instance.
|
||||
# The heartbeat task will reset the capacity to the system capacity after upgrade.
|
||||
stop_local_services(['uwsgi', 'celery', 'beat', 'callback'], communicate=False)
|
||||
stop_local_services(communicate=False)
|
||||
raise RuntimeError("Shutting down.")
|
||||
for other_inst in lost_instances:
|
||||
try:
|
||||
reaper.reap(other_inst)
|
||||
except Exception:
|
||||
logger.exception('failed to reap jobs for {}'.format(other_inst.hostname))
|
||||
try:
|
||||
# Capacity could already be 0 because:
|
||||
# * It's a new node and it never had a heartbeat
|
||||
@@ -424,8 +386,8 @@ def cluster_node_heartbeat(self):
|
||||
logger.exception(six.text_type('Error marking {} as lost').format(other_inst.hostname))
|
||||
|
||||
|
||||
@shared_task(bind=True)
|
||||
def awx_isolated_heartbeat(self):
|
||||
@task(queue=get_local_queuename)
|
||||
def awx_isolated_heartbeat():
|
||||
local_hostname = settings.CLUSTER_HOST_ID
|
||||
logger.debug("Controlling node checking for any isolated management tasks.")
|
||||
poll_interval = settings.AWX_ISOLATED_PERIODIC_CHECK
|
||||
@@ -452,8 +414,8 @@ def awx_isolated_heartbeat(self):
|
||||
isolated_manager.IsolatedManager.health_check(isolated_instance_qs, awx_application_version)
|
||||
|
||||
|
||||
@shared_task(bind=True, queue=settings.CELERY_DEFAULT_QUEUE)
|
||||
def awx_periodic_scheduler(self):
|
||||
@task()
|
||||
def awx_periodic_scheduler():
|
||||
run_now = now()
|
||||
state = TowerScheduleState.get_solo()
|
||||
last_run = state.schedule_last_run
|
||||
@@ -503,8 +465,8 @@ def awx_periodic_scheduler(self):
|
||||
state.save()
|
||||
|
||||
|
||||
@shared_task(bind=True, queue=settings.CELERY_DEFAULT_QUEUE)
|
||||
def handle_work_success(self, result, task_actual):
|
||||
@task()
|
||||
def handle_work_success(task_actual):
|
||||
try:
|
||||
instance = UnifiedJob.get_instance_by_type(task_actual['type'], task_actual['id'])
|
||||
except ObjectDoesNotExist:
|
||||
@@ -517,7 +479,7 @@ def handle_work_success(self, result, task_actual):
|
||||
run_job_complete.delay(instance.id)
|
||||
|
||||
|
||||
@shared_task(queue=settings.CELERY_DEFAULT_QUEUE)
|
||||
@task()
|
||||
def handle_work_error(task_id, *args, **kwargs):
|
||||
subtasks = kwargs.get('subtasks', None)
|
||||
logger.debug('Executing error task id %s, subtasks: %s' % (task_id, str(subtasks)))
|
||||
@@ -558,7 +520,7 @@ def handle_work_error(task_id, *args, **kwargs):
|
||||
pass
|
||||
|
||||
|
||||
@shared_task(queue=settings.CELERY_DEFAULT_QUEUE)
|
||||
@task()
|
||||
def update_inventory_computed_fields(inventory_id, should_update_hosts=True):
|
||||
'''
|
||||
Signal handler and wrapper around inventory.update_computed_fields to
|
||||
@@ -578,7 +540,7 @@ def update_inventory_computed_fields(inventory_id, should_update_hosts=True):
|
||||
raise
|
||||
|
||||
|
||||
@shared_task(queue=settings.CELERY_DEFAULT_QUEUE)
|
||||
@task()
|
||||
def update_host_smart_inventory_memberships():
|
||||
try:
|
||||
with transaction.atomic():
|
||||
@@ -603,8 +565,8 @@ def update_host_smart_inventory_memberships():
|
||||
smart_inventory.update_computed_fields(update_groups=False, update_hosts=False)
|
||||
|
||||
|
||||
@shared_task(bind=True, queue=settings.CELERY_DEFAULT_QUEUE, max_retries=5)
|
||||
def delete_inventory(self, inventory_id, user_id):
|
||||
@task()
|
||||
def delete_inventory(inventory_id, user_id):
|
||||
# Delete inventory as user
|
||||
if user_id is None:
|
||||
user = None
|
||||
@@ -629,7 +591,7 @@ def delete_inventory(self, inventory_id, user_id):
|
||||
return
|
||||
except DatabaseError:
|
||||
logger.exception('Database error deleting inventory {}, but will retry.'.format(inventory_id))
|
||||
self.retry(countdown=10)
|
||||
# TODO: self.retry(countdown=10)
|
||||
|
||||
|
||||
def with_path_cleanup(f):
|
||||
@@ -650,8 +612,7 @@ def with_path_cleanup(f):
|
||||
return _wrapped
|
||||
|
||||
|
||||
class BaseTask(Task):
|
||||
name = None
|
||||
class BaseTask(object):
|
||||
model = None
|
||||
event_model = None
|
||||
abstract = True
|
||||
@@ -945,14 +906,11 @@ class BaseTask(Task):
|
||||
if instance.cancel_flag:
|
||||
instance = self.update_model(instance.pk, status='canceled')
|
||||
if instance.status != 'running':
|
||||
if hasattr(settings, 'CELERY_UNIT_TEST'):
|
||||
return
|
||||
else:
|
||||
# Stop the task chain and prevent starting the job if it has
|
||||
# already been canceled.
|
||||
instance = self.update_model(pk)
|
||||
status = instance.status
|
||||
raise RuntimeError('not starting %s task' % instance.status)
|
||||
# Stop the task chain and prevent starting the job if it has
|
||||
# already been canceled.
|
||||
instance = self.update_model(pk)
|
||||
status = instance.status
|
||||
raise RuntimeError('not starting %s task' % instance.status)
|
||||
|
||||
if not os.path.exists(settings.AWX_PROOT_BASE_PATH):
|
||||
raise RuntimeError('AWX_PROOT_BASE_PATH=%s does not exist' % settings.AWX_PROOT_BASE_PATH)
|
||||
@@ -1085,8 +1043,6 @@ class BaseTask(Task):
|
||||
logger.exception(six.text_type('{} Final run hook errored.').format(instance.log_format))
|
||||
instance.websocket_emit_status(status)
|
||||
if status != 'successful':
|
||||
# Raising an exception will mark the job as 'failed' in celery
|
||||
# and will stop a task chain from continuing to execute
|
||||
if status == 'canceled':
|
||||
raise AwxTaskError.TaskCancel(instance, rc)
|
||||
else:
|
||||
@@ -1109,12 +1065,12 @@ class BaseTask(Task):
|
||||
return ''
|
||||
|
||||
|
||||
@task()
|
||||
class RunJob(BaseTask):
|
||||
'''
|
||||
Celery task to run a job using ansible-playbook.
|
||||
Run a job using ansible-playbook.
|
||||
'''
|
||||
|
||||
name = 'awx.main.tasks.run_job'
|
||||
model = Job
|
||||
event_model = JobEvent
|
||||
event_data_key = 'job_id'
|
||||
@@ -1404,7 +1360,6 @@ class RunJob(BaseTask):
|
||||
self.update_model(job.pk, status='failed', job_explanation=error)
|
||||
raise RuntimeError(error)
|
||||
if job.project and job.project.scm_type:
|
||||
job_request_id = '' if self.request.id is None else self.request.id
|
||||
pu_ig = job.instance_group
|
||||
pu_en = job.execution_node
|
||||
if job.is_isolated() is True:
|
||||
@@ -1417,16 +1372,14 @@ class RunJob(BaseTask):
|
||||
status='running',
|
||||
instance_group = pu_ig,
|
||||
execution_node=pu_en,
|
||||
celery_task_id=job_request_id))
|
||||
celery_task_id=job.celery_task_id))
|
||||
# save the associated job before calling run() so that a
|
||||
# cancel() call on the job can cancel the project update
|
||||
job = self.update_model(job.pk, project_update=local_project_sync)
|
||||
|
||||
project_update_task = local_project_sync._get_task_class()
|
||||
try:
|
||||
task_instance = project_update_task()
|
||||
task_instance.request.id = job_request_id
|
||||
task_instance.run(local_project_sync.id)
|
||||
project_update_task().run(local_project_sync.id)
|
||||
job = self.update_model(job.pk, scm_revision=job.project.scm_revision)
|
||||
except Exception:
|
||||
local_project_sync.refresh_from_db()
|
||||
@@ -1436,7 +1389,6 @@ class RunJob(BaseTask):
|
||||
('project_update', local_project_sync.name, local_project_sync.id)))
|
||||
raise
|
||||
|
||||
|
||||
def final_run_hook(self, job, status, **kwargs):
|
||||
super(RunJob, self).final_run_hook(job, status, **kwargs)
|
||||
if job.use_fact_cache:
|
||||
@@ -1467,9 +1419,9 @@ class RunJob(BaseTask):
|
||||
update_inventory_computed_fields.delay(inventory.id, True)
|
||||
|
||||
|
||||
@task()
|
||||
class RunProjectUpdate(BaseTask):
|
||||
|
||||
name = 'awx.main.tasks.run_project_update'
|
||||
model = ProjectUpdate
|
||||
event_model = ProjectUpdateEvent
|
||||
event_data_key = 'project_update_id'
|
||||
@@ -1670,7 +1622,6 @@ class RunProjectUpdate(BaseTask):
|
||||
return getattr(settings, 'PROJECT_UPDATE_IDLE_TIMEOUT', None)
|
||||
|
||||
def _update_dependent_inventories(self, project_update, dependent_inventory_sources):
|
||||
project_request_id = '' if self.request.id is None else self.request.id
|
||||
scm_revision = project_update.project.scm_revision
|
||||
inv_update_class = InventoryUpdate._get_task_class()
|
||||
for inv_src in dependent_inventory_sources:
|
||||
@@ -1693,13 +1644,10 @@ class RunProjectUpdate(BaseTask):
|
||||
status='running',
|
||||
instance_group=project_update.instance_group,
|
||||
execution_node=project_update.execution_node,
|
||||
celery_task_id=str(project_request_id),
|
||||
source_project_update=project_update))
|
||||
source_project_update=project_update,
|
||||
celery_task_id=project_update.celery_task_id))
|
||||
try:
|
||||
task_instance = inv_update_class()
|
||||
# Runs in the same Celery task as project update
|
||||
task_instance.request.id = project_request_id
|
||||
task_instance.run(local_inv_update.id)
|
||||
inv_update_class().run(local_inv_update.id)
|
||||
except Exception:
|
||||
logger.exception(six.text_type('{} Unhandled exception updating dependent SCM inventory sources.')
|
||||
.format(project_update.log_format))
|
||||
@@ -1804,9 +1752,9 @@ class RunProjectUpdate(BaseTask):
|
||||
return getattr(settings, 'AWX_PROOT_ENABLED', False)
|
||||
|
||||
|
||||
@task()
|
||||
class RunInventoryUpdate(BaseTask):
|
||||
|
||||
name = 'awx.main.tasks.run_inventory_update'
|
||||
model = InventoryUpdate
|
||||
event_model = InventoryUpdateEvent
|
||||
event_data_key = 'inventory_update_id'
|
||||
@@ -2024,8 +1972,7 @@ class RunInventoryUpdate(BaseTask):
|
||||
This dictionary is used by `build_env`, below.
|
||||
"""
|
||||
# Run the superclass implementation.
|
||||
super_ = super(RunInventoryUpdate, self).build_passwords
|
||||
passwords = super_(inventory_update, **kwargs)
|
||||
passwords = super(RunInventoryUpdate, self).build_passwords(inventory_update, **kwargs)
|
||||
|
||||
# Take key fields from the credential in use and add them to the
|
||||
# passwords dictionary.
|
||||
@@ -2188,7 +2135,6 @@ class RunInventoryUpdate(BaseTask):
|
||||
if inventory_update.inventory_source:
|
||||
source_project = inventory_update.inventory_source.source_project
|
||||
if (inventory_update.source=='scm' and inventory_update.launch_type!='scm' and source_project):
|
||||
request_id = '' if self.request.id is None else self.request.id
|
||||
local_project_sync = source_project.create_project_update(
|
||||
_eager_fields=dict(
|
||||
launch_type="sync",
|
||||
@@ -2196,16 +2142,14 @@ class RunInventoryUpdate(BaseTask):
|
||||
status='running',
|
||||
execution_node=inventory_update.execution_node,
|
||||
instance_group = inventory_update.instance_group,
|
||||
celery_task_id=request_id))
|
||||
celery_task_id=inventory_update.celery_task_id))
|
||||
# associate the inventory update before calling run() so that a
|
||||
# cancel() call on the inventory update can cancel the project update
|
||||
local_project_sync.scm_inventory_updates.add(inventory_update)
|
||||
|
||||
project_update_task = local_project_sync._get_task_class()
|
||||
try:
|
||||
task_instance = project_update_task()
|
||||
task_instance.request.id = request_id
|
||||
task_instance.run(local_project_sync.id)
|
||||
project_update_task().run(local_project_sync.id)
|
||||
inventory_update.inventory_source.scm_last_revision = local_project_sync.project.scm_revision
|
||||
inventory_update.inventory_source.save(update_fields=['scm_last_revision'])
|
||||
except Exception:
|
||||
@@ -2216,12 +2160,12 @@ class RunInventoryUpdate(BaseTask):
|
||||
raise
|
||||
|
||||
|
||||
@task()
|
||||
class RunAdHocCommand(BaseTask):
|
||||
'''
|
||||
Celery task to run an ad hoc command using ansible.
|
||||
Run an ad hoc command using ansible.
|
||||
'''
|
||||
|
||||
name = 'awx.main.tasks.run_ad_hoc_command'
|
||||
model = AdHocCommand
|
||||
event_model = AdHocCommandEvent
|
||||
event_data_key = 'ad_hoc_command_id'
|
||||
@@ -2382,9 +2326,9 @@ class RunAdHocCommand(BaseTask):
|
||||
return getattr(settings, 'AWX_PROOT_ENABLED', False)
|
||||
|
||||
|
||||
@task()
|
||||
class RunSystemJob(BaseTask):
|
||||
|
||||
name = 'awx.main.tasks.run_system_job'
|
||||
model = SystemJob
|
||||
event_model = SystemJobEvent
|
||||
event_data_key = 'system_job_id'
|
||||
@@ -2439,9 +2383,9 @@ def _reconstruct_relationships(copy_mapping):
|
||||
new_obj.save()
|
||||
|
||||
|
||||
@shared_task(bind=True, queue=settings.CELERY_DEFAULT_QUEUE)
|
||||
@task()
|
||||
def deep_copy_model_obj(
|
||||
self, model_module, model_name, obj_pk, new_obj_pk,
|
||||
model_module, model_name, obj_pk, new_obj_pk,
|
||||
user_pk, sub_obj_list, permission_check_func=None
|
||||
):
|
||||
logger.info(six.text_type('Deep copy {} from {} to {}.').format(model_name, obj_pk, new_obj_pk))
|
||||
|
||||
@@ -14,7 +14,6 @@ from django.core.urlresolvers import resolve
|
||||
from django.utils.six.moves.urllib.parse import urlparse
|
||||
from django.utils import timezone
|
||||
from django.contrib.auth.models import User
|
||||
from django.conf import settings
|
||||
from django.core.serializers.json import DjangoJSONEncoder
|
||||
from django.db.backends.sqlite3.base import SQLiteCursorWrapper
|
||||
from jsonbfield.fields import JSONField
|
||||
@@ -66,17 +65,6 @@ def swagger_autogen(requests=__SWAGGER_REQUESTS__):
|
||||
return requests
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def celery_memory_broker():
|
||||
'''
|
||||
FIXME: Not sure how "far" just setting the BROKER_URL will get us.
|
||||
We may need to incluence CELERY's configuration like we do in the old unit tests (see base.py)
|
||||
|
||||
Allows django signal code to execute without the need for redis
|
||||
'''
|
||||
settings.BROKER_URL='memory://localhost/'
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def user():
|
||||
def u(name, is_superuser=False):
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
import itertools
|
||||
import pytest
|
||||
import mock
|
||||
|
||||
# Django
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
|
||||
# AWX
|
||||
from awx.main.models import UnifiedJobTemplate, Job, JobTemplate, WorkflowJobTemplate, Project, WorkflowJob, Schedule
|
||||
from awx.main.models.ha import InstanceGroup
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@@ -66,48 +64,6 @@ class TestCreateUnifiedJob:
|
||||
assert net_credential in second_job.credentials.all()
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestIsolatedRuns:
|
||||
|
||||
def test_low_capacity_isolated_instance_selected(self):
|
||||
ig = InstanceGroup.objects.create(name='tower')
|
||||
iso_ig = InstanceGroup.objects.create(name='thepentagon', controller=ig)
|
||||
iso_ig.instances.create(hostname='iso1', capacity=50)
|
||||
i2 = iso_ig.instances.create(hostname='iso2', capacity=200)
|
||||
job = Job.objects.create(
|
||||
instance_group=iso_ig,
|
||||
celery_task_id='something',
|
||||
)
|
||||
|
||||
mock_async = mock.MagicMock()
|
||||
success_callback = mock.MagicMock()
|
||||
error_callback = mock.MagicMock()
|
||||
|
||||
class MockTaskClass:
|
||||
apply_async = mock_async
|
||||
|
||||
with mock.patch.object(job, '_get_task_class') as task_class:
|
||||
task_class.return_value = MockTaskClass
|
||||
job.start_celery_task([], error_callback, success_callback, 'thepentagon')
|
||||
mock_async.assert_called_with([job.id], [],
|
||||
link_error=error_callback,
|
||||
link=success_callback,
|
||||
queue='thepentagon',
|
||||
task_id='something')
|
||||
|
||||
i2.capacity = 20
|
||||
i2.save()
|
||||
|
||||
with mock.patch.object(job, '_get_task_class') as task_class:
|
||||
task_class.return_value = MockTaskClass
|
||||
job.start_celery_task([], error_callback, success_callback, 'thepentagon')
|
||||
mock_async.assert_called_with([job.id], [],
|
||||
link_error=error_callback,
|
||||
link=success_callback,
|
||||
queue='thepentagon',
|
||||
task_id='something')
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestMetaVars:
|
||||
'''
|
||||
|
||||
@@ -1,19 +1,10 @@
|
||||
import pytest
|
||||
import mock
|
||||
import json
|
||||
from datetime import timedelta, datetime
|
||||
|
||||
from django.core.cache import cache
|
||||
from django.utils.timezone import now as tz_now
|
||||
from datetime import timedelta
|
||||
|
||||
from awx.main.scheduler import TaskManager
|
||||
from awx.main.utils import encrypt_field
|
||||
from awx.main.models import (
|
||||
Job,
|
||||
Instance,
|
||||
WorkflowJob,
|
||||
)
|
||||
from awx.main.models.notifications import JobNotificationMixin
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@@ -245,140 +236,3 @@ def test_shared_dependencies_launch(default_instance_group, job_template_factory
|
||||
iu = [x for x in ii.inventory_updates.all()]
|
||||
assert len(pu) == 1
|
||||
assert len(iu) == 1
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_cleanup_interval(mock_cache):
|
||||
with mock.patch.multiple('awx.main.scheduler.task_manager.cache', get=mock_cache.get, set=mock_cache.set):
|
||||
assert mock_cache.get('last_celery_task_cleanup') is None
|
||||
|
||||
TaskManager().cleanup_inconsistent_celery_tasks()
|
||||
last_cleanup = mock_cache.get('last_celery_task_cleanup')
|
||||
assert isinstance(last_cleanup, datetime)
|
||||
|
||||
TaskManager().cleanup_inconsistent_celery_tasks()
|
||||
assert cache.get('last_celery_task_cleanup') == last_cleanup
|
||||
|
||||
|
||||
class TestReaper():
|
||||
@pytest.fixture
|
||||
def all_jobs(self, mocker):
|
||||
now = tz_now()
|
||||
|
||||
Instance.objects.create(hostname='host1', capacity=100)
|
||||
Instance.objects.create(hostname='host2', capacity=100)
|
||||
Instance.objects.create(hostname='host3_split', capacity=100)
|
||||
Instance.objects.create(hostname='host4_offline', capacity=0)
|
||||
|
||||
j1 = Job.objects.create(status='pending', execution_node='host1')
|
||||
j2 = Job.objects.create(status='waiting', celery_task_id='considered_j2')
|
||||
j3 = Job.objects.create(status='waiting', celery_task_id='considered_j3')
|
||||
j3.modified = now - timedelta(seconds=60)
|
||||
j3.save(update_fields=['modified'])
|
||||
j4 = Job.objects.create(status='running', celery_task_id='considered_j4', execution_node='host1')
|
||||
j5 = Job.objects.create(status='waiting', celery_task_id='reapable_j5')
|
||||
j5.modified = now - timedelta(seconds=60)
|
||||
j5.save(update_fields=['modified'])
|
||||
j6 = Job.objects.create(status='waiting', celery_task_id='considered_j6')
|
||||
j6.modified = now - timedelta(seconds=60)
|
||||
j6.save(update_fields=['modified'])
|
||||
j7 = Job.objects.create(status='running', celery_task_id='considered_j7', execution_node='host2')
|
||||
j8 = Job.objects.create(status='running', celery_task_id='reapable_j7', execution_node='host2')
|
||||
j9 = Job.objects.create(status='waiting', celery_task_id='reapable_j8')
|
||||
j9.modified = now - timedelta(seconds=60)
|
||||
j9.save(update_fields=['modified'])
|
||||
j10 = Job.objects.create(status='running', celery_task_id='host3_j10', execution_node='host3_split')
|
||||
|
||||
j11 = Job.objects.create(status='running', celery_task_id='host4_j11', execution_node='host4_offline')
|
||||
|
||||
j12 = WorkflowJob.objects.create(status='running', celery_task_id='workflow_job', execution_node='host1')
|
||||
|
||||
js = [j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12]
|
||||
for j in js:
|
||||
j.save = mocker.Mock(wraps=j.save)
|
||||
j.websocket_emit_status = mocker.Mock()
|
||||
return js
|
||||
|
||||
@pytest.fixture
|
||||
def considered_jobs(self, all_jobs):
|
||||
return all_jobs[2:7] + [all_jobs[10]]
|
||||
|
||||
@pytest.fixture
|
||||
def running_tasks(self, all_jobs):
|
||||
return {
|
||||
'host1': [all_jobs[3]],
|
||||
'host2': [all_jobs[7], all_jobs[8]],
|
||||
'host3_split': [all_jobs[9]],
|
||||
'host4_offline': [all_jobs[10]],
|
||||
}
|
||||
|
||||
@pytest.fixture
|
||||
def waiting_tasks(self, all_jobs):
|
||||
return [all_jobs[2], all_jobs[4], all_jobs[5], all_jobs[8]]
|
||||
|
||||
@pytest.fixture
|
||||
def reapable_jobs(self, all_jobs):
|
||||
return [all_jobs[4], all_jobs[7], all_jobs[10]]
|
||||
|
||||
@pytest.fixture
|
||||
def unconsidered_jobs(self, all_jobs):
|
||||
return all_jobs[0:1] + all_jobs[5:7]
|
||||
|
||||
@pytest.fixture
|
||||
def active_tasks(self):
|
||||
return ([], {
|
||||
'host1': ['considered_j2', 'considered_j3', 'considered_j4',],
|
||||
'host2': ['considered_j6', 'considered_j7'],
|
||||
})
|
||||
|
||||
@pytest.mark.django_db
|
||||
@mock.patch.object(JobNotificationMixin, 'send_notification_templates')
|
||||
@mock.patch.object(TaskManager, 'get_active_tasks', lambda self: ([], []))
|
||||
def test_cleanup_inconsistent_task(self, notify, active_tasks, considered_jobs, reapable_jobs, running_tasks, waiting_tasks, mocker, settings):
|
||||
settings.AWX_INCONSISTENT_TASK_INTERVAL = 0
|
||||
tm = TaskManager()
|
||||
|
||||
tm.get_running_tasks = mocker.Mock(return_value=(running_tasks, waiting_tasks))
|
||||
tm.get_active_tasks = mocker.Mock(return_value=active_tasks)
|
||||
|
||||
tm.cleanup_inconsistent_celery_tasks()
|
||||
|
||||
for j in considered_jobs:
|
||||
if j not in reapable_jobs:
|
||||
j.save.assert_not_called()
|
||||
|
||||
assert notify.call_count == 4
|
||||
notify.assert_has_calls([mock.call('failed') for j in reapable_jobs], any_order=True)
|
||||
|
||||
for j in reapable_jobs:
|
||||
j.websocket_emit_status.assert_called_once_with('failed')
|
||||
assert j.status == 'failed'
|
||||
assert j.job_explanation == (
|
||||
'Task was marked as running in Tower but was not present in the job queue, so it has been marked as failed.'
|
||||
)
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_get_running_tasks(self, all_jobs):
|
||||
tm = TaskManager()
|
||||
|
||||
# Ensure the query grabs the expected jobs
|
||||
execution_nodes_jobs, waiting_jobs = tm.get_running_tasks()
|
||||
assert 'host1' in execution_nodes_jobs
|
||||
assert 'host2' in execution_nodes_jobs
|
||||
assert 'host3_split' in execution_nodes_jobs
|
||||
|
||||
assert all_jobs[3] in execution_nodes_jobs['host1']
|
||||
|
||||
assert all_jobs[6] in execution_nodes_jobs['host2']
|
||||
assert all_jobs[7] in execution_nodes_jobs['host2']
|
||||
|
||||
assert all_jobs[9] in execution_nodes_jobs['host3_split']
|
||||
|
||||
assert all_jobs[10] in execution_nodes_jobs['host4_offline']
|
||||
|
||||
assert all_jobs[11] not in execution_nodes_jobs['host1']
|
||||
|
||||
assert all_jobs[2] in waiting_jobs
|
||||
assert all_jobs[4] in waiting_jobs
|
||||
assert all_jobs[5] in waiting_jobs
|
||||
assert all_jobs[8] in waiting_jobs
|
||||
|
||||
@@ -1,12 +1,39 @@
|
||||
import datetime
|
||||
import multiprocessing
|
||||
import random
|
||||
import sys
|
||||
from uuid import uuid4
|
||||
import signal
|
||||
import time
|
||||
|
||||
from django.utils.timezone import now as tz_now
|
||||
import pytest
|
||||
|
||||
from awx.main.dispatch.worker import BaseWorker
|
||||
from awx.main.dispatch.pool import WorkerPool
|
||||
from awx.main.models import Job, WorkflowJob, Instance
|
||||
from awx.main.dispatch import reaper
|
||||
from awx.main.dispatch.pool import PoolWorker, WorkerPool, AutoscalePool
|
||||
from awx.main.dispatch.publish import task
|
||||
from awx.main.dispatch.worker import BaseWorker, TaskWorker
|
||||
|
||||
|
||||
@task()
|
||||
def add(a, b):
|
||||
return a + b
|
||||
|
||||
|
||||
class BaseTask(object):
|
||||
|
||||
def add(self, a, b):
|
||||
return add(a, b)
|
||||
|
||||
|
||||
@task()
|
||||
class Adder(BaseTask):
|
||||
def run(self, a, b):
|
||||
return super(Adder, self).add(a, b)
|
||||
|
||||
|
||||
@task(queue='hard-math')
|
||||
def multiply(a, b):
|
||||
return a * b
|
||||
|
||||
|
||||
class SimpleWorker(BaseWorker):
|
||||
@@ -21,6 +48,61 @@ class ResultWriter(BaseWorker):
|
||||
result_queue.put(body + '!!!')
|
||||
|
||||
|
||||
class SlowResultWriter(BaseWorker):
|
||||
|
||||
def perform_work(self, body, result_queue):
|
||||
time.sleep(3)
|
||||
super(SlowResultWriter, self).perform_work(body, result_queue)
|
||||
|
||||
|
||||
class TestPoolWorker:
|
||||
|
||||
def setup_method(self, test_method):
|
||||
self.worker = PoolWorker(1000, self.tick, tuple())
|
||||
|
||||
def tick(self):
|
||||
self.worker.finished.put(self.worker.queue.get()['uuid'])
|
||||
time.sleep(.5)
|
||||
|
||||
def test_qsize(self):
|
||||
assert self.worker.qsize == 0
|
||||
for i in range(3):
|
||||
self.worker.put({'task': 'abc123'})
|
||||
assert self.worker.qsize == 3
|
||||
|
||||
def test_put(self):
|
||||
assert len(self.worker.managed_tasks) == 0
|
||||
assert self.worker.messages_finished == 0
|
||||
self.worker.put({'task': 'abc123'})
|
||||
|
||||
assert len(self.worker.managed_tasks) == 1
|
||||
assert self.worker.messages_sent == 1
|
||||
|
||||
def test_managed_tasks(self):
|
||||
self.worker.put({'task': 'abc123'})
|
||||
self.worker.calculate_managed_tasks()
|
||||
assert len(self.worker.managed_tasks) == 1
|
||||
|
||||
self.tick()
|
||||
self.worker.calculate_managed_tasks()
|
||||
assert len(self.worker.managed_tasks) == 0
|
||||
|
||||
def test_current_task(self):
|
||||
self.worker.put({'task': 'abc123'})
|
||||
assert self.worker.current_task['task'] == 'abc123'
|
||||
|
||||
def test_quit(self):
|
||||
self.worker.quit()
|
||||
assert self.worker.queue.get() == 'QUIT'
|
||||
|
||||
def test_idle_busy(self):
|
||||
assert self.worker.idle is True
|
||||
assert self.worker.busy is False
|
||||
self.worker.put({'task': 'abc123'})
|
||||
assert self.worker.busy is True
|
||||
assert self.worker.idle is False
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestWorkerPool:
|
||||
|
||||
@@ -28,37 +110,35 @@ class TestWorkerPool:
|
||||
self.pool = WorkerPool(min_workers=3)
|
||||
|
||||
def teardown_method(self, test_method):
|
||||
self.pool.stop()
|
||||
self.pool.stop(signal.SIGTERM)
|
||||
|
||||
def test_worker(self):
|
||||
self.pool.init_workers(SimpleWorker().work_loop)
|
||||
assert len(self.pool) == 3
|
||||
for worker in self.pool.workers:
|
||||
total, _, process = worker
|
||||
assert total == 0
|
||||
assert process.is_alive() is True
|
||||
assert worker.messages_sent == 0
|
||||
assert worker.alive is True
|
||||
|
||||
def test_single_task(self):
|
||||
self.pool.init_workers(SimpleWorker().work_loop)
|
||||
self.pool.write(0, 'xyz')
|
||||
assert self.pool.workers[0][0] == 1 # worker at index 0 handled one task
|
||||
assert self.pool.workers[1][0] == 0
|
||||
assert self.pool.workers[2][0] == 0
|
||||
assert self.pool.workers[0].messages_sent == 1 # worker at index 0 handled one task
|
||||
assert self.pool.workers[1].messages_sent == 0
|
||||
assert self.pool.workers[2].messages_sent == 0
|
||||
|
||||
def test_queue_preference(self):
|
||||
self.pool.init_workers(SimpleWorker().work_loop)
|
||||
self.pool.write(2, 'xyz')
|
||||
assert self.pool.workers[0][0] == 0
|
||||
assert self.pool.workers[1][0] == 0
|
||||
assert self.pool.workers[2][0] == 1 # worker at index 2 handled one task
|
||||
assert self.pool.workers[0].messages_sent == 0
|
||||
assert self.pool.workers[1].messages_sent == 0
|
||||
assert self.pool.workers[2].messages_sent == 1 # worker at index 2 handled one task
|
||||
|
||||
def test_worker_processing(self):
|
||||
result_queue = multiprocessing.Queue()
|
||||
self.pool.init_workers(ResultWriter().work_loop, result_queue)
|
||||
uuids = []
|
||||
for i in range(10):
|
||||
self.pool.write(
|
||||
random.choice(self.pool.workers)[0],
|
||||
random.choice(range(len(self.pool))),
|
||||
'Hello, Worker {}'.format(i)
|
||||
)
|
||||
all_messages = [result_queue.get(timeout=1) for i in range(10)]
|
||||
@@ -68,5 +148,212 @@ class TestWorkerPool:
|
||||
for i in range(10)
|
||||
]
|
||||
|
||||
total_handled = sum([worker[0] for worker in self.pool.workers])
|
||||
total_handled = sum([worker.messages_sent for worker in self.pool.workers])
|
||||
assert total_handled == 10
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestAutoScaling:
|
||||
|
||||
def setup_method(self, test_method):
|
||||
self.pool = AutoscalePool(min_workers=2, max_workers=10)
|
||||
|
||||
def teardown_method(self, test_method):
|
||||
self.pool.stop(signal.SIGTERM)
|
||||
|
||||
def test_scale_up(self):
|
||||
result_queue = multiprocessing.Queue()
|
||||
self.pool.init_workers(SlowResultWriter().work_loop, result_queue)
|
||||
|
||||
# start with two workers, write an event to each worker and make it busy
|
||||
assert len(self.pool) == 2
|
||||
for i, w in enumerate(self.pool.workers):
|
||||
w.put('Hello, Worker {}'.format(0))
|
||||
assert len(self.pool) == 2
|
||||
|
||||
# wait for the subprocesses to start working on their tasks and be marked busy
|
||||
time.sleep(1)
|
||||
assert self.pool.should_grow
|
||||
|
||||
# write a third message, expect a new worker to spawn because all
|
||||
# workers are busy
|
||||
self.pool.write(0, 'Hello, Worker {}'.format(2))
|
||||
assert len(self.pool) == 3
|
||||
|
||||
def test_scale_down(self):
|
||||
self.pool.init_workers(ResultWriter().work_loop, multiprocessing.Queue())
|
||||
|
||||
# start with two workers, and scale up to 10 workers
|
||||
assert len(self.pool) == 2
|
||||
for i in range(8):
|
||||
self.pool.up()
|
||||
assert len(self.pool) == 10
|
||||
|
||||
# cleanup should scale down to 8 workers
|
||||
self.pool.cleanup()
|
||||
assert len(self.pool) == 2
|
||||
|
||||
def test_max_scale_up(self):
|
||||
self.pool.init_workers(ResultWriter().work_loop, multiprocessing.Queue())
|
||||
|
||||
assert len(self.pool) == 2
|
||||
for i in range(25):
|
||||
self.pool.up()
|
||||
assert self.pool.max_workers == 10
|
||||
assert self.pool.full is True
|
||||
assert len(self.pool) == 10
|
||||
|
||||
def test_equal_worker_distribution(self):
|
||||
# if all workers are busy, spawn new workers *before* adding messages
|
||||
# to an existing queue
|
||||
self.pool.init_workers(SlowResultWriter().work_loop, multiprocessing.Queue)
|
||||
|
||||
# start with two workers, write an event to each worker and make it busy
|
||||
assert len(self.pool) == 2
|
||||
for i in range(10):
|
||||
self.pool.write(0, 'Hello, World!')
|
||||
assert len(self.pool) == 10
|
||||
for w in self.pool.workers:
|
||||
assert w.busy
|
||||
assert len(w.managed_tasks) == 1
|
||||
|
||||
# the queue is full at 10, the _next_ write should put the message into
|
||||
# a worker's backlog
|
||||
assert len(self.pool) == 10
|
||||
for w in self.pool.workers:
|
||||
assert w.messages_sent == 1
|
||||
self.pool.write(0, 'Hello, World!')
|
||||
assert len(self.pool) == 10
|
||||
assert self.pool.workers[0].messages_sent == 2
|
||||
|
||||
def test_lost_worker_autoscale(self):
|
||||
# if a worker exits, it should be replaced automatically up to min_workers
|
||||
self.pool.init_workers(ResultWriter().work_loop, multiprocessing.Queue())
|
||||
|
||||
# start with two workers, kill one of them
|
||||
assert len(self.pool) == 2
|
||||
assert not self.pool.should_grow
|
||||
alive_pid = self.pool.workers[1].pid
|
||||
self.pool.workers[0].process.terminate()
|
||||
time.sleep(1) # wait a moment for sigterm
|
||||
|
||||
# clean up and the dead worker
|
||||
self.pool.cleanup()
|
||||
assert len(self.pool) == 1
|
||||
assert self.pool.workers[0].pid == alive_pid
|
||||
|
||||
# the next queue write should replace the lost worker
|
||||
self.pool.write(0, 'Hello, Worker')
|
||||
assert len(self.pool) == 2
|
||||
|
||||
|
||||
class TestTaskDispatcher:
|
||||
|
||||
@property
|
||||
def tm(self):
|
||||
return TaskWorker()
|
||||
|
||||
def test_function_dispatch(self):
|
||||
result = self.tm.perform_work({
|
||||
'task': 'awx.main.tests.functional.test_dispatch.add',
|
||||
'args': [2, 2]
|
||||
})
|
||||
assert result == 4
|
||||
|
||||
def test_method_dispatch(self):
|
||||
result = self.tm.perform_work({
|
||||
'task': 'awx.main.tests.functional.test_dispatch.Adder',
|
||||
'args': [2, 2]
|
||||
})
|
||||
assert result == 4
|
||||
|
||||
|
||||
class TestTaskPublisher:
|
||||
|
||||
def test_function_callable(self):
|
||||
assert add(2, 2) == 4
|
||||
|
||||
def test_method_callable(self):
|
||||
assert Adder().run(2, 2) == 4
|
||||
|
||||
def test_function_apply_async(self):
|
||||
message, queue = add.apply_async([2, 2])
|
||||
assert message['args'] == [2, 2]
|
||||
assert message['kwargs'] == {}
|
||||
assert message['task'] == 'awx.main.tests.functional.test_dispatch.add'
|
||||
assert queue == 'awx_private_queue'
|
||||
|
||||
def test_method_apply_async(self):
|
||||
message, queue = Adder.apply_async([2, 2])
|
||||
assert message['args'] == [2, 2]
|
||||
assert message['kwargs'] == {}
|
||||
assert message['task'] == 'awx.main.tests.functional.test_dispatch.Adder'
|
||||
assert queue == 'awx_private_queue'
|
||||
|
||||
def test_apply_with_queue(self):
|
||||
message, queue = add.apply_async([2, 2], queue='abc123')
|
||||
assert queue == 'abc123'
|
||||
|
||||
def test_queue_defined_in_task_decorator(self):
|
||||
message, queue = multiply.apply_async([2, 2])
|
||||
assert queue == 'hard-math'
|
||||
|
||||
def test_queue_overridden_from_task_decorator(self):
|
||||
message, queue = multiply.apply_async([2, 2], queue='not-so-hard')
|
||||
assert queue == 'not-so-hard'
|
||||
|
||||
def test_apply_with_callable_queuename(self):
|
||||
message, queue = add.apply_async([2, 2], queue=lambda: 'called')
|
||||
assert queue == 'called'
|
||||
|
||||
|
||||
yesterday = tz_now() - datetime.timedelta(days=1)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestJobReaper(object):
|
||||
|
||||
@pytest.mark.parametrize('status, execution_node, controller_node, modified, fail', [
|
||||
('running', '', '', None, False), # running, not assigned to the instance
|
||||
('running', 'awx', '', None, True), # running, has the instance as its execution_node
|
||||
('running', '', 'awx', None, True), # running, has the instance as its controller_node
|
||||
('waiting', '', '', None, False), # waiting, not assigned to the instance
|
||||
('waiting', 'awx', '', None, False), # waiting, was edited less than a minute ago
|
||||
('waiting', '', 'awx', None, False), # waiting, was edited less than a minute ago
|
||||
('waiting', 'awx', '', yesterday, True), # waiting, assigned to the execution_node, stale
|
||||
('waiting', '', 'awx', yesterday, True), # waiting, assigned to the controller_node, stale
|
||||
])
|
||||
def test_should_reap(self, status, fail, execution_node, controller_node, modified):
|
||||
i = Instance(hostname='awx')
|
||||
i.save()
|
||||
j = Job(
|
||||
status=status,
|
||||
execution_node=execution_node,
|
||||
controller_node=controller_node,
|
||||
start_args='SENSITIVE',
|
||||
)
|
||||
j.save()
|
||||
if modified:
|
||||
# we have to edit the modification time _without_ calling save()
|
||||
# (because .save() overwrites it to _now_)
|
||||
Job.objects.filter(id=j.id).update(modified=modified)
|
||||
reaper.reap(i)
|
||||
job = Job.objects.first()
|
||||
if fail:
|
||||
assert job.status == 'failed'
|
||||
assert 'marked as failed' in job.job_explanation
|
||||
assert job.start_args == ''
|
||||
else:
|
||||
assert job.status == status
|
||||
|
||||
def test_workflow_does_not_reap(self):
|
||||
i = Instance(hostname='awx')
|
||||
i.save()
|
||||
j = WorkflowJob(
|
||||
status='running',
|
||||
execution_node='awx'
|
||||
)
|
||||
j.save()
|
||||
reaper.reap(i)
|
||||
|
||||
assert WorkflowJob.objects.first().status == 'running'
|
||||
|
||||
@@ -1,69 +0,0 @@
|
||||
# Copyright (c) 2017 Ansible by Red Hat
|
||||
# All Rights Reserved.
|
||||
|
||||
import mock
|
||||
import pytest
|
||||
|
||||
from django.utils.timezone import now as tz_now
|
||||
from django.db import DatabaseError
|
||||
|
||||
from awx.main.scheduler import TaskManager
|
||||
from awx.main.models import (
|
||||
Job,
|
||||
Instance,
|
||||
InstanceGroup,
|
||||
)
|
||||
from django.core.cache import cache
|
||||
|
||||
|
||||
class TestCleanupInconsistentCeleryTasks():
|
||||
@mock.patch.object(cache, 'get', return_value=None)
|
||||
@mock.patch.object(TaskManager, 'get_active_tasks', return_value=([], {}))
|
||||
@mock.patch.object(TaskManager, 'get_running_tasks', return_value=({'host1': [Job(id=2), Job(id=3),]}, []))
|
||||
@mock.patch.object(InstanceGroup.objects, 'prefetch_related', return_value=[])
|
||||
@mock.patch.object(Instance.objects, 'filter', return_value=mock.MagicMock(first=lambda: None))
|
||||
@mock.patch('awx.main.scheduler.task_manager.logger')
|
||||
def test_instance_does_not_exist(self, logger_mock, *args):
|
||||
logger_mock.error = mock.MagicMock(side_effect=RuntimeError("mocked"))
|
||||
tm = TaskManager()
|
||||
with pytest.raises(RuntimeError) as excinfo:
|
||||
tm.cleanup_inconsistent_celery_tasks()
|
||||
|
||||
assert "mocked" in str(excinfo.value)
|
||||
logger_mock.error.assert_called_once_with("Execution node Instance host1 not found in database. "
|
||||
"The node is currently executing jobs ['job 2 (new)', "
|
||||
"'job 3 (new)']")
|
||||
|
||||
@mock.patch.object(cache, 'get', return_value=None)
|
||||
@mock.patch.object(TaskManager, 'get_active_tasks', return_value=([], {'host1': []}))
|
||||
@mock.patch.object(InstanceGroup.objects, 'prefetch_related', return_value=[])
|
||||
@mock.patch.object(TaskManager, 'get_running_tasks')
|
||||
@mock.patch('awx.main.scheduler.task_manager.logger')
|
||||
def test_save_failed(self, logger_mock, get_running_tasks, *args):
|
||||
logger_mock.error = mock.MagicMock()
|
||||
job = Job(id=2, modified=tz_now(), status='running', celery_task_id='blah', execution_node='host1')
|
||||
job.websocket_emit_status = mock.MagicMock()
|
||||
get_running_tasks.return_value = ({'host1': [job]}, [])
|
||||
tm = TaskManager()
|
||||
|
||||
with mock.patch.object(job, 'save', side_effect=DatabaseError):
|
||||
tm.cleanup_inconsistent_celery_tasks()
|
||||
job.save.assert_called_once()
|
||||
logger_mock.error.assert_called_once_with("Task job 2 (failed) DB error in marking failed. Job possibly deleted.")
|
||||
|
||||
@mock.patch.object(InstanceGroup.objects, 'prefetch_related', return_value=[])
|
||||
@mock.patch('awx.main.scheduler.task_manager.Inspect')
|
||||
def test_multiple_active_instances_sanity_check(self, inspect_mock, *args):
|
||||
class MockInspector:
|
||||
pass
|
||||
|
||||
mock_inspector = MockInspector()
|
||||
mock_inspector.active = lambda: {
|
||||
'celery@host1': [],
|
||||
'celery@host2': []
|
||||
}
|
||||
inspect_mock.return_value = mock_inspector
|
||||
tm = TaskManager()
|
||||
active_task_queues, queues = tm.get_active_tasks()
|
||||
assert 'host1' in queues
|
||||
assert 'host2' in queues
|
||||
@@ -67,7 +67,7 @@ def test_work_success_callback_missing_job():
|
||||
task_data = {'type': 'project_update', 'id': 9999}
|
||||
with mock.patch('django.db.models.query.QuerySet.get') as get_mock:
|
||||
get_mock.side_effect = ProjectUpdate.DoesNotExist()
|
||||
assert tasks.handle_work_success(None, task_data) is None
|
||||
assert tasks.handle_work_success(task_data) is None
|
||||
|
||||
|
||||
def test_send_notifications_list(mocker):
|
||||
|
||||
@@ -8,8 +8,8 @@ def test_produce_supervisor_command(mocker):
|
||||
mock_process.communicate = communicate_mock
|
||||
Popen_mock = mocker.MagicMock(return_value=mock_process)
|
||||
with mocker.patch.object(reload.subprocess, 'Popen', Popen_mock):
|
||||
reload._supervisor_service_command(['beat', 'callback', 'fact'], "restart")
|
||||
reload._supervisor_service_command("restart")
|
||||
reload.subprocess.Popen.assert_called_once_with(
|
||||
['supervisorctl', 'restart', 'tower-processes:receiver',],
|
||||
['supervisorctl', 'restart', 'tower-processes:*',],
|
||||
stderr=-1, stdin=-1, stdout=-1)
|
||||
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
from celery.utils.log import get_logger
|
||||
from celery.worker.autoscale import Autoscaler, AUTOSCALE_KEEPALIVE
|
||||
from django.conf import settings
|
||||
import psutil
|
||||
|
||||
logger = get_logger('awx.main.tasks')
|
||||
|
||||
|
||||
class DynamicAutoScaler(Autoscaler):
|
||||
|
||||
def __init__(self, pool, max_concurrency, min_concurrency=0, worker=None,
|
||||
keepalive=AUTOSCALE_KEEPALIVE, mutex=None):
|
||||
super(DynamicAutoScaler, self).__init__(pool, max_concurrency,
|
||||
min_concurrency, worker,
|
||||
keepalive, mutex)
|
||||
settings_absmem = getattr(settings, 'SYSTEM_TASK_ABS_MEM', None)
|
||||
if settings_absmem is not None:
|
||||
total_memory_gb = int(settings_absmem)
|
||||
else:
|
||||
total_memory_gb = (psutil.virtual_memory().total >> 30) + 1 # noqa: round up
|
||||
|
||||
# 5 workers per GB of total memory
|
||||
self.max_concurrency = min(max_concurrency, (total_memory_gb * 5))
|
||||
logger.warn('celery worker dynamic --autoscale={},{}'.format(
|
||||
self.max_concurrency,
|
||||
self.min_concurrency
|
||||
))
|
||||
@@ -1,17 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright (c) 2017 Ansible Tower by Red Hat
|
||||
# All Rights Reserved.
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
class AWXCeleryRouter(object):
|
||||
def route_for_task(self, task, args=None, kwargs=None):
|
||||
tasks = [
|
||||
'awx.main.tasks.cluster_node_heartbeat',
|
||||
'awx.main.tasks.purge_old_stdout_files',
|
||||
'awx.main.tasks.awx_isolated_heartbeat',
|
||||
]
|
||||
if task in tasks:
|
||||
return {'queue': settings.CLUSTER_HOST_ID, 'routing_key': settings.CLUSTER_HOST_ID}
|
||||
@@ -11,11 +11,8 @@ from django.conf import settings
|
||||
logger = logging.getLogger('awx.main.utils.reload')
|
||||
|
||||
|
||||
def _supervisor_service_command(service_internal_names, command, communicate=True):
|
||||
def _supervisor_service_command(command, communicate=True):
|
||||
'''
|
||||
Service internal name options:
|
||||
- beat - celery - callback - channels - uwsgi - daphne
|
||||
- fact - nginx
|
||||
example use pattern of supervisorctl:
|
||||
# supervisorctl restart tower-processes:receiver tower-processes:factcacher
|
||||
'''
|
||||
@@ -25,13 +22,7 @@ def _supervisor_service_command(service_internal_names, command, communicate=Tru
|
||||
args = ['supervisorctl']
|
||||
if settings.DEBUG:
|
||||
args.extend(['-c', '/supervisor.conf'])
|
||||
programs = []
|
||||
name_translation_dict = settings.SERVICE_NAME_DICT
|
||||
for n in service_internal_names:
|
||||
if n in name_translation_dict:
|
||||
programs.append('{}:{}'.format(group_name, name_translation_dict[n]))
|
||||
args.extend([command])
|
||||
args.extend(programs)
|
||||
args.extend([command, '{}:*'.format(group_name)])
|
||||
logger.debug('Issuing command to {} services, args={}'.format(command, args))
|
||||
supervisor_process = subprocess.Popen(args, stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
@@ -48,6 +39,6 @@ def _supervisor_service_command(service_internal_names, command, communicate=Tru
|
||||
logger.info('Submitted supervisorctl {} command, not waiting for result'.format(command))
|
||||
|
||||
|
||||
def stop_local_services(service_internal_names, communicate=True):
|
||||
logger.warn('Stopping services {} on this node in response to user action'.format(service_internal_names))
|
||||
_supervisor_service_command(service_internal_names, command='stop', communicate=communicate)
|
||||
def stop_local_services(communicate=True):
|
||||
logger.warn('Stopping services on this node in response to user action')
|
||||
_supervisor_service_command(command='stop', communicate=communicate)
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
import os
|
||||
import re # noqa
|
||||
import sys
|
||||
import djcelery
|
||||
import six
|
||||
from datetime import timedelta
|
||||
|
||||
@@ -26,6 +25,8 @@ BASE_DIR = os.path.dirname(os.path.dirname(__file__))
|
||||
def is_testing(argv=None):
|
||||
import sys
|
||||
'''Return True if running django or py.test unit tests.'''
|
||||
if 'PYTEST_CURRENT_TEST' in os.environ.keys():
|
||||
return True
|
||||
argv = sys.argv if argv is None else argv
|
||||
if len(argv) >= 1 and ('py.test' in argv[0] or 'py/test.py' in argv[0]):
|
||||
return True
|
||||
@@ -60,7 +61,7 @@ DATABASES = {
|
||||
'NAME': os.path.join(BASE_DIR, 'awx.sqlite3'),
|
||||
'ATOMIC_REQUESTS': True,
|
||||
'TEST': {
|
||||
# Test database cannot be :memory: for celery/inventory tests.
|
||||
# Test database cannot be :memory: for inventory tests.
|
||||
'NAME': os.path.join(BASE_DIR, 'awx_test.sqlite3'),
|
||||
},
|
||||
}
|
||||
@@ -280,7 +281,6 @@ INSTALLED_APPS = (
|
||||
'oauth2_provider',
|
||||
'rest_framework',
|
||||
'django_extensions',
|
||||
'djcelery',
|
||||
'channels',
|
||||
'polymorphic',
|
||||
'taggit',
|
||||
@@ -459,40 +459,9 @@ DEVSERVER_DEFAULT_PORT = '8013'
|
||||
# Set default ports for live server tests.
|
||||
os.environ.setdefault('DJANGO_LIVE_TEST_SERVER_ADDRESS', 'localhost:9013-9199')
|
||||
|
||||
djcelery.setup_loader()
|
||||
|
||||
BROKER_POOL_LIMIT = None
|
||||
BROKER_URL = 'amqp://guest:guest@localhost:5672//'
|
||||
CELERY_EVENT_QUEUE_TTL = 5
|
||||
CELERY_DEFAULT_QUEUE = 'awx_private_queue'
|
||||
CELERY_DEFAULT_EXCHANGE = 'awx_private_queue'
|
||||
CELERY_DEFAULT_ROUTING_KEY = 'awx_private_queue'
|
||||
CELERY_DEFAULT_EXCHANGE_TYPE = 'direct'
|
||||
CELERY_TASK_SERIALIZER = 'json'
|
||||
CELERY_RESULT_SERIALIZER = 'json'
|
||||
CELERY_ACCEPT_CONTENT = ['json']
|
||||
CELERY_TRACK_STARTED = True
|
||||
CELERYD_TASK_TIME_LIMIT = None
|
||||
CELERYD_TASK_SOFT_TIME_LIMIT = None
|
||||
CELERYD_POOL_RESTARTS = True
|
||||
CELERYD_AUTOSCALER = 'awx.main.utils.autoscale:DynamicAutoScaler'
|
||||
CELERY_RESULT_BACKEND = 'djcelery.backends.database:DatabaseBackend'
|
||||
CELERY_IMPORTS = ('awx.main.scheduler.tasks',)
|
||||
CELERY_QUEUES = ()
|
||||
CELERY_ROUTES = ('awx.main.utils.ha.AWXCeleryRouter',)
|
||||
|
||||
|
||||
def log_celery_failure(*args):
|
||||
# Import annotations lazily to avoid polluting the `awx.settings` namespace
|
||||
# and causing circular imports
|
||||
from awx.main.tasks import log_celery_failure
|
||||
return log_celery_failure(*args)
|
||||
|
||||
|
||||
CELERY_ANNOTATIONS = {'*': {'on_failure': log_celery_failure}}
|
||||
|
||||
CELERYBEAT_SCHEDULER = 'celery.beat.PersistentScheduler'
|
||||
CELERYBEAT_MAX_LOOP_INTERVAL = 60
|
||||
CELERYBEAT_SCHEDULE = {
|
||||
'tower_scheduler': {
|
||||
'task': 'awx.main.tasks.awx_periodic_scheduler',
|
||||
@@ -525,9 +494,6 @@ CELERYBEAT_SCHEDULE = {
|
||||
}
|
||||
AWX_INCONSISTENT_TASK_INTERVAL = 60 * 3
|
||||
|
||||
# Celery queues that will always be listened to by celery workers
|
||||
# Note: Broadcast queues have unique, auto-generated names, with the alias
|
||||
# property value of the original queue name.
|
||||
AWX_CELERY_QUEUES_STATIC = [
|
||||
six.text_type(CELERY_DEFAULT_QUEUE),
|
||||
]
|
||||
@@ -626,8 +592,8 @@ SOCIAL_AUTH_SAML_ENABLED_IDPS = {}
|
||||
SOCIAL_AUTH_SAML_ORGANIZATION_ATTR = {}
|
||||
SOCIAL_AUTH_SAML_TEAM_ATTR = {}
|
||||
|
||||
# Any ANSIBLE_* settings will be passed to the subprocess environment by the
|
||||
# celery task.
|
||||
# Any ANSIBLE_* settings will be passed to the task runner subprocess
|
||||
# environment
|
||||
|
||||
# Do not want AWX to ask interactive questions and want it to be friendly with
|
||||
# reprovisioning
|
||||
@@ -641,8 +607,7 @@ ANSIBLE_PARAMIKO_RECORD_HOST_KEYS = False
|
||||
# output
|
||||
ANSIBLE_FORCE_COLOR = True
|
||||
|
||||
# Additional environment variables to be passed to the subprocess started by
|
||||
# the celery task.
|
||||
# Additional environment variables to be passed to the ansible subprocesses
|
||||
AWX_TASK_ENV = {}
|
||||
|
||||
# Flag to enable/disable updating hosts M2M when saving job events.
|
||||
@@ -1071,6 +1036,15 @@ LOGGING = {
|
||||
'backupCount': 5,
|
||||
'formatter':'simple',
|
||||
},
|
||||
'callback_receiver': {
|
||||
'level': 'WARNING',
|
||||
'class':'logging.handlers.RotatingFileHandler',
|
||||
'filters': ['require_debug_false'],
|
||||
'filename': os.path.join(LOG_ROOT, 'callback_receiver.log'),
|
||||
'maxBytes': 1024 * 1024 * 5, # 5 MB
|
||||
'backupCount': 5,
|
||||
'formatter':'simple',
|
||||
},
|
||||
'dispatcher': {
|
||||
'level': 'WARNING',
|
||||
'class':'logging.handlers.RotatingFileHandler',
|
||||
@@ -1080,6 +1054,10 @@ LOGGING = {
|
||||
'backupCount': 5,
|
||||
'formatter':'dispatcher',
|
||||
},
|
||||
'celery.beat': {
|
||||
'class':'logging.StreamHandler',
|
||||
'level': 'ERROR'
|
||||
}, # don't log every celerybeat wakeup
|
||||
'inventory_import': {
|
||||
'level': 'DEBUG',
|
||||
'class':'logging.StreamHandler',
|
||||
@@ -1162,6 +1140,9 @@ LOGGING = {
|
||||
'awx.main': {
|
||||
'handlers': ['null']
|
||||
},
|
||||
'awx.main.commands.run_callback_receiver': {
|
||||
'handlers': ['callback_receiver'],
|
||||
},
|
||||
'awx.main.dispatch': {
|
||||
'handlers': ['dispatcher'],
|
||||
},
|
||||
|
||||
@@ -68,13 +68,6 @@ template['OPTIONS']['loaders'] = (
|
||||
'django.template.loaders.app_directories.Loader',
|
||||
)
|
||||
|
||||
# Disable capturing all SQL queries when running celeryd in development.
|
||||
if 'celery' in sys.argv:
|
||||
SQL_DEBUG = False
|
||||
|
||||
CELERYD_HIJACK_ROOT_LOGGER = False
|
||||
CELERYD_LOG_COLOR = True
|
||||
|
||||
CALLBACK_QUEUE = "callback_tasks"
|
||||
|
||||
# Enable dynamically pulling roles from a requirement.yml file
|
||||
@@ -149,15 +142,6 @@ except ImportError:
|
||||
|
||||
CLUSTER_HOST_ID = socket.gethostname()
|
||||
|
||||
# Supervisor service name dictionary used for programatic restart
|
||||
SERVICE_NAME_DICT = {
|
||||
"celery": "celery",
|
||||
"callback": "receiver",
|
||||
"runworker": "channels",
|
||||
"uwsgi": "uwsgi",
|
||||
"daphne": "daphne",
|
||||
"nginx": "nginx"}
|
||||
|
||||
try:
|
||||
socket.gethostbyname('docker.for.mac.host.internal')
|
||||
os.environ['SDB_NOTIFY_HOST'] = 'docker.for.mac.host.internal'
|
||||
|
||||
@@ -73,13 +73,13 @@ if "pytest" in sys.modules:
|
||||
'ENGINE': 'django.db.backends.sqlite3',
|
||||
'NAME': os.path.join(BASE_DIR, 'awx.sqlite3'),
|
||||
'TEST': {
|
||||
# Test database cannot be :memory: for celery/inventory tests.
|
||||
# Test database cannot be :memory: for inventory tests.
|
||||
'NAME': os.path.join(BASE_DIR, 'awx_test.sqlite3'),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
# Celery AMQP configuration.
|
||||
# AMQP configuration.
|
||||
BROKER_URL = "amqp://{}:{}@{}/{}".format(os.environ.get("RABBITMQ_USER"),
|
||||
os.environ.get("RABBITMQ_PASS"),
|
||||
os.environ.get("RABBITMQ_HOST"),
|
||||
@@ -138,8 +138,7 @@ REMOTE_HOST_HEADERS = ['REMOTE_ADDR', 'REMOTE_HOST']
|
||||
# REMOTE_HOST_HEADERS will be trusted unconditionally')
|
||||
PROXY_IP_WHITELIST = []
|
||||
|
||||
# Define additional environment variables to be passed to subprocess started by
|
||||
# the celery task.
|
||||
# Define additional environment variables to be passed to ansible subprocesses
|
||||
#AWX_TASK_ENV['FOO'] = 'BAR'
|
||||
|
||||
# If set, use -vvv for project updates instead of -v for more output.
|
||||
|
||||
@@ -39,13 +39,13 @@ if is_testing(sys.argv):
|
||||
'ENGINE': 'django.db.backends.sqlite3',
|
||||
'NAME': os.path.join(BASE_DIR, 'awx.sqlite3'),
|
||||
'TEST': {
|
||||
# Test database cannot be :memory: for celery/inventory tests.
|
||||
# Test database cannot be :memory: for tests.
|
||||
'NAME': os.path.join(BASE_DIR, 'awx_test.sqlite3'),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
# Celery AMQP configuration.
|
||||
# AMQP configuration.
|
||||
BROKER_URL = 'amqp://guest:guest@localhost:5672'
|
||||
|
||||
# Set True to enable additional logging from the job_event_callback plugin
|
||||
@@ -94,8 +94,7 @@ REMOTE_HOST_HEADERS = ['REMOTE_ADDR', 'REMOTE_HOST']
|
||||
# REMOTE_HOST_HEADERS will be trusted unconditionally')
|
||||
PROXY_IP_WHITELIST = []
|
||||
|
||||
# Define additional environment variables to be passed to subprocess started by
|
||||
# the celery task.
|
||||
# Define additional environment variables to be passed to ansible subprocesses
|
||||
#AWX_TASK_ENV['FOO'] = 'BAR'
|
||||
|
||||
# If set, use -vvv for project updates instead of -v for more output.
|
||||
|
||||
@@ -54,21 +54,13 @@ AWX_ISOLATED_USERNAME = 'awx'
|
||||
|
||||
LOGGING['handlers']['tower_warnings']['filename'] = '/var/log/tower/tower.log'
|
||||
LOGGING['handlers']['callback_receiver']['filename'] = '/var/log/tower/callback_receiver.log'
|
||||
LOGGING['handlers']['dispatcher']['filename'] = '/var/log/tower/dispatcher.log'
|
||||
LOGGING['handlers']['task_system']['filename'] = '/var/log/tower/task_system.log'
|
||||
LOGGING['handlers']['fact_receiver']['filename'] = '/var/log/tower/fact_receiver.log'
|
||||
LOGGING['handlers']['management_playbooks']['filename'] = '/var/log/tower/management_playbooks.log'
|
||||
LOGGING['handlers']['system_tracking_migrations']['filename'] = '/var/log/tower/tower_system_tracking_migrations.log'
|
||||
LOGGING['handlers']['rbac_migrations']['filename'] = '/var/log/tower/tower_rbac_migrations.log'
|
||||
|
||||
# Supervisor service name dictionary used for programatic restart
|
||||
SERVICE_NAME_DICT = {
|
||||
"beat": "awx-celery-beat",
|
||||
"celery": "awx-celery",
|
||||
"callback": "awx-callback-receiver",
|
||||
"channels": "awx-channels-worker",
|
||||
"uwsgi": "awx-uwsgi",
|
||||
"daphne": "awx-daphne"}
|
||||
|
||||
# Store a snapshot of default settings at this point before loading any
|
||||
# customizable config files.
|
||||
DEFAULTS_SNAPSHOT = {}
|
||||
|
||||
Reference in New Issue
Block a user