mirror of
https://github.com/ansible/awx.git
synced 2026-01-21 22:48:02 -03:30
Merge pull request #4404 from cchurch/split-job-event-data
Split job event data between callback queue and stdout.
This commit is contained in:
commit
a38ab75086
@ -26,7 +26,7 @@ import uuid
|
||||
from ansible.utils.display import Display
|
||||
|
||||
# Tower Display Callback
|
||||
from tower_display_callback.events import event_context
|
||||
from .events import event_context
|
||||
|
||||
__all__ = []
|
||||
|
||||
|
||||
@ -22,14 +22,75 @@ import base64
|
||||
import contextlib
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import multiprocessing
|
||||
import os
|
||||
import threading
|
||||
import uuid
|
||||
|
||||
# Kombu
|
||||
from kombu import Connection, Exchange, Producer
|
||||
|
||||
__all__ = ['event_context']
|
||||
|
||||
|
||||
class CallbackQueueEventDispatcher(object):
|
||||
|
||||
def __init__(self):
|
||||
self.callback_connection = os.getenv('CALLBACK_CONNECTION', None)
|
||||
self.connection_queue = os.getenv('CALLBACK_QUEUE', '')
|
||||
self.connection = None
|
||||
self.exchange = None
|
||||
self._init_logging()
|
||||
|
||||
def _init_logging(self):
|
||||
try:
|
||||
self.job_callback_debug = int(os.getenv('JOB_CALLBACK_DEBUG', '0'))
|
||||
except ValueError:
|
||||
self.job_callback_debug = 0
|
||||
self.logger = logging.getLogger('awx.plugins.callback.job_event_callback')
|
||||
if self.job_callback_debug >= 2:
|
||||
self.logger.setLevel(logging.DEBUG)
|
||||
elif self.job_callback_debug >= 1:
|
||||
self.logger.setLevel(logging.INFO)
|
||||
else:
|
||||
self.logger.setLevel(logging.WARNING)
|
||||
handler = logging.StreamHandler()
|
||||
formatter = logging.Formatter('%(levelname)-8s %(process)-8d %(message)s')
|
||||
handler.setFormatter(formatter)
|
||||
self.logger.addHandler(handler)
|
||||
self.logger.propagate = False
|
||||
|
||||
def dispatch(self, obj):
|
||||
if not self.callback_connection or not self.connection_queue:
|
||||
return
|
||||
active_pid = os.getpid()
|
||||
for retry_count in xrange(4):
|
||||
try:
|
||||
if not hasattr(self, 'connection_pid'):
|
||||
self.connection_pid = active_pid
|
||||
if self.connection_pid != active_pid:
|
||||
self.connection = None
|
||||
if self.connection is None:
|
||||
self.connection = Connection(self.callback_connection)
|
||||
self.exchange = Exchange(self.connection_queue, type='direct')
|
||||
|
||||
producer = Producer(self.connection)
|
||||
producer.publish(obj,
|
||||
serializer='json',
|
||||
compression='bzip2',
|
||||
exchange=self.exchange,
|
||||
declare=[self.exchange],
|
||||
routing_key=self.connection_queue)
|
||||
return
|
||||
except Exception, e:
|
||||
self.logger.info('Publish Job Event Exception: %r, retry=%d', e,
|
||||
retry_count, exc_info=True)
|
||||
retry_count += 1
|
||||
if retry_count >= 3:
|
||||
break
|
||||
|
||||
|
||||
class EventContext(object):
|
||||
'''
|
||||
Store global and local (per thread/process) data associated with callback
|
||||
@ -38,6 +99,7 @@ class EventContext(object):
|
||||
|
||||
def __init__(self):
|
||||
self.display_lock = multiprocessing.RLock()
|
||||
self.dispatcher = CallbackQueueEventDispatcher()
|
||||
|
||||
def add_local(self, **kwargs):
|
||||
if not hasattr(self, '_local'):
|
||||
@ -111,7 +173,9 @@ class EventContext(object):
|
||||
if event_data.get(key, False):
|
||||
event = key
|
||||
break
|
||||
|
||||
max_res = int(os.getenv("MAX_EVENT_RES", 700000))
|
||||
if event not in ('playbook_on_stats',) and "res" in event_data and len(str(event_data['res'])) > max_res:
|
||||
event_data['res'] = {}
|
||||
event_dict = dict(event=event, event_data=event_data)
|
||||
for key in event_data.keys():
|
||||
if key in ('job_id', 'ad_hoc_command_id', 'uuid', 'parent_uuid', 'created', 'artifact_data'):
|
||||
@ -136,7 +200,9 @@ class EventContext(object):
|
||||
fileobj.flush()
|
||||
|
||||
def dump_begin(self, fileobj):
|
||||
self.dump(fileobj, self.get_begin_dict())
|
||||
begin_dict = self.get_begin_dict()
|
||||
self.dispatcher.dispatch(begin_dict)
|
||||
self.dump(fileobj, {'uuid': begin_dict['uuid']})
|
||||
|
||||
def dump_end(self, fileobj):
|
||||
self.dump(fileobj, self.get_end_dict(), flush=True)
|
||||
|
||||
@ -29,8 +29,8 @@ from ansible.plugins.callback import CallbackBase
|
||||
from ansible.plugins.callback.default import CallbackModule as DefaultCallbackModule
|
||||
|
||||
# Tower Display Callback
|
||||
from tower_display_callback.events import event_context
|
||||
from tower_display_callback.minimal import CallbackModule as MinimalCallbackModule
|
||||
from .events import event_context
|
||||
from .minimal import CallbackModule as MinimalCallbackModule
|
||||
|
||||
|
||||
class BaseCallbackModule(CallbackBase):
|
||||
|
||||
@ -21,6 +21,7 @@ logger = logging.getLogger('awx.main.commands.run_callback_receiver')
|
||||
class CallbackBrokerWorker(ConsumerMixin):
|
||||
def __init__(self, connection):
|
||||
self.connection = connection
|
||||
self.partial_events = {}
|
||||
|
||||
def get_consumers(self, Consumer, channel):
|
||||
return [Consumer(queues=[Queue(settings.CALLBACK_QUEUE,
|
||||
@ -31,18 +32,28 @@ class CallbackBrokerWorker(ConsumerMixin):
|
||||
|
||||
def process_task(self, body, message):
|
||||
try:
|
||||
if 'event' not in body:
|
||||
raise Exception('Payload does not have an event')
|
||||
if 'job_id' not in body and 'ad_hoc_command_id' not in body:
|
||||
raise Exception('Payload does not have a job_id or ad_hoc_command_id')
|
||||
if settings.DEBUG:
|
||||
logger.info('Body: {}'.format(body))
|
||||
logger.info('Message: {}'.format(message))
|
||||
try:
|
||||
if 'job_id' in body:
|
||||
JobEvent.create_from_data(**body)
|
||||
elif 'ad_hoc_command_id' in body:
|
||||
AdHocCommandEvent.create_from_data(**body)
|
||||
# If event came directly from callback without counter/stdout,
|
||||
# save it until the rest of the event arrives.
|
||||
if 'counter' not in body:
|
||||
if 'uuid' in body:
|
||||
self.partial_events[body['uuid']] = body
|
||||
# If event has counter, try to combine it with any event data
|
||||
# already received for the same uuid, then create the actual
|
||||
# job event record.
|
||||
else:
|
||||
if 'uuid' in body:
|
||||
partial_event = self.partial_events.pop(body['uuid'], {})
|
||||
body.update(partial_event)
|
||||
if 'job_id' in body:
|
||||
JobEvent.create_from_data(**body)
|
||||
elif 'ad_hoc_command_id' in body:
|
||||
AdHocCommandEvent.create_from_data(**body)
|
||||
except DatabaseError as e:
|
||||
logger.error('Database Error Saving Job Event: {}'.format(e))
|
||||
except Exception as exc:
|
||||
|
||||
@ -809,6 +809,7 @@ class RunJob(BaseTask):
|
||||
env['REST_API_URL'] = settings.INTERNAL_API_URL
|
||||
env['REST_API_TOKEN'] = job.task_auth_token or ''
|
||||
env['TOWER_HOST'] = settings.TOWER_URL_BASE
|
||||
env['MAX_EVENT_RES'] = settings.MAX_EVENT_RES_DATA
|
||||
env['CALLBACK_QUEUE'] = settings.CALLBACK_QUEUE
|
||||
env['CALLBACK_CONNECTION'] = settings.BROKER_URL
|
||||
if getattr(settings, 'JOB_CALLBACK_DEBUG', False):
|
||||
|
||||
@ -152,6 +152,10 @@ REMOTE_HOST_HEADERS = ['REMOTE_ADDR', 'REMOTE_HOST']
|
||||
# Note: This setting may be overridden by database settings.
|
||||
STDOUT_MAX_BYTES_DISPLAY = 1048576
|
||||
|
||||
# The maximum size of the ansible callback event's res data structure
|
||||
# beyond this limit and the value will be removed
|
||||
MAX_EVENT_RES_DATA = 700000
|
||||
|
||||
# Note: This setting may be overridden by database settings.
|
||||
EVENT_STDOUT_MAX_BYTES_DISPLAY = 1024
|
||||
|
||||
@ -522,17 +526,6 @@ ANSIBLE_FORCE_COLOR = True
|
||||
# the celery task.
|
||||
AWX_TASK_ENV = {}
|
||||
|
||||
# Maximum number of job events processed by the callback receiver worker process
|
||||
# before it recycles
|
||||
JOB_EVENT_RECYCLE_THRESHOLD = 3000
|
||||
|
||||
# Number of workers used to proecess job events in parallel
|
||||
JOB_EVENT_WORKERS = 4
|
||||
|
||||
# Maximum number of job events that can be waiting on a single worker queue before
|
||||
# it can be skipped as too busy
|
||||
JOB_EVENT_MAX_QUEUE_SIZE = 100
|
||||
|
||||
# Flag to enable/disable updating hosts M2M when saving job events.
|
||||
CAPTURE_JOB_EVENT_HOSTS = False
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user