Merge pull request #707 from ansible/jag/statsd

Add optional statsd metrics gathering
This commit is contained in:
Matthew Jones
2016-01-29 09:59:01 -05:00
10 changed files with 124 additions and 2 deletions

45
awx/lib/metrics.py Normal file
View File

@@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
from functools import wraps
from django_statsd.clients import statsd
def task_timer(fn):
@wraps(fn)
def __wrapped__(self, *args, **kwargs):
statsd.incr('tasks.{}.{}.count'.format(
self.name.rsplit('.', 1)[-1],
fn.__name__
))
with statsd.timer('tasks.{}.{}.timer'.format(
self.name.rsplit('.', 1)[-1],
fn.__name__
)):
return fn(self, *args, **kwargs)
return __wrapped__
class BaseTimer(object):
def __init__(self, name, prefix=None):
self.name = name.rsplit('.', 1)[-1]
if prefix:
self.name = '{}.{}'.format(prefix, self.name)
def __call__(self, fn):
@wraps(fn)
def __wrapped__(obj, *args, **kwargs):
statsd.incr('{}.{}.count'.format(
self.name,
fn.__name__
))
with statsd.timer('{}.{}.timer'.format(
self.name,
fn.__name__
)):
return fn(obj, *args, **kwargs)
return __wrapped__

View File

@@ -22,7 +22,9 @@ from django.db import connection
# AWX # AWX
from awx.main.models import * # noqa from awx.main.models import * # noqa
from awx.main.socket import Socket from awx.main.socket import Socket
from awx.lib.metrics import BaseTimer
fn_timer = BaseTimer(__name__)
logger = logging.getLogger('awx.main.commands.run_callback_receiver') logger = logging.getLogger('awx.main.commands.run_callback_receiver')
WORKERS = 4 WORKERS = 4
@@ -98,6 +100,7 @@ class CallbackReceiver(object):
break break
time.sleep(0.1) time.sleep(0.1)
@fn_timer
def write_queue_worker(self, preferred_queue, worker_queues, message): def write_queue_worker(self, preferred_queue, worker_queues, message):
queue_order = sorted(range(WORKERS), cmp=lambda x, y: -1 if x==preferred_queue else 0) queue_order = sorted(range(WORKERS), cmp=lambda x, y: -1 if x==preferred_queue else 0)
for queue_actual in queue_order: for queue_actual in queue_order:
@@ -161,6 +164,7 @@ class CallbackReceiver(object):
sys.exit(1) sys.exit(1)
last_parent_events[message['job_id']] = job_parent_events last_parent_events[message['job_id']] = job_parent_events
@fn_timer
@transaction.atomic @transaction.atomic
def process_job_event(self, data): def process_job_event(self, data):
# Sanity check: Do we need to do anything at all? # Sanity check: Do we need to do anything at all?
@@ -223,6 +227,7 @@ class CallbackReceiver(object):
logger.error('Database error saving job event: %s', e) logger.error('Database error saving job event: %s', e)
return None return None
@fn_timer
@transaction.atomic @transaction.atomic
def process_ad_hoc_event(self, data): def process_ad_hoc_event(self, data):
# Sanity check: Do we need to do anything at all? # Sanity check: Do we need to do anything at all?

View File

@@ -41,6 +41,7 @@ from django.utils.datastructures import SortedDict
from django.utils.timezone import now from django.utils.timezone import now
# AWX # AWX
from awx.lib.metrics import task_timer
from awx.main.constants import CLOUD_PROVIDERS from awx.main.constants import CLOUD_PROVIDERS
from awx.main.models import * # noqa from awx.main.models import * # noqa
from awx.main.queue import FifoQueue from awx.main.queue import FifoQueue
@@ -216,6 +217,7 @@ class BaseTask(Task):
model = None model = None
abstract = True abstract = True
@task_timer
def update_model(self, pk, _attempt=0, **updates): def update_model(self, pk, _attempt=0, **updates):
"""Reload the model instance from the database and update the """Reload the model instance from the database and update the
given fields. given fields.
@@ -285,6 +287,7 @@ class BaseTask(Task):
os.chmod(path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) os.chmod(path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
return path return path
@task_timer
def build_private_data_files(self, instance, **kwargs): def build_private_data_files(self, instance, **kwargs):
''' '''
Create a temporary files containing the private data. Create a temporary files containing the private data.
@@ -328,6 +331,7 @@ class BaseTask(Task):
'': '', '': '',
} }
@task_timer
def build_env(self, instance, **kwargs): def build_env(self, instance, **kwargs):
''' '''
Build environment dictionary for ansible-playbook. Build environment dictionary for ansible-playbook.
@@ -352,6 +356,7 @@ class BaseTask(Task):
env['PROOT_TMP_DIR'] = tower_settings.AWX_PROOT_BASE_PATH env['PROOT_TMP_DIR'] = tower_settings.AWX_PROOT_BASE_PATH
return env return env
@task_timer
def build_safe_env(self, instance, **kwargs): def build_safe_env(self, instance, **kwargs):
''' '''
Build environment dictionary, hiding potentially sensitive information Build environment dictionary, hiding potentially sensitive information
@@ -420,6 +425,7 @@ class BaseTask(Task):
''' '''
return SortedDict() return SortedDict()
@task_timer
def run_pexpect(self, instance, args, cwd, env, passwords, stdout_handle, def run_pexpect(self, instance, args, cwd, env, passwords, stdout_handle,
output_replacements=None): output_replacements=None):
''' '''
@@ -503,6 +509,7 @@ class BaseTask(Task):
Hook for any steps to run after job/task is complete. Hook for any steps to run after job/task is complete.
''' '''
@task_timer
def run(self, pk, **kwargs): def run(self, pk, **kwargs):
''' '''
Run the job/task and capture its output. Run the job/task and capture its output.
@@ -598,6 +605,7 @@ class RunJob(BaseTask):
name = 'awx.main.tasks.run_job' name = 'awx.main.tasks.run_job'
model = Job model = Job
@task_timer
def build_private_data(self, job, **kwargs): def build_private_data(self, job, **kwargs):
''' '''
Returns a dict of the form Returns a dict of the form
@@ -881,7 +889,7 @@ class RunProjectUpdate(BaseTask):
name = 'awx.main.tasks.run_project_update' name = 'awx.main.tasks.run_project_update'
model = ProjectUpdate model = ProjectUpdate
@task_timer
def build_private_data(self, project_update, **kwargs): def build_private_data(self, project_update, **kwargs):
''' '''
Return SSH private key data needed for this project update. Return SSH private key data needed for this project update.
@@ -1049,6 +1057,7 @@ class RunInventoryUpdate(BaseTask):
name = 'awx.main.tasks.run_inventory_update' name = 'awx.main.tasks.run_inventory_update'
model = InventoryUpdate model = InventoryUpdate
@task_timer
def build_private_data(self, inventory_update, **kwargs): def build_private_data(self, inventory_update, **kwargs):
"""Return private data needed for inventory update. """Return private data needed for inventory update.
If no private data is needed, return None. If no private data is needed, return None.
@@ -1320,6 +1329,7 @@ class RunAdHocCommand(BaseTask):
name = 'awx.main.tasks.run_ad_hoc_command' name = 'awx.main.tasks.run_ad_hoc_command'
model = AdHocCommand model = AdHocCommand
@task_timer
def build_private_data(self, ad_hoc_command, **kwargs): def build_private_data(self, ad_hoc_command, **kwargs):
''' '''
Return SSH private key data needed for this ad hoc command (only if Return SSH private key data needed for this ad hoc command (only if

View File

@@ -47,6 +47,24 @@ import zmq
import psutil import psutil
# Only use statsd if there's a statsd host in the environment
# otherwise just do a noop.
if os.environ.get('GRAPHITE_PORT_8125_UDP_ADDR'):
from statsd import StatsClient
statsd = StatsClient(host=os.environ['GRAPHITE_PORT_8125_UDP_ADDR'],
port=8125,
prefix='tower.job.event_callback',
maxudpsize=512)
else:
class NoStatsClient(object):
def __getattr__(self, item):
if item.startswith('__'):
return super(NoStatsClient, self).__getattr__(item)
else:
return lambda *args, **kwargs: None
statsd = NoStatsClient()
class TokenAuth(requests.auth.AuthBase): class TokenAuth(requests.auth.AuthBase):
def __init__(self, token): def __init__(self, token):
@@ -186,7 +204,8 @@ class BaseCallbackModule(object):
def _log_event(self, event, **event_data): def _log_event(self, event, **event_data):
if self.callback_consumer_port: if self.callback_consumer_port:
self._post_job_event_queue_msg(event, event_data) with statsd.timer('zmq_post_event_msg.{}'.format(event)):
self._post_job_event_queue_msg(event, event_data)
else: else:
self._post_rest_api_event(event, event_data) self._post_rest_api_event(event, event_data)
@@ -255,6 +274,7 @@ class BaseCallbackModule(object):
task=result._task, diff=diff) task=result._task, diff=diff)
@staticmethod @staticmethod
@statsd.timer('terminate_ssh_control_masters')
def terminate_ssh_control_masters(): def terminate_ssh_control_masters():
# Determine if control persist is being used and if any open sockets # Determine if control persist is being used and if any open sockets
# exist after running the playbook. # exist after running the playbook.

View File

@@ -66,6 +66,12 @@ PASSWORD_HASHERS = (
# Configure a default UUID for development only. # Configure a default UUID for development only.
SYSTEM_UUID = '00000000-0000-0000-0000-000000000000' SYSTEM_UUID = '00000000-0000-0000-0000-000000000000'
STATSD_CLIENT = 'django_statsd.clients.normal'
STATSD_HOST = 'graphite'
STATSD_PORT = 8125
STATSD_PREFIX = 'tower'
STATSD_MAXUDPSIZE = 512
# If there is an `/etc/tower/settings.py`, include it. # If there is an `/etc/tower/settings.py`, include it.
# If there is a `/etc/tower/conf.d/*.py`, include them. # If there is a `/etc/tower/conf.d/*.py`, include them.
include(optional('/etc/tower/settings.py'), scope=locals()) include(optional('/etc/tower/settings.py'), scope=locals())

View File

@@ -13,3 +13,4 @@ from development import * # NOQA
DEBUG = False DEBUG = False
TEMPLATE_DEBUG = DEBUG TEMPLATE_DEBUG = DEBUG
SQL_DEBUG = DEBUG SQL_DEBUG = DEBUG
STATSD_CLIENT = 'django_statsd.clients.null'

View File

@@ -0,0 +1,5 @@
BSD and MPL
Portions of this are from commonware:
https://github.com/jsocol/commonware/blob/master/LICENSE

View File

@@ -0,0 +1,20 @@
Copyright (c) 2012, James Socol
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@@ -24,6 +24,7 @@ django-polymorphic==0.5.3
django-radius==1.0.0 django-radius==1.0.0
djangorestframework==2.3.13 djangorestframework==2.3.13
django-split-settings==0.1.1 django-split-settings==0.1.1
django-statsd-mozilla==0.3.16
django-taggit==0.11.2 django-taggit==0.11.2
git+https://github.com/matburt/dm.xmlsec.binding.git@master#egg=dm.xmlsec.binding git+https://github.com/matburt/dm.xmlsec.binding.git@master#egg=dm.xmlsec.binding
dogpile.cache==0.5.6 dogpile.cache==0.5.6
@@ -113,6 +114,7 @@ requests-oauthlib==0.5.0
simplejson==3.6.0 simplejson==3.6.0
six==1.9.0 six==1.9.0
South==1.0.2 South==1.0.2
statsd==3.2.1
stevedore==1.3.0 stevedore==1.3.0
suds==0.4 suds==0.4
warlock==1.1.0 warlock==1.1.0

View File

@@ -7,6 +7,7 @@ tower:
- postgres - postgres
- redis - redis
- mongo - mongo
- graphite
volumes: volumes:
- ../:/tower_devel - ../:/tower_devel
postgres: postgres:
@@ -28,3 +29,10 @@ dockerui:
privileged: true privileged: true
volumes: volumes:
- /var/run/docker.sock:/var/run/docker.sock - /var/run/docker.sock:/var/run/docker.sock
graphite:
image: hopsoft/graphite-statsd
ports:
- "8001:80"
- "2003:2003"
- "8125:8125/udp"
- "8126:8126"