add a minimal framework for generating analytics/metrics

annotate queries & add license analytics
This commit is contained in:
Ryan Petrello 2019-01-28 09:28:32 -05:00 committed by Christian Adams
parent 7b4c63037a
commit c586fa9821
16 changed files with 544 additions and 5 deletions

View File

@ -32,6 +32,7 @@ from awx.api.views import (
OAuth2TokenList,
ApplicationOAuth2TokenList,
OAuth2ApplicationDetail,
MetricsView,
)
from .organization import urls as organization_urls
@ -131,6 +132,7 @@ v2_urls = [
url(r'^applications/(?P<pk>[0-9]+)/tokens/$', ApplicationOAuth2TokenList.as_view(), name='application_o_auth2_token_list'),
url(r'^tokens/$', OAuth2TokenList.as_view(), name='o_auth2_token_list'),
url(r'^', include(oauth2_urls)),
url(r'^metrics/$', MetricsView.as_view(), name='metrics_view'),
]
app_name = 'api'

View File

@ -318,6 +318,54 @@ class DashboardJobsGraphView(APIView):
return Response(dashboard_data)
class MetricsView(APIView):
view_name = _("Metrics")
swagger_topic = 'Metrics'
renderer_classes = [renderers.BrowsableAPIRenderer,
renderers.PlainTextRenderer,
JSONRenderer]
def get(self, request, format=None):
''' Show Metrics Details '''
# Temporary Imports
from awx.main.models.organization import UserSessionMembership
from django.contrib.sessions.models import Session
def _prepare_data(data):
metrics = ''
for metric in data:
metrics += metric + '\n'
return metrics
# Add active/expired, or only query active sessions
total_sessions = Session.objects.all().count()
active_sessions = Session.objects.filter(expire_date__gte=now()).count()
api_sessions = UserSessionMembership.objects.all().count()
channels_sessions = total_sessions - api_sessions
expired_sessions = total_sessions - active_sessions
data = []
data.append("# HELP awx_sessions_active counter A count of active sessions.")
data.append("# TYPE awx_sessions_active counter")
data.append("sessions.active_sessions {0} ".format(total_sessions))
data.append("# TYPE awx_sessions_websocket counter")
data.append("sessions.websocket_sessions {0} ".format(channels_sessions))
data.append("# TYPE awx_sessions_api counter")
data.append("sessions.api_sessions {0} ".format(api_sessions))
data.append("# TYPE sessions.active_sessions counter")
data.append("sessions.active_sessions {0}".format(active_sessions))
data.append("# TYPE sessions.expired_sessions counter")
data.append("sessions.expired_sessions {0}".format(expired_sessions))
return Response(_prepare_data(data))
class InstanceList(ListAPIView):
view_name = _("Instances")

View File

@ -103,6 +103,7 @@ class ApiVersionRootView(APIView):
data['credential_types'] = reverse('api:credential_type_list', request=request)
data['applications'] = reverse('api:o_auth2_application_list', request=request)
data['tokens'] = reverse('api:o_auth2_token_list', request=request)
data['metrics'] = reverse('api:metrics_view', request=request)
data['inventory'] = reverse('api:inventory_list', request=request)
data['inventory_scripts'] = reverse('api:inventory_script_list', request=request)
data['inventory_sources'] = reverse('api:inventory_source_list', request=request)
@ -157,6 +158,7 @@ class ApiV1PingView(APIView):
'ha': is_ha_environment(),
'version': get_awx_version(),
'active_node': settings.CLUSTER_HOST_ID,
'system_uuid': settings.system_uuid,
}
response['instances'] = []
@ -276,6 +278,3 @@ class ApiV1ConfigView(APIView):
except Exception:
# FIX: Log
return Response({"error": _("Failed to remove license.")}, status=status.HTTP_400_BAD_REQUEST)

View File

@ -0,0 +1 @@
from .core import register, gather, ship # noqa

View File

@ -0,0 +1,158 @@
import os.path
from django.db.models import Count
from django.conf import settings
from django.utils.timezone import now
from awx.conf.license import get_license
from awx.main.utils import (get_awx_version, get_ansible_version,
get_custom_venv_choices, camelcase_to_underscore)
from awx.main import models
from django.contrib.sessions.models import Session
from awx.main.analytics import register
#
# This module is used to define metrics collected by awx.main.analytics.gather()
# Each function is decorated with a key name, and should return a data
# structure that can be serialized to JSON
#
# @register('something')
# def something(since):
# # the generated archive will contain a `something.json` w/ this JSON
# return {'some': 'json'}
#
# All functions - when called - will be passed a datetime.datetime object,
# `since`, which represents the last time analytics were gathered (some metrics
# functions - like those that return metadata about playbook runs, may return
# data _since_ the last report date - i.e., new data in the last 24 hours)
#
@register('config')
def config(since):
license_info = get_license(show_key=False)
return {
'system_uuid': settings.SYSTEM_UUID,
'tower_url_base': settings.TOWER_URL_BASE,
'tower_version': get_awx_version(),
'ansible_version': get_ansible_version(),
'license_type': license_info.get('license_type', 'UNLICENSED'),
'free_instances': license_info.get('free instances', 0),
'license_expiry': license_info.get('time_remaining', 0),
'authentication_backends': settings.AUTHENTICATION_BACKENDS,
'logging_aggregators': settings.LOG_AGGREGATOR_LOGGERS
}
@register('counts')
def counts(since):
counts = {}
for cls in (models.Organization, models.Team, models.User,
models.Inventory, models.Credential, models.Project,
models.JobTemplate, models.WorkflowJobTemplate, models.Host,
models.Schedule, models.CustomInventoryScript,
models.NotificationTemplate):
counts[camelcase_to_underscore(cls.__name__)] = cls.objects.count()
venvs = get_custom_venv_choices()
counts['custom_virtualenvs'] = len([
v for v in venvs
if os.path.basename(v.rstrip('/')) != 'ansible'
])
counts['active_host_count'] = models.Host.objects.active_count()
counts['smart_inventories'] = models.Inventory.objects.filter(kind='smart').count()
counts['normal_inventories'] = models.Inventory.objects.filter(kind='').count()
active_sessions = Session.objects.filter(expire_date__gte=now()).count()
api_sessions = models.UserSessionMembership.objects.select_related('session').filter(session__expire_date__gte=now()).count()
channels_sessions = active_sessions - api_sessions
counts['active_sessions'] = active_sessions
counts['active_api_sessions'] = api_sessions
counts['active_channels_sessions'] = channels_sessions
counts['running_jobs'] = models.Job.objects.filter(status='running').count()
return counts
@register('org_counts')
def org_counts(since):
counts = {}
for org in models.Organization.objects.annotate(num_users=Count('member_role__members', distinct=True), num_teams=Count('teams', distinct=True)): # Use .values to make a dict of only the fields we can about where
counts[org.id] = {'name': org.name,
'users': org.num_users,
'teams': org.num_teams
}
return counts
@register('cred_type_counts')
def cred_type_counts(since):
counts = {}
for cred_type in models.CredentialType.objects.annotate(num_credentials=Count('credentials', distinct=True)):
counts[cred_type.id] = {'name': cred_type.name,
'credential_count': cred_type.num_credentials
}
return counts
@register('inventory_counts')
def inventory_counts(since):
counts = {}
from django.db.models import Count
for inv in models.Inventory.objects.annotate(num_sources=Count('inventory_sources', distinct=True), num_hosts=Count('hosts', distinct=True)).only('id', 'name', 'kind'):
counts[inv.id] = {'name': inv.name,
'kind': inv.kind,
'hosts': inv.num_hosts,
'sources': inv.num_sources
}
return counts
@register('projects_by_scm_type')
def projects_by_scm_type(since):
counts = dict(
(t[0] or 'manual', 0)
for t in models.Project.SCM_TYPE_CHOICES
)
for result in models.Project.objects.values('scm_type').annotate(
count=Count('scm_type')
).order_by('scm_type'):
counts[result['scm_type'] or 'manual'] = result['count']
return counts
@register('job_counts') #TODO: evaluate if we want this (was not an ask) Also, may think about annotating rather than grabbing objects for efficiency (even though there will likely be < 100 instances)
def job_counts(since):
counts = {}
counts['total_jobs'] = models.UnifiedJob.objects.all().count()
for instance in models.Instance.objects.all():
counts[instance.id] = {'uuid': instance.uuid,
'jobs_total': instance.jobs_total, # this is _all_ jobs run by that node
'jobs_running': instance.jobs_running, # this is jobs in running & waiting state
}
jobs_running = models.UnifiedJob.objects.filter(execution_node=instance, status__in=('running', 'waiting',)).count()
jobs_total = models.UnifiedJob.objects.filter(execution_node=instance).count()
counts['total_jobs'] = models.UnifiedJob.objects.annotate(running_jobs=)
for instance in models.Instance.objects.all():
counts[instance.id] = {'uuid': instance.uuid,
'jobs_total': instance.jobs_total, # this is _all_ jobs run by that node
'jobs_running': instance.jobs_running, # this is jobs in running & waiting state
}
jobs_running = models.UnifiedJob.objects.filter(execution_node=instance, status__in=('running', 'waiting',)).count()
jobs_total = models.UnifiedJob.objects.filter(execution_node=instance).count()
return counts
@register('jobs')
def jobs(since):
counts = {}
jobs = models.Job.objects.filter(created__gt=since)
counts['latest_jobs'] = models.Job.objects.filter(created__gt=since).count()
return counts

130
awx/main/analytics/core.py Normal file
View File

@ -0,0 +1,130 @@
import codecs
import inspect
import json
import logging
import os
import os.path
import tempfile
import shutil
import subprocess
from django.conf import settings
from django.utils.encoding import smart_str
from django.utils.timezone import now, timedelta
from rest_framework.exceptions import PermissionDenied
from awx.main.models import Job
from awx.main.access import access_registry
from awx.main.models.ha import TowerAnalyticsState
__all__ = ['register', 'gather', 'ship']
logger = logging.getLogger('awx.main.analytics')
def _valid_license():
try:
access_registry[Job](None).check_license()
except PermissionDenied:
logger.exception("A valid license was not found:")
return False
return True
def register(key):
"""
A decorator used to register a function as a metric collector.
Decorated functions should return JSON-serializable objects.
@register('projects_by_scm_type')
def projects_by_scm_type():
return {'git': 5, 'svn': 1, 'hg': 0}
"""
def decorate(f):
f.__awx_analytics_key__ = key
return f
return decorate
def gather(dest=None, module=None):
"""
Gather all defined metrics and write them as JSON files in a .tgz
:param dest: the (optional) absolute path to write a compressed tarball
:pararm module: the module to search for registered analytic collector
functions; defaults to awx.main.analytics.collectors
"""
import time # TODO: Remove this
start_time = time.time() # TODO: Remove this
run_now = now()
state = TowerAnalyticsState.get_solo()
last_run = state.last_run
logger.debug("Last analytics run was: {}".format(last_run))
state.last_run = run_now
state.save()
max_interval = now() - timedelta(days=7)
if last_run < max_interval or not last_run:
last_run = max_interval
if _valid_license() is False:
logger.exception("Invalid License provided, or No License Provided")
return
if module is None:
from awx.main.analytics import collectors
module = collectors
dest = dest or tempfile.mkdtemp(prefix='awx_analytics')
for name, func in inspect.getmembers(module):
if inspect.isfunction(func) and hasattr(func, '__awx_analytics_key__'):
key = func.__awx_analytics_key__
path = '{}.json'.format(os.path.join(dest, key))
with codecs.open(path, 'w', encoding='utf-8') as f:
try:
json.dump(func(last_run), f)
except Exception:
logger.exception("Could not generate metric {}.json".format(key))
f.close()
os.remove(f.name)
# can't use isoformat() since it has colons, which GNU tar doesn't like
tarname = '_'.join([
settings.SYSTEM_UUID,
run_now.strftime('%Y-%m-%d-%H%M%S%z')
])
tgz = shutil.make_archive(
os.path.join(os.path.dirname(dest), tarname),
'gztar',
dest
)
shutil.rmtree(dest)
print("Analytics Time --- %s seconds ---" % (time.time() - start_time)) # TODO: Remove this
return tgz
def ship(path):
"""
Ship gathered metrics via the Insights agent
"""
agent = 'insights-client'
if shutil.which(agent) is None:
logger.error('could not find {} on PATH'.format(agent))
return
logger.debug('shipping analytics file: {}'.format(path))
try:
cmd = [
agent, '--payload', path, '--content-type', settings.INSIGHTS_AGENT_MIME
]
output = smart_str(subprocess.check_output(cmd, timeout=60 * 5))
logger.debug(output)
except subprocess.CalledProcessError:
logger.exception('{} failure:'.format(cmd))
except subprocess.TimeoutExpired:
logger.exception('{} timeout:'.format(cmd))

View File

@ -0,0 +1,30 @@
import logging
from awx.main.analytics import gather, ship
from django.core.management.base import BaseCommand
class Command(BaseCommand):
'''
Gather AWX analytics data
'''
help = 'Gather AWX analytics data'
def add_arguments(self, parser):
parser.add_argument('--ship', dest='ship', action='store_true',
help='Enable to ship metrics via insights-client')
def init_logging(self):
self.logger = logging.getLogger('awx.main.analytics')
handler = logging.StreamHandler()
handler.setLevel(logging.DEBUG)
handler.setFormatter(logging.Formatter('%(message)s'))
self.logger.addHandler(handler)
self.logger.propagate = False
def handle(self, *args, **options):
tgz = gather()
self.init_logging()
self.logger.debug(tgz)
if options.get('ship'):
ship(tgz)

View File

@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.16 on 2019-01-28 14:27
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0061_v350_track_native_credentialtype_source'),
]
operations = [
migrations.CreateModel(
name='TowerAnalyticsState',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('last_run', models.DateTimeField(auto_now_add=True)),
],
options={
'abstract': False,
},
),
]

View File

@ -26,7 +26,7 @@ from awx.main.models.unified_jobs import UnifiedJob
from awx.main.utils import get_cpu_capacity, get_mem_capacity, get_system_task_capacity
from awx.main.models.mixins import RelatedJobsMixin
__all__ = ('Instance', 'InstanceGroup', 'JobOrigin', 'TowerScheduleState',)
__all__ = ('Instance', 'InstanceGroup', 'JobOrigin', 'TowerScheduleState', 'TowerAnalyticsState')
class HasPolicyEditsMixin(HasEditsMixin):
@ -251,6 +251,10 @@ class TowerScheduleState(SingletonModel):
schedule_last_run = models.DateTimeField(auto_now_add=True)
class TowerAnalyticsState(SingletonModel):
last_run = models.DateTimeField(auto_now_add=True)
class JobOrigin(models.Model):
"""A model representing the relationship between a unified job and
the instance that was responsible for starting that job.

View File

@ -156,7 +156,9 @@ class Profile(CreatedModifiedModel):
class UserSessionMembership(BaseModel):
'''
A lookup table for session membership given user.
A lookup table for API session membership given user. Note, there is a
different session created by channels for websockets using the same
underlying model.
'''
class Meta:

View File

@ -321,6 +321,19 @@ def send_notifications(notification_list, job_id=None):
logger.exception('Error saving notification {} result.'.format(notification.id))
@task()
def gather_analytics():
if settings.PENDO_TRACKING_STATE == 'off':
return
try:
tgz = analytics.gather()
logger.debug('gathered analytics: {}'.format(tgz))
analytics.ship(tgz)
finally:
if os.path.exists(tgz):
os.remove(tgz)
@task()
def run_administrative_checks():
logger.warn("Running administrative checks.")

View File

@ -0,0 +1,44 @@
import importlib
import json
import os
import tarfile
import pytest
from awx.main.analytics import gather, register
@register('example')
def example(since):
return {'awx': 123}
@register('bad_json')
def bad_json(since):
return set()
@register('throws_error')
def throws_error(since):
raise ValueError()
@pytest.mark.django_db
def test_gather():
tgz = gather(module=importlib.import_module(__name__))
files = {}
with tarfile.open(tgz, "r:gz") as archive:
for member in archive.getmembers():
files[member.name] = archive.extractfile(member)
# functions that returned valid JSON should show up
assert './example.json' in files.keys()
assert json.loads(files['./example.json'].read()) == {'awx': 123}
# functions that don't return serializable objects should not
assert './bad_json.json' not in files.keys()
assert './throws_error.json' not in files.keys()
try:
os.remove(tgz)
except Exception:
pass

View File

@ -0,0 +1,45 @@
import pytest
from awx.main import models
from awx.main.analytics import collectors
@pytest.mark.django_db
def test_empty():
assert collectors.counts(None) == {
'organization': 0,
'team': 0,
'user': 0,
'inventory': 0,
'credential': 0,
'project': 0,
'job_template': 0,
'workflow_job_template': 0,
'host': 0,
'schedule': 0,
'custom_inventory_script': 0,
'custom_virtualenvs': 1 # dev env ansible3
}
@pytest.mark.django_db
def test_database_counts(organization_factory, job_template_factory,
workflow_job_template_factory):
objs = organization_factory('org', superusers=['admin'])
jt = job_template_factory('test', organization=objs.organization,
inventory='test_inv', project='test_project',
credential='test_cred')
workflow_job_template_factory('test')
models.Team(organization=objs.organization).save()
models.Host(inventory=jt.inventory).save()
models.Schedule(
rrule='DTSTART;TZID=America/New_York:20300504T150000',
unified_job_template=jt.job_template
).save()
models.CustomInventoryScript(organization=objs.organization).save()
counts = collectors.counts(None)
for key in ('organization', 'team', 'user', 'inventory', 'credential',
'project', 'job_template', 'workflow_job_template', 'host',
'schedule', 'custom_inventory_script'):
assert counts[key] == 1

View File

@ -0,0 +1,32 @@
import pytest
import random
from awx.main.models import Project
from awx.main.analytics import collectors
@pytest.mark.django_db
def test_empty():
assert collectors.projects_by_scm_type(None) == {
'manual': 0,
'git': 0,
'svn': 0,
'hg': 0,
'insights': 0
}
@pytest.mark.django_db
@pytest.mark.parametrize('scm_type', [t[0] for t in Project.SCM_TYPE_CHOICES])
def test_multiple(scm_type):
expected = {
'manual': 0,
'git': 0,
'svn': 0,
'hg': 0,
'insights': 0
}
for i in range(random.randint(0, 10)):
Project(scm_type=scm_type).save()
expected[scm_type or 'manual'] += 1
assert collectors.projects_by_scm_type(None) == expected

View File

@ -5,6 +5,7 @@ import os
import re # noqa
import sys
from datetime import timedelta
from celery.schedules import crontab
# global settings
from django.conf import global_settings
@ -486,6 +487,10 @@ CELERYBEAT_SCHEDULE = {
'task': 'awx.main.tasks.purge_old_stdout_files',
'schedule': timedelta(days=7)
},
'gather_analytics': {
'task': 'awx.main.tasks.gather_analytics',
'schedule': crontab(hour=0)
},
'task_manager': {
'task': 'awx.main.scheduler.tasks.run_task_manager',
'schedule': timedelta(seconds=20),
@ -958,6 +963,7 @@ TOWER_ADMIN_ALERTS = True
TOWER_URL_BASE = "https://towerhost"
INSIGHTS_URL_BASE = "https://example.org"
INSIGHTS_AGENT_MIME = 'application/example'
TOWER_SETTINGS_MANIFEST = {}