add copy events table and remove events queries

add copy unifiedjob table, futher optimization, rm since from job_instance queries
This commit is contained in:
Christian Adams
2019-03-12 10:30:01 -04:00
parent 636153d92c
commit a7368cec43
7 changed files with 166 additions and 75 deletions

View File

@@ -175,6 +175,9 @@ class SettingsRegistry(object):
# `PENDO_TRACKING_STATE` is disabled for the open source awx license # `PENDO_TRACKING_STATE` is disabled for the open source awx license
if setting == 'PENDO_TRACKING_STATE' and get_license().get('license_type') == 'open': if setting == 'PENDO_TRACKING_STATE' and get_license().get('license_type') == 'open':
field_instance.read_only = True field_instance.read_only = True
if setting == 'INSIGHTS_DATA_ENABLED' and get_license().get('license_type') == 'open':
field_instance.read_only = True
return field_instance return field_instance

View File

@@ -1,5 +1,6 @@
import os.path import os.path
from django.db import connection
from django.db.models import Count from django.db.models import Count
from django.conf import settings from django.conf import settings
from django.utils.timezone import now from django.utils.timezone import now
@@ -11,23 +12,21 @@ from awx.main import models
from django.contrib.sessions.models import Session from django.contrib.sessions.models import Session
from awx.main.analytics import register from awx.main.analytics import register
'''
This module is used to define metrics collected by awx.main.analytics.gather()
Each function is decorated with a key name, and should return a data
structure that can be serialized to JSON
@register('something')
def something(since):
# the generated archive will contain a `something.json` w/ this JSON
return {'some': 'json'}
# All functions - when called - will be passed a datetime.datetime object,
# This module is used to define metrics collected by awx.main.analytics.gather() `since`, which represents the last time analytics were gathered (some metrics
# Each function is decorated with a key name, and should return a data functions - like those that return metadata about playbook runs, may return
# structure that can be serialized to JSON data _since_ the last report date - i.e., new data in the last 24 hours)
# '''
# @register('something')
# def something(since):
# # the generated archive will contain a `something.json` w/ this JSON
# return {'some': 'json'}
#
# All functions - when called - will be passed a datetime.datetime object,
# `since`, which represents the last time analytics were gathered (some metrics
# functions - like those that return metadata about playbook runs, may return
# data _since_ the last report date - i.e., new data in the last 24 hours)
#
@register('config') @register('config')
@@ -41,16 +40,21 @@ def config(since):
'license_type': license_info.get('license_type', 'UNLICENSED'), 'license_type': license_info.get('license_type', 'UNLICENSED'),
'free_instances': license_info.get('free instances', 0), 'free_instances': license_info.get('free instances', 0),
'license_expiry': license_info.get('time_remaining', 0), 'license_expiry': license_info.get('time_remaining', 0),
'pendo_tracking': settings.PENDO_TRACKING_STATE,
'authentication_backends': settings.AUTHENTICATION_BACKENDS, 'authentication_backends': settings.AUTHENTICATION_BACKENDS,
'logging_aggregators': settings.LOG_AGGREGATOR_LOGGERS 'logging_aggregators': settings.LOG_AGGREGATOR_LOGGERS,
'external_logger_enabled': settings.LOG_AGGREGATOR_ENABLED,
'external_logger_type': getattr(settings, 'LOG_AGGREGATOR_TYPE', None),
} }
@register('counts') @register('counts')
def counts(since): def counts(since):
counts = {} counts = {}
for cls in (models.Organization, models.Team, models.User, for cls in (models.Organization, models.Team, models.User,
models.Inventory, models.Credential, models.Project, models.Inventory, models.Credential, models.Project,
models.JobTemplate, models.WorkflowJobTemplate, models.Host, models.JobTemplate, models.WorkflowJobTemplate,
models.UnifiedJob, models.Host,
models.Schedule, models.CustomInventoryScript, models.Schedule, models.CustomInventoryScript,
models.NotificationTemplate): models.NotificationTemplate):
counts[camelcase_to_underscore(cls.__name__)] = cls.objects.count() counts[camelcase_to_underscore(cls.__name__)] = cls.objects.count()
@@ -61,53 +65,63 @@ def counts(since):
if os.path.basename(v.rstrip('/')) != 'ansible' if os.path.basename(v.rstrip('/')) != 'ansible'
]) ])
counts['active_host_count'] = models.Host.objects.active_count() inv_counts = dict(models.Inventory.objects.order_by().values_list('kind').annotate(Count('kind')))
counts['smart_inventories'] = models.Inventory.objects.filter(kind='smart').count() inv_counts['normal'] = inv_counts.get('', 0)
counts['normal_inventories'] = models.Inventory.objects.filter(kind='').count() inv_counts['smart'] = inv_counts.get('smart', 0)
counts['inventories'] = inv_counts
counts['active_host_count'] = models.Host.objects.active_count()
active_sessions = Session.objects.filter(expire_date__gte=now()).count() active_sessions = Session.objects.filter(expire_date__gte=now()).count()
api_sessions = models.UserSessionMembership.objects.select_related('session').filter(session__expire_date__gte=now()).count() api_sessions = models.UserSessionMembership.objects.select_related('session').filter(session__expire_date__gte=now()).count()
channels_sessions = active_sessions - api_sessions channels_sessions = active_sessions - api_sessions
counts['active_sessions'] = active_sessions counts['active_sessions'] = active_sessions
counts['active_api_sessions'] = api_sessions counts['active_api_sessions'] = api_sessions
counts['active_channels_sessions'] = channels_sessions counts['active_channels_sessions'] = channels_sessions
counts['running_jobs'] = models.Job.objects.filter(status='running').count() counts['running_jobs'] = models.UnifiedJob.objects.filter(status__in=('running', 'waiting',)).count()
return counts return counts
@register('org_counts') @register('org_counts')
def org_counts(since): def org_counts(since):
counts = {} counts = {}
for org in models.Organization.objects.annotate(num_users=Count('member_role__members', distinct=True), for org in models.Organization.objects.annotate(num_users=Count('member_role__members', distinct=True),
num_teams=Count('teams', distinct=True)): # Use .values to make a dict of only the fields we can about where num_teams=Count('teams', distinct=True)).values('name', 'id', 'num_users', 'num_teams'):
counts[org.id] = {'name': org.name, counts[org['id']] = {'name': org['name'],
'users': org.num_users, 'users': org['num_users'],
'teams': org.num_teams 'teams': org['num_teams']
} }
return counts return counts
@register('cred_type_counts') @register('cred_type_counts')
def cred_type_counts(since): def cred_type_counts(since):
counts = {} counts = {}
for cred_type in models.CredentialType.objects.annotate(num_credentials=Count('credentials', distinct=True)): for cred_type in models.CredentialType.objects.annotate(num_credentials=Count(
counts[cred_type.id] = {'name': cred_type.name, 'credentials', distinct=True)).values('name', 'id', 'managed_by_tower', 'num_credentials'):
'credential_count': cred_type.num_credentials counts[cred_type['id']] = {'name': cred_type['name'],
} 'credential_count': cred_type['num_credentials'],
'managed_by_tower': cred_type['managed_by_tower']
}
return counts return counts
@register('inventory_counts') @register('inventory_counts')
def inventory_counts(since): def inventory_counts(since):
counts = {} counts = {}
from django.db.models import Count for inv in models.Inventory.objects.filter(kind='').annotate(num_sources=Count('inventory_sources', distinct=True),
for inv in models.Inventory.objects.annotate(num_sources=Count('inventory_sources', distinct=True), num_hosts=Count('hosts', distinct=True)).only('id', 'name', 'kind'):
num_hosts=Count('hosts', distinct=True)).only('id', 'name', 'kind'):
counts[inv.id] = {'name': inv.name, counts[inv.id] = {'name': inv.name,
'kind': inv.kind, 'kind': inv.kind,
'hosts': inv.num_hosts, 'hosts': inv.num_hosts,
'sources': inv.num_sources 'sources': inv.num_sources
} }
for smart_inv in models.Inventory.objects.filter(kind='smart'):
counts[smart_inv.id] = {'name': smart_inv.name,
'kind': smart_inv.kind,
'num_hosts': smart_inv.hosts.count(),
'num_sources': smart_inv.inventory_sources.count()
}
return counts return counts
@@ -124,24 +138,99 @@ def projects_by_scm_type(since):
return counts return counts
@register('instance_info')
def instance_info(since):
info = {}
instances = models.Instance.objects.values_list('hostname').annotate().values(
'uuid', 'version', 'capacity', 'cpu', 'memory', 'managed_by_policy', 'hostname', 'last_isolated_check')
for instance in instances:
info = {'uuid': instance['uuid'],
'version': instance['version'],
'capacity': instance['capacity'],
'cpu': instance['cpu'],
'memory': instance['memory'],
'managed_by_policy': instance['managed_by_policy'],
'last_isolated_check': instance['last_isolated_check'],
}
return info
@register('job_counts') @register('job_counts')
def job_counts(since): #TODO: Optimize -- for example, all of these are going to need to be restrained to the last 24 hours/INSIGHTS_SCHEDULE def job_counts(since):
counts = {} counts = {}
counts['total_jobs'] = models.UnifiedJob.objects.all().count() counts['total_jobs'] = models.UnifiedJob.objects.count()
counts['status'] = dict(models.UnifiedJob.objects.values_list('status').annotate(Count('status'))) counts['status'] = dict(models.UnifiedJob.objects.values_list('status').annotate(Count('status')))
for instance in models.Instance.objects.all(): counts['launch_type'] = dict(models.UnifiedJob.objects.values_list('launch_type').annotate(Count('launch_type')))
counts[instance.id] = {'uuid': instance.uuid,
'jobs_total': models.UnifiedJob.objects.filter(execution_node=instance.hostname, status__in=('running', 'waiting',)).count(),
'jobs_running': models.UnifiedJob.objects.filter(execution_node=instance.hostname).count(), # jobs in running & waiting state
'launch_type': dict(models.UnifiedJob.objects.filter(execution_node=instance.hostname).values_list('launch_type').annotate(Count('launch_type')))
}
return counts return counts
@register('job_instance_counts')
@register('jobs') def job_instance_counts(since):
def jobs(since):
counts = {} counts = {}
jobs = models.Job.objects.filter(created__gt=since) job_types = models.UnifiedJob.objects.values_list(
counts['latest_jobs'] = models.Job.objects.filter(created__gt=since).count() 'execution_node', 'launch_type').annotate(job_launch_type=Count('launch_type'))
for job in job_types:
counts.setdefault(job[0], {}).setdefault('status', {})[job[1]] = job[2]
job_statuses = models.UnifiedJob.objects.values_list(
'execution_node', 'status').annotate(job_status=Count('status'))
for job in job_statuses:
counts.setdefault(job[0], {}).setdefault('launch_type', {})[job[1]] = job[2]
return counts return counts
# Copies Job Events from db to a .csv to be shipped
def copy_tables(since, full_path):
def _copy_table(table, query, path):
events_file = os.path.join(path, table + '_table.csv')
write_data = open(events_file, 'w', encoding='utf-8')
with connection.cursor() as cursor:
cursor.copy_expert(query, write_data)
write_data.close()
return events_file
events_query = '''COPY (SELECT main_jobevent.id,
main_jobevent.created,
main_jobevent.uuid,
main_jobevent.parent_uuid,
main_jobevent.event,
main_jobevent.event_data::json->'task_action'
main_jobevent.failed,
main_jobevent.changed,
main_jobevent.playbook,
main_jobevent.play,
main_jobevent.task,
main_jobevent.role,
main_jobevent.job_id,
main_jobevent.host_id,
main_jobevent.host_name,
FROM main_jobevent
WHERE main_jobevent.created > {}
ORDER BY main_jobevent.id ASC) to stdout'''.format(since.strftime("'%Y-%m-%d %H:%M:%S'"))
_copy_table(table='events', query=events_query, path=full_path)
unified_job_query = '''COPY (SELECT main_unifiedjob.id,
main_unifiedjob.polymorphic_ctype_id,
django_content_type.model,
main_unifiedjob.created,
main_unifiedjob.name,
main_unifiedjob.unified_job_template_id,
main_unifiedjob.launch_type,
main_unifiedjob.schedule_id,
main_unifiedjob.execution_node,
main_unifiedjob.controller_node,
main_unifiedjob.cancel_flag,
main_unifiedjob.status,
main_unifiedjob.failed,
main_unifiedjob.started,
main_unifiedjob.finished,
main_unifiedjob.elapsed,
main_unifiedjob.job_explanation,
main_unifiedjob.instance_group_id
FROM main_unifiedjob, django_content_type
WHERE main_unifiedjob.created > {} and main_unifiedjob.polymorphic_ctype_id = django_content_type.id
ORDER BY main_unifiedjob.id ASC) to stdout'''.format(since.strftime("'%Y-%m-%d %H:%M:%S'"))
_copy_table(table='unified_jobs', query=unified_job_query, path=full_path)
return

View File

@@ -1,4 +1,3 @@
import codecs
import inspect import inspect
import json import json
import logging import logging
@@ -31,7 +30,7 @@ def _valid_license():
logger.exception("A valid license was not found:") logger.exception("A valid license was not found:")
return False return False
return True return True
def register(key): def register(key):
""" """
@@ -59,8 +58,6 @@ def gather(dest=None, module=None):
:pararm module: the module to search for registered analytic collector :pararm module: the module to search for registered analytic collector
functions; defaults to awx.main.analytics.collectors functions; defaults to awx.main.analytics.collectors
""" """
import time # TODO: Remove this
start_time = time.time() # TODO: Remove this
run_now = now() run_now = now()
state = TowerAnalyticsState.get_solo() state = TowerAnalyticsState.get_solo()
@@ -73,8 +70,8 @@ def gather(dest=None, module=None):
if last_run < max_interval or not last_run: if last_run < max_interval or not last_run:
last_run = max_interval last_run = max_interval
if settings.INSIGHTS_DATA_ENABLED: if not settings.INSIGHTS_DATA_ENABLED:
logger.exception("Insights not enabled. Analytics data not gathered.") logger.exception("Insights analytics not enabled")
return return
if _valid_license() is False: if _valid_license() is False:
@@ -90,14 +87,18 @@ def gather(dest=None, module=None):
if inspect.isfunction(func) and hasattr(func, '__awx_analytics_key__'): if inspect.isfunction(func) and hasattr(func, '__awx_analytics_key__'):
key = func.__awx_analytics_key__ key = func.__awx_analytics_key__
path = '{}.json'.format(os.path.join(dest, key)) path = '{}.json'.format(os.path.join(dest, key))
with codecs.open(path, 'w', encoding='utf-8') as f: with open(path, 'w', encoding='utf-8') as f:
try: try:
json.dump(func(last_run), f) json.dump(func(last_run), f)
except Exception: except Exception:
logger.exception("Could not generate metric {}.json".format(key)) logger.exception("Could not generate metric {}.json".format(key))
f.close() f.close()
os.remove(f.name) os.remove(f.name)
try:
collectors.copy_tables(since=last_run, full_path=dest)
except Exception:
logger.exception("Could not copy tables")
# can't use isoformat() since it has colons, which GNU tar doesn't like # can't use isoformat() since it has colons, which GNU tar doesn't like
tarname = '_'.join([ tarname = '_'.join([
settings.SYSTEM_UUID, settings.SYSTEM_UUID,
@@ -109,7 +110,6 @@ def gather(dest=None, module=None):
dest dest
) )
shutil.rmtree(dest) shutil.rmtree(dest)
print("Analytics Time --- %s seconds ---" % (time.time() - start_time)) # TODO: Remove this
return tgz return tgz

View File

@@ -307,8 +307,8 @@ register(
default=False, default=False,
label=_('Insights Analytics Data'), label=_('Insights Analytics Data'),
help_text=_('Enables Tower to gather analytics data about itself and send it to Insights.'), help_text=_('Enables Tower to gather analytics data about itself and send it to Insights.'),
category=_('Jobs'), category=_('System'),
category_slug='jobs', category_slug='system',
) )
register( register(

View File

@@ -13,20 +13,20 @@ def test_empty():
"active_sessions": 0, "active_sessions": 0,
"credential": 0, "credential": 0,
"custom_inventory_script": 0, "custom_inventory_script": 0,
"custom_virtualenvs": 1, # dev env ansible3 "custom_virtualenvs": 0, # dev env ansible3
"host": 0, "host": 0,
"inventory": 0, 'inventory': 0,
"inventories": {'normal': 0, 'smart': 0},
"job_template": 0, "job_template": 0,
"normal_inventories": 0,
"notification_template": 0, "notification_template": 0,
"organization": 0, "organization": 0,
"project": 0, "project": 0,
"running_jobs": 0, "running_jobs": 0,
"schedule": 0, "schedule": 0,
"smart_inventories": 0,
"team": 0, "team": 0,
"user": 0, "user": 0,
"workflow_job_template": 0 "workflow_job_template": 0,
'unified_job': 0
} }

View File

@@ -672,11 +672,10 @@ AWX_AUTO_DEPROVISION_INSTANCES = False
# Note: This setting may be overridden by database settings. # Note: This setting may be overridden by database settings.
PENDO_TRACKING_STATE = "off" PENDO_TRACKING_STATE = "off"
# Enables Insights data collection for Ansible Tower. Defaults to the value of PENDO_TRACKING_STATE # Enables Insights data collection for Ansible Tower.
if PENDO_TRACKING_STATE == "off": # Note: This setting may be overridden by database settings.
INSIGHTS_DATA_ENABLED = False INSIGHTS_DATA_ENABLED = False
else:
INSIGHTS_DATA_ENABLED = True
# Default list of modules allowed for ad hoc commands. # Default list of modules allowed for ad hoc commands.
# Note: This setting may be overridden by database settings. # Note: This setting may be overridden by database settings.

View File

@@ -17,8 +17,8 @@ register(
('anonymous', _('Anonymous')), ('anonymous', _('Anonymous')),
('detailed', _('Detailed')), ('detailed', _('Detailed')),
], ],
label=_('Analytics Tracking State'), label=_('Pendo Analytics Tracking State'),
help_text=_('Enable or Disable Analytics Tracking.'), help_text=_('Enable or Disable Pendo Analytics Tracking.'),
category=_('UI'), category=_('UI'),
category_slug='ui', category_slug='ui',
) )