add copy events table and remove events queries

add copy unifiedjob table, futher optimization, rm since from job_instance queries
This commit is contained in:
Christian Adams 2019-03-12 10:30:01 -04:00
parent 636153d92c
commit a7368cec43
7 changed files with 166 additions and 75 deletions

View File

@ -175,6 +175,9 @@ class SettingsRegistry(object):
# `PENDO_TRACKING_STATE` is disabled for the open source awx license
if setting == 'PENDO_TRACKING_STATE' and get_license().get('license_type') == 'open':
field_instance.read_only = True
if setting == 'INSIGHTS_DATA_ENABLED' and get_license().get('license_type') == 'open':
field_instance.read_only = True
return field_instance

View File

@ -1,5 +1,6 @@
import os.path
from django.db import connection
from django.db.models import Count
from django.conf import settings
from django.utils.timezone import now
@ -11,23 +12,21 @@ from awx.main import models
from django.contrib.sessions.models import Session
from awx.main.analytics import register
'''
This module is used to define metrics collected by awx.main.analytics.gather()
Each function is decorated with a key name, and should return a data
structure that can be serialized to JSON
@register('something')
def something(since):
# the generated archive will contain a `something.json` w/ this JSON
return {'some': 'json'}
#
# This module is used to define metrics collected by awx.main.analytics.gather()
# Each function is decorated with a key name, and should return a data
# structure that can be serialized to JSON
#
# @register('something')
# def something(since):
# # the generated archive will contain a `something.json` w/ this JSON
# return {'some': 'json'}
#
# All functions - when called - will be passed a datetime.datetime object,
# `since`, which represents the last time analytics were gathered (some metrics
# functions - like those that return metadata about playbook runs, may return
# data _since_ the last report date - i.e., new data in the last 24 hours)
#
All functions - when called - will be passed a datetime.datetime object,
`since`, which represents the last time analytics were gathered (some metrics
functions - like those that return metadata about playbook runs, may return
data _since_ the last report date - i.e., new data in the last 24 hours)
'''
@register('config')
@ -41,16 +40,21 @@ def config(since):
'license_type': license_info.get('license_type', 'UNLICENSED'),
'free_instances': license_info.get('free instances', 0),
'license_expiry': license_info.get('time_remaining', 0),
'pendo_tracking': settings.PENDO_TRACKING_STATE,
'authentication_backends': settings.AUTHENTICATION_BACKENDS,
'logging_aggregators': settings.LOG_AGGREGATOR_LOGGERS
'logging_aggregators': settings.LOG_AGGREGATOR_LOGGERS,
'external_logger_enabled': settings.LOG_AGGREGATOR_ENABLED,
'external_logger_type': getattr(settings, 'LOG_AGGREGATOR_TYPE', None),
}
@register('counts')
def counts(since):
counts = {}
for cls in (models.Organization, models.Team, models.User,
models.Inventory, models.Credential, models.Project,
models.JobTemplate, models.WorkflowJobTemplate, models.Host,
models.JobTemplate, models.WorkflowJobTemplate,
models.UnifiedJob, models.Host,
models.Schedule, models.CustomInventoryScript,
models.NotificationTemplate):
counts[camelcase_to_underscore(cls.__name__)] = cls.objects.count()
@ -61,53 +65,63 @@ def counts(since):
if os.path.basename(v.rstrip('/')) != 'ansible'
])
counts['active_host_count'] = models.Host.objects.active_count()
counts['smart_inventories'] = models.Inventory.objects.filter(kind='smart').count()
counts['normal_inventories'] = models.Inventory.objects.filter(kind='').count()
inv_counts = dict(models.Inventory.objects.order_by().values_list('kind').annotate(Count('kind')))
inv_counts['normal'] = inv_counts.get('', 0)
inv_counts['smart'] = inv_counts.get('smart', 0)
counts['inventories'] = inv_counts
counts['active_host_count'] = models.Host.objects.active_count()
active_sessions = Session.objects.filter(expire_date__gte=now()).count()
api_sessions = models.UserSessionMembership.objects.select_related('session').filter(session__expire_date__gte=now()).count()
channels_sessions = active_sessions - api_sessions
counts['active_sessions'] = active_sessions
counts['active_api_sessions'] = api_sessions
counts['active_channels_sessions'] = channels_sessions
counts['running_jobs'] = models.Job.objects.filter(status='running').count()
counts['running_jobs'] = models.UnifiedJob.objects.filter(status__in=('running', 'waiting',)).count()
return counts
@register('org_counts')
def org_counts(since):
counts = {}
for org in models.Organization.objects.annotate(num_users=Count('member_role__members', distinct=True),
num_teams=Count('teams', distinct=True)): # Use .values to make a dict of only the fields we can about where
counts[org.id] = {'name': org.name,
'users': org.num_users,
'teams': org.num_teams
}
num_teams=Count('teams', distinct=True)).values('name', 'id', 'num_users', 'num_teams'):
counts[org['id']] = {'name': org['name'],
'users': org['num_users'],
'teams': org['num_teams']
}
return counts
@register('cred_type_counts')
def cred_type_counts(since):
counts = {}
for cred_type in models.CredentialType.objects.annotate(num_credentials=Count('credentials', distinct=True)):
counts[cred_type.id] = {'name': cred_type.name,
'credential_count': cred_type.num_credentials
}
for cred_type in models.CredentialType.objects.annotate(num_credentials=Count(
'credentials', distinct=True)).values('name', 'id', 'managed_by_tower', 'num_credentials'):
counts[cred_type['id']] = {'name': cred_type['name'],
'credential_count': cred_type['num_credentials'],
'managed_by_tower': cred_type['managed_by_tower']
}
return counts
@register('inventory_counts')
def inventory_counts(since):
counts = {}
from django.db.models import Count
for inv in models.Inventory.objects.annotate(num_sources=Count('inventory_sources', distinct=True),
num_hosts=Count('hosts', distinct=True)).only('id', 'name', 'kind'):
for inv in models.Inventory.objects.filter(kind='').annotate(num_sources=Count('inventory_sources', distinct=True),
num_hosts=Count('hosts', distinct=True)).only('id', 'name', 'kind'):
counts[inv.id] = {'name': inv.name,
'kind': inv.kind,
'hosts': inv.num_hosts,
'sources': inv.num_sources
}
'kind': inv.kind,
'hosts': inv.num_hosts,
'sources': inv.num_sources
}
for smart_inv in models.Inventory.objects.filter(kind='smart'):
counts[smart_inv.id] = {'name': smart_inv.name,
'kind': smart_inv.kind,
'num_hosts': smart_inv.hosts.count(),
'num_sources': smart_inv.inventory_sources.count()
}
return counts
@ -124,24 +138,99 @@ def projects_by_scm_type(since):
return counts
@register('instance_info')
def instance_info(since):
info = {}
instances = models.Instance.objects.values_list('hostname').annotate().values(
'uuid', 'version', 'capacity', 'cpu', 'memory', 'managed_by_policy', 'hostname', 'last_isolated_check')
for instance in instances:
info = {'uuid': instance['uuid'],
'version': instance['version'],
'capacity': instance['capacity'],
'cpu': instance['cpu'],
'memory': instance['memory'],
'managed_by_policy': instance['managed_by_policy'],
'last_isolated_check': instance['last_isolated_check'],
}
return info
@register('job_counts')
def job_counts(since): #TODO: Optimize -- for example, all of these are going to need to be restrained to the last 24 hours/INSIGHTS_SCHEDULE
def job_counts(since):
counts = {}
counts['total_jobs'] = models.UnifiedJob.objects.all().count()
counts['total_jobs'] = models.UnifiedJob.objects.count()
counts['status'] = dict(models.UnifiedJob.objects.values_list('status').annotate(Count('status')))
for instance in models.Instance.objects.all():
counts[instance.id] = {'uuid': instance.uuid,
'jobs_total': models.UnifiedJob.objects.filter(execution_node=instance.hostname, status__in=('running', 'waiting',)).count(),
'jobs_running': models.UnifiedJob.objects.filter(execution_node=instance.hostname).count(), # jobs in running & waiting state
'launch_type': dict(models.UnifiedJob.objects.filter(execution_node=instance.hostname).values_list('launch_type').annotate(Count('launch_type')))
}
counts['launch_type'] = dict(models.UnifiedJob.objects.values_list('launch_type').annotate(Count('launch_type')))
return counts
@register('jobs')
def jobs(since):
@register('job_instance_counts')
def job_instance_counts(since):
counts = {}
jobs = models.Job.objects.filter(created__gt=since)
counts['latest_jobs'] = models.Job.objects.filter(created__gt=since).count()
job_types = models.UnifiedJob.objects.values_list(
'execution_node', 'launch_type').annotate(job_launch_type=Count('launch_type'))
for job in job_types:
counts.setdefault(job[0], {}).setdefault('status', {})[job[1]] = job[2]
job_statuses = models.UnifiedJob.objects.values_list(
'execution_node', 'status').annotate(job_status=Count('status'))
for job in job_statuses:
counts.setdefault(job[0], {}).setdefault('launch_type', {})[job[1]] = job[2]
return counts
# Copies Job Events from db to a .csv to be shipped
def copy_tables(since, full_path):
def _copy_table(table, query, path):
events_file = os.path.join(path, table + '_table.csv')
write_data = open(events_file, 'w', encoding='utf-8')
with connection.cursor() as cursor:
cursor.copy_expert(query, write_data)
write_data.close()
return events_file
events_query = '''COPY (SELECT main_jobevent.id,
main_jobevent.created,
main_jobevent.uuid,
main_jobevent.parent_uuid,
main_jobevent.event,
main_jobevent.event_data::json->'task_action'
main_jobevent.failed,
main_jobevent.changed,
main_jobevent.playbook,
main_jobevent.play,
main_jobevent.task,
main_jobevent.role,
main_jobevent.job_id,
main_jobevent.host_id,
main_jobevent.host_name,
FROM main_jobevent
WHERE main_jobevent.created > {}
ORDER BY main_jobevent.id ASC) to stdout'''.format(since.strftime("'%Y-%m-%d %H:%M:%S'"))
_copy_table(table='events', query=events_query, path=full_path)
unified_job_query = '''COPY (SELECT main_unifiedjob.id,
main_unifiedjob.polymorphic_ctype_id,
django_content_type.model,
main_unifiedjob.created,
main_unifiedjob.name,
main_unifiedjob.unified_job_template_id,
main_unifiedjob.launch_type,
main_unifiedjob.schedule_id,
main_unifiedjob.execution_node,
main_unifiedjob.controller_node,
main_unifiedjob.cancel_flag,
main_unifiedjob.status,
main_unifiedjob.failed,
main_unifiedjob.started,
main_unifiedjob.finished,
main_unifiedjob.elapsed,
main_unifiedjob.job_explanation,
main_unifiedjob.instance_group_id
FROM main_unifiedjob, django_content_type
WHERE main_unifiedjob.created > {} and main_unifiedjob.polymorphic_ctype_id = django_content_type.id
ORDER BY main_unifiedjob.id ASC) to stdout'''.format(since.strftime("'%Y-%m-%d %H:%M:%S'"))
_copy_table(table='unified_jobs', query=unified_job_query, path=full_path)
return

View File

@ -1,4 +1,3 @@
import codecs
import inspect
import json
import logging
@ -31,7 +30,7 @@ def _valid_license():
logger.exception("A valid license was not found:")
return False
return True
def register(key):
"""
@ -59,8 +58,6 @@ def gather(dest=None, module=None):
:pararm module: the module to search for registered analytic collector
functions; defaults to awx.main.analytics.collectors
"""
import time # TODO: Remove this
start_time = time.time() # TODO: Remove this
run_now = now()
state = TowerAnalyticsState.get_solo()
@ -73,8 +70,8 @@ def gather(dest=None, module=None):
if last_run < max_interval or not last_run:
last_run = max_interval
if settings.INSIGHTS_DATA_ENABLED:
logger.exception("Insights not enabled. Analytics data not gathered.")
if not settings.INSIGHTS_DATA_ENABLED:
logger.exception("Insights analytics not enabled")
return
if _valid_license() is False:
@ -90,14 +87,18 @@ def gather(dest=None, module=None):
if inspect.isfunction(func) and hasattr(func, '__awx_analytics_key__'):
key = func.__awx_analytics_key__
path = '{}.json'.format(os.path.join(dest, key))
with codecs.open(path, 'w', encoding='utf-8') as f:
with open(path, 'w', encoding='utf-8') as f:
try:
json.dump(func(last_run), f)
except Exception:
logger.exception("Could not generate metric {}.json".format(key))
f.close()
os.remove(f.name)
try:
collectors.copy_tables(since=last_run, full_path=dest)
except Exception:
logger.exception("Could not copy tables")
# can't use isoformat() since it has colons, which GNU tar doesn't like
tarname = '_'.join([
settings.SYSTEM_UUID,
@ -109,7 +110,6 @@ def gather(dest=None, module=None):
dest
)
shutil.rmtree(dest)
print("Analytics Time --- %s seconds ---" % (time.time() - start_time)) # TODO: Remove this
return tgz

View File

@ -307,8 +307,8 @@ register(
default=False,
label=_('Insights Analytics Data'),
help_text=_('Enables Tower to gather analytics data about itself and send it to Insights.'),
category=_('Jobs'),
category_slug='jobs',
category=_('System'),
category_slug='system',
)
register(

View File

@ -13,20 +13,20 @@ def test_empty():
"active_sessions": 0,
"credential": 0,
"custom_inventory_script": 0,
"custom_virtualenvs": 1, # dev env ansible3
"custom_virtualenvs": 0, # dev env ansible3
"host": 0,
"inventory": 0,
'inventory': 0,
"inventories": {'normal': 0, 'smart': 0},
"job_template": 0,
"normal_inventories": 0,
"notification_template": 0,
"organization": 0,
"project": 0,
"running_jobs": 0,
"schedule": 0,
"smart_inventories": 0,
"team": 0,
"user": 0,
"workflow_job_template": 0
"workflow_job_template": 0,
'unified_job': 0
}

View File

@ -672,11 +672,10 @@ AWX_AUTO_DEPROVISION_INSTANCES = False
# Note: This setting may be overridden by database settings.
PENDO_TRACKING_STATE = "off"
# Enables Insights data collection for Ansible Tower. Defaults to the value of PENDO_TRACKING_STATE
if PENDO_TRACKING_STATE == "off":
INSIGHTS_DATA_ENABLED = False
else:
INSIGHTS_DATA_ENABLED = True
# Enables Insights data collection for Ansible Tower.
# Note: This setting may be overridden by database settings.
INSIGHTS_DATA_ENABLED = False
# Default list of modules allowed for ad hoc commands.
# Note: This setting may be overridden by database settings.

View File

@ -17,8 +17,8 @@ register(
('anonymous', _('Anonymous')),
('detailed', _('Detailed')),
],
label=_('Analytics Tracking State'),
help_text=_('Enable or Disable Analytics Tracking.'),
label=_('Pendo Analytics Tracking State'),
help_text=_('Enable or Disable Pendo Analytics Tracking.'),
category=_('UI'),
category_slug='ui',
)