diff --git a/awx/api/views/root.py b/awx/api/views/root.py index 4a15936e9b..aeda19cdeb 100644 --- a/awx/api/views/root.py +++ b/awx/api/views/root.py @@ -21,6 +21,7 @@ import requests from awx.api.generics import APIView from awx.conf.registry import settings_registry +from awx.main.analytics import all_collectors from awx.main.ha import is_ha_environment from awx.main.utils import ( get_awx_version, @@ -252,6 +253,7 @@ class ApiV2ConfigView(APIView): ansible_version=get_ansible_version(), eula=render_to_string("eula.md") if license_data.get('license_type', 'UNLICENSED') != 'open' else '', analytics_status=pendo_state, + analytics_collectors=all_collectors(), become_methods=PRIVILEGE_ESCALATION_METHODS, ) diff --git a/awx/main/analytics/__init__.py b/awx/main/analytics/__init__.py index 9ab526f29b..4302dc9cf2 100644 --- a/awx/main/analytics/__init__.py +++ b/awx/main/analytics/__init__.py @@ -1 +1 @@ -from .core import register, gather, ship, table_version # noqa +from .core import all_collectors, register, gather, ship # noqa diff --git a/awx/main/analytics/collectors.py b/awx/main/analytics/collectors.py index ba27905092..01f7f37cdb 100644 --- a/awx/main/analytics/collectors.py +++ b/awx/main/analytics/collectors.py @@ -6,13 +6,14 @@ from django.db import connection from django.db.models import Count from django.conf import settings from django.utils.timezone import now +from django.utils.translation import ugettext_lazy as _ from awx.conf.license import get_license from awx.main.utils import (get_awx_version, get_ansible_version, get_custom_venv_choices, camelcase_to_underscore) from awx.main import models from django.contrib.sessions.models import Session -from awx.main.analytics import register, table_version +from awx.main.analytics import register ''' This module is used to define metrics collected by awx.main.analytics.gather() @@ -31,7 +32,7 @@ data _since_ the last report date - i.e., new data in the last 24 hours) ''' -@register('config', '1.1') +@register('config', '1.1', description=_('General platform configuration')) def config(since): license_info = get_license(show_key=False) install_type = 'traditional' @@ -63,7 +64,7 @@ def config(since): } -@register('counts', '1.0') +@register('counts', '1.0', description=_('Counts of objects such as organizations, inventories, and projects')) def counts(since): counts = {} for cls in (models.Organization, models.Team, models.User, @@ -98,7 +99,7 @@ def counts(since): return counts -@register('org_counts', '1.0') +@register('org_counts', '1.0', description=_('Counts of users and teams by organization')) def org_counts(since): counts = {} for org in models.Organization.objects.annotate(num_users=Count('member_role__members', distinct=True), @@ -110,7 +111,7 @@ def org_counts(since): return counts -@register('cred_type_counts', '1.0') +@register('cred_type_counts', '1.0', description=_('Counts of credentials by credential type')) def cred_type_counts(since): counts = {} for cred_type in models.CredentialType.objects.annotate(num_credentials=Count( @@ -122,7 +123,7 @@ def cred_type_counts(since): return counts -@register('inventory_counts', '1.2') +@register('inventory_counts', '1.2', description=_('Inventories, their inventory sources, and host counts')) def inventory_counts(since): counts = {} for inv in models.Inventory.objects.filter(kind='').annotate(num_sources=Count('inventory_sources', distinct=True), @@ -147,7 +148,7 @@ def inventory_counts(since): return counts -@register('projects_by_scm_type', '1.0') +@register('projects_by_scm_type', '1.0', description=_('Counts of projects by source control type')) def projects_by_scm_type(since): counts = dict( (t[0] or 'manual', 0) @@ -166,7 +167,7 @@ def _get_isolated_datetime(last_check): return last_check -@register('instance_info', '1.0') +@register('instance_info', '1.0', description=_('Cluster topology and capacity')) def instance_info(since, include_hostnames=False): info = {} instances = models.Instance.objects.values_list('hostname').values( @@ -192,7 +193,7 @@ def instance_info(since, include_hostnames=False): return info -@register('job_counts', '1.0') +@register('job_counts', '1.0', description=_('Counts of jobs by status')) def job_counts(since): counts = {} counts['total_jobs'] = models.UnifiedJob.objects.exclude(launch_type='sync').count() @@ -202,7 +203,7 @@ def job_counts(since): return counts -@register('job_instance_counts', '1.0') +@register('job_instance_counts', '1.0', description=_('Counts of jobs by execution node')) def job_instance_counts(since): counts = {} job_types = models.UnifiedJob.objects.exclude(launch_type='sync').values_list( @@ -217,7 +218,7 @@ def job_instance_counts(since): return counts -@register('query_info', '1.0') +@register('query_info', '1.0', description=_('Metadata about the analytics collected')) def query_info(since, collection_type): query_info = {} query_info['last_run'] = str(since) @@ -226,21 +227,17 @@ def query_info(since, collection_type): return query_info -# Copies Job Events from db to a .csv to be shipped -@table_version('events_table.csv', '1.1') -@table_version('unified_jobs_table.csv', '1.1') -@table_version('unified_job_template_table.csv', '1.0') -@table_version('workflow_job_node_table.csv', '1.0') -@table_version('workflow_job_template_node_table.csv', '1.0') -def copy_tables(since, full_path, subset=None): - def _copy_table(table, query, path): - file_path = os.path.join(path, table + '_table.csv') - file = open(file_path, 'w', encoding='utf-8') - with connection.cursor() as cursor: - cursor.copy_expert(query, file) - file.close() - return file_path +def _copy_table(table, query, path): + file_path = os.path.join(path, table + '_table.csv') + file = open(file_path, 'w', encoding='utf-8') + with connection.cursor() as cursor: + cursor.copy_expert(query, file) + file.close() + return file_path + +@register('events_table', '1.1', format='csv', description=_('Automation task records')) +def events_table(since, full_path): events_query = '''COPY (SELECT main_jobevent.id, main_jobevent.created, main_jobevent.uuid, @@ -264,9 +261,11 @@ def copy_tables(since, full_path, subset=None): FROM main_jobevent WHERE main_jobevent.created > {} ORDER BY main_jobevent.id ASC) TO STDOUT WITH CSV HEADER'''.format(since.strftime("'%Y-%m-%d %H:%M:%S'")) - if not subset or 'events' in subset: - _copy_table(table='events', query=events_query, path=full_path) + return _copy_table(table='events', query=events_query, path=full_path) + +@register('unified_jobs_table', '1.1', format='csv', description=_('Data on jobs run')) +def unified_jobs_table(since, full_path): unified_job_query = '''COPY (SELECT main_unifiedjob.id, main_unifiedjob.polymorphic_ctype_id, django_content_type.model, @@ -297,9 +296,11 @@ def copy_tables(since, full_path, subset=None): WHERE (main_unifiedjob.created > {0} OR main_unifiedjob.finished > {0}) AND main_unifiedjob.launch_type != 'sync' ORDER BY main_unifiedjob.id ASC) TO STDOUT WITH CSV HEADER'''.format(since.strftime("'%Y-%m-%d %H:%M:%S'")) - if not subset or 'unified_jobs' in subset: - _copy_table(table='unified_jobs', query=unified_job_query, path=full_path) + return _copy_table(table='unified_jobs', query=unified_job_query, path=full_path) + +@register('unified_job_template_table', '1.0', format='csv', description=_('Data on job templates')) +def unified_job_template_table(since, full_path): unified_job_template_query = '''COPY (SELECT main_unifiedjobtemplate.id, main_unifiedjobtemplate.polymorphic_ctype_id, django_content_type.model, @@ -318,9 +319,11 @@ def copy_tables(since, full_path, subset=None): FROM main_unifiedjobtemplate, django_content_type WHERE main_unifiedjobtemplate.polymorphic_ctype_id = django_content_type.id ORDER BY main_unifiedjobtemplate.id ASC) TO STDOUT WITH CSV HEADER''' - if not subset or 'unified_job_template' in subset: - _copy_table(table='unified_job_template', query=unified_job_template_query, path=full_path) + return _copy_table(table='unified_job_template', query=unified_job_template_query, path=full_path) + +@register('workflow_job_node_table', '1.0', format='csv', description=_('Data on workflow runs')) +def workflow_job_node_table(since, full_path): workflow_job_node_query = '''COPY (SELECT main_workflowjobnode.id, main_workflowjobnode.created, main_workflowjobnode.modified, @@ -351,9 +354,11 @@ def copy_tables(since, full_path, subset=None): ) always_nodes ON main_workflowjobnode.id = always_nodes.from_workflowjobnode_id WHERE main_workflowjobnode.modified > {} ORDER BY main_workflowjobnode.id ASC) TO STDOUT WITH CSV HEADER'''.format(since.strftime("'%Y-%m-%d %H:%M:%S'")) - if not subset or 'workflow_job_node' in subset: - _copy_table(table='workflow_job_node', query=workflow_job_node_query, path=full_path) + return _copy_table(table='workflow_job_node', query=workflow_job_node_query, path=full_path) + +@register('workflow_job_template_node_table', '1.0', format='csv', description=_('Data on workflows')) +def workflow_job_template_node_table(since, full_path): workflow_job_template_node_query = '''COPY (SELECT main_workflowjobtemplatenode.id, main_workflowjobtemplatenode.created, main_workflowjobtemplatenode.modified, @@ -381,7 +386,4 @@ def copy_tables(since, full_path, subset=None): GROUP BY from_workflowjobtemplatenode_id ) always_nodes ON main_workflowjobtemplatenode.id = always_nodes.from_workflowjobtemplatenode_id ORDER BY main_workflowjobtemplatenode.id ASC) TO STDOUT WITH CSV HEADER''' - if not subset or 'workflow_job_template_node' in subset: - _copy_table(table='workflow_job_template_node', query=workflow_job_template_node_query, path=full_path) - - return + return _copy_table(table='workflow_job_template_node', query=workflow_job_template_node_query, path=full_path) diff --git a/awx/main/analytics/core.py b/awx/main/analytics/core.py index bab62b4a3c..310f77dc16 100644 --- a/awx/main/analytics/core.py +++ b/awx/main/analytics/core.py @@ -18,7 +18,7 @@ from awx.main.models.ha import TowerAnalyticsState from awx.main.utils import get_awx_http_client_headers, set_environ -__all__ = ['register', 'gather', 'ship', 'table_version'] +__all__ = ['register', 'gather', 'ship'] logger = logging.getLogger('awx.main.analytics') @@ -37,11 +37,27 @@ def _valid_license(): return True -def register(key, version): +def all_collectors(): + from awx.main.analytics import collectors + + collector_dict = {} + module = collectors + for name, func in inspect.getmembers(module): + if inspect.isfunction(func) and hasattr(func, '__awx_analytics_key__'): + key = func.__awx_analytics_key__ + desc = func.__awx_analytics_description__ or '' + version = func.__awx_analytics_version__ + collector_dict[key] = { 'name': key, 'version': version, 'description': desc} + return collector_dict + + +def register(key, version, description=None, format='json'): """ A decorator used to register a function as a metric collector. - Decorated functions should return JSON-serializable objects. + Decorated functions should do the following based on format: + - json: return JSON-serializable objects. + - csv: write CSV data to a filename named 'key' @register('projects_by_scm_type', 1) def projects_by_scm_type(): @@ -51,28 +67,19 @@ def register(key, version): def decorate(f): f.__awx_analytics_key__ = key f.__awx_analytics_version__ = version + f.__awx_analytics_description__ = description + f.__awx_analytics_type__ = format return f return decorate -def table_version(file_name, version): - - global manifest - manifest[file_name] = version - - def decorate(f): - return f - - return decorate - - -def gather(dest=None, module=None, collection_type='scheduled'): +def gather(dest=None, module=None, subset = None, collection_type='scheduled'): """ Gather all defined metrics and write them as JSON files in a .tgz :param dest: the (optional) absolute path to write a compressed tarball - :pararm module: the module to search for registered analytic collector + :param module: the module to search for registered analytic collector functions; defaults to awx.main.analytics.collectors """ @@ -93,14 +100,21 @@ def gather(dest=None, module=None, collection_type='scheduled'): logger.error("Automation Analytics not enabled. Use --dry-run to gather locally without sending.") return + collector_list = [] if module is None: from awx.main.analytics import collectors module = collectors - + for name, func in inspect.getmembers(module): + if ( + inspect.isfunction(func) and + hasattr(func, '__awx_analytics_key__') and + (not subset or name in subset) + ): + collector_list.append((name, func)) dest = dest or tempfile.mkdtemp(prefix='awx_analytics') - for name, func in inspect.getmembers(module): - if inspect.isfunction(func) and hasattr(func, '__awx_analytics_key__'): + for name, func in collector_list: + if func.__awx_analytics_type__ == 'json': key = func.__awx_analytics_key__ manifest['{}.json'.format(key)] = func.__awx_analytics_version__ path = '{}.json'.format(os.path.join(dest, key)) @@ -114,7 +128,14 @@ def gather(dest=None, module=None, collection_type='scheduled'): logger.exception("Could not generate metric {}.json".format(key)) f.close() os.remove(f.name) - + elif func.__awx_analytics_type__ == 'csv': + key = func.__awx_analytics_key__ + manifest['{}.csv'.format(key)] = func.__awx_analytics_version__ + try: + func(last_run, full_path=dest) + except Exception: + logger.exception("Could not generate metric {}.csv".format(key)) + path = os.path.join(dest, 'manifest.json') with open(path, 'w', encoding='utf-8') as f: try: @@ -124,11 +145,6 @@ def gather(dest=None, module=None, collection_type='scheduled'): f.close() os.remove(f.name) - try: - collectors.copy_tables(since=last_run, full_path=dest) - except Exception: - logger.exception("Could not copy tables") - # can't use isoformat() since it has colons, which GNU tar doesn't like tarname = '_'.join([ settings.SYSTEM_UUID, diff --git a/awx/main/tasks.py b/awx/main/tasks.py index 59f7eb50cb..3e1acc2996 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -373,8 +373,9 @@ def gather_analytics(): if acquired is False: logger.debug('Not gathering analytics, another task holds lock') return + subset = list(all_collectors().keys()) try: - tgz = analytics.gather() + tgz = analytics.gather(subset=subset) if not tgz: return logger.info('gathered analytics: {}'.format(tgz)) diff --git a/awx/main/tests/functional/analytics/test_collectors.py b/awx/main/tests/functional/analytics/test_collectors.py index ff53ac6bb4..fad64c7b86 100644 --- a/awx/main/tests/functional/analytics/test_collectors.py +++ b/awx/main/tests/functional/analytics/test_collectors.py @@ -86,7 +86,7 @@ def test_copy_tables_unified_job_query( job_name = job_template.create_unified_job().name with tempfile.TemporaryDirectory() as tmpdir: - collectors.copy_tables(time_start, tmpdir, subset="unified_jobs") + collectors.unified_jobs_table(time_start, tmpdir, subset="unified_jobs") with open(os.path.join(tmpdir, "unified_jobs_table.csv")) as f: lines = "".join([line for line in f]) @@ -134,7 +134,7 @@ def test_copy_tables_workflow_job_node_query(sqlite_copy_expert, workflow_job): time_start = now() - timedelta(hours=9) with tempfile.TemporaryDirectory() as tmpdir: - collectors.copy_tables(time_start, tmpdir, subset="workflow_job_node_query") + collectors.workflow_job_node_table(time_start, tmpdir, subset="workflow_job_node_query") with open(os.path.join(tmpdir, "workflow_job_node_table.csv")) as f: reader = csv.reader(f) # Pop the headers