Refactor analytics collectors.

- Only have one registration class - Add description fields - Add automation collector information to /api/v2/config
2026-03-04 10:11:05 -03:30 · 2020-09-09 17:10:14 -04:00
parent dff7667532
commit 1c4b06fe1e
6 changed files with 87 additions and 66 deletions
--- a/awx/api/views/root.py
+++ b/awx/api/views/root.py
@@ -21,6 +21,7 @@ import requests

 from awx.api.generics import APIView
 from awx.conf.registry import settings_registry
+from awx.main.analytics import all_collectors
 from awx.main.ha import is_ha_environment
 from awx.main.utils import (
    get_awx_version,
@@ -252,6 +253,7 @@ class ApiV2ConfigView(APIView):
            ansible_version=get_ansible_version(),
            eula=render_to_string("eula.md") if license_data.get('license_type', 'UNLICENSED') != 'open' else '',
            analytics_status=pendo_state,
+            analytics_collectors=all_collectors(),
            become_methods=PRIVILEGE_ESCALATION_METHODS,
        )

--- a/awx/main/analytics/init.py
+++ b/awx/main/analytics/init.py
@@ -1 +1 @@
-from .core import register, gather, ship, table_version  # noqa
+from .core import all_collectors, register, gather, ship  # noqa
--- a/awx/main/analytics/collectors.py
+++ b/awx/main/analytics/collectors.py
@@ -6,13 +6,14 @@ from django.db import connection
 from django.db.models import Count
 from django.conf import settings
 from django.utils.timezone import now
+from django.utils.translation import ugettext_lazy as _

 from awx.conf.license import get_license
 from awx.main.utils import (get_awx_version, get_ansible_version,
                            get_custom_venv_choices, camelcase_to_underscore)
 from awx.main import models
 from django.contrib.sessions.models import Session
-from awx.main.analytics import register, table_version
+from awx.main.analytics import register

 '''
 This module is used to define metrics collected by awx.main.analytics.gather()
@@ -31,7 +32,7 @@ data _since_ the last report date - i.e., new data in the last 24 hours)
 '''


-@register('config', '1.1')
+@register('config', '1.1', description=_('General platform configuration'))
 def config(since):
    license_info = get_license(show_key=False)
    install_type = 'traditional'
@@ -63,7 +64,7 @@ def config(since):
    }


-@register('counts', '1.0')
+@register('counts', '1.0', description=_('Counts of objects such as organizations, inventories, and projects'))
 def counts(since):
    counts = {}
    for cls in (models.Organization, models.Team, models.User,
@@ -98,7 +99,7 @@ def counts(since):
    return counts

    
-@register('org_counts', '1.0')
+@register('org_counts', '1.0', description=_('Counts of users and teams by organization'))
 def org_counts(since):
    counts = {}
    for org in models.Organization.objects.annotate(num_users=Count('member_role__members', distinct=True), 
@@ -110,7 +111,7 @@ def org_counts(since):
    return counts
    
    
-@register('cred_type_counts', '1.0')
+@register('cred_type_counts', '1.0', description=_('Counts of credentials by credential type'))
 def cred_type_counts(since):
    counts = {}
    for cred_type in models.CredentialType.objects.annotate(num_credentials=Count(
@@ -122,7 +123,7 @@ def cred_type_counts(since):
    return counts
    
    
-@register('inventory_counts', '1.2')
+@register('inventory_counts', '1.2', description=_('Inventories, their inventory sources, and host counts'))
 def inventory_counts(since):
    counts = {}
    for inv in models.Inventory.objects.filter(kind='').annotate(num_sources=Count('inventory_sources', distinct=True), 
@@ -147,7 +148,7 @@ def inventory_counts(since):
    return counts


-@register('projects_by_scm_type', '1.0')
+@register('projects_by_scm_type', '1.0', description=_('Counts of projects by source control type'))
 def projects_by_scm_type(since):
    counts = dict(
        (t[0] or 'manual', 0)
@@ -166,7 +167,7 @@ def _get_isolated_datetime(last_check):
    return last_check


-@register('instance_info', '1.0')
+@register('instance_info', '1.0', description=_('Cluster topology and capacity'))
 def instance_info(since, include_hostnames=False):
    info = {}
    instances = models.Instance.objects.values_list('hostname').values(
@@ -192,7 +193,7 @@ def instance_info(since, include_hostnames=False):
    return info


-@register('job_counts', '1.0')
+@register('job_counts', '1.0', description=_('Counts of jobs by status'))
 def job_counts(since):
    counts = {}
    counts['total_jobs'] = models.UnifiedJob.objects.exclude(launch_type='sync').count()
@@ -202,7 +203,7 @@ def job_counts(since):
    return counts
    
    
-@register('job_instance_counts', '1.0')
+@register('job_instance_counts', '1.0', description=_('Counts of jobs by execution node'))
 def job_instance_counts(since):
    counts = {}
    job_types = models.UnifiedJob.objects.exclude(launch_type='sync').values_list(
@@ -217,7 +218,7 @@ def job_instance_counts(since):
    return counts


-@register('query_info', '1.0')
+@register('query_info', '1.0', description=_('Metadata about the analytics collected'))
 def query_info(since, collection_type):
    query_info = {}
    query_info['last_run'] = str(since)
@@ -226,21 +227,17 @@ def query_info(since, collection_type):
    return query_info


-# Copies Job Events from db to a .csv to be shipped
-@table_version('events_table.csv', '1.1')
-@table_version('unified_jobs_table.csv', '1.1')
-@table_version('unified_job_template_table.csv', '1.0')
-@table_version('workflow_job_node_table.csv', '1.0')
-@table_version('workflow_job_template_node_table.csv', '1.0')
-def copy_tables(since, full_path, subset=None):
-    def _copy_table(table, query, path):
-        file_path = os.path.join(path, table + '_table.csv')
-        file = open(file_path, 'w', encoding='utf-8')
-        with connection.cursor() as cursor:
-            cursor.copy_expert(query, file)
-            file.close()
-        return file_path
+def _copy_table(table, query, path):
+    file_path = os.path.join(path, table + '_table.csv')
+    file = open(file_path, 'w', encoding='utf-8')
+    with connection.cursor() as cursor:
+        cursor.copy_expert(query, file)
+        file.close()
+    return file_path

+
+@register('events_table', '1.1', format='csv', description=_('Automation task records'))
+def events_table(since, full_path):
    events_query = '''COPY (SELECT main_jobevent.id, 
                              main_jobevent.created,
                              main_jobevent.uuid,
@@ -264,9 +261,11 @@ def copy_tables(since, full_path, subset=None):
                              FROM main_jobevent 
                              WHERE main_jobevent.created > {}
                              ORDER BY main_jobevent.id ASC) TO STDOUT WITH CSV HEADER'''.format(since.strftime("'%Y-%m-%d %H:%M:%S'"))
-    if not subset or 'events' in subset:
-        _copy_table(table='events', query=events_query, path=full_path)
+    return _copy_table(table='events', query=events_query, path=full_path)

+
+@register('unified_jobs_table', '1.1', format='csv', description=_('Data on jobs run'))
+def unified_jobs_table(since, full_path):
    unified_job_query = '''COPY (SELECT main_unifiedjob.id,
                                 main_unifiedjob.polymorphic_ctype_id,
                                 django_content_type.model,
@@ -297,9 +296,11 @@ def copy_tables(since, full_path, subset=None):
                                 WHERE (main_unifiedjob.created > {0} OR main_unifiedjob.finished > {0})
                                       AND main_unifiedjob.launch_type != 'sync'
                                 ORDER BY main_unifiedjob.id ASC) TO STDOUT WITH CSV HEADER'''.format(since.strftime("'%Y-%m-%d %H:%M:%S'"))    
-    if not subset or 'unified_jobs' in subset:
-        _copy_table(table='unified_jobs', query=unified_job_query, path=full_path)
+    return _copy_table(table='unified_jobs', query=unified_job_query, path=full_path)

+
+@register('unified_job_template_table', '1.0', format='csv', description=_('Data on job templates'))
+def unified_job_template_table(since, full_path):
    unified_job_template_query = '''COPY (SELECT main_unifiedjobtemplate.id, 
                                 main_unifiedjobtemplate.polymorphic_ctype_id,
                                 django_content_type.model,
@@ -318,9 +319,11 @@ def copy_tables(since, full_path, subset=None):
                                 FROM main_unifiedjobtemplate, django_content_type
                                 WHERE main_unifiedjobtemplate.polymorphic_ctype_id = django_content_type.id
                                 ORDER BY main_unifiedjobtemplate.id ASC) TO STDOUT WITH CSV HEADER'''  
-    if not subset or 'unified_job_template' in subset:
-        _copy_table(table='unified_job_template', query=unified_job_template_query, path=full_path)
+    return _copy_table(table='unified_job_template', query=unified_job_template_query, path=full_path)

+
+@register('workflow_job_node_table', '1.0', format='csv', description=_('Data on workflow runs'))
+def workflow_job_node_table(since, full_path):
    workflow_job_node_query = '''COPY (SELECT main_workflowjobnode.id,
                                 main_workflowjobnode.created,
                                 main_workflowjobnode.modified, 
@@ -351,9 +354,11 @@ def copy_tables(since, full_path, subset=None):
                                 ) always_nodes ON main_workflowjobnode.id = always_nodes.from_workflowjobnode_id
                                 WHERE main_workflowjobnode.modified > {}
                                 ORDER BY main_workflowjobnode.id ASC) TO STDOUT WITH CSV HEADER'''.format(since.strftime("'%Y-%m-%d %H:%M:%S'"))    
-    if not subset or 'workflow_job_node' in subset:
-        _copy_table(table='workflow_job_node', query=workflow_job_node_query, path=full_path)
+    return _copy_table(table='workflow_job_node', query=workflow_job_node_query, path=full_path)

+
+@register('workflow_job_template_node_table', '1.0', format='csv', description=_('Data on workflows'))
+def workflow_job_template_node_table(since, full_path):
    workflow_job_template_node_query = '''COPY (SELECT main_workflowjobtemplatenode.id, 
                                 main_workflowjobtemplatenode.created,
                                 main_workflowjobtemplatenode.modified, 
@@ -381,7 +386,4 @@ def copy_tables(since, full_path, subset=None):
                                     GROUP BY from_workflowjobtemplatenode_id
                                 ) always_nodes ON main_workflowjobtemplatenode.id = always_nodes.from_workflowjobtemplatenode_id
                                 ORDER BY main_workflowjobtemplatenode.id ASC) TO STDOUT WITH CSV HEADER'''   
-    if not subset or 'workflow_job_template_node' in subset:
-        _copy_table(table='workflow_job_template_node', query=workflow_job_template_node_query, path=full_path)
-
-    return
+    return _copy_table(table='workflow_job_template_node', query=workflow_job_template_node_query, path=full_path)
--- a/awx/main/analytics/core.py
+++ b/awx/main/analytics/core.py
@@ -18,7 +18,7 @@ from awx.main.models.ha import TowerAnalyticsState
 from awx.main.utils import get_awx_http_client_headers, set_environ


-__all__ = ['register', 'gather', 'ship', 'table_version']
+__all__ = ['register', 'gather', 'ship']


 logger = logging.getLogger('awx.main.analytics')
@@ -37,11 +37,27 @@ def _valid_license():
    return True


-def register(key, version):
+def all_collectors():
+    from awx.main.analytics import collectors
+
+    collector_dict = {}
+    module = collectors
+    for name, func in inspect.getmembers(module):
+        if inspect.isfunction(func) and hasattr(func, '__awx_analytics_key__'):
+            key = func.__awx_analytics_key__
+            desc = func.__awx_analytics_description__ or ''
+            version = func.__awx_analytics_version__
+            collector_dict[key] = { 'name': key, 'version': version, 'description': desc}
+    return collector_dict
+
+
+def register(key, version, description=None, format='json'):
    """
    A decorator used to register a function as a metric collector.

-    Decorated functions should return JSON-serializable objects.
+    Decorated functions should do the following based on format:
+    - json: return JSON-serializable objects.
+    - csv: write CSV data to a filename named 'key'

    @register('projects_by_scm_type', 1)
    def projects_by_scm_type():
@@ -51,28 +67,19 @@ def register(key, version):
    def decorate(f):
        f.__awx_analytics_key__ = key
        f.__awx_analytics_version__ = version
+        f.__awx_analytics_description__ = description
+        f.__awx_analytics_type__ = format
        return f

    return decorate


-def table_version(file_name, version):
-
-    global manifest
-    manifest[file_name] = version
-
-    def decorate(f):
-        return f
-
-    return decorate
-
-
-def gather(dest=None, module=None, collection_type='scheduled'):
+def gather(dest=None, module=None, subset = None, collection_type='scheduled'):
    """
    Gather all defined metrics and write them as JSON files in a .tgz

    :param dest:    the (optional) absolute path to write a compressed tarball
-    :pararm module: the module to search for registered analytic collector
+    :param module: the module to search for registered analytic collector
                    functions; defaults to awx.main.analytics.collectors
    """

@@ -93,14 +100,21 @@ def gather(dest=None, module=None, collection_type='scheduled'):
        logger.error("Automation Analytics not enabled. Use --dry-run to gather locally without sending.")
        return

+    collector_list = []
    if module is None:
        from awx.main.analytics import collectors
        module = collectors
-
+    for name, func in inspect.getmembers(module):
+        if (
+            inspect.isfunction(func) and
+            hasattr(func, '__awx_analytics_key__') and
+            (not subset or name in subset)
+        ):
+            collector_list.append((name, func))

    dest = dest or tempfile.mkdtemp(prefix='awx_analytics')
-    for name, func in inspect.getmembers(module):
-        if inspect.isfunction(func) and hasattr(func, '__awx_analytics_key__'):
+    for name, func in collector_list:
+        if func.__awx_analytics_type__ == 'json':
            key = func.__awx_analytics_key__
            manifest['{}.json'.format(key)] = func.__awx_analytics_version__
            path = '{}.json'.format(os.path.join(dest, key))
@@ -114,7 +128,14 @@ def gather(dest=None, module=None, collection_type='scheduled'):
                    logger.exception("Could not generate metric {}.json".format(key))
                    f.close()
                    os.remove(f.name)
-    
+        elif func.__awx_analytics_type__ == 'csv':
+            key = func.__awx_analytics_key__
+            manifest['{}.csv'.format(key)] = func.__awx_analytics_version__
+            try:
+                func(last_run, full_path=dest)
+            except Exception:
+                logger.exception("Could not generate metric {}.csv".format(key))
+
    path = os.path.join(dest, 'manifest.json')
    with open(path, 'w', encoding='utf-8') as f:
        try:
@@ -124,11 +145,6 @@ def gather(dest=None, module=None, collection_type='scheduled'):
            f.close()
            os.remove(f.name)

-    try:
-        collectors.copy_tables(since=last_run, full_path=dest)
-    except Exception:
-        logger.exception("Could not copy tables")
-        
    # can't use isoformat() since it has colons, which GNU tar doesn't like
    tarname = '_'.join([
        settings.SYSTEM_UUID,
--- a/awx/main/tasks.py
+++ b/awx/main/tasks.py
@@ -373,8 +373,9 @@ def gather_analytics():
            if acquired is False:
                logger.debug('Not gathering analytics, another task holds lock')
                return
+            subset = list(all_collectors().keys())
            try:
-                tgz = analytics.gather()
+                tgz = analytics.gather(subset=subset)
                if not tgz:
                    return
                logger.info('gathered analytics: {}'.format(tgz))
--- a/awx/main/tests/functional/analytics/test_collectors.py
+++ b/awx/main/tests/functional/analytics/test_collectors.py
@@ -86,7 +86,7 @@ def test_copy_tables_unified_job_query(
    job_name = job_template.create_unified_job().name

    with tempfile.TemporaryDirectory() as tmpdir:
-        collectors.copy_tables(time_start, tmpdir, subset="unified_jobs")
+        collectors.unified_jobs_table(time_start, tmpdir, subset="unified_jobs")
        with open(os.path.join(tmpdir, "unified_jobs_table.csv")) as f:
            lines = "".join([line for line in f])

@@ -134,7 +134,7 @@ def test_copy_tables_workflow_job_node_query(sqlite_copy_expert, workflow_job):
    time_start = now() - timedelta(hours=9)

    with tempfile.TemporaryDirectory() as tmpdir:
-        collectors.copy_tables(time_start, tmpdir, subset="workflow_job_node_query")
+        collectors.workflow_job_node_table(time_start, tmpdir, subset="workflow_job_node_query")
        with open(os.path.join(tmpdir, "workflow_job_node_table.csv")) as f:
            reader = csv.reader(f)
            # Pop the headers