From b7227113bef986d35dbf6735c583f25916aa94b1 Mon Sep 17 00:00:00 2001
From: Ladislav Smola <lsmola@redhat.com>
Date: Thu, 5 Mar 2020 14:52:46 +0100
Subject: [PATCH 1/4] Use modified to check if job should be sent to analytics

It can take several hours for a job to go from pending to
successful/failed state and we need to also send the job with
a changed state, otherwise the analytics will be incorrect.
---
 awx/main/analytics/collectors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/awx/main/analytics/collectors.py b/awx/main/analytics/collectors.py
index 17158d853f..0f15538567 100644
--- a/awx/main/analytics/collectors.py
+++ b/awx/main/analytics/collectors.py
@@ -277,7 +277,7 @@ def copy_tables(since, full_path):
                                  FROM main_unifiedjob
                                  JOIN django_content_type ON main_unifiedjob.polymorphic_ctype_id = django_content_type.id
                                  JOIN main_organization ON main_organization.id = main_unifiedjob.organization_id
-                                 WHERE main_unifiedjob.created > {} 
+                                 WHERE main_unifiedjob.modified > {} 
                                  AND main_unifiedjob.launch_type != 'sync'
                                  ORDER BY main_unifiedjob.id ASC) TO STDOUT WITH CSV HEADER'''.format(since.strftime("'%Y-%m-%d %H:%M:%S'"))    
     _copy_table(table='unified_jobs', query=unified_job_query, path=full_path)

From 6a503e152a217073e56003b7809907dbed2cf828 Mon Sep 17 00:00:00 2001
From: Ladislav Smola <lsmola@redhat.com>
Date: Thu, 5 Mar 2020 14:49:49 +0100
Subject: [PATCH 2/4] Send also workflows as part of unified jobs

Workflows do not have a record in main_job, therefore the JOIN
was ignoring those. We need to do LEFT JOIN to include also
workflows.

It also seems like we are not able to get a link to organizations
from workflows? When looking at:
<tower_url>#/organizations?organization_search=page_size:20;order_by:name

We don't seem to list a relation to workflows. Is it possible to get it from
somewhere?
---
 awx/main/analytics/collectors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/awx/main/analytics/collectors.py b/awx/main/analytics/collectors.py
index 0f15538567..601ad039b6 100644
--- a/awx/main/analytics/collectors.py
+++ b/awx/main/analytics/collectors.py
@@ -276,7 +276,7 @@ def copy_tables(since, full_path):
                                  main_unifiedjob.instance_group_id
                                  FROM main_unifiedjob
                                  JOIN django_content_type ON main_unifiedjob.polymorphic_ctype_id = django_content_type.id
-                                 JOIN main_organization ON main_organization.id = main_unifiedjob.organization_id
+                                 LEFT JOIN main_organization ON main_organization.id = main_unifiedjob.organization_id
                                  WHERE main_unifiedjob.modified > {} 
                                  AND main_unifiedjob.launch_type != 'sync'
                                  ORDER BY main_unifiedjob.id ASC) TO STDOUT WITH CSV HEADER'''.format(since.strftime("'%Y-%m-%d %H:%M:%S'"))    

From 6a86af5b43336f6524060cc49bcc2b86728ce797 Mon Sep 17 00:00:00 2001
From: Ladislav Smola <lsmola@redhat.com>
Date: Wed, 18 Mar 2020 10:16:07 +0100
Subject: [PATCH 3/4] Use indexed timestamps

Use created and finished, which are indexed, to try to fetch all
states of jobs. If job is not finished, we might not get the
right terminal status, but that should be ok for now.
---
 awx/main/analytics/collectors.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/awx/main/analytics/collectors.py b/awx/main/analytics/collectors.py
index 601ad039b6..922c3b0648 100644
--- a/awx/main/analytics/collectors.py
+++ b/awx/main/analytics/collectors.py
@@ -277,8 +277,8 @@ def copy_tables(since, full_path):
                                  FROM main_unifiedjob
                                  JOIN django_content_type ON main_unifiedjob.polymorphic_ctype_id = django_content_type.id
                                  LEFT JOIN main_organization ON main_organization.id = main_unifiedjob.organization_id
-                                 WHERE main_unifiedjob.modified > {} 
-                                 AND main_unifiedjob.launch_type != 'sync'
+                                 WHERE (main_unifiedjob.created > {0} OR main_unifiedjob.finished > {0})
+                                       AND main_unifiedjob.launch_type != 'sync'
                                  ORDER BY main_unifiedjob.id ASC) TO STDOUT WITH CSV HEADER'''.format(since.strftime("'%Y-%m-%d %H:%M:%S'"))    
     _copy_table(table='unified_jobs', query=unified_job_query, path=full_path)
 

From 1f9f86974a1095e6064baaba9a92d1885912803c Mon Sep 17 00:00:00 2001
From: chris meyers <chris.meyers.fsu@gmail.com>
Date: Wed, 8 Apr 2020 16:27:53 -0400
Subject: [PATCH 4/4] test analytics table output

* unified_jobs output should include derived jobs i.e. project update,
inventory update, job
* This PR adds a test to ensure that.
---
 .../functional/analytics/test_collectors.py   | 76 +++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 awx/main/tests/functional/analytics/test_collectors.py

diff --git a/awx/main/tests/functional/analytics/test_collectors.py b/awx/main/tests/functional/analytics/test_collectors.py
new file mode 100644
index 0000000000..bf470df574
--- /dev/null
+++ b/awx/main/tests/functional/analytics/test_collectors.py
@@ -0,0 +1,76 @@
+import pytest
+import tempfile
+import os
+import shutil
+import csv
+
+from django.utils.timezone import now
+from django.db.backends.sqlite3.base import SQLiteCursorWrapper
+
+from awx.main.analytics import collectors
+
+from awx.main.models import (
+    ProjectUpdate,
+    InventorySource,
+)
+
+
+@pytest.fixture
+def sqlite_copy_expert(request):
+    # copy_expert is postgres-specific, and SQLite doesn't support it; mock its
+    # behavior to test that it writes a file that contains stdout from events
+    path = tempfile.mkdtemp(prefix='copied_tables')
+
+    def write_stdout(self, sql, fd):
+        # Would be cool if we instead properly disected the SQL query and verified
+        # it that way. But instead, we just take the nieve approach here.
+        assert sql.startswith('COPY (')
+        assert sql.endswith(') TO STDOUT WITH CSV HEADER')
+
+        sql = sql.replace('COPY (', '')
+        sql = sql.replace(') TO STDOUT WITH CSV HEADER', '')
+
+        # Remove JSON style queries
+        # TODO: could replace JSON style queries with sqlite kind of equivalents
+        sql_new = []
+        for line in sql.split('\n'):
+            if line.find('main_jobevent.event_data::') == -1:
+                sql_new.append(line)
+        sql = '\n'.join(sql_new)
+
+        self.execute(sql)
+        results = self.fetchall()
+        headers = [i[0] for i in self.description]
+
+        csv_handle = csv.writer(fd, delimiter=',', quoting=csv.QUOTE_ALL, escapechar='\\', lineterminator='\n')
+        csv_handle.writerow(headers)
+        csv_handle.writerows(results)
+
+
+    setattr(SQLiteCursorWrapper, 'copy_expert', write_stdout)
+    request.addfinalizer(lambda: shutil.rmtree(path))
+    request.addfinalizer(lambda: delattr(SQLiteCursorWrapper, 'copy_expert'))
+    return path
+
+
+@pytest.mark.django_db
+def test_copy_tables_unified_job_query(sqlite_copy_expert, project, inventory, job_template):
+    '''
+    Ensure that various unified job types are in the output of the query.
+    '''
+
+    time_start = now()
+    inv_src = InventorySource.objects.create(name="inventory_update1", inventory=inventory, source='gce')
+
+    project_update_name = ProjectUpdate.objects.create(project=project, name="project_update1").name
+    inventory_update_name = inv_src.create_unified_job().name
+    job_name = job_template.create_unified_job().name
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        collectors.copy_tables(time_start, tmpdir)
+        with open(os.path.join(tmpdir, 'unified_jobs_table.csv')) as f:
+            lines = ''.join([l for l in f])
+
+            assert project_update_name in lines
+            assert inventory_update_name in lines
+            assert job_name in lines