Merge pull request #10003 from jbradberry/analytics-4-week-logging

Log adjustments made to the analytics interval due to the 4-week limit SUMMARY If the user passes in the since and/or until parameters, sometimes they will be truncated to limit the data to only 4 weeks. Emit a log message when this happens. ISSUE TYPE Bugfix Pull Request COMPONENT NAME API AWX VERSION Reviewed-by: Jim Ladd <None> Reviewed-by: Chris Meyers <None> Reviewed-by: Jeff Bradberry <None>
2026-06-21 06:37:45 -02:30 · 2021-04-27 13:55:56 +00:00
parent 35e778b7ae 584fda5178
commit bf892443df
3 changed files with 250 additions and 33 deletions
--- a/awx/main/analytics/collectors.py
+++ b/awx/main/analytics/collectors.py
@@ -36,7 +36,7 @@ data _since_ the last report date - i.e., new data in the last 24 hours)
 """


-def trivial_slicing(key, since, until):
+def trivial_slicing(key, since, until, last_gather):
    if since is not None:
        return [(since, until)]

@@ -45,11 +45,11 @@ def trivial_slicing(key, since, until):
    horizon = until - timedelta(weeks=4)
    last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
    last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
-    last_entry = max(last_entries.get(key) or settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon, horizon)
+    last_entry = max(last_entries.get(key) or last_gather, horizon)
    return [(last_entry, until)]


-def four_hour_slicing(key, since, until):
+def four_hour_slicing(key, since, until, last_gather):
    if since is not None:
        last_entry = since
    else:
@@ -58,7 +58,7 @@ def four_hour_slicing(key, since, until):
        horizon = until - timedelta(weeks=4)
        last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
        last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
-        last_entry = max(last_entries.get(key) or settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon, horizon)
+        last_entry = max(last_entries.get(key) or last_gather, horizon)

    start, end = last_entry, None
    while start < until:
@@ -67,15 +67,14 @@ def four_hour_slicing(key, since, until):
        start = end


-def events_slicing(key, since, until):
+def events_slicing(key, since, until, last_gather):
    from awx.conf.models import Setting

-    last_gather = settings.AUTOMATION_ANALYTICS_LAST_GATHER
    last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
    last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
    horizon = until - timedelta(weeks=4)

-    lower = since or last_gather or horizon
+    lower = since or last_gather
    if not since and last_entries.get(key):
        lower = horizon
    pk_values = models.JobEvent.objects.filter(created__gte=lower, created__lte=until).aggregate(Min('pk'), Max('pk'))
--- a/awx/main/analytics/core.py
+++ b/awx/main/analytics/core.py
@@ -116,6 +116,51 @@ def package(target, data, timestamp):
        return None


+def calculate_collection_interval(since, until):
+    _now = now()
+
+    # Make sure that the endpoints are not in the future.
+    if until is not None and until > _now:
+        until = _now
+        logger.warning(f"End of the collection interval is in the future, setting to {_now}.")
+    if since is not None and since > _now:
+        since = _now
+        logger.warning(f"Start of the collection interval is in the future, setting to {_now}.")
+
+    # The value of `until` needs to be concrete, so resolve it.  If it wasn't passed in,
+    # set it to `now`, but only if that isn't more than 4 weeks ahead of a passed-in
+    # `since` parameter.
+    if since is not None:
+        if until is not None:
+            if until > since + timedelta(weeks=4):
+                until = since + timedelta(weeks=4)
+                logger.warning(f"End of the collection interval is greater than 4 weeks from start, setting end to {until}.")
+        else:  # until is None
+            until = min(since + timedelta(weeks=4), _now)
+    elif until is None:
+        until = _now
+
+    if since and since >= until:
+        logger.warning("Start of the collection interval is later than the end, ignoring request.")
+        raise ValueError
+
+    # The ultimate beginning of the interval needs to be compared to 4 weeks prior to
+    # `until`, but we want to keep `since` empty if it wasn't passed in because we use that
+    # case to know whether to use the bookkeeping settings variables to decide the start of
+    # the interval.
+    horizon = until - timedelta(weeks=4)
+    if since is not None and since < horizon:
+        since = horizon
+        logger.warning(f"Start of the collection interval is more than 4 weeks prior to {until}, setting to {horizon}.")
+
+    last_gather = settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon
+    if last_gather < horizon:
+        last_gather = horizon
+        logger.warning(f"Last analytics run was more than 4 weeks prior to {until}, using {horizon} instead.")
+
+    return since, until, last_gather
+
+
 def gather(dest=None, module=None, subset=None, since=None, until=None, collection_type='scheduled'):
    """
    Gather all defined metrics and write them as JSON files in a .tgz
@@ -148,30 +193,12 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
        from awx.main.analytics import collectors
        from awx.main.signals import disable_activity_stream

-        _now = now()
-
-        # Make sure that the endpoints are not in the future.
-        until = None if until is None else min(until, _now)
-        since = None if since is None else min(since, _now)
-
-        if since and not until:
-            # If `since` is explicit but not `until`, `since` should be used to calculate the 4-week limit
-            until = min(since + timedelta(weeks=4), _now)
-        else:
-            until = _now if until is None else until
-
-        horizon = until - timedelta(weeks=4)
-        if since is not None:
-            # Make sure the start isn't more than 4 weeks prior to `until`.
-            since = max(since, horizon)
-
-        if since and since >= until:
-            logger.warning("Start of the collection interval is later than the end, ignoring request.")
-            return None
-
        logger.debug("Last analytics run was: {}".format(settings.AUTOMATION_ANALYTICS_LAST_GATHER))
-        # LAST_GATHER time should always get truncated to less than 4 weeks back.
-        last_gather = max(settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon, horizon)
+
+        try:
+            since, until, last_gather = calculate_collection_interval(since, until)
+        except ValueError:
+            return None

        last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
        last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
@@ -201,7 +228,7 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
            key = func.__awx_analytics_key__
            filename = f'{key}.json'
            try:
-                last_entry = max(last_entries.get(key) or last_gather, horizon)
+                last_entry = max(last_entries.get(key) or last_gather, until - timedelta(weeks=4))
                results = (func(since or last_entry, collection_type=collection_type, until=until), func.__awx_analytics_version__)
                json.dumps(results)  # throwaway check to see if the data is json-serializable
                data[filename] = results
@@ -233,9 +260,9 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
                # allowed to be None, and will fall back to LAST_ENTRIES[key] or to
                # LAST_GATHER (truncated appropriately to match the 4-week limit).
                if func.__awx_expensive__:
-                    slices = func.__awx_expensive__(key, since, until)
+                    slices = func.__awx_expensive__(key, since, until, last_gather)
                else:
-                    slices = collectors.trivial_slicing(key, since, until)
+                    slices = collectors.trivial_slicing(key, since, until, last_gather)

                for start, end in slices:
                    files = func(start, full_path=gather_dir, until=end)
--- a/awx/main/tests/unit/analytics/test_collection_interval.py
+++ b/awx/main/tests/unit/analytics/test_collection_interval.py
@@ -0,0 +1,191 @@
+import logging
+import pytest
+
+from django.utils.timezone import now, timedelta
+
+from awx.main.analytics.core import calculate_collection_interval
+
+# This is some bullshit.
+logger = logging.getLogger('awx.main.analytics')
+logger.propagate = True
+
+epsilon = timedelta(minutes=1)
+
+
+class TestIntervalWithSinceAndUntil:
+    @pytest.mark.parametrize('gather', [None, 2, 6])
+    def test_ok(self, caplog, settings, gather):
+        _now = now()
+        _prior = _now - timedelta(weeks=gather) if gather is not None else None
+        until = _now
+        since = until - timedelta(weeks=3)
+        settings.AUTOMATION_ANALYTICS_LAST_GATHER = _prior
+
+        new_since, new_until, last_gather = calculate_collection_interval(since, until)
+        assert new_since == since
+        assert new_until == until
+
+        # 1 log message, if LAST_GATHER was more than 4 weeks ago
+        expected = 1 if gather and gather > 4 else 0
+        assert len(caplog.records) == expected
+        assert sum(1 for msg in caplog.messages if "more than 4 weeks prior" in msg) == expected
+
+        # last_gather is always set to something even if LAST_GATHER was empty
+        assert last_gather is not None
+        assert abs(until - last_gather - timedelta(weeks=gather if gather and gather <= 4 else 4)) < epsilon
+
+    def test_both_in_future(self, caplog):
+        since = now() + timedelta(weeks=1)
+        until = since + timedelta(weeks=1)
+
+        with pytest.raises(ValueError):
+            calculate_collection_interval(since, until)
+
+        # log message each for `since` and `until` getting chopped, then another for the empty interval
+        assert len(caplog.records) == 3
+        assert sum(1 for msg in caplog.messages if "is in the future" in msg) == 2
+        assert sum(1 for msg in caplog.messages if "later than the end" in msg) == 1
+
+    def test_until_in_future(self, caplog):
+        _now = now()
+        since = _now - timedelta(weeks=1)
+        until = _now + timedelta(weeks=1)
+
+        new_since, new_until, _ = calculate_collection_interval(since, until)
+        assert new_since == since
+        assert abs(_now - new_until) < epsilon  # `until` gets truncated to now()
+
+        # log message for `until` getting chopped
+        assert len(caplog.records) == 1
+        assert sum(1 for msg in caplog.messages if "is in the future" in msg) == 1
+
+    def test_interval_too_large(self, caplog):
+        until = now()
+        since = until - timedelta(weeks=5)
+
+        new_since, new_until, _ = calculate_collection_interval(since, until)
+        # interval is 4 weeks counting forward from explicit `since`
+        assert new_since == since
+        assert new_until == since + timedelta(weeks=4)
+
+        # log message for `until` getting chopped
+        assert len(caplog.records) == 1
+        assert sum(1 for msg in caplog.messages if "greater than 4 weeks from start" in msg) == 1
+
+    def test_reversed(self, caplog):
+        since = now()
+        until = since - timedelta(weeks=3)
+
+        with pytest.raises(ValueError):
+            calculate_collection_interval(since, until)
+
+        # log message for the empty interval
+        assert len(caplog.records) == 1
+        assert sum(1 for msg in caplog.messages if "later than the end" in msg) == 1
+
+
+class TestIntervalWithSinceOnly:
+    @pytest.mark.parametrize('gather', [None, 2, 6])
+    def test_ok(self, caplog, settings, gather):
+        _now = now()
+        _prior = _now - timedelta(weeks=gather) if gather is not None else None
+        since = _now - timedelta(weeks=2)
+        until = None  # until is going to wind up being effectively now.
+        settings.AUTOMATION_ANALYTICS_LAST_GATHER = _prior
+
+        new_since, new_until, last_gather = calculate_collection_interval(since, until)
+        # `since` is only 2 weeks back, so now() is within the 4-week cutoff and can be used for `until`
+        assert new_since == since
+        assert abs(new_until - _now) < epsilon
+
+        # 1 log message, if LAST_GATHER was more than 4 weeks ago
+        expected = 1 if gather and gather > 4 else 0
+        assert len(caplog.records) == expected
+        assert sum(1 for msg in caplog.messages if "more than 4 weeks prior" in msg) == expected
+
+        # last_gather is always set to something even if LAST_GATHER was empty
+        assert last_gather is not None
+        assert abs(_now - last_gather - timedelta(weeks=gather if gather and gather <= 4 else 4)) < epsilon
+
+    def test_since_more_than_4_weeks_before_now(self, caplog):
+        since = now() - timedelta(weeks=5)
+        until = None  # until is going to wind up being effectively now.
+
+        new_since, new_until, last_gather = calculate_collection_interval(since, until)
+        # interval is 4 weeks counting forward from explicit `since`
+        assert new_since == since
+        assert new_until == since + timedelta(weeks=4)
+
+        # no logs, since no explicit parameters were truncated or adjusted
+        assert len(caplog.records) == 0
+
+    def test_since_in_future(self, caplog):
+        since = now() + timedelta(weeks=1)
+        until = None
+
+        with pytest.raises(ValueError):
+            calculate_collection_interval(since, until)
+
+        # log message for `since` getting chopped, and another for the empty interval
+        assert len(caplog.records) == 2
+        assert sum(1 for msg in caplog.messages if "is in the future" in msg) == 1
+        assert sum(1 for msg in caplog.messages if "later than the end" in msg) == 1
+
+
+class TestIntervalWithUntilOnly:
+    @pytest.mark.parametrize('gather', [None, 2, 6])
+    def test_ok(self, caplog, settings, gather):
+        _now = now()
+        _prior = _now - timedelta(weeks=gather) if gather is not None else None
+        since = None
+        until = _now - timedelta(weeks=1)
+        settings.AUTOMATION_ANALYTICS_LAST_GATHER = _prior
+
+        new_since, new_until, last_gather = calculate_collection_interval(since, until)
+        assert new_since is None  # this allows LAST_ENTRIES[key] to be the fallback
+        assert new_until == until
+
+        # last_gather is always set to something even if LAST_GATHER was empty
+        assert last_gather is not None
+        # since `until` is 1 week ago, the 4-week cutoff is 5 weeks ago
+        assert abs(_now - last_gather - timedelta(weeks=gather if gather and gather <= 5 else 5)) < epsilon
+
+        # 1 log message for LAST_GATHER, if it was more than 4 weeks before `until`
+        expected = 1 if gather and gather > 5 else 0
+        assert len(caplog.records) == expected
+        assert sum(1 for msg in caplog.messages if "more than 4 weeks prior" in msg) == expected
+
+    def test_until_in_future(self, caplog):
+        _now = now()
+        since = None
+        until = _now + timedelta(weeks=1)
+
+        new_since, new_until, _ = calculate_collection_interval(since, until)
+        assert new_since is None  # this allows LAST_ENTRIES[key] to be the fallback
+        assert abs(new_until - _now) < epsilon  # `until` gets truncated to now()
+
+        # log message for `until` getting chopped
+        assert len(caplog.records) == 1
+        assert sum(1 for msg in caplog.messages if "is in the future" in msg) == 1
+
+
+class TestIntervalWithNoParams:
+    @pytest.mark.parametrize('gather', [None, 2, 6])
+    def test_ok(self, caplog, settings, gather):
+        _now = now()
+        _prior = _now - timedelta(weeks=gather) if gather is not None else None
+        since, until = None, None
+        settings.AUTOMATION_ANALYTICS_LAST_GATHER = _prior
+
+        new_since, new_until, last_gather = calculate_collection_interval(since, until)
+        assert new_since is None  # this allows LAST_ENTRIES[key] to be the fallback
+        assert abs(new_until - _now) < epsilon  # `until` defaults to now()
+
+        # last_gather is always set to something even if LAST_GATHER was empty
+        assert last_gather is not None
+        assert abs(_now - last_gather - timedelta(weeks=gather if gather and gather <= 4 else 4)) < epsilon
+
+        # 1 log message for LAST_GATHER, if it was more than 4 weeks before now
+        expected = 1 if gather and gather > 4 else 0
+        assert len(caplog.records) == expected
+        assert sum(1 for msg in caplog.messages if "more than 4 weeks prior" in msg) == expected