diff --git a/awx/main/analytics/collectors.py b/awx/main/analytics/collectors.py index e7fc7a8c90..a7919772b3 100644 --- a/awx/main/analytics/collectors.py +++ b/awx/main/analytics/collectors.py @@ -36,7 +36,7 @@ data _since_ the last report date - i.e., new data in the last 24 hours) """ -def trivial_slicing(key, since, until): +def trivial_slicing(key, since, until, last_gather): if since is not None: return [(since, until)] @@ -45,11 +45,11 @@ def trivial_slicing(key, since, until): horizon = until - timedelta(weeks=4) last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first() last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook) - last_entry = max(last_entries.get(key) or settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon, horizon) + last_entry = max(last_entries.get(key) or last_gather, horizon) return [(last_entry, until)] -def four_hour_slicing(key, since, until): +def four_hour_slicing(key, since, until, last_gather): if since is not None: last_entry = since else: @@ -58,7 +58,7 @@ def four_hour_slicing(key, since, until): horizon = until - timedelta(weeks=4) last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first() last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook) - last_entry = max(last_entries.get(key) or settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon, horizon) + last_entry = max(last_entries.get(key) or last_gather, horizon) start, end = last_entry, None while start < until: @@ -67,15 +67,14 @@ def four_hour_slicing(key, since, until): start = end -def events_slicing(key, since, until): +def events_slicing(key, since, until, last_gather): from awx.conf.models import Setting - last_gather = settings.AUTOMATION_ANALYTICS_LAST_GATHER last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first() last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook) horizon = until - timedelta(weeks=4) - lower = since or last_gather or horizon + lower = since or last_gather if not since and last_entries.get(key): lower = horizon pk_values = models.JobEvent.objects.filter(created__gte=lower, created__lte=until).aggregate(Min('pk'), Max('pk')) diff --git a/awx/main/analytics/core.py b/awx/main/analytics/core.py index 27d7b5312c..936a2bed28 100644 --- a/awx/main/analytics/core.py +++ b/awx/main/analytics/core.py @@ -116,6 +116,51 @@ def package(target, data, timestamp): return None +def calculate_collection_interval(since, until): + _now = now() + + # Make sure that the endpoints are not in the future. + if until is not None and until > _now: + until = _now + logger.warning(f"End of the collection interval is in the future, setting to {_now}.") + if since is not None and since > _now: + since = _now + logger.warning(f"Start of the collection interval is in the future, setting to {_now}.") + + # The value of `until` needs to be concrete, so resolve it. If it wasn't passed in, + # set it to `now`, but only if that isn't more than 4 weeks ahead of a passed-in + # `since` parameter. + if since is not None: + if until is not None: + if until > since + timedelta(weeks=4): + until = since + timedelta(weeks=4) + logger.warning(f"End of the collection interval is greater than 4 weeks from start, setting end to {until}.") + else: # until is None + until = min(since + timedelta(weeks=4), _now) + elif until is None: + until = _now + + if since and since >= until: + logger.warning("Start of the collection interval is later than the end, ignoring request.") + raise ValueError + + # The ultimate beginning of the interval needs to be compared to 4 weeks prior to + # `until`, but we want to keep `since` empty if it wasn't passed in because we use that + # case to know whether to use the bookkeeping settings variables to decide the start of + # the interval. + horizon = until - timedelta(weeks=4) + if since is not None and since < horizon: + since = horizon + logger.warning(f"Start of the collection interval is more than 4 weeks prior to {until}, setting to {horizon}.") + + last_gather = settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon + if last_gather < horizon: + last_gather = horizon + logger.warning(f"Last analytics run was more than 4 weeks prior to {until}, using {horizon} instead.") + + return since, until, last_gather + + def gather(dest=None, module=None, subset=None, since=None, until=None, collection_type='scheduled'): """ Gather all defined metrics and write them as JSON files in a .tgz @@ -148,30 +193,12 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti from awx.main.analytics import collectors from awx.main.signals import disable_activity_stream - _now = now() - - # Make sure that the endpoints are not in the future. - until = None if until is None else min(until, _now) - since = None if since is None else min(since, _now) - - if since and not until: - # If `since` is explicit but not `until`, `since` should be used to calculate the 4-week limit - until = min(since + timedelta(weeks=4), _now) - else: - until = _now if until is None else until - - horizon = until - timedelta(weeks=4) - if since is not None: - # Make sure the start isn't more than 4 weeks prior to `until`. - since = max(since, horizon) - - if since and since >= until: - logger.warning("Start of the collection interval is later than the end, ignoring request.") - return None - logger.debug("Last analytics run was: {}".format(settings.AUTOMATION_ANALYTICS_LAST_GATHER)) - # LAST_GATHER time should always get truncated to less than 4 weeks back. - last_gather = max(settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon, horizon) + + try: + since, until, last_gather = calculate_collection_interval(since, until) + except ValueError: + return None last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first() last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook) @@ -201,7 +228,7 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti key = func.__awx_analytics_key__ filename = f'{key}.json' try: - last_entry = max(last_entries.get(key) or last_gather, horizon) + last_entry = max(last_entries.get(key) or last_gather, until - timedelta(weeks=4)) results = (func(since or last_entry, collection_type=collection_type, until=until), func.__awx_analytics_version__) json.dumps(results) # throwaway check to see if the data is json-serializable data[filename] = results @@ -233,9 +260,9 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti # allowed to be None, and will fall back to LAST_ENTRIES[key] or to # LAST_GATHER (truncated appropriately to match the 4-week limit). if func.__awx_expensive__: - slices = func.__awx_expensive__(key, since, until) + slices = func.__awx_expensive__(key, since, until, last_gather) else: - slices = collectors.trivial_slicing(key, since, until) + slices = collectors.trivial_slicing(key, since, until, last_gather) for start, end in slices: files = func(start, full_path=gather_dir, until=end) diff --git a/awx/main/tests/unit/analytics/test_collection_interval.py b/awx/main/tests/unit/analytics/test_collection_interval.py new file mode 100644 index 0000000000..4bb91377dc --- /dev/null +++ b/awx/main/tests/unit/analytics/test_collection_interval.py @@ -0,0 +1,191 @@ +import logging +import pytest + +from django.utils.timezone import now, timedelta + +from awx.main.analytics.core import calculate_collection_interval + +# This is some bullshit. +logger = logging.getLogger('awx.main.analytics') +logger.propagate = True + +epsilon = timedelta(minutes=1) + + +class TestIntervalWithSinceAndUntil: + @pytest.mark.parametrize('gather', [None, 2, 6]) + def test_ok(self, caplog, settings, gather): + _now = now() + _prior = _now - timedelta(weeks=gather) if gather is not None else None + until = _now + since = until - timedelta(weeks=3) + settings.AUTOMATION_ANALYTICS_LAST_GATHER = _prior + + new_since, new_until, last_gather = calculate_collection_interval(since, until) + assert new_since == since + assert new_until == until + + # 1 log message, if LAST_GATHER was more than 4 weeks ago + expected = 1 if gather and gather > 4 else 0 + assert len(caplog.records) == expected + assert sum(1 for msg in caplog.messages if "more than 4 weeks prior" in msg) == expected + + # last_gather is always set to something even if LAST_GATHER was empty + assert last_gather is not None + assert abs(until - last_gather - timedelta(weeks=gather if gather and gather <= 4 else 4)) < epsilon + + def test_both_in_future(self, caplog): + since = now() + timedelta(weeks=1) + until = since + timedelta(weeks=1) + + with pytest.raises(ValueError): + calculate_collection_interval(since, until) + + # log message each for `since` and `until` getting chopped, then another for the empty interval + assert len(caplog.records) == 3 + assert sum(1 for msg in caplog.messages if "is in the future" in msg) == 2 + assert sum(1 for msg in caplog.messages if "later than the end" in msg) == 1 + + def test_until_in_future(self, caplog): + _now = now() + since = _now - timedelta(weeks=1) + until = _now + timedelta(weeks=1) + + new_since, new_until, _ = calculate_collection_interval(since, until) + assert new_since == since + assert abs(_now - new_until) < epsilon # `until` gets truncated to now() + + # log message for `until` getting chopped + assert len(caplog.records) == 1 + assert sum(1 for msg in caplog.messages if "is in the future" in msg) == 1 + + def test_interval_too_large(self, caplog): + until = now() + since = until - timedelta(weeks=5) + + new_since, new_until, _ = calculate_collection_interval(since, until) + # interval is 4 weeks counting forward from explicit `since` + assert new_since == since + assert new_until == since + timedelta(weeks=4) + + # log message for `until` getting chopped + assert len(caplog.records) == 1 + assert sum(1 for msg in caplog.messages if "greater than 4 weeks from start" in msg) == 1 + + def test_reversed(self, caplog): + since = now() + until = since - timedelta(weeks=3) + + with pytest.raises(ValueError): + calculate_collection_interval(since, until) + + # log message for the empty interval + assert len(caplog.records) == 1 + assert sum(1 for msg in caplog.messages if "later than the end" in msg) == 1 + + +class TestIntervalWithSinceOnly: + @pytest.mark.parametrize('gather', [None, 2, 6]) + def test_ok(self, caplog, settings, gather): + _now = now() + _prior = _now - timedelta(weeks=gather) if gather is not None else None + since = _now - timedelta(weeks=2) + until = None # until is going to wind up being effectively now. + settings.AUTOMATION_ANALYTICS_LAST_GATHER = _prior + + new_since, new_until, last_gather = calculate_collection_interval(since, until) + # `since` is only 2 weeks back, so now() is within the 4-week cutoff and can be used for `until` + assert new_since == since + assert abs(new_until - _now) < epsilon + + # 1 log message, if LAST_GATHER was more than 4 weeks ago + expected = 1 if gather and gather > 4 else 0 + assert len(caplog.records) == expected + assert sum(1 for msg in caplog.messages if "more than 4 weeks prior" in msg) == expected + + # last_gather is always set to something even if LAST_GATHER was empty + assert last_gather is not None + assert abs(_now - last_gather - timedelta(weeks=gather if gather and gather <= 4 else 4)) < epsilon + + def test_since_more_than_4_weeks_before_now(self, caplog): + since = now() - timedelta(weeks=5) + until = None # until is going to wind up being effectively now. + + new_since, new_until, last_gather = calculate_collection_interval(since, until) + # interval is 4 weeks counting forward from explicit `since` + assert new_since == since + assert new_until == since + timedelta(weeks=4) + + # no logs, since no explicit parameters were truncated or adjusted + assert len(caplog.records) == 0 + + def test_since_in_future(self, caplog): + since = now() + timedelta(weeks=1) + until = None + + with pytest.raises(ValueError): + calculate_collection_interval(since, until) + + # log message for `since` getting chopped, and another for the empty interval + assert len(caplog.records) == 2 + assert sum(1 for msg in caplog.messages if "is in the future" in msg) == 1 + assert sum(1 for msg in caplog.messages if "later than the end" in msg) == 1 + + +class TestIntervalWithUntilOnly: + @pytest.mark.parametrize('gather', [None, 2, 6]) + def test_ok(self, caplog, settings, gather): + _now = now() + _prior = _now - timedelta(weeks=gather) if gather is not None else None + since = None + until = _now - timedelta(weeks=1) + settings.AUTOMATION_ANALYTICS_LAST_GATHER = _prior + + new_since, new_until, last_gather = calculate_collection_interval(since, until) + assert new_since is None # this allows LAST_ENTRIES[key] to be the fallback + assert new_until == until + + # last_gather is always set to something even if LAST_GATHER was empty + assert last_gather is not None + # since `until` is 1 week ago, the 4-week cutoff is 5 weeks ago + assert abs(_now - last_gather - timedelta(weeks=gather if gather and gather <= 5 else 5)) < epsilon + + # 1 log message for LAST_GATHER, if it was more than 4 weeks before `until` + expected = 1 if gather and gather > 5 else 0 + assert len(caplog.records) == expected + assert sum(1 for msg in caplog.messages if "more than 4 weeks prior" in msg) == expected + + def test_until_in_future(self, caplog): + _now = now() + since = None + until = _now + timedelta(weeks=1) + + new_since, new_until, _ = calculate_collection_interval(since, until) + assert new_since is None # this allows LAST_ENTRIES[key] to be the fallback + assert abs(new_until - _now) < epsilon # `until` gets truncated to now() + + # log message for `until` getting chopped + assert len(caplog.records) == 1 + assert sum(1 for msg in caplog.messages if "is in the future" in msg) == 1 + + +class TestIntervalWithNoParams: + @pytest.mark.parametrize('gather', [None, 2, 6]) + def test_ok(self, caplog, settings, gather): + _now = now() + _prior = _now - timedelta(weeks=gather) if gather is not None else None + since, until = None, None + settings.AUTOMATION_ANALYTICS_LAST_GATHER = _prior + + new_since, new_until, last_gather = calculate_collection_interval(since, until) + assert new_since is None # this allows LAST_ENTRIES[key] to be the fallback + assert abs(new_until - _now) < epsilon # `until` defaults to now() + + # last_gather is always set to something even if LAST_GATHER was empty + assert last_gather is not None + assert abs(_now - last_gather - timedelta(weeks=gather if gather and gather <= 4 else 4)) < epsilon + + # 1 log message for LAST_GATHER, if it was more than 4 weeks before now + expected = 1 if gather and gather > 4 else 0 + assert len(caplog.records) == expected + assert sum(1 for msg in caplog.messages if "more than 4 weeks prior" in msg) == expected