Merge pull request #10003 from jbradberry/analytics-4-week-logging

Log adjustments made to the analytics interval due to the 4-week limit

SUMMARY
If the user passes in the since and/or until parameters, sometimes they will be truncated to limit the data to only 4 weeks.  Emit a log message when this happens.
ISSUE TYPE

Bugfix Pull Request

COMPONENT NAME

API

AWX VERSION

Reviewed-by: Jim Ladd <None>
Reviewed-by: Chris Meyers <None>
Reviewed-by: Jeff Bradberry <None>
This commit is contained in:
softwarefactory-project-zuul[bot]
2021-04-27 13:55:56 +00:00
committed by GitHub
3 changed files with 250 additions and 33 deletions

View File

@@ -36,7 +36,7 @@ data _since_ the last report date - i.e., new data in the last 24 hours)
"""
def trivial_slicing(key, since, until):
def trivial_slicing(key, since, until, last_gather):
if since is not None:
return [(since, until)]
@@ -45,11 +45,11 @@ def trivial_slicing(key, since, until):
horizon = until - timedelta(weeks=4)
last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
last_entry = max(last_entries.get(key) or settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon, horizon)
last_entry = max(last_entries.get(key) or last_gather, horizon)
return [(last_entry, until)]
def four_hour_slicing(key, since, until):
def four_hour_slicing(key, since, until, last_gather):
if since is not None:
last_entry = since
else:
@@ -58,7 +58,7 @@ def four_hour_slicing(key, since, until):
horizon = until - timedelta(weeks=4)
last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
last_entry = max(last_entries.get(key) or settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon, horizon)
last_entry = max(last_entries.get(key) or last_gather, horizon)
start, end = last_entry, None
while start < until:
@@ -67,15 +67,14 @@ def four_hour_slicing(key, since, until):
start = end
def events_slicing(key, since, until):
def events_slicing(key, since, until, last_gather):
from awx.conf.models import Setting
last_gather = settings.AUTOMATION_ANALYTICS_LAST_GATHER
last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
horizon = until - timedelta(weeks=4)
lower = since or last_gather or horizon
lower = since or last_gather
if not since and last_entries.get(key):
lower = horizon
pk_values = models.JobEvent.objects.filter(created__gte=lower, created__lte=until).aggregate(Min('pk'), Max('pk'))

View File

@@ -116,6 +116,51 @@ def package(target, data, timestamp):
return None
def calculate_collection_interval(since, until):
_now = now()
# Make sure that the endpoints are not in the future.
if until is not None and until > _now:
until = _now
logger.warning(f"End of the collection interval is in the future, setting to {_now}.")
if since is not None and since > _now:
since = _now
logger.warning(f"Start of the collection interval is in the future, setting to {_now}.")
# The value of `until` needs to be concrete, so resolve it. If it wasn't passed in,
# set it to `now`, but only if that isn't more than 4 weeks ahead of a passed-in
# `since` parameter.
if since is not None:
if until is not None:
if until > since + timedelta(weeks=4):
until = since + timedelta(weeks=4)
logger.warning(f"End of the collection interval is greater than 4 weeks from start, setting end to {until}.")
else: # until is None
until = min(since + timedelta(weeks=4), _now)
elif until is None:
until = _now
if since and since >= until:
logger.warning("Start of the collection interval is later than the end, ignoring request.")
raise ValueError
# The ultimate beginning of the interval needs to be compared to 4 weeks prior to
# `until`, but we want to keep `since` empty if it wasn't passed in because we use that
# case to know whether to use the bookkeeping settings variables to decide the start of
# the interval.
horizon = until - timedelta(weeks=4)
if since is not None and since < horizon:
since = horizon
logger.warning(f"Start of the collection interval is more than 4 weeks prior to {until}, setting to {horizon}.")
last_gather = settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon
if last_gather < horizon:
last_gather = horizon
logger.warning(f"Last analytics run was more than 4 weeks prior to {until}, using {horizon} instead.")
return since, until, last_gather
def gather(dest=None, module=None, subset=None, since=None, until=None, collection_type='scheduled'):
"""
Gather all defined metrics and write them as JSON files in a .tgz
@@ -148,30 +193,12 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
from awx.main.analytics import collectors
from awx.main.signals import disable_activity_stream
_now = now()
# Make sure that the endpoints are not in the future.
until = None if until is None else min(until, _now)
since = None if since is None else min(since, _now)
if since and not until:
# If `since` is explicit but not `until`, `since` should be used to calculate the 4-week limit
until = min(since + timedelta(weeks=4), _now)
else:
until = _now if until is None else until
horizon = until - timedelta(weeks=4)
if since is not None:
# Make sure the start isn't more than 4 weeks prior to `until`.
since = max(since, horizon)
if since and since >= until:
logger.warning("Start of the collection interval is later than the end, ignoring request.")
return None
logger.debug("Last analytics run was: {}".format(settings.AUTOMATION_ANALYTICS_LAST_GATHER))
# LAST_GATHER time should always get truncated to less than 4 weeks back.
last_gather = max(settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon, horizon)
try:
since, until, last_gather = calculate_collection_interval(since, until)
except ValueError:
return None
last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
@@ -201,7 +228,7 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
key = func.__awx_analytics_key__
filename = f'{key}.json'
try:
last_entry = max(last_entries.get(key) or last_gather, horizon)
last_entry = max(last_entries.get(key) or last_gather, until - timedelta(weeks=4))
results = (func(since or last_entry, collection_type=collection_type, until=until), func.__awx_analytics_version__)
json.dumps(results) # throwaway check to see if the data is json-serializable
data[filename] = results
@@ -233,9 +260,9 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
# allowed to be None, and will fall back to LAST_ENTRIES[key] or to
# LAST_GATHER (truncated appropriately to match the 4-week limit).
if func.__awx_expensive__:
slices = func.__awx_expensive__(key, since, until)
slices = func.__awx_expensive__(key, since, until, last_gather)
else:
slices = collectors.trivial_slicing(key, since, until)
slices = collectors.trivial_slicing(key, since, until, last_gather)
for start, end in slices:
files = func(start, full_path=gather_dir, until=end)

View File

@@ -0,0 +1,191 @@
import logging
import pytest
from django.utils.timezone import now, timedelta
from awx.main.analytics.core import calculate_collection_interval
# This is some bullshit.
logger = logging.getLogger('awx.main.analytics')
logger.propagate = True
epsilon = timedelta(minutes=1)
class TestIntervalWithSinceAndUntil:
@pytest.mark.parametrize('gather', [None, 2, 6])
def test_ok(self, caplog, settings, gather):
_now = now()
_prior = _now - timedelta(weeks=gather) if gather is not None else None
until = _now
since = until - timedelta(weeks=3)
settings.AUTOMATION_ANALYTICS_LAST_GATHER = _prior
new_since, new_until, last_gather = calculate_collection_interval(since, until)
assert new_since == since
assert new_until == until
# 1 log message, if LAST_GATHER was more than 4 weeks ago
expected = 1 if gather and gather > 4 else 0
assert len(caplog.records) == expected
assert sum(1 for msg in caplog.messages if "more than 4 weeks prior" in msg) == expected
# last_gather is always set to something even if LAST_GATHER was empty
assert last_gather is not None
assert abs(until - last_gather - timedelta(weeks=gather if gather and gather <= 4 else 4)) < epsilon
def test_both_in_future(self, caplog):
since = now() + timedelta(weeks=1)
until = since + timedelta(weeks=1)
with pytest.raises(ValueError):
calculate_collection_interval(since, until)
# log message each for `since` and `until` getting chopped, then another for the empty interval
assert len(caplog.records) == 3
assert sum(1 for msg in caplog.messages if "is in the future" in msg) == 2
assert sum(1 for msg in caplog.messages if "later than the end" in msg) == 1
def test_until_in_future(self, caplog):
_now = now()
since = _now - timedelta(weeks=1)
until = _now + timedelta(weeks=1)
new_since, new_until, _ = calculate_collection_interval(since, until)
assert new_since == since
assert abs(_now - new_until) < epsilon # `until` gets truncated to now()
# log message for `until` getting chopped
assert len(caplog.records) == 1
assert sum(1 for msg in caplog.messages if "is in the future" in msg) == 1
def test_interval_too_large(self, caplog):
until = now()
since = until - timedelta(weeks=5)
new_since, new_until, _ = calculate_collection_interval(since, until)
# interval is 4 weeks counting forward from explicit `since`
assert new_since == since
assert new_until == since + timedelta(weeks=4)
# log message for `until` getting chopped
assert len(caplog.records) == 1
assert sum(1 for msg in caplog.messages if "greater than 4 weeks from start" in msg) == 1
def test_reversed(self, caplog):
since = now()
until = since - timedelta(weeks=3)
with pytest.raises(ValueError):
calculate_collection_interval(since, until)
# log message for the empty interval
assert len(caplog.records) == 1
assert sum(1 for msg in caplog.messages if "later than the end" in msg) == 1
class TestIntervalWithSinceOnly:
@pytest.mark.parametrize('gather', [None, 2, 6])
def test_ok(self, caplog, settings, gather):
_now = now()
_prior = _now - timedelta(weeks=gather) if gather is not None else None
since = _now - timedelta(weeks=2)
until = None # until is going to wind up being effectively now.
settings.AUTOMATION_ANALYTICS_LAST_GATHER = _prior
new_since, new_until, last_gather = calculate_collection_interval(since, until)
# `since` is only 2 weeks back, so now() is within the 4-week cutoff and can be used for `until`
assert new_since == since
assert abs(new_until - _now) < epsilon
# 1 log message, if LAST_GATHER was more than 4 weeks ago
expected = 1 if gather and gather > 4 else 0
assert len(caplog.records) == expected
assert sum(1 for msg in caplog.messages if "more than 4 weeks prior" in msg) == expected
# last_gather is always set to something even if LAST_GATHER was empty
assert last_gather is not None
assert abs(_now - last_gather - timedelta(weeks=gather if gather and gather <= 4 else 4)) < epsilon
def test_since_more_than_4_weeks_before_now(self, caplog):
since = now() - timedelta(weeks=5)
until = None # until is going to wind up being effectively now.
new_since, new_until, last_gather = calculate_collection_interval(since, until)
# interval is 4 weeks counting forward from explicit `since`
assert new_since == since
assert new_until == since + timedelta(weeks=4)
# no logs, since no explicit parameters were truncated or adjusted
assert len(caplog.records) == 0
def test_since_in_future(self, caplog):
since = now() + timedelta(weeks=1)
until = None
with pytest.raises(ValueError):
calculate_collection_interval(since, until)
# log message for `since` getting chopped, and another for the empty interval
assert len(caplog.records) == 2
assert sum(1 for msg in caplog.messages if "is in the future" in msg) == 1
assert sum(1 for msg in caplog.messages if "later than the end" in msg) == 1
class TestIntervalWithUntilOnly:
@pytest.mark.parametrize('gather', [None, 2, 6])
def test_ok(self, caplog, settings, gather):
_now = now()
_prior = _now - timedelta(weeks=gather) if gather is not None else None
since = None
until = _now - timedelta(weeks=1)
settings.AUTOMATION_ANALYTICS_LAST_GATHER = _prior
new_since, new_until, last_gather = calculate_collection_interval(since, until)
assert new_since is None # this allows LAST_ENTRIES[key] to be the fallback
assert new_until == until
# last_gather is always set to something even if LAST_GATHER was empty
assert last_gather is not None
# since `until` is 1 week ago, the 4-week cutoff is 5 weeks ago
assert abs(_now - last_gather - timedelta(weeks=gather if gather and gather <= 5 else 5)) < epsilon
# 1 log message for LAST_GATHER, if it was more than 4 weeks before `until`
expected = 1 if gather and gather > 5 else 0
assert len(caplog.records) == expected
assert sum(1 for msg in caplog.messages if "more than 4 weeks prior" in msg) == expected
def test_until_in_future(self, caplog):
_now = now()
since = None
until = _now + timedelta(weeks=1)
new_since, new_until, _ = calculate_collection_interval(since, until)
assert new_since is None # this allows LAST_ENTRIES[key] to be the fallback
assert abs(new_until - _now) < epsilon # `until` gets truncated to now()
# log message for `until` getting chopped
assert len(caplog.records) == 1
assert sum(1 for msg in caplog.messages if "is in the future" in msg) == 1
class TestIntervalWithNoParams:
@pytest.mark.parametrize('gather', [None, 2, 6])
def test_ok(self, caplog, settings, gather):
_now = now()
_prior = _now - timedelta(weeks=gather) if gather is not None else None
since, until = None, None
settings.AUTOMATION_ANALYTICS_LAST_GATHER = _prior
new_since, new_until, last_gather = calculate_collection_interval(since, until)
assert new_since is None # this allows LAST_ENTRIES[key] to be the fallback
assert abs(new_until - _now) < epsilon # `until` defaults to now()
# last_gather is always set to something even if LAST_GATHER was empty
assert last_gather is not None
assert abs(_now - last_gather - timedelta(weeks=gather if gather and gather <= 4 else 4)) < epsilon
# 1 log message for LAST_GATHER, if it was more than 4 weeks before now
expected = 1 if gather and gather > 4 else 0
assert len(caplog.records) == expected
assert sum(1 for msg in caplog.messages if "more than 4 weeks prior" in msg) == expected