mirror of
https://github.com/ansible/awx.git
synced 2026-03-14 15:37:29 -02:30
Merge pull request #10003 from jbradberry/analytics-4-week-logging
Log adjustments made to the analytics interval due to the 4-week limit SUMMARY If the user passes in the since and/or until parameters, sometimes they will be truncated to limit the data to only 4 weeks. Emit a log message when this happens. ISSUE TYPE Bugfix Pull Request COMPONENT NAME API AWX VERSION Reviewed-by: Jim Ladd <None> Reviewed-by: Chris Meyers <None> Reviewed-by: Jeff Bradberry <None>
This commit is contained in:
@@ -36,7 +36,7 @@ data _since_ the last report date - i.e., new data in the last 24 hours)
|
||||
"""
|
||||
|
||||
|
||||
def trivial_slicing(key, since, until):
|
||||
def trivial_slicing(key, since, until, last_gather):
|
||||
if since is not None:
|
||||
return [(since, until)]
|
||||
|
||||
@@ -45,11 +45,11 @@ def trivial_slicing(key, since, until):
|
||||
horizon = until - timedelta(weeks=4)
|
||||
last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
|
||||
last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
|
||||
last_entry = max(last_entries.get(key) or settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon, horizon)
|
||||
last_entry = max(last_entries.get(key) or last_gather, horizon)
|
||||
return [(last_entry, until)]
|
||||
|
||||
|
||||
def four_hour_slicing(key, since, until):
|
||||
def four_hour_slicing(key, since, until, last_gather):
|
||||
if since is not None:
|
||||
last_entry = since
|
||||
else:
|
||||
@@ -58,7 +58,7 @@ def four_hour_slicing(key, since, until):
|
||||
horizon = until - timedelta(weeks=4)
|
||||
last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
|
||||
last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
|
||||
last_entry = max(last_entries.get(key) or settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon, horizon)
|
||||
last_entry = max(last_entries.get(key) or last_gather, horizon)
|
||||
|
||||
start, end = last_entry, None
|
||||
while start < until:
|
||||
@@ -67,15 +67,14 @@ def four_hour_slicing(key, since, until):
|
||||
start = end
|
||||
|
||||
|
||||
def events_slicing(key, since, until):
|
||||
def events_slicing(key, since, until, last_gather):
|
||||
from awx.conf.models import Setting
|
||||
|
||||
last_gather = settings.AUTOMATION_ANALYTICS_LAST_GATHER
|
||||
last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
|
||||
last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
|
||||
horizon = until - timedelta(weeks=4)
|
||||
|
||||
lower = since or last_gather or horizon
|
||||
lower = since or last_gather
|
||||
if not since and last_entries.get(key):
|
||||
lower = horizon
|
||||
pk_values = models.JobEvent.objects.filter(created__gte=lower, created__lte=until).aggregate(Min('pk'), Max('pk'))
|
||||
|
||||
@@ -116,6 +116,51 @@ def package(target, data, timestamp):
|
||||
return None
|
||||
|
||||
|
||||
def calculate_collection_interval(since, until):
|
||||
_now = now()
|
||||
|
||||
# Make sure that the endpoints are not in the future.
|
||||
if until is not None and until > _now:
|
||||
until = _now
|
||||
logger.warning(f"End of the collection interval is in the future, setting to {_now}.")
|
||||
if since is not None and since > _now:
|
||||
since = _now
|
||||
logger.warning(f"Start of the collection interval is in the future, setting to {_now}.")
|
||||
|
||||
# The value of `until` needs to be concrete, so resolve it. If it wasn't passed in,
|
||||
# set it to `now`, but only if that isn't more than 4 weeks ahead of a passed-in
|
||||
# `since` parameter.
|
||||
if since is not None:
|
||||
if until is not None:
|
||||
if until > since + timedelta(weeks=4):
|
||||
until = since + timedelta(weeks=4)
|
||||
logger.warning(f"End of the collection interval is greater than 4 weeks from start, setting end to {until}.")
|
||||
else: # until is None
|
||||
until = min(since + timedelta(weeks=4), _now)
|
||||
elif until is None:
|
||||
until = _now
|
||||
|
||||
if since and since >= until:
|
||||
logger.warning("Start of the collection interval is later than the end, ignoring request.")
|
||||
raise ValueError
|
||||
|
||||
# The ultimate beginning of the interval needs to be compared to 4 weeks prior to
|
||||
# `until`, but we want to keep `since` empty if it wasn't passed in because we use that
|
||||
# case to know whether to use the bookkeeping settings variables to decide the start of
|
||||
# the interval.
|
||||
horizon = until - timedelta(weeks=4)
|
||||
if since is not None and since < horizon:
|
||||
since = horizon
|
||||
logger.warning(f"Start of the collection interval is more than 4 weeks prior to {until}, setting to {horizon}.")
|
||||
|
||||
last_gather = settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon
|
||||
if last_gather < horizon:
|
||||
last_gather = horizon
|
||||
logger.warning(f"Last analytics run was more than 4 weeks prior to {until}, using {horizon} instead.")
|
||||
|
||||
return since, until, last_gather
|
||||
|
||||
|
||||
def gather(dest=None, module=None, subset=None, since=None, until=None, collection_type='scheduled'):
|
||||
"""
|
||||
Gather all defined metrics and write them as JSON files in a .tgz
|
||||
@@ -148,30 +193,12 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
|
||||
from awx.main.analytics import collectors
|
||||
from awx.main.signals import disable_activity_stream
|
||||
|
||||
_now = now()
|
||||
|
||||
# Make sure that the endpoints are not in the future.
|
||||
until = None if until is None else min(until, _now)
|
||||
since = None if since is None else min(since, _now)
|
||||
|
||||
if since and not until:
|
||||
# If `since` is explicit but not `until`, `since` should be used to calculate the 4-week limit
|
||||
until = min(since + timedelta(weeks=4), _now)
|
||||
else:
|
||||
until = _now if until is None else until
|
||||
|
||||
horizon = until - timedelta(weeks=4)
|
||||
if since is not None:
|
||||
# Make sure the start isn't more than 4 weeks prior to `until`.
|
||||
since = max(since, horizon)
|
||||
|
||||
if since and since >= until:
|
||||
logger.warning("Start of the collection interval is later than the end, ignoring request.")
|
||||
return None
|
||||
|
||||
logger.debug("Last analytics run was: {}".format(settings.AUTOMATION_ANALYTICS_LAST_GATHER))
|
||||
# LAST_GATHER time should always get truncated to less than 4 weeks back.
|
||||
last_gather = max(settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon, horizon)
|
||||
|
||||
try:
|
||||
since, until, last_gather = calculate_collection_interval(since, until)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
|
||||
last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
|
||||
@@ -201,7 +228,7 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
|
||||
key = func.__awx_analytics_key__
|
||||
filename = f'{key}.json'
|
||||
try:
|
||||
last_entry = max(last_entries.get(key) or last_gather, horizon)
|
||||
last_entry = max(last_entries.get(key) or last_gather, until - timedelta(weeks=4))
|
||||
results = (func(since or last_entry, collection_type=collection_type, until=until), func.__awx_analytics_version__)
|
||||
json.dumps(results) # throwaway check to see if the data is json-serializable
|
||||
data[filename] = results
|
||||
@@ -233,9 +260,9 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
|
||||
# allowed to be None, and will fall back to LAST_ENTRIES[key] or to
|
||||
# LAST_GATHER (truncated appropriately to match the 4-week limit).
|
||||
if func.__awx_expensive__:
|
||||
slices = func.__awx_expensive__(key, since, until)
|
||||
slices = func.__awx_expensive__(key, since, until, last_gather)
|
||||
else:
|
||||
slices = collectors.trivial_slicing(key, since, until)
|
||||
slices = collectors.trivial_slicing(key, since, until, last_gather)
|
||||
|
||||
for start, end in slices:
|
||||
files = func(start, full_path=gather_dir, until=end)
|
||||
|
||||
191
awx/main/tests/unit/analytics/test_collection_interval.py
Normal file
191
awx/main/tests/unit/analytics/test_collection_interval.py
Normal file
@@ -0,0 +1,191 @@
|
||||
import logging
|
||||
import pytest
|
||||
|
||||
from django.utils.timezone import now, timedelta
|
||||
|
||||
from awx.main.analytics.core import calculate_collection_interval
|
||||
|
||||
# This is some bullshit.
|
||||
logger = logging.getLogger('awx.main.analytics')
|
||||
logger.propagate = True
|
||||
|
||||
epsilon = timedelta(minutes=1)
|
||||
|
||||
|
||||
class TestIntervalWithSinceAndUntil:
|
||||
@pytest.mark.parametrize('gather', [None, 2, 6])
|
||||
def test_ok(self, caplog, settings, gather):
|
||||
_now = now()
|
||||
_prior = _now - timedelta(weeks=gather) if gather is not None else None
|
||||
until = _now
|
||||
since = until - timedelta(weeks=3)
|
||||
settings.AUTOMATION_ANALYTICS_LAST_GATHER = _prior
|
||||
|
||||
new_since, new_until, last_gather = calculate_collection_interval(since, until)
|
||||
assert new_since == since
|
||||
assert new_until == until
|
||||
|
||||
# 1 log message, if LAST_GATHER was more than 4 weeks ago
|
||||
expected = 1 if gather and gather > 4 else 0
|
||||
assert len(caplog.records) == expected
|
||||
assert sum(1 for msg in caplog.messages if "more than 4 weeks prior" in msg) == expected
|
||||
|
||||
# last_gather is always set to something even if LAST_GATHER was empty
|
||||
assert last_gather is not None
|
||||
assert abs(until - last_gather - timedelta(weeks=gather if gather and gather <= 4 else 4)) < epsilon
|
||||
|
||||
def test_both_in_future(self, caplog):
|
||||
since = now() + timedelta(weeks=1)
|
||||
until = since + timedelta(weeks=1)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
calculate_collection_interval(since, until)
|
||||
|
||||
# log message each for `since` and `until` getting chopped, then another for the empty interval
|
||||
assert len(caplog.records) == 3
|
||||
assert sum(1 for msg in caplog.messages if "is in the future" in msg) == 2
|
||||
assert sum(1 for msg in caplog.messages if "later than the end" in msg) == 1
|
||||
|
||||
def test_until_in_future(self, caplog):
|
||||
_now = now()
|
||||
since = _now - timedelta(weeks=1)
|
||||
until = _now + timedelta(weeks=1)
|
||||
|
||||
new_since, new_until, _ = calculate_collection_interval(since, until)
|
||||
assert new_since == since
|
||||
assert abs(_now - new_until) < epsilon # `until` gets truncated to now()
|
||||
|
||||
# log message for `until` getting chopped
|
||||
assert len(caplog.records) == 1
|
||||
assert sum(1 for msg in caplog.messages if "is in the future" in msg) == 1
|
||||
|
||||
def test_interval_too_large(self, caplog):
|
||||
until = now()
|
||||
since = until - timedelta(weeks=5)
|
||||
|
||||
new_since, new_until, _ = calculate_collection_interval(since, until)
|
||||
# interval is 4 weeks counting forward from explicit `since`
|
||||
assert new_since == since
|
||||
assert new_until == since + timedelta(weeks=4)
|
||||
|
||||
# log message for `until` getting chopped
|
||||
assert len(caplog.records) == 1
|
||||
assert sum(1 for msg in caplog.messages if "greater than 4 weeks from start" in msg) == 1
|
||||
|
||||
def test_reversed(self, caplog):
|
||||
since = now()
|
||||
until = since - timedelta(weeks=3)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
calculate_collection_interval(since, until)
|
||||
|
||||
# log message for the empty interval
|
||||
assert len(caplog.records) == 1
|
||||
assert sum(1 for msg in caplog.messages if "later than the end" in msg) == 1
|
||||
|
||||
|
||||
class TestIntervalWithSinceOnly:
|
||||
@pytest.mark.parametrize('gather', [None, 2, 6])
|
||||
def test_ok(self, caplog, settings, gather):
|
||||
_now = now()
|
||||
_prior = _now - timedelta(weeks=gather) if gather is not None else None
|
||||
since = _now - timedelta(weeks=2)
|
||||
until = None # until is going to wind up being effectively now.
|
||||
settings.AUTOMATION_ANALYTICS_LAST_GATHER = _prior
|
||||
|
||||
new_since, new_until, last_gather = calculate_collection_interval(since, until)
|
||||
# `since` is only 2 weeks back, so now() is within the 4-week cutoff and can be used for `until`
|
||||
assert new_since == since
|
||||
assert abs(new_until - _now) < epsilon
|
||||
|
||||
# 1 log message, if LAST_GATHER was more than 4 weeks ago
|
||||
expected = 1 if gather and gather > 4 else 0
|
||||
assert len(caplog.records) == expected
|
||||
assert sum(1 for msg in caplog.messages if "more than 4 weeks prior" in msg) == expected
|
||||
|
||||
# last_gather is always set to something even if LAST_GATHER was empty
|
||||
assert last_gather is not None
|
||||
assert abs(_now - last_gather - timedelta(weeks=gather if gather and gather <= 4 else 4)) < epsilon
|
||||
|
||||
def test_since_more_than_4_weeks_before_now(self, caplog):
|
||||
since = now() - timedelta(weeks=5)
|
||||
until = None # until is going to wind up being effectively now.
|
||||
|
||||
new_since, new_until, last_gather = calculate_collection_interval(since, until)
|
||||
# interval is 4 weeks counting forward from explicit `since`
|
||||
assert new_since == since
|
||||
assert new_until == since + timedelta(weeks=4)
|
||||
|
||||
# no logs, since no explicit parameters were truncated or adjusted
|
||||
assert len(caplog.records) == 0
|
||||
|
||||
def test_since_in_future(self, caplog):
|
||||
since = now() + timedelta(weeks=1)
|
||||
until = None
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
calculate_collection_interval(since, until)
|
||||
|
||||
# log message for `since` getting chopped, and another for the empty interval
|
||||
assert len(caplog.records) == 2
|
||||
assert sum(1 for msg in caplog.messages if "is in the future" in msg) == 1
|
||||
assert sum(1 for msg in caplog.messages if "later than the end" in msg) == 1
|
||||
|
||||
|
||||
class TestIntervalWithUntilOnly:
|
||||
@pytest.mark.parametrize('gather', [None, 2, 6])
|
||||
def test_ok(self, caplog, settings, gather):
|
||||
_now = now()
|
||||
_prior = _now - timedelta(weeks=gather) if gather is not None else None
|
||||
since = None
|
||||
until = _now - timedelta(weeks=1)
|
||||
settings.AUTOMATION_ANALYTICS_LAST_GATHER = _prior
|
||||
|
||||
new_since, new_until, last_gather = calculate_collection_interval(since, until)
|
||||
assert new_since is None # this allows LAST_ENTRIES[key] to be the fallback
|
||||
assert new_until == until
|
||||
|
||||
# last_gather is always set to something even if LAST_GATHER was empty
|
||||
assert last_gather is not None
|
||||
# since `until` is 1 week ago, the 4-week cutoff is 5 weeks ago
|
||||
assert abs(_now - last_gather - timedelta(weeks=gather if gather and gather <= 5 else 5)) < epsilon
|
||||
|
||||
# 1 log message for LAST_GATHER, if it was more than 4 weeks before `until`
|
||||
expected = 1 if gather and gather > 5 else 0
|
||||
assert len(caplog.records) == expected
|
||||
assert sum(1 for msg in caplog.messages if "more than 4 weeks prior" in msg) == expected
|
||||
|
||||
def test_until_in_future(self, caplog):
|
||||
_now = now()
|
||||
since = None
|
||||
until = _now + timedelta(weeks=1)
|
||||
|
||||
new_since, new_until, _ = calculate_collection_interval(since, until)
|
||||
assert new_since is None # this allows LAST_ENTRIES[key] to be the fallback
|
||||
assert abs(new_until - _now) < epsilon # `until` gets truncated to now()
|
||||
|
||||
# log message for `until` getting chopped
|
||||
assert len(caplog.records) == 1
|
||||
assert sum(1 for msg in caplog.messages if "is in the future" in msg) == 1
|
||||
|
||||
|
||||
class TestIntervalWithNoParams:
|
||||
@pytest.mark.parametrize('gather', [None, 2, 6])
|
||||
def test_ok(self, caplog, settings, gather):
|
||||
_now = now()
|
||||
_prior = _now - timedelta(weeks=gather) if gather is not None else None
|
||||
since, until = None, None
|
||||
settings.AUTOMATION_ANALYTICS_LAST_GATHER = _prior
|
||||
|
||||
new_since, new_until, last_gather = calculate_collection_interval(since, until)
|
||||
assert new_since is None # this allows LAST_ENTRIES[key] to be the fallback
|
||||
assert abs(new_until - _now) < epsilon # `until` defaults to now()
|
||||
|
||||
# last_gather is always set to something even if LAST_GATHER was empty
|
||||
assert last_gather is not None
|
||||
assert abs(_now - last_gather - timedelta(weeks=gather if gather and gather <= 4 else 4)) < epsilon
|
||||
|
||||
# 1 log message for LAST_GATHER, if it was more than 4 weeks before now
|
||||
expected = 1 if gather and gather > 4 else 0
|
||||
assert len(caplog.records) == expected
|
||||
assert sum(1 for msg in caplog.messages if "more than 4 weeks prior" in msg) == expected
|
||||
Reference in New Issue
Block a user