Break out the main interval trimming calculation into a new function

This commit is contained in:
Jeff Bradberry 2021-04-23 13:37:22 -04:00
parent f4d848e596
commit 4857c5edcb
2 changed files with 58 additions and 50 deletions

View File

@ -36,7 +36,7 @@ data _since_ the last report date - i.e., new data in the last 24 hours)
"""
def trivial_slicing(key, since, until):
def trivial_slicing(key, since, until, last_gather):
if since is not None:
return [(since, until)]
@ -45,11 +45,11 @@ def trivial_slicing(key, since, until):
horizon = until - timedelta(weeks=4)
last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
last_entry = max(last_entries.get(key) or settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon, horizon)
last_entry = max(last_entries.get(key) or last_gather, horizon)
return [(last_entry, until)]
def four_hour_slicing(key, since, until):
def four_hour_slicing(key, since, until, last_gather):
if since is not None:
last_entry = since
else:
@ -58,7 +58,7 @@ def four_hour_slicing(key, since, until):
horizon = until - timedelta(weeks=4)
last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
last_entry = max(last_entries.get(key) or settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon, horizon)
last_entry = max(last_entries.get(key) or last_gather, horizon)
start, end = last_entry, None
while start < until:
@ -67,15 +67,14 @@ def four_hour_slicing(key, since, until):
start = end
def events_slicing(key, since, until):
def events_slicing(key, since, until, last_gather):
from awx.conf.models import Setting
last_gather = settings.AUTOMATION_ANALYTICS_LAST_GATHER
last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
horizon = until - timedelta(weeks=4)
lower = since or last_gather or horizon
lower = since or last_gather
if not since and last_entries.get(key):
lower = horizon
pk_values = models.JobEvent.objects.filter(created__gte=lower, created__lte=until).aggregate(Min('pk'), Max('pk'))

View File

@ -116,6 +116,51 @@ def package(target, data, timestamp):
return None
def calculate_collection_interval(since, until):
_now = now()
# Make sure that the endpoints are not in the future.
if until is not None and until > _now:
until = _now
logger.warning(f"End of the collection interval is in the future, setting to {_now}.")
if since is not None and since > _now:
since = _now
logger.warning(f"Start of the collection interval is in the future, setting to {_now}.")
# The value of `until` needs to be concrete, so resolve it. If it wasn't passed in,
# set it to `now`, but only if that isn't more than 4 weeks ahead of a passed-in
# `since` parameter.
if since is not None:
if until is not None:
if until > since + timedelta(weeks=4):
until = since + timedelta(weeks=4)
logger.warning(f"End of the collection interval is greater than 4 weeks from start, setting end to {until}.")
else: # until is None
until = min(since + timedelta(weeks=4), _now)
elif until is None:
until = _now
if since and since >= until:
logger.warning("Start of the collection interval is later than the end, ignoring request.")
raise ValueError
# The ultimate beginning of the interval needs to be compared to 4 weeks prior to
# `until`, but we want to keep `since` empty if it wasn't passed in because we use that
# case to know whether to use the bookkeeping settings variables to decide the start of
# the interval.
horizon = until - timedelta(weeks=4)
if since is not None and since < horizon:
since = horizon
logger.warning(f"Start of the collection interval is more than 4 weeks prior to {until}, setting to {horizon}.")
last_gather = settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon
if last_gather < horizon:
last_gather = horizon
logger.warning(f"Last analytics run was more than 4 weeks prior to {until}, using {horizon} instead.")
return since, until, last_gather
def gather(dest=None, module=None, subset=None, since=None, until=None, collection_type='scheduled'):
"""
Gather all defined metrics and write them as JSON files in a .tgz
@ -148,48 +193,12 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
from awx.main.analytics import collectors
from awx.main.signals import disable_activity_stream
_now = now()
# Make sure that the endpoints are not in the future.
if until is not None and until > _now:
until = _now
logger.warning(f"End of the collection interval is in the future, setting to {_now}.")
if since is not None and since > _now:
since = _now
logger.warning(f"Start of the collection interval is in the future, setting to {_now}.")
# The value of `until` needs to be concrete, so resolve it. If it wasn't passed in,
# set it to `now`, but only if that isn't more than 4 weeks ahead of a passed-in
# `since` parameter.
if since is not None:
if until is not None:
if until > since + timedelta(weeks=4):
until = since + timedelta(weeks=4)
logger.warning(f"End of the collection interval is greater than 4 weeks from start, setting end to {until}.")
else: # until is None
until = min(since + timedelta(weeks=4), _now)
elif until is None:
until = _now
if since and since >= until:
logger.warning("Start of the collection interval is later than the end, ignoring request.")
return None
# The ultimate beginning of the interval needs to be compared to 4 weeks prior to
# `until`, but we want to keep `since` empty if it wasn't passed in because we use that
# case to know whether to use the bookkeeping settings variables to decide the start of
# the interval.
horizon = until - timedelta(weeks=4)
if since is not None and since < horizon:
since = horizon
logger.warning(f"Start of the collection interval is more than 4 weeks prior to {until}, setting to {horizon}.")
logger.debug("Last analytics run was: {}".format(settings.AUTOMATION_ANALYTICS_LAST_GATHER))
last_gather = settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon
if last_gather < horizon:
last_gather = horizon
logger.warning(f"Last analytics run was more than 4 weeks prior to {until}, using {horizon} instead.")
try:
since, until, last_gather = calculate_collection_interval(since, until)
except ValueError:
return None
last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
@ -219,7 +228,7 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
key = func.__awx_analytics_key__
filename = f'{key}.json'
try:
last_entry = max(last_entries.get(key) or last_gather, horizon)
last_entry = max(last_entries.get(key) or last_gather, until - timedelta(weeks=4))
results = (func(since or last_entry, collection_type=collection_type, until=until), func.__awx_analytics_version__)
json.dumps(results) # throwaway check to see if the data is json-serializable
data[filename] = results
@ -251,9 +260,9 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
# allowed to be None, and will fall back to LAST_ENTRIES[key] or to
# LAST_GATHER (truncated appropriately to match the 4-week limit).
if func.__awx_expensive__:
slices = func.__awx_expensive__(key, since, until)
slices = func.__awx_expensive__(key, since, until, last_gather)
else:
slices = collectors.trivial_slicing(key, since, until)
slices = collectors.trivial_slicing(key, since, until, last_gather)
for start, end in slices:
files = func(start, full_path=gather_dir, until=end)