Merge pull request #13809 from ansible/feature_usage-collection-pt2

Enhance usage metrics collection
This commit is contained in:
John Westcott IV 2023-04-07 11:44:59 -04:00 committed by GitHub
commit 3ce68ced1e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 230 additions and 42 deletions

View File

@ -6,7 +6,7 @@ import platform
import distro
from django.db import connection
from django.db.models import Count
from django.db.models import Count, Min
from django.conf import settings
from django.contrib.sessions.models import Session
from django.utils.timezone import now, timedelta
@ -35,7 +35,7 @@ data _since_ the last report date - i.e., new data in the last 24 hours)
"""
def trivial_slicing(key, since, until, last_gather):
def trivial_slicing(key, since, until, last_gather, **kwargs):
if since is not None:
return [(since, until)]
@ -48,7 +48,7 @@ def trivial_slicing(key, since, until, last_gather):
return [(last_entry, until)]
def four_hour_slicing(key, since, until, last_gather):
def four_hour_slicing(key, since, until, last_gather, **kwargs):
if since is not None:
last_entry = since
else:
@ -69,6 +69,54 @@ def four_hour_slicing(key, since, until, last_gather):
start = end
def host_metric_slicing(key, since, until, last_gather, **kwargs):
"""
Slicing doesn't start 4 weeks ago, but sends whole table monthly or first time
"""
from awx.main.models.inventory import HostMetric
if since is not None:
return [(since, until)]
from awx.conf.models import Setting
# Check if full sync should be done
full_sync_enabled = kwargs.get('full_sync_enabled', False)
last_entry = None
if not full_sync_enabled:
#
# If not, try incremental sync first
#
last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
last_entry = last_entries.get(key)
if not last_entry:
#
# If not done before, switch to full sync
#
full_sync_enabled = True
if full_sync_enabled:
#
# Find the lowest date for full sync
#
min_dates = HostMetric.objects.aggregate(min_last_automation=Min('last_automation'), min_last_deleted=Min('last_deleted'))
if min_dates['min_last_automation'] and min_dates['min_last_deleted']:
last_entry = min(min_dates['min_last_automation'], min_dates['min_last_deleted'])
elif min_dates['min_last_automation'] or min_dates['min_last_deleted']:
last_entry = min_dates['min_last_automation'] or min_dates['min_last_deleted']
if not last_entry:
# empty table
return []
start, end = last_entry, None
while start < until:
end = min(start + timedelta(days=30), until)
yield (start, end)
start = end
def _identify_lower(key, since, until, last_gather):
from awx.conf.models import Setting
@ -537,3 +585,25 @@ def workflow_job_template_node_table(since, full_path, **kwargs):
) always_nodes ON main_workflowjobtemplatenode.id = always_nodes.from_workflowjobtemplatenode_id
ORDER BY main_workflowjobtemplatenode.id ASC) TO STDOUT WITH CSV HEADER'''
return _copy_table(table='workflow_job_template_node', query=workflow_job_template_node_query, path=full_path)
@register(
'host_metric_table', '1.0', format='csv', description=_('Host Metric data, incremental/full sync'), expensive=host_metric_slicing, full_sync_interval=30
)
def host_metric_table(since, full_path, until, **kwargs):
host_metric_query = '''COPY (SELECT main_hostmetric.id,
main_hostmetric.hostname,
main_hostmetric.first_automation,
main_hostmetric.last_automation,
main_hostmetric.last_deleted,
main_hostmetric.deleted,
main_hostmetric.automated_counter,
main_hostmetric.deleted_counter,
main_hostmetric.used_in_inventories
FROM main_hostmetric
WHERE (main_hostmetric.last_automation > '{}' AND main_hostmetric.last_automation <= '{}') OR
(main_hostmetric.last_deleted > '{}' AND main_hostmetric.last_deleted <= '{}')
ORDER BY main_hostmetric.id ASC) TO STDOUT WITH CSV HEADER'''.format(
since.isoformat(), until.isoformat(), since.isoformat(), until.isoformat()
)
return _copy_table(table='host_metric', query=host_metric_query, path=full_path)

View File

@ -52,7 +52,7 @@ def all_collectors():
}
def register(key, version, description=None, format='json', expensive=None):
def register(key, version, description=None, format='json', expensive=None, full_sync_interval=None):
"""
A decorator used to register a function as a metric collector.
@ -71,6 +71,7 @@ def register(key, version, description=None, format='json', expensive=None):
f.__awx_analytics_description__ = description
f.__awx_analytics_type__ = format
f.__awx_expensive__ = expensive
f.__awx_full_sync_interval__ = full_sync_interval
return f
return decorate
@ -259,10 +260,19 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
# These slicer functions may return a generator. The `since` parameter is
# allowed to be None, and will fall back to LAST_ENTRIES[key] or to
# LAST_GATHER (truncated appropriately to match the 4-week limit).
#
# Or it can force full table sync if interval is given
kwargs = dict()
full_sync_enabled = False
if func.__awx_full_sync_interval__:
last_full_sync = last_entries.get(f"{key}_full")
full_sync_enabled = not last_full_sync or last_full_sync < now() - timedelta(days=func.__awx_full_sync_interval__)
kwargs['full_sync_enabled'] = full_sync_enabled
if func.__awx_expensive__:
slices = func.__awx_expensive__(key, since, until, last_gather)
slices = func.__awx_expensive__(key, since, until, last_gather, **kwargs)
else:
slices = collectors.trivial_slicing(key, since, until, last_gather)
slices = collectors.trivial_slicing(key, since, until, last_gather, **kwargs)
for start, end in slices:
files = func(start, full_path=gather_dir, until=end)
@ -301,6 +311,12 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
succeeded = False
logger.exception("Could not generate metric {}".format(filename))
# update full sync timestamp if successfully shipped
if full_sync_enabled and collection_type != 'dry-run' and succeeded:
with disable_activity_stream():
last_entries[f"{key}_full"] = now()
settings.AUTOMATION_ANALYTICS_LAST_ENTRIES = json.dumps(last_entries, cls=DjangoJSONEncoder)
if collection_type != 'dry-run':
if succeeded:
for fpath in tarfiles:

View File

@ -822,6 +822,15 @@ register(
category_slug='system',
)
register(
'CLEANUP_HOST_METRICS_LAST_TS',
field_class=fields.DateTimeField,
label=_('Last cleanup date for HostMetrics'),
allow_null=True,
category=_('System'),
category_slug='system',
)
def logging_validate(serializer, attrs):
if not serializer.instance or not hasattr(serializer.instance, 'LOG_AGGREGATOR_HOST') or not hasattr(serializer.instance, 'LOG_AGGREGATOR_TYPE'):

View File

@ -0,0 +1,22 @@
from awx.main.models import HostMetric
from django.core.management.base import BaseCommand
from django.conf import settings
class Command(BaseCommand):
"""
Run soft-deleting of HostMetrics
"""
help = 'Run soft-deleting of HostMetrics'
def add_arguments(self, parser):
parser.add_argument('--months-ago', type=int, dest='months-ago', action='store', help='Threshold in months for soft-deleting')
def handle(self, *args, **options):
months_ago = options.get('months-ago') or None
if not months_ago:
months_ago = getattr(settings, 'CLEANUP_HOST_METRICS_THRESHOLD', 12)
HostMetric.cleanup_task(months_ago)

View File

@ -9,6 +9,8 @@ import re
import copy
import os.path
from urllib.parse import urljoin
import dateutil.relativedelta
import yaml
# Django
@ -888,6 +890,23 @@ class HostMetric(models.Model):
self.deleted = False
self.save(update_fields=['deleted'])
@classmethod
def cleanup_task(cls, months_ago):
try:
months_ago = int(months_ago)
if months_ago <= 0:
raise ValueError()
last_automation_before = now() - dateutil.relativedelta.relativedelta(months=months_ago)
logger.info(f'Cleanup [HostMetric]: soft-deleting records last automated before {last_automation_before}')
HostMetric.active_objects.filter(last_automation__lt=last_automation_before).update(
deleted=True, deleted_counter=models.F('deleted_counter') + 1, last_deleted=now()
)
settings.CLEANUP_HOST_METRICS_LAST_TS = now()
except (TypeError, ValueError):
logger.error(f"Cleanup [HostMetric]: months_ago({months_ago}) has to be a positive integer value")
class HostMetricSummaryMonthly(models.Model):
"""

View File

@ -47,6 +47,7 @@ from awx.main.models import (
Inventory,
SmartInventoryMembership,
Job,
HostMetric,
)
from awx.main.constants import ACTIVE_STATES
from awx.main.dispatch.publish import task
@ -378,6 +379,20 @@ def cleanup_images_and_files():
_cleanup_images_and_files()
@task(queue=get_task_queuename)
def cleanup_host_metrics():
from awx.conf.models import Setting
from rest_framework.fields import DateTimeField
last_cleanup = Setting.objects.filter(key='CLEANUP_HOST_METRICS_LAST_TS').first()
last_time = DateTimeField().to_internal_value(last_cleanup.value) if last_cleanup and last_cleanup.value else None
cleanup_interval_secs = getattr(settings, 'CLEANUP_HOST_METRICS_INTERVAL', 30) * 86400
if not last_time or ((now() - last_time).total_seconds() > cleanup_interval_secs):
months_ago = getattr(settings, 'CLEANUP_HOST_METRICS_THRESHOLD', 12)
HostMetric.cleanup_task(months_ago)
@task(queue=get_task_queuename)
def cluster_node_health_check(node):
"""

View File

@ -388,9 +388,13 @@ class Licenser(object):
if subscription_model == SUBSCRIPTION_USAGE_MODEL_UNIQUE_HOSTS:
automated_instances = HostMetric.active_objects.count()
first_host = HostMetric.active_objects.only('first_automation').order_by('first_automation').first()
attrs['deleted_instances'] = HostMetric.objects.filter(deleted=True).count()
attrs['reactivated_instances'] = HostMetric.active_objects.filter(deleted_counter__gte=1).count()
else:
automated_instances = HostMetric.objects.count()
automated_instances = 0
first_host = HostMetric.objects.only('first_automation').order_by('first_automation').first()
attrs['deleted_instances'] = 0
attrs['reactivated_instances'] = 0
if first_host:
automated_since = int(first_host.first_automation.timestamp())

View File

@ -475,6 +475,7 @@ CELERYBEAT_SCHEDULE = {
'receptor_reaper': {'task': 'awx.main.tasks.system.awx_receptor_workunit_reaper', 'schedule': timedelta(seconds=60)},
'send_subsystem_metrics': {'task': 'awx.main.analytics.analytics_tasks.send_subsystem_metrics', 'schedule': timedelta(seconds=20)},
'cleanup_images': {'task': 'awx.main.tasks.system.cleanup_images_and_files', 'schedule': timedelta(hours=3)},
'cleanup_host_metrics': {'task': 'awx.main.tasks.system.cleanup_host_metrics', 'schedule': timedelta(days=1)},
}
# Django Caching Configuration
@ -1052,3 +1053,10 @@ UI_NEXT = True
# - '': No model - Subscription not counted from Host Metrics
# - 'unique_managed_hosts': Compliant = automated - deleted hosts (using /api/v2/host_metrics/)
SUBSCRIPTION_USAGE_MODEL = ''
# Host metrics cleanup - last time of the cleanup run (soft-deleting records)
CLEANUP_HOST_METRICS_LAST_TS = None
# Host metrics cleanup - minimal interval between two cleanups in days
CLEANUP_HOST_METRICS_INTERVAL = 30 # days
# Host metrics cleanup - soft-delete HostMetric records with last_automation < [threshold] (in months)
CLEANUP_HOST_METRICS_THRESHOLD = 12 # months

View File

@ -24,7 +24,7 @@ const HelperText = styled(PFHelperText)`
`;
function SubscriptionDetail() {
const { me = {}, license_info, version } = useConfig();
const { me = {}, license_info, version, systemConfig } = useConfig();
const baseURL = '/settings/subscription';
const tabsArray = [
{
@ -56,35 +56,38 @@ function SubscriptionDetail() {
<RoutedTabs tabsArray={tabsArray} />
<CardBody>
<DetailList>
<Detail
dataCy="subscription-status"
label={t`Status`}
value={
license_info.compliant ? (
<>
<Label variant="outline" color="green" icon={<CheckIcon />}>
{t`Compliant`}
</Label>
<HelperText>
<HelperTextItem>{t`The number of hosts you have automated against is below your subscription count.`}</HelperTextItem>
</HelperText>
</>
) : (
<>
<Label
variant="outline"
color="red"
icon={<ExclamationCircleIcon />}
>
{t`Out of compliance`}
</Label>
<HelperText>
<HelperTextItem>{t`You have automated against more hosts than your subscription allows.`}</HelperTextItem>
</HelperText>
</>
)
}
/>
{systemConfig?.SUBSCRIPTION_USAGE_MODEL ===
'unique_managed_hosts' && (
<Detail
dataCy="subscription-status"
label={t`Status`}
value={
license_info.compliant ? (
<>
<Label variant="outline" color="green" icon={<CheckIcon />}>
{t`Compliant`}
</Label>
<HelperText>
<HelperTextItem>{t`The number of hosts you have automated against is below your subscription count.`}</HelperTextItem>
</HelperText>
</>
) : (
<>
<Label
variant="outline"
color="red"
icon={<ExclamationCircleIcon />}
>
{t`Out of compliance`}
</Label>
<HelperText>
<HelperTextItem>{t`You have automated against more hosts than your subscription allows.`}</HelperTextItem>
</HelperText>
</>
)
}
/>
)}
{typeof automatedInstancesCount !== 'undefined' &&
automatedInstancesCount !== null && (
<Detail
@ -107,11 +110,30 @@ function SubscriptionDetail() {
label={t`Hosts imported`}
value={license_info.current_instances}
/>
<Detail
dataCy="subscription-hosts-remaining"
label={t`Hosts remaining`}
value={license_info.free_instances}
/>
{systemConfig?.SUBSCRIPTION_USAGE_MODEL ===
'unique_managed_hosts' && (
<Detail
dataCy="subscription-hosts-remaining"
label={t`Hosts remaining`}
value={license_info.free_instances}
/>
)}
{systemConfig?.SUBSCRIPTION_USAGE_MODEL ===
'unique_managed_hosts' && (
<Detail
dataCy="subscription-hosts-deleted"
label={t`Hosts deleted`}
value={license_info.deleted_instances}
/>
)}
{systemConfig?.SUBSCRIPTION_USAGE_MODEL ===
'unique_managed_hosts' && (
<Detail
dataCy="subscription-hosts-reactivated"
label={t`Active hosts previously deleted`}
value={license_info.reactivated_instances}
/>
)}
{license_info.instance_count < 9999999 && (
<Detail
dataCy="subscription-hosts-available"

View File

@ -31,6 +31,9 @@ const config = {
trial: false,
valid_key: true,
},
systemConfig: {
SUBSCRIPTION_USAGE_MODEL: 'unique_managed_hosts',
},
};
describe('<SubscriptionDetail />', () => {