Merge pull request #13809 from ansible/feature_usage-collection-pt2

Enhance usage metrics collection
This commit is contained in:
John Westcott IV
2023-04-07 11:44:59 -04:00
committed by GitHub
10 changed files with 230 additions and 42 deletions

View File

@@ -6,7 +6,7 @@ import platform
import distro import distro
from django.db import connection from django.db import connection
from django.db.models import Count from django.db.models import Count, Min
from django.conf import settings from django.conf import settings
from django.contrib.sessions.models import Session from django.contrib.sessions.models import Session
from django.utils.timezone import now, timedelta from django.utils.timezone import now, timedelta
@@ -35,7 +35,7 @@ data _since_ the last report date - i.e., new data in the last 24 hours)
""" """
def trivial_slicing(key, since, until, last_gather): def trivial_slicing(key, since, until, last_gather, **kwargs):
if since is not None: if since is not None:
return [(since, until)] return [(since, until)]
@@ -48,7 +48,7 @@ def trivial_slicing(key, since, until, last_gather):
return [(last_entry, until)] return [(last_entry, until)]
def four_hour_slicing(key, since, until, last_gather): def four_hour_slicing(key, since, until, last_gather, **kwargs):
if since is not None: if since is not None:
last_entry = since last_entry = since
else: else:
@@ -69,6 +69,54 @@ def four_hour_slicing(key, since, until, last_gather):
start = end start = end
def host_metric_slicing(key, since, until, last_gather, **kwargs):
"""
Slicing doesn't start 4 weeks ago, but sends whole table monthly or first time
"""
from awx.main.models.inventory import HostMetric
if since is not None:
return [(since, until)]
from awx.conf.models import Setting
# Check if full sync should be done
full_sync_enabled = kwargs.get('full_sync_enabled', False)
last_entry = None
if not full_sync_enabled:
#
# If not, try incremental sync first
#
last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
last_entry = last_entries.get(key)
if not last_entry:
#
# If not done before, switch to full sync
#
full_sync_enabled = True
if full_sync_enabled:
#
# Find the lowest date for full sync
#
min_dates = HostMetric.objects.aggregate(min_last_automation=Min('last_automation'), min_last_deleted=Min('last_deleted'))
if min_dates['min_last_automation'] and min_dates['min_last_deleted']:
last_entry = min(min_dates['min_last_automation'], min_dates['min_last_deleted'])
elif min_dates['min_last_automation'] or min_dates['min_last_deleted']:
last_entry = min_dates['min_last_automation'] or min_dates['min_last_deleted']
if not last_entry:
# empty table
return []
start, end = last_entry, None
while start < until:
end = min(start + timedelta(days=30), until)
yield (start, end)
start = end
def _identify_lower(key, since, until, last_gather): def _identify_lower(key, since, until, last_gather):
from awx.conf.models import Setting from awx.conf.models import Setting
@@ -537,3 +585,25 @@ def workflow_job_template_node_table(since, full_path, **kwargs):
) always_nodes ON main_workflowjobtemplatenode.id = always_nodes.from_workflowjobtemplatenode_id ) always_nodes ON main_workflowjobtemplatenode.id = always_nodes.from_workflowjobtemplatenode_id
ORDER BY main_workflowjobtemplatenode.id ASC) TO STDOUT WITH CSV HEADER''' ORDER BY main_workflowjobtemplatenode.id ASC) TO STDOUT WITH CSV HEADER'''
return _copy_table(table='workflow_job_template_node', query=workflow_job_template_node_query, path=full_path) return _copy_table(table='workflow_job_template_node', query=workflow_job_template_node_query, path=full_path)
@register(
'host_metric_table', '1.0', format='csv', description=_('Host Metric data, incremental/full sync'), expensive=host_metric_slicing, full_sync_interval=30
)
def host_metric_table(since, full_path, until, **kwargs):
host_metric_query = '''COPY (SELECT main_hostmetric.id,
main_hostmetric.hostname,
main_hostmetric.first_automation,
main_hostmetric.last_automation,
main_hostmetric.last_deleted,
main_hostmetric.deleted,
main_hostmetric.automated_counter,
main_hostmetric.deleted_counter,
main_hostmetric.used_in_inventories
FROM main_hostmetric
WHERE (main_hostmetric.last_automation > '{}' AND main_hostmetric.last_automation <= '{}') OR
(main_hostmetric.last_deleted > '{}' AND main_hostmetric.last_deleted <= '{}')
ORDER BY main_hostmetric.id ASC) TO STDOUT WITH CSV HEADER'''.format(
since.isoformat(), until.isoformat(), since.isoformat(), until.isoformat()
)
return _copy_table(table='host_metric', query=host_metric_query, path=full_path)

View File

@@ -52,7 +52,7 @@ def all_collectors():
} }
def register(key, version, description=None, format='json', expensive=None): def register(key, version, description=None, format='json', expensive=None, full_sync_interval=None):
""" """
A decorator used to register a function as a metric collector. A decorator used to register a function as a metric collector.
@@ -71,6 +71,7 @@ def register(key, version, description=None, format='json', expensive=None):
f.__awx_analytics_description__ = description f.__awx_analytics_description__ = description
f.__awx_analytics_type__ = format f.__awx_analytics_type__ = format
f.__awx_expensive__ = expensive f.__awx_expensive__ = expensive
f.__awx_full_sync_interval__ = full_sync_interval
return f return f
return decorate return decorate
@@ -259,10 +260,19 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
# These slicer functions may return a generator. The `since` parameter is # These slicer functions may return a generator. The `since` parameter is
# allowed to be None, and will fall back to LAST_ENTRIES[key] or to # allowed to be None, and will fall back to LAST_ENTRIES[key] or to
# LAST_GATHER (truncated appropriately to match the 4-week limit). # LAST_GATHER (truncated appropriately to match the 4-week limit).
#
# Or it can force full table sync if interval is given
kwargs = dict()
full_sync_enabled = False
if func.__awx_full_sync_interval__:
last_full_sync = last_entries.get(f"{key}_full")
full_sync_enabled = not last_full_sync or last_full_sync < now() - timedelta(days=func.__awx_full_sync_interval__)
kwargs['full_sync_enabled'] = full_sync_enabled
if func.__awx_expensive__: if func.__awx_expensive__:
slices = func.__awx_expensive__(key, since, until, last_gather) slices = func.__awx_expensive__(key, since, until, last_gather, **kwargs)
else: else:
slices = collectors.trivial_slicing(key, since, until, last_gather) slices = collectors.trivial_slicing(key, since, until, last_gather, **kwargs)
for start, end in slices: for start, end in slices:
files = func(start, full_path=gather_dir, until=end) files = func(start, full_path=gather_dir, until=end)
@@ -301,6 +311,12 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
succeeded = False succeeded = False
logger.exception("Could not generate metric {}".format(filename)) logger.exception("Could not generate metric {}".format(filename))
# update full sync timestamp if successfully shipped
if full_sync_enabled and collection_type != 'dry-run' and succeeded:
with disable_activity_stream():
last_entries[f"{key}_full"] = now()
settings.AUTOMATION_ANALYTICS_LAST_ENTRIES = json.dumps(last_entries, cls=DjangoJSONEncoder)
if collection_type != 'dry-run': if collection_type != 'dry-run':
if succeeded: if succeeded:
for fpath in tarfiles: for fpath in tarfiles:

View File

@@ -822,6 +822,15 @@ register(
category_slug='system', category_slug='system',
) )
register(
'CLEANUP_HOST_METRICS_LAST_TS',
field_class=fields.DateTimeField,
label=_('Last cleanup date for HostMetrics'),
allow_null=True,
category=_('System'),
category_slug='system',
)
def logging_validate(serializer, attrs): def logging_validate(serializer, attrs):
if not serializer.instance or not hasattr(serializer.instance, 'LOG_AGGREGATOR_HOST') or not hasattr(serializer.instance, 'LOG_AGGREGATOR_TYPE'): if not serializer.instance or not hasattr(serializer.instance, 'LOG_AGGREGATOR_HOST') or not hasattr(serializer.instance, 'LOG_AGGREGATOR_TYPE'):

View File

@@ -0,0 +1,22 @@
from awx.main.models import HostMetric
from django.core.management.base import BaseCommand
from django.conf import settings
class Command(BaseCommand):
"""
Run soft-deleting of HostMetrics
"""
help = 'Run soft-deleting of HostMetrics'
def add_arguments(self, parser):
parser.add_argument('--months-ago', type=int, dest='months-ago', action='store', help='Threshold in months for soft-deleting')
def handle(self, *args, **options):
months_ago = options.get('months-ago') or None
if not months_ago:
months_ago = getattr(settings, 'CLEANUP_HOST_METRICS_THRESHOLD', 12)
HostMetric.cleanup_task(months_ago)

View File

@@ -9,6 +9,8 @@ import re
import copy import copy
import os.path import os.path
from urllib.parse import urljoin from urllib.parse import urljoin
import dateutil.relativedelta
import yaml import yaml
# Django # Django
@@ -888,6 +890,23 @@ class HostMetric(models.Model):
self.deleted = False self.deleted = False
self.save(update_fields=['deleted']) self.save(update_fields=['deleted'])
@classmethod
def cleanup_task(cls, months_ago):
try:
months_ago = int(months_ago)
if months_ago <= 0:
raise ValueError()
last_automation_before = now() - dateutil.relativedelta.relativedelta(months=months_ago)
logger.info(f'Cleanup [HostMetric]: soft-deleting records last automated before {last_automation_before}')
HostMetric.active_objects.filter(last_automation__lt=last_automation_before).update(
deleted=True, deleted_counter=models.F('deleted_counter') + 1, last_deleted=now()
)
settings.CLEANUP_HOST_METRICS_LAST_TS = now()
except (TypeError, ValueError):
logger.error(f"Cleanup [HostMetric]: months_ago({months_ago}) has to be a positive integer value")
class HostMetricSummaryMonthly(models.Model): class HostMetricSummaryMonthly(models.Model):
""" """

View File

@@ -47,6 +47,7 @@ from awx.main.models import (
Inventory, Inventory,
SmartInventoryMembership, SmartInventoryMembership,
Job, Job,
HostMetric,
) )
from awx.main.constants import ACTIVE_STATES from awx.main.constants import ACTIVE_STATES
from awx.main.dispatch.publish import task from awx.main.dispatch.publish import task
@@ -378,6 +379,20 @@ def cleanup_images_and_files():
_cleanup_images_and_files() _cleanup_images_and_files()
@task(queue=get_task_queuename)
def cleanup_host_metrics():
from awx.conf.models import Setting
from rest_framework.fields import DateTimeField
last_cleanup = Setting.objects.filter(key='CLEANUP_HOST_METRICS_LAST_TS').first()
last_time = DateTimeField().to_internal_value(last_cleanup.value) if last_cleanup and last_cleanup.value else None
cleanup_interval_secs = getattr(settings, 'CLEANUP_HOST_METRICS_INTERVAL', 30) * 86400
if not last_time or ((now() - last_time).total_seconds() > cleanup_interval_secs):
months_ago = getattr(settings, 'CLEANUP_HOST_METRICS_THRESHOLD', 12)
HostMetric.cleanup_task(months_ago)
@task(queue=get_task_queuename) @task(queue=get_task_queuename)
def cluster_node_health_check(node): def cluster_node_health_check(node):
""" """

View File

@@ -388,9 +388,13 @@ class Licenser(object):
if subscription_model == SUBSCRIPTION_USAGE_MODEL_UNIQUE_HOSTS: if subscription_model == SUBSCRIPTION_USAGE_MODEL_UNIQUE_HOSTS:
automated_instances = HostMetric.active_objects.count() automated_instances = HostMetric.active_objects.count()
first_host = HostMetric.active_objects.only('first_automation').order_by('first_automation').first() first_host = HostMetric.active_objects.only('first_automation').order_by('first_automation').first()
attrs['deleted_instances'] = HostMetric.objects.filter(deleted=True).count()
attrs['reactivated_instances'] = HostMetric.active_objects.filter(deleted_counter__gte=1).count()
else: else:
automated_instances = HostMetric.objects.count() automated_instances = 0
first_host = HostMetric.objects.only('first_automation').order_by('first_automation').first() first_host = HostMetric.objects.only('first_automation').order_by('first_automation').first()
attrs['deleted_instances'] = 0
attrs['reactivated_instances'] = 0
if first_host: if first_host:
automated_since = int(first_host.first_automation.timestamp()) automated_since = int(first_host.first_automation.timestamp())

View File

@@ -475,6 +475,7 @@ CELERYBEAT_SCHEDULE = {
'receptor_reaper': {'task': 'awx.main.tasks.system.awx_receptor_workunit_reaper', 'schedule': timedelta(seconds=60)}, 'receptor_reaper': {'task': 'awx.main.tasks.system.awx_receptor_workunit_reaper', 'schedule': timedelta(seconds=60)},
'send_subsystem_metrics': {'task': 'awx.main.analytics.analytics_tasks.send_subsystem_metrics', 'schedule': timedelta(seconds=20)}, 'send_subsystem_metrics': {'task': 'awx.main.analytics.analytics_tasks.send_subsystem_metrics', 'schedule': timedelta(seconds=20)},
'cleanup_images': {'task': 'awx.main.tasks.system.cleanup_images_and_files', 'schedule': timedelta(hours=3)}, 'cleanup_images': {'task': 'awx.main.tasks.system.cleanup_images_and_files', 'schedule': timedelta(hours=3)},
'cleanup_host_metrics': {'task': 'awx.main.tasks.system.cleanup_host_metrics', 'schedule': timedelta(days=1)},
} }
# Django Caching Configuration # Django Caching Configuration
@@ -1052,3 +1053,10 @@ UI_NEXT = True
# - '': No model - Subscription not counted from Host Metrics # - '': No model - Subscription not counted from Host Metrics
# - 'unique_managed_hosts': Compliant = automated - deleted hosts (using /api/v2/host_metrics/) # - 'unique_managed_hosts': Compliant = automated - deleted hosts (using /api/v2/host_metrics/)
SUBSCRIPTION_USAGE_MODEL = '' SUBSCRIPTION_USAGE_MODEL = ''
# Host metrics cleanup - last time of the cleanup run (soft-deleting records)
CLEANUP_HOST_METRICS_LAST_TS = None
# Host metrics cleanup - minimal interval between two cleanups in days
CLEANUP_HOST_METRICS_INTERVAL = 30 # days
# Host metrics cleanup - soft-delete HostMetric records with last_automation < [threshold] (in months)
CLEANUP_HOST_METRICS_THRESHOLD = 12 # months

View File

@@ -24,7 +24,7 @@ const HelperText = styled(PFHelperText)`
`; `;
function SubscriptionDetail() { function SubscriptionDetail() {
const { me = {}, license_info, version } = useConfig(); const { me = {}, license_info, version, systemConfig } = useConfig();
const baseURL = '/settings/subscription'; const baseURL = '/settings/subscription';
const tabsArray = [ const tabsArray = [
{ {
@@ -56,35 +56,38 @@ function SubscriptionDetail() {
<RoutedTabs tabsArray={tabsArray} /> <RoutedTabs tabsArray={tabsArray} />
<CardBody> <CardBody>
<DetailList> <DetailList>
<Detail {systemConfig?.SUBSCRIPTION_USAGE_MODEL ===
dataCy="subscription-status" 'unique_managed_hosts' && (
label={t`Status`} <Detail
value={ dataCy="subscription-status"
license_info.compliant ? ( label={t`Status`}
<> value={
<Label variant="outline" color="green" icon={<CheckIcon />}> license_info.compliant ? (
{t`Compliant`} <>
</Label> <Label variant="outline" color="green" icon={<CheckIcon />}>
<HelperText> {t`Compliant`}
<HelperTextItem>{t`The number of hosts you have automated against is below your subscription count.`}</HelperTextItem> </Label>
</HelperText> <HelperText>
</> <HelperTextItem>{t`The number of hosts you have automated against is below your subscription count.`}</HelperTextItem>
) : ( </HelperText>
<> </>
<Label ) : (
variant="outline" <>
color="red" <Label
icon={<ExclamationCircleIcon />} variant="outline"
> color="red"
{t`Out of compliance`} icon={<ExclamationCircleIcon />}
</Label> >
<HelperText> {t`Out of compliance`}
<HelperTextItem>{t`You have automated against more hosts than your subscription allows.`}</HelperTextItem> </Label>
</HelperText> <HelperText>
</> <HelperTextItem>{t`You have automated against more hosts than your subscription allows.`}</HelperTextItem>
) </HelperText>
} </>
/> )
}
/>
)}
{typeof automatedInstancesCount !== 'undefined' && {typeof automatedInstancesCount !== 'undefined' &&
automatedInstancesCount !== null && ( automatedInstancesCount !== null && (
<Detail <Detail
@@ -107,11 +110,30 @@ function SubscriptionDetail() {
label={t`Hosts imported`} label={t`Hosts imported`}
value={license_info.current_instances} value={license_info.current_instances}
/> />
<Detail {systemConfig?.SUBSCRIPTION_USAGE_MODEL ===
dataCy="subscription-hosts-remaining" 'unique_managed_hosts' && (
label={t`Hosts remaining`} <Detail
value={license_info.free_instances} dataCy="subscription-hosts-remaining"
/> label={t`Hosts remaining`}
value={license_info.free_instances}
/>
)}
{systemConfig?.SUBSCRIPTION_USAGE_MODEL ===
'unique_managed_hosts' && (
<Detail
dataCy="subscription-hosts-deleted"
label={t`Hosts deleted`}
value={license_info.deleted_instances}
/>
)}
{systemConfig?.SUBSCRIPTION_USAGE_MODEL ===
'unique_managed_hosts' && (
<Detail
dataCy="subscription-hosts-reactivated"
label={t`Active hosts previously deleted`}
value={license_info.reactivated_instances}
/>
)}
{license_info.instance_count < 9999999 && ( {license_info.instance_count < 9999999 && (
<Detail <Detail
dataCy="subscription-hosts-available" dataCy="subscription-hosts-available"

View File

@@ -31,6 +31,9 @@ const config = {
trial: false, trial: false,
valid_key: true, valid_key: true,
}, },
systemConfig: {
SUBSCRIPTION_USAGE_MODEL: 'unique_managed_hosts',
},
}; };
describe('<SubscriptionDetail />', () => { describe('<SubscriptionDetail />', () => {