mirror of
https://github.com/ansible/awx.git
synced 2026-01-11 01:57:35 -03:30
HostMetrics: Hard auto-cleanup (#14255)
Fix host metric settings Cleanup_host_metric command with default params Fix order of host metric cleanups
This commit is contained in:
parent
5ce2055431
commit
660dab439b
@ -1,22 +1,22 @@
|
||||
from awx.main.models import HostMetric
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.conf import settings
|
||||
from awx.main.tasks.host_metrics import HostMetricTask
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""
|
||||
Run soft-deleting of HostMetrics
|
||||
This command provides cleanup task for HostMetric model.
|
||||
There are two modes, which run in following order:
|
||||
- soft cleanup
|
||||
- - Perform soft-deletion of all host metrics last automated 12 months ago or before.
|
||||
This is the same as issuing a DELETE request to /api/v2/host_metrics/N/ for all host metrics that match the criteria.
|
||||
- - updates columns delete, deleted_counter and last_deleted
|
||||
- hard cleanup
|
||||
- - Permanently erase from the database all host metrics last automated 36 months ago or before.
|
||||
This operation happens after the soft deletion has finished.
|
||||
"""
|
||||
|
||||
help = 'Run soft-deleting of HostMetrics'
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('--months-ago', type=int, dest='months-ago', action='store', help='Threshold in months for soft-deleting')
|
||||
help = 'Run soft and hard-deletion of HostMetrics'
|
||||
|
||||
def handle(self, *args, **options):
|
||||
months_ago = options.get('months-ago') or None
|
||||
|
||||
if not months_ago:
|
||||
months_ago = getattr(settings, 'CLEANUP_HOST_METRICS_SOFT_THRESHOLD', 12)
|
||||
|
||||
HostMetric.cleanup_task(months_ago)
|
||||
HostMetricTask().cleanup(soft_threshold=settings.CLEANUP_HOST_METRICS_SOFT_THRESHOLD, hard_threshold=settings.CLEANUP_HOST_METRICS_HARD_THRESHOLD)
|
||||
|
||||
@ -10,7 +10,6 @@ import copy
|
||||
import os.path
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import dateutil.relativedelta
|
||||
import yaml
|
||||
|
||||
# Django
|
||||
@ -890,23 +889,6 @@ class HostMetric(models.Model):
|
||||
self.deleted = False
|
||||
self.save(update_fields=['deleted'])
|
||||
|
||||
@classmethod
|
||||
def cleanup_task(cls, months_ago):
|
||||
try:
|
||||
months_ago = int(months_ago)
|
||||
if months_ago <= 0:
|
||||
raise ValueError()
|
||||
|
||||
last_automation_before = now() - dateutil.relativedelta.relativedelta(months=months_ago)
|
||||
|
||||
logger.info(f'cleanup_host_metrics: soft-deleting records last automated before {last_automation_before}')
|
||||
HostMetric.active_objects.filter(last_automation__lt=last_automation_before).update(
|
||||
deleted=True, deleted_counter=models.F('deleted_counter') + 1, last_deleted=now()
|
||||
)
|
||||
settings.CLEANUP_HOST_METRICS_LAST_TS = now()
|
||||
except (TypeError, ValueError):
|
||||
logger.error(f"cleanup_host_metrics: months_ago({months_ago}) has to be a positive integer value")
|
||||
|
||||
|
||||
class HostMetricSummaryMonthly(models.Model):
|
||||
"""
|
||||
|
||||
10
awx/main/tasks/helpers.py
Normal file
10
awx/main/tasks/helpers.py
Normal file
@ -0,0 +1,10 @@
|
||||
from django.utils.timezone import now
|
||||
from rest_framework.fields import DateTimeField
|
||||
|
||||
|
||||
def is_run_threshold_reached(setting, threshold_seconds):
|
||||
last_time = DateTimeField().to_internal_value(setting) if setting else None
|
||||
if not last_time:
|
||||
return True
|
||||
else:
|
||||
return (now() - last_time).total_seconds() > threshold_seconds
|
||||
@ -3,33 +3,90 @@ from dateutil.relativedelta import relativedelta
|
||||
import logging
|
||||
|
||||
from django.conf import settings
|
||||
from django.db.models import Count
|
||||
from django.db.models import Count, F
|
||||
from django.db.models.functions import TruncMonth
|
||||
from django.utils.timezone import now
|
||||
from rest_framework.fields import DateTimeField
|
||||
from awx.main.dispatch import get_task_queuename
|
||||
from awx.main.dispatch.publish import task
|
||||
from awx.main.models.inventory import HostMetric, HostMetricSummaryMonthly
|
||||
from awx.main.tasks.helpers import is_run_threshold_reached
|
||||
from awx.conf.license import get_license
|
||||
|
||||
logger = logging.getLogger('awx.main.tasks.host_metric_summary_monthly')
|
||||
logger = logging.getLogger('awx.main.tasks.host_metrics')
|
||||
|
||||
|
||||
@task(queue=get_task_queuename)
|
||||
def cleanup_host_metrics():
|
||||
if is_run_threshold_reached(getattr(settings, 'CLEANUP_HOST_METRICS_LAST_TS', None), getattr(settings, 'CLEANUP_HOST_METRICS_INTERVAL', 30) * 86400):
|
||||
logger.info(f"Executing cleanup_host_metrics, last ran at {getattr(settings, 'CLEANUP_HOST_METRICS_LAST_TS', '---')}")
|
||||
HostMetricTask().cleanup(
|
||||
soft_threshold=getattr(settings, 'CLEANUP_HOST_METRICS_SOFT_THRESHOLD', 12),
|
||||
hard_threshold=getattr(settings, 'CLEANUP_HOST_METRICS_HARD_THRESHOLD', 36),
|
||||
)
|
||||
logger.info("Finished cleanup_host_metrics")
|
||||
|
||||
|
||||
@task(queue=get_task_queuename)
|
||||
def host_metric_summary_monthly():
|
||||
"""Run cleanup host metrics summary monthly task each week"""
|
||||
if _is_run_threshold_reached(
|
||||
getattr(settings, 'HOST_METRIC_SUMMARY_TASK_LAST_TS', None), getattr(settings, 'HOST_METRIC_SUMMARY_TASK_INTERVAL', 7) * 86400
|
||||
):
|
||||
if is_run_threshold_reached(getattr(settings, 'HOST_METRIC_SUMMARY_TASK_LAST_TS', None), getattr(settings, 'HOST_METRIC_SUMMARY_TASK_INTERVAL', 7) * 86400):
|
||||
logger.info(f"Executing host_metric_summary_monthly, last ran at {getattr(settings, 'HOST_METRIC_SUMMARY_TASK_LAST_TS', '---')}")
|
||||
HostMetricSummaryMonthlyTask().execute()
|
||||
logger.info("Finished host_metric_summary_monthly")
|
||||
|
||||
|
||||
def _is_run_threshold_reached(setting, threshold_seconds):
|
||||
last_time = DateTimeField().to_internal_value(setting) if setting else DateTimeField().to_internal_value('1970-01-01')
|
||||
class HostMetricTask:
|
||||
"""
|
||||
This class provides cleanup task for HostMetric model.
|
||||
There are two modes:
|
||||
- soft cleanup (updates columns delete, deleted_counter and last_deleted)
|
||||
- hard cleanup (deletes from the db)
|
||||
"""
|
||||
|
||||
return (now() - last_time).total_seconds() > threshold_seconds
|
||||
def cleanup(self, soft_threshold=None, hard_threshold=None):
|
||||
"""
|
||||
Main entrypoint, runs either soft cleanup, hard cleanup or both
|
||||
|
||||
:param soft_threshold: (int)
|
||||
:param hard_threshold: (int)
|
||||
"""
|
||||
if hard_threshold is not None:
|
||||
self.hard_cleanup(hard_threshold)
|
||||
if soft_threshold is not None:
|
||||
self.soft_cleanup(soft_threshold)
|
||||
|
||||
settings.CLEANUP_HOST_METRICS_LAST_TS = now()
|
||||
|
||||
@staticmethod
|
||||
def soft_cleanup(threshold=None):
|
||||
if threshold is None:
|
||||
threshold = getattr(settings, 'CLEANUP_HOST_METRICS_SOFT_THRESHOLD', 12)
|
||||
|
||||
try:
|
||||
threshold = int(threshold)
|
||||
except (ValueError, TypeError) as e:
|
||||
raise type(e)("soft_threshold has to be convertible to number") from e
|
||||
|
||||
last_automation_before = now() - relativedelta(months=threshold)
|
||||
rows = HostMetric.active_objects.filter(last_automation__lt=last_automation_before).update(
|
||||
deleted=True, deleted_counter=F('deleted_counter') + 1, last_deleted=now()
|
||||
)
|
||||
logger.info(f'cleanup_host_metrics: soft-deleted records last automated before {last_automation_before}, affected rows: {rows}')
|
||||
|
||||
@staticmethod
|
||||
def hard_cleanup(threshold=None):
|
||||
if threshold is None:
|
||||
threshold = getattr(settings, 'CLEANUP_HOST_METRICS_HARD_THRESHOLD', 36)
|
||||
|
||||
try:
|
||||
threshold = int(threshold)
|
||||
except (ValueError, TypeError) as e:
|
||||
raise type(e)("hard_threshold has to be convertible to number") from e
|
||||
|
||||
last_deleted_before = now() - relativedelta(months=threshold)
|
||||
queryset = HostMetric.objects.filter(deleted=True, last_deleted__lt=last_deleted_before)
|
||||
rows = queryset.delete()
|
||||
logger.info(f'cleanup_host_metrics: hard-deleted records which were soft deleted before {last_deleted_before}, affected rows: {rows[0]}')
|
||||
|
||||
|
||||
class HostMetricSummaryMonthlyTask:
|
||||
|
||||
@ -48,7 +48,6 @@ from awx.main.models import (
|
||||
Inventory,
|
||||
SmartInventoryMembership,
|
||||
Job,
|
||||
HostMetric,
|
||||
convert_jsonfields,
|
||||
)
|
||||
from awx.main.constants import ACTIVE_STATES
|
||||
@ -64,6 +63,7 @@ from awx.main.utils.common import (
|
||||
|
||||
from awx.main.utils.reload import stop_local_services
|
||||
from awx.main.utils.pglock import advisory_lock
|
||||
from awx.main.tasks.helpers import is_run_threshold_reached
|
||||
from awx.main.tasks.receptor import get_receptor_ctl, worker_info, worker_cleanup, administrative_workunit_reaper, write_receptor_config
|
||||
from awx.main.consumers import emit_channel_notification
|
||||
from awx.main import analytics
|
||||
@ -368,9 +368,7 @@ def send_notifications(notification_list, job_id=None):
|
||||
|
||||
@task(queue=get_task_queuename)
|
||||
def gather_analytics():
|
||||
from awx.conf.models import Setting
|
||||
|
||||
if is_run_threshold_reached(Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_GATHER').first(), settings.AUTOMATION_ANALYTICS_GATHER_INTERVAL):
|
||||
if is_run_threshold_reached(getattr(settings, 'AUTOMATION_ANALYTICS_LAST_GATHER', None), settings.AUTOMATION_ANALYTICS_GATHER_INTERVAL):
|
||||
analytics.gather()
|
||||
|
||||
|
||||
@ -427,29 +425,6 @@ def cleanup_images_and_files():
|
||||
_cleanup_images_and_files()
|
||||
|
||||
|
||||
@task(queue=get_task_queuename)
|
||||
def cleanup_host_metrics():
|
||||
"""Run cleanup host metrics ~each month"""
|
||||
# TODO: move whole method to host_metrics in follow-up PR
|
||||
from awx.conf.models import Setting
|
||||
|
||||
if is_run_threshold_reached(
|
||||
Setting.objects.filter(key='CLEANUP_HOST_METRICS_LAST_TS').first(), getattr(settings, 'CLEANUP_HOST_METRICS_INTERVAL', 30) * 86400
|
||||
):
|
||||
months_ago = getattr(settings, 'CLEANUP_HOST_METRICS_SOFT_THRESHOLD', 12)
|
||||
logger.info("Executing cleanup_host_metrics")
|
||||
HostMetric.cleanup_task(months_ago)
|
||||
logger.info("Finished cleanup_host_metrics")
|
||||
|
||||
|
||||
def is_run_threshold_reached(setting, threshold_seconds):
|
||||
from rest_framework.fields import DateTimeField
|
||||
|
||||
last_time = DateTimeField().to_internal_value(setting.value) if setting and setting.value else DateTimeField().to_internal_value('1970-01-01')
|
||||
|
||||
return (now() - last_time).total_seconds() > threshold_seconds
|
||||
|
||||
|
||||
@task(queue=get_task_queuename)
|
||||
def cluster_node_health_check(node):
|
||||
"""
|
||||
|
||||
@ -0,0 +1,78 @@
|
||||
import pytest
|
||||
|
||||
from awx.main.tasks.host_metrics import HostMetricTask
|
||||
from awx.main.models.inventory import HostMetric
|
||||
from awx.main.tests.factories.fixtures import mk_host_metric
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_no_host_metrics():
|
||||
"""No-crash test"""
|
||||
assert HostMetric.objects.count() == 0
|
||||
HostMetricTask().cleanup(soft_threshold=0, hard_threshold=0)
|
||||
HostMetricTask().cleanup(soft_threshold=24, hard_threshold=42)
|
||||
assert HostMetric.objects.count() == 0
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_delete_exception():
|
||||
"""Crash test"""
|
||||
with pytest.raises(ValueError):
|
||||
HostMetricTask().soft_cleanup("")
|
||||
with pytest.raises(TypeError):
|
||||
HostMetricTask().hard_cleanup(set())
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.mark.parametrize('threshold', [settings.CLEANUP_HOST_METRICS_SOFT_THRESHOLD, 20])
|
||||
def test_soft_delete(threshold):
|
||||
"""Metrics with last_automation < threshold are updated to deleted=True"""
|
||||
mk_host_metric('host_1', first_automation=ago(months=1), last_automation=ago(months=1), deleted=False)
|
||||
mk_host_metric('host_2', first_automation=ago(months=1), last_automation=ago(months=1), deleted=True)
|
||||
mk_host_metric('host_3', first_automation=ago(months=1), last_automation=ago(months=threshold, hours=-1), deleted=False)
|
||||
mk_host_metric('host_4', first_automation=ago(months=1), last_automation=ago(months=threshold, hours=-1), deleted=True)
|
||||
mk_host_metric('host_5', first_automation=ago(months=1), last_automation=ago(months=threshold, hours=1), deleted=False)
|
||||
mk_host_metric('host_6', first_automation=ago(months=1), last_automation=ago(months=threshold, hours=1), deleted=True)
|
||||
mk_host_metric('host_7', first_automation=ago(months=1), last_automation=ago(months=42), deleted=False)
|
||||
mk_host_metric('host_8', first_automation=ago(months=1), last_automation=ago(months=42), deleted=True)
|
||||
|
||||
assert HostMetric.objects.count() == 8
|
||||
assert HostMetric.active_objects.count() == 4
|
||||
|
||||
for i in range(2):
|
||||
HostMetricTask().cleanup(soft_threshold=threshold)
|
||||
assert HostMetric.objects.count() == 8
|
||||
|
||||
hostnames = set(HostMetric.objects.filter(deleted=False).order_by('hostname').values_list('hostname', flat=True))
|
||||
assert hostnames == {'host_1', 'host_3'}
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.mark.parametrize('threshold', [settings.CLEANUP_HOST_METRICS_HARD_THRESHOLD, 20])
|
||||
def test_hard_delete(threshold):
|
||||
"""Metrics with last_deleted < threshold and deleted=True are deleted from the db"""
|
||||
mk_host_metric('host_1', first_automation=ago(months=1), last_deleted=ago(months=1), deleted=False)
|
||||
mk_host_metric('host_2', first_automation=ago(months=1), last_deleted=ago(months=1), deleted=True)
|
||||
mk_host_metric('host_3', first_automation=ago(months=1), last_deleted=ago(months=threshold, hours=-1), deleted=False)
|
||||
mk_host_metric('host_4', first_automation=ago(months=1), last_deleted=ago(months=threshold, hours=-1), deleted=True)
|
||||
mk_host_metric('host_5', first_automation=ago(months=1), last_deleted=ago(months=threshold, hours=1), deleted=False)
|
||||
mk_host_metric('host_6', first_automation=ago(months=1), last_deleted=ago(months=threshold, hours=1), deleted=True)
|
||||
mk_host_metric('host_7', first_automation=ago(months=1), last_deleted=ago(months=42), deleted=False)
|
||||
mk_host_metric('host_8', first_automation=ago(months=1), last_deleted=ago(months=42), deleted=True)
|
||||
|
||||
assert HostMetric.objects.count() == 8
|
||||
assert HostMetric.active_objects.count() == 4
|
||||
|
||||
for i in range(2):
|
||||
HostMetricTask().cleanup(hard_threshold=threshold)
|
||||
assert HostMetric.objects.count() == 6
|
||||
|
||||
hostnames = set(HostMetric.objects.order_by('hostname').values_list('hostname', flat=True))
|
||||
assert hostnames == {'host_1', 'host_2', 'host_3', 'host_4', 'host_5', 'host_7'}
|
||||
|
||||
|
||||
def ago(months=0, hours=0):
|
||||
return timezone.now() - relativedelta(months=months, hours=hours)
|
||||
@ -470,7 +470,7 @@ CELERYBEAT_SCHEDULE = {
|
||||
'receptor_reaper': {'task': 'awx.main.tasks.system.awx_receptor_workunit_reaper', 'schedule': timedelta(seconds=60)},
|
||||
'send_subsystem_metrics': {'task': 'awx.main.analytics.analytics_tasks.send_subsystem_metrics', 'schedule': timedelta(seconds=20)},
|
||||
'cleanup_images': {'task': 'awx.main.tasks.system.cleanup_images_and_files', 'schedule': timedelta(hours=3)},
|
||||
'cleanup_host_metrics': {'task': 'awx.main.tasks.system.cleanup_host_metrics', 'schedule': timedelta(hours=3, minutes=30)},
|
||||
'cleanup_host_metrics': {'task': 'awx.main.tasks.host_metrics.cleanup_host_metrics', 'schedule': timedelta(hours=3, minutes=30)},
|
||||
'host_metric_summary_monthly': {'task': 'awx.main.tasks.host_metrics.host_metric_summary_monthly', 'schedule': timedelta(hours=4)},
|
||||
}
|
||||
|
||||
@ -1049,7 +1049,7 @@ UI_NEXT = True
|
||||
# - 'unique_managed_hosts': Compliant = automated - deleted hosts (using /api/v2/host_metrics/)
|
||||
SUBSCRIPTION_USAGE_MODEL = ''
|
||||
|
||||
# Host metrics cleanup - last time of the cleanup run (soft-deleting records)
|
||||
# Host metrics cleanup - last time of the task/command run
|
||||
CLEANUP_HOST_METRICS_LAST_TS = None
|
||||
# Host metrics cleanup - minimal interval between two cleanups in days
|
||||
CLEANUP_HOST_METRICS_INTERVAL = 30 # days
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user