From 44e5db39ab3dfdba2f9557abe7194ad20c504f5c Mon Sep 17 00:00:00 2001 From: Chris Meyers Date: Fri, 10 Apr 2015 07:34:26 -0400 Subject: [PATCH] implemented cleanup_facts command + tests --- awx/main/management/commands/cleanup_facts.py | 130 +++++++++++++ awx/main/tests/commands/__init__.py | 7 +- awx/main/tests/commands/base.py | 20 ++ awx/main/tests/commands/cleanup_facts.py | 172 ++++++++++++++++++ 4 files changed, 327 insertions(+), 2 deletions(-) create mode 100644 awx/main/management/commands/cleanup_facts.py create mode 100644 awx/main/tests/commands/cleanup_facts.py diff --git a/awx/main/management/commands/cleanup_facts.py b/awx/main/management/commands/cleanup_facts.py new file mode 100644 index 0000000000..ad1028f9bf --- /dev/null +++ b/awx/main/management/commands/cleanup_facts.py @@ -0,0 +1,130 @@ +# Copyright (c) 2015 Ansible, Inc. +# All Rights Reserved + +# Python +import re +from dateutil.relativedelta import relativedelta +import logging +from datetime import datetime, timedelta +from optparse import make_option +from dateutil.relativedelta import relativedelta + +# Django +from django.core.management.base import BaseCommand, CommandError +from django.db import transaction + +# AWX +from awx.fact.models.fact import * # noqa + +OLDER_THAN = 'older_than' +GRANULARITY = 'granularity' + +logger = logging.getLogger('awx.main.commands.cleanup_facts') + +class CleanupFacts(object): + def __init__(self): + self.timestamp = None + + # Find all with timestamp < older_than + # Start search at < older_than, stop search at oldest entry + # Find all factVersion < pivot && > (pivot - granularity) grouped by host sorted by time descending (because it's indexed this way) + # foreach group + # Delete all except LAST entry (or Delete all except the FIRST entry, it's an arbitrary decision) + # + # pivot -= granularity + # group by host + def cleanup(self, older_than_abs, granularity): + fact_oldest = FactVersion.objects.all().order_by('timestamp').first() + if not fact_oldest: + return 0 + + total = 0 + date_pivot = older_than_abs + while date_pivot > fact_oldest.timestamp: + date_pivot_next = date_pivot - granularity + kv = { + 'timestamp__lte': date_pivot, + 'timestamp__gt': date_pivot_next, + } + version_objs = FactVersion.objects.filter(**kv).order_by('-timestamp') + + # Transform array -> {host_id} = [, , ...] + # TODO: If this set gets large then we can use mongo to transform the data set for us. + host_ids = {} + for obj in version_objs: + k = obj.host.id + if k not in host_ids: + host_ids[k] = [] + host_ids[k].append(obj) + + for k in host_ids: + ids = [fact.id for fact in host_ids[k]] + fact_ids = [fact.fact.id for fact in host_ids[k]] + # Remove 1 entry + ids.pop() + fact_ids.pop() + # delete the rest + count = FactVersion.objects.filter(id__in=ids).delete() + # FIXME: if this crashes here then we are inconsistent + count = Fact.objects.filter(id__in=fact_ids).delete() + total += count + + date_pivot = date_pivot_next + return total + + ''' + older_than and granularity are of type relativedelta + ''' + def run(self, older_than, granularity): + t = datetime.now() + deleted_count = self.cleanup(t - older_than, granularity) + print("Deleted %d facts." % deleted_count) + +class Command(BaseCommand): + help = 'Cleanup facts. For each host older than the value specified, keep one fact scan for each time window (granularity).' + option_list = BaseCommand.option_list + ( + make_option('--older_than', + dest='older_than', + default=None, + help='Specify the relative time to consider facts older than (w)eek (d)ay or (y)ear (i.e. 5d, 2w, 1y).'), + make_option('--granularity', + dest='granularity', + default=None, + help='Window duration to group same hosts by for deletion (w)eek (d)ay or (y)ear (i.e. 5d, 2w, 1y).'),) + + def __init__(self): + super(Command, self).__init__() + + def string_time_to_timestamp(self, time_string): + units = { + 'y': 'years', + 'd': 'days', + 'w': 'weeks', + 'm': 'months' + } + try: + match = re.match(r'(?P[0-9]+)(?P.*)', time_string) + group = match.groupdict() + kv = {} + units_verbose = units[group['unit']] + kv[units_verbose]= int(group['value']) + return relativedelta(**kv) + except (KeyError, TypeError, AttributeError): + return None + + @transaction.atomic + def handle(self, *args, **options): + cleanup_facts = CleanupFacts() + if not all([options[GRANULARITY], options[OLDER_THAN]]): + raise CommandError('Both --granularity and --older_than are required.') + + older_than = self.string_time_to_timestamp(options[OLDER_THAN]) + granularity = self.string_time_to_timestamp(options[GRANULARITY]) + + if older_than == None: + raise CommandError('--older_than invalid value "%s"' % options[OLDER_THAN]) + if granularity == None: + raise CommandError('--granularity invalid value "%s"' % options[GRANULARITY]) + + cleanup_facts.run(older_than, granularity) + diff --git a/awx/main/tests/commands/__init__.py b/awx/main/tests/commands/__init__.py index 7a1446f52a..7626f0f739 100644 --- a/awx/main/tests/commands/__init__.py +++ b/awx/main/tests/commands/__init__.py @@ -1,5 +1,8 @@ # Copyright (c) 2015 Ansible, Inc. # All Rights Reserved -from awx.main.tests.commands.run_fact_cache_receiver import * # noqa -from awx.main.tests.commands.commands_monolithic import * # noqa +from __future__ import absolute_import + +from .run_fact_cache_receiver import * # noqa +from .commands_monolithic import * # noqa +from .cleanup_facts import * # noqa diff --git a/awx/main/tests/commands/base.py b/awx/main/tests/commands/base.py index 575eb08cf4..b8e9434f25 100644 --- a/awx/main/tests/commands/base.py +++ b/awx/main/tests/commands/base.py @@ -1,9 +1,11 @@ # Copyright (c) 2015 Ansible, Inc. # All Rights Reserved +# Python import StringIO import sys import json +from dateutil.relativedelta import relativedelta # Django from django.core.management import call_command @@ -11,11 +13,29 @@ from django.core.management import call_command # AWX from awx.main.models import * # noqa from awx.main.tests.base import BaseTestMixin +from awx.fact.models import * # noqa class BaseCommandMixin(BaseTestMixin): ''' Base class for tests that run management commands. ''' + def create_hosts_and_facts(self, epoch, host_count, facts_per_host): + self.hosts = [] + for i in range(0, host_count): + host = FactHost.objects.create(hostname='host_%d' % i) + self.hosts.append(host) + + self.fact = { + 'hello': 'world' + } + self.facts = [] + self.versions = [] + for host in self.hosts: + for i in range(0, facts_per_host): + t = epoch - relativedelta(years=i) + (fact_obj, version_obj) = Fact.add_fact(t, self.fact, host, 'packages') + self.facts.append(fact_obj) + self.versions.append(version_obj) def create_test_inventories(self): self.setup_users() diff --git a/awx/main/tests/commands/cleanup_facts.py b/awx/main/tests/commands/cleanup_facts.py new file mode 100644 index 0000000000..c10f5cd90b --- /dev/null +++ b/awx/main/tests/commands/cleanup_facts.py @@ -0,0 +1,172 @@ +# Copyright (c) 2015 Ansible, Inc. +# All Rights Reserved + +# Python +from datetime import datetime +from dateutil.relativedelta import relativedelta +import mock + +#Django +from django.core.management.base import CommandError + +# AWX +from awx.main.tests.base import BaseTest, MongoDBRequired +from awx.main.tests.commands.base import BaseCommandMixin +from awx.main.management.commands.cleanup_facts import Command, CleanupFacts +from awx.fact.models.fact import * # noqa + +__all__ = ['CommandTest','CleanupFactsUnitTest', 'CleanupFactsCommandFunctionalTest'] + +class CleanupFactsCommandFunctionalTest(BaseCommandMixin, BaseTest, MongoDBRequired): + def test_invoke_zero_ok(self): + self.create_hosts_and_facts(datetime(year=2015, day=2, month=1, microsecond=0), 10, 20) + + result, stdout, stderr = self.run_command('cleanup_facts', granularity='2y', older_than='1d') + self.assertEqual(stdout, 'Deleted %s facts.\n' % ((200 / 2))) + + def test_invoke_zero_deleted(self): + result, stdout, stderr = self.run_command('cleanup_facts', granularity='1w',older_than='5d') + self.assertEqual(stdout, 'Deleted 0 facts.\n') + + def test_invoke_params_required(self): + result, stdout, stderr = self.run_command('cleanup_facts') + self.assertIsInstance(result, CommandError) + self.assertEqual(str(result), 'Both --granularity and --older_than are required.') + +class CommandTest(BaseTest): + @mock.patch('awx.main.management.commands.cleanup_facts.CleanupFacts.run') + def test_parameters_ok(self, run): + + kv = { + 'older_than': '1d', + 'granularity': '1d', + } + cmd = Command() + cmd.handle(None, **kv) + run.assert_called_once_with(relativedelta(days=1), relativedelta(days=1)) + + def test_string_time_to_timestamp_ok(self): + kvs = [ + { + 'time': '2w', + 'timestamp': relativedelta(weeks=2), + 'msg': '2 weeks', + }, + { + 'time': '23d', + 'timestamp': relativedelta(days=23), + 'msg': '23 days', + }, + { + 'time': '11m', + 'timestamp': relativedelta(months=11), + 'msg': '11 months', + }, + { + 'time': '14y', + 'timestamp': relativedelta(years=14), + 'msg': '14 years', + }, + ] + for kv in kvs: + cmd = Command() + res = cmd.string_time_to_timestamp(kv['time']) + self.assertEqual(kv['timestamp'], res, "%s should convert to %s" % (kv['time'], kv['msg'])) + + def test_string_time_to_timestamp_invalid(self): + kvs = [ + { + 'time': '2weeks', + 'msg': 'weeks instead of w', + }, + { + 'time': '2days', + 'msg': 'days instead of d', + }, + { + 'time': '23', + 'msg': 'no unit specified', + }, + { + 'time': None, + 'msg': 'no value specified', + }, + { + 'time': 'zigzag', + 'msg': 'random string specified', + }, + ] + for kv in kvs: + cmd = Command() + res = cmd.string_time_to_timestamp(kv['time']) + self.assertIsNone(res, kv['msg']) + + # Mock run() just in case, but it should never get called because an error should be thrown + @mock.patch('awx.main.management.commands.cleanup_facts.CleanupFacts.run') + def test_parameters_fail(self, run): + kvs = [ + { + 'older_than': '1week', + 'granularity': '1d', + 'msg': 'Invalid older_than param value', + }, + { + 'older_than': '1d', + 'granularity': '1year', + 'msg': 'Invalid granularity param value', + } + ] + for kv in kvs: + cmd = Command() + with self.assertRaises(CommandError): + cmd.handle(None, older_than=kv['older_than'], granularity=kv['granularity']) + +class CleanupFactsUnitTest(BaseCommandMixin, BaseTest, MongoDBRequired): + def setUp(self): + super(CleanupFactsUnitTest, self).setUp() + + self.datetime_base = datetime(year=2015, day=2, month=1, microsecond=0) + self.HOSTS = 10 + self.FACTS_PER_HOST = 20 + + self.create_hosts_and_facts(self.datetime_base, self.HOSTS, self.FACTS_PER_HOST) + + ''' + Create 10 hosts with 20 facts each. A single fact a year for 20 years. + After cleanup, there should be 10 facts for each host. + Then ensure the correct facts are deleted. + ''' + def test_cleanup_logic(self): + cleanup_facts = CleanupFacts() + fact_oldest = FactVersion.objects.all().order_by('timestamp').first() + granularity = relativedelta(years=2) + + deleted_count = cleanup_facts.cleanup(self.datetime_base, granularity) + self.assertEqual(deleted_count, (self.FACTS_PER_HOST * self.HOSTS) / 2) + + # Check the number of facts per host + for host in self.hosts: + count = FactVersion.objects.filter(host=host).count() + self.assertEqual(count, self.FACTS_PER_HOST / 2, "should have half the number of FactVersion per host for host %s") + + count = Fact.objects.filter(host=host).count() + self.assertEqual(count, self.FACTS_PER_HOST / 2, "should have half the number of Fact per host") + + # Ensure that only 1 fact exists per granularity time + date_pivot = self.datetime_base + for host in self.hosts: + while date_pivot > fact_oldest.timestamp: + date_pivot_next = date_pivot - granularity + kv = { + 'timestamp__lte': date_pivot, + 'timestamp__gt': date_pivot_next, + 'host': host, + } + count = FactVersion.objects.filter(**kv).count() + self.assertEqual(count, 1, "should only be 1 FactVersion per the 2 year granularity") + count = Fact.objects.filter(**kv).count() + self.assertEqual(count, 1, "should only be 1 Fact per the 2 year granularity") + date_pivot = date_pivot_next + + +