Merge pull request #133 from chrismeyersfsu/feature-cleanup_facts

implemented cleanup_facts command + tests
This commit is contained in:
Chris Meyers 2015-04-13 15:10:10 -04:00
commit bbe23b0071
4 changed files with 323 additions and 2 deletions

View File

@ -0,0 +1,126 @@
# Copyright (c) 2015 Ansible, Inc.
# All Rights Reserved
# Python
import re
from dateutil.relativedelta import relativedelta
from datetime import datetime
from optparse import make_option
# Django
from django.core.management.base import BaseCommand, CommandError
from django.db import transaction
# AWX
from awx.fact.models.fact import * # noqa
OLDER_THAN = 'older_than'
GRANULARITY = 'granularity'
class CleanupFacts(object):
def __init__(self):
self.timestamp = None
# Find all with timestamp < older_than
# Start search at < older_than, stop search at oldest entry
# Find all factVersion < pivot && > (pivot - granularity) grouped by host sorted by time descending (because it's indexed this way)
# foreach group
# Delete all except LAST entry (or Delete all except the FIRST entry, it's an arbitrary decision)
#
# pivot -= granularity
# group by host
def cleanup(self, older_than_abs, granularity):
fact_oldest = FactVersion.objects.all().order_by('timestamp').first()
if not fact_oldest:
return 0
total = 0
date_pivot = older_than_abs
while date_pivot > fact_oldest.timestamp:
date_pivot_next = date_pivot - granularity
kv = {
'timestamp__lte': date_pivot,
'timestamp__gt': date_pivot_next,
}
version_objs = FactVersion.objects.filter(**kv).order_by('-timestamp')
# Transform array -> {host_id} = [<fact_version>, <fact_version>, ...]
# TODO: If this set gets large then we can use mongo to transform the data set for us.
host_ids = {}
for obj in version_objs:
k = obj.host.id
if k not in host_ids:
host_ids[k] = []
host_ids[k].append(obj)
for k in host_ids:
ids = [fact.id for fact in host_ids[k]]
fact_ids = [fact.fact.id for fact in host_ids[k]]
# Remove 1 entry
ids.pop()
fact_ids.pop()
# delete the rest
count = FactVersion.objects.filter(id__in=ids).delete()
# FIXME: if this crashes here then we are inconsistent
count = Fact.objects.filter(id__in=fact_ids).delete()
total += count
date_pivot = date_pivot_next
return total
'''
older_than and granularity are of type relativedelta
'''
def run(self, older_than, granularity):
t = datetime.now()
deleted_count = self.cleanup(t - older_than, granularity)
print("Deleted %d facts." % deleted_count)
class Command(BaseCommand):
help = 'Cleanup facts. For each host older than the value specified, keep one fact scan for each time window (granularity).'
option_list = BaseCommand.option_list + (
make_option('--older_than',
dest='older_than',
default=None,
help='Specify the relative time to consider facts older than (w)eek (d)ay or (y)ear (i.e. 5d, 2w, 1y).'),
make_option('--granularity',
dest='granularity',
default=None,
help='Window duration to group same hosts by for deletion (w)eek (d)ay or (y)ear (i.e. 5d, 2w, 1y).'),)
def __init__(self):
super(Command, self).__init__()
def string_time_to_timestamp(self, time_string):
units = {
'y': 'years',
'd': 'days',
'w': 'weeks',
'm': 'months'
}
try:
match = re.match(r'(?P<value>[0-9]+)(?P<unit>.*)', time_string)
group = match.groupdict()
kv = {}
units_verbose = units[group['unit']]
kv[units_verbose]= int(group['value'])
return relativedelta(**kv)
except (KeyError, TypeError, AttributeError):
return None
@transaction.atomic
def handle(self, *args, **options):
cleanup_facts = CleanupFacts()
if not all([options[GRANULARITY], options[OLDER_THAN]]):
raise CommandError('Both --granularity and --older_than are required.')
older_than = self.string_time_to_timestamp(options[OLDER_THAN])
granularity = self.string_time_to_timestamp(options[GRANULARITY])
if older_than is None:
raise CommandError('--older_than invalid value "%s"' % options[OLDER_THAN])
if granularity is None:
raise CommandError('--granularity invalid value "%s"' % options[GRANULARITY])
cleanup_facts.run(older_than, granularity)

View File

@ -1,5 +1,8 @@
# Copyright (c) 2015 Ansible, Inc.
# All Rights Reserved
from awx.main.tests.commands.run_fact_cache_receiver import * # noqa
from awx.main.tests.commands.commands_monolithic import * # noqa
from __future__ import absolute_import
from .run_fact_cache_receiver import * # noqa
from .commands_monolithic import * # noqa
from .cleanup_facts import * # noqa

View File

@ -1,9 +1,11 @@
# Copyright (c) 2015 Ansible, Inc.
# All Rights Reserved
# Python
import StringIO
import sys
import json
from dateutil.relativedelta import relativedelta
# Django
from django.core.management import call_command
@ -11,11 +13,29 @@ from django.core.management import call_command
# AWX
from awx.main.models import * # noqa
from awx.main.tests.base import BaseTestMixin
from awx.fact.models import * # noqa
class BaseCommandMixin(BaseTestMixin):
'''
Base class for tests that run management commands.
'''
def create_hosts_and_facts(self, epoch, host_count, facts_per_host):
self.hosts = []
for i in range(0, host_count):
host = FactHost.objects.create(hostname='host_%d' % i)
self.hosts.append(host)
self.fact = {
'hello': 'world'
}
self.facts = []
self.versions = []
for host in self.hosts:
for i in range(0, facts_per_host):
t = epoch - relativedelta(years=i)
(fact_obj, version_obj) = Fact.add_fact(t, self.fact, host, 'packages')
self.facts.append(fact_obj)
self.versions.append(version_obj)
def create_test_inventories(self):
self.setup_users()

View File

@ -0,0 +1,172 @@
# Copyright (c) 2015 Ansible, Inc.
# All Rights Reserved
# Python
from datetime import datetime
from dateutil.relativedelta import relativedelta
import mock
#Django
from django.core.management.base import CommandError
# AWX
from awx.main.tests.base import BaseTest, MongoDBRequired
from awx.main.tests.commands.base import BaseCommandMixin
from awx.main.management.commands.cleanup_facts import Command, CleanupFacts
from awx.fact.models.fact import * # noqa
__all__ = ['CommandTest','CleanupFactsUnitTest', 'CleanupFactsCommandFunctionalTest']
class CleanupFactsCommandFunctionalTest(BaseCommandMixin, BaseTest, MongoDBRequired):
def test_invoke_zero_ok(self):
self.create_hosts_and_facts(datetime(year=2015, day=2, month=1, microsecond=0), 10, 20)
result, stdout, stderr = self.run_command('cleanup_facts', granularity='2y', older_than='1d')
self.assertEqual(stdout, 'Deleted %s facts.\n' % ((200 / 2)))
def test_invoke_zero_deleted(self):
result, stdout, stderr = self.run_command('cleanup_facts', granularity='1w',older_than='5d')
self.assertEqual(stdout, 'Deleted 0 facts.\n')
def test_invoke_params_required(self):
result, stdout, stderr = self.run_command('cleanup_facts')
self.assertIsInstance(result, CommandError)
self.assertEqual(str(result), 'Both --granularity and --older_than are required.')
class CommandTest(BaseTest):
@mock.patch('awx.main.management.commands.cleanup_facts.CleanupFacts.run')
def test_parameters_ok(self, run):
kv = {
'older_than': '1d',
'granularity': '1d',
}
cmd = Command()
cmd.handle(None, **kv)
run.assert_called_once_with(relativedelta(days=1), relativedelta(days=1))
def test_string_time_to_timestamp_ok(self):
kvs = [
{
'time': '2w',
'timestamp': relativedelta(weeks=2),
'msg': '2 weeks',
},
{
'time': '23d',
'timestamp': relativedelta(days=23),
'msg': '23 days',
},
{
'time': '11m',
'timestamp': relativedelta(months=11),
'msg': '11 months',
},
{
'time': '14y',
'timestamp': relativedelta(years=14),
'msg': '14 years',
},
]
for kv in kvs:
cmd = Command()
res = cmd.string_time_to_timestamp(kv['time'])
self.assertEqual(kv['timestamp'], res, "%s should convert to %s" % (kv['time'], kv['msg']))
def test_string_time_to_timestamp_invalid(self):
kvs = [
{
'time': '2weeks',
'msg': 'weeks instead of w',
},
{
'time': '2days',
'msg': 'days instead of d',
},
{
'time': '23',
'msg': 'no unit specified',
},
{
'time': None,
'msg': 'no value specified',
},
{
'time': 'zigzag',
'msg': 'random string specified',
},
]
for kv in kvs:
cmd = Command()
res = cmd.string_time_to_timestamp(kv['time'])
self.assertIsNone(res, kv['msg'])
# Mock run() just in case, but it should never get called because an error should be thrown
@mock.patch('awx.main.management.commands.cleanup_facts.CleanupFacts.run')
def test_parameters_fail(self, run):
kvs = [
{
'older_than': '1week',
'granularity': '1d',
'msg': 'Invalid older_than param value',
},
{
'older_than': '1d',
'granularity': '1year',
'msg': 'Invalid granularity param value',
}
]
for kv in kvs:
cmd = Command()
with self.assertRaises(CommandError):
cmd.handle(None, older_than=kv['older_than'], granularity=kv['granularity'])
class CleanupFactsUnitTest(BaseCommandMixin, BaseTest, MongoDBRequired):
def setUp(self):
super(CleanupFactsUnitTest, self).setUp()
self.datetime_base = datetime(year=2015, day=2, month=1, microsecond=0)
self.HOSTS = 10
self.FACTS_PER_HOST = 20
self.create_hosts_and_facts(self.datetime_base, self.HOSTS, self.FACTS_PER_HOST)
'''
Create 10 hosts with 20 facts each. A single fact a year for 20 years.
After cleanup, there should be 10 facts for each host.
Then ensure the correct facts are deleted.
'''
def test_cleanup_logic(self):
cleanup_facts = CleanupFacts()
fact_oldest = FactVersion.objects.all().order_by('timestamp').first()
granularity = relativedelta(years=2)
deleted_count = cleanup_facts.cleanup(self.datetime_base, granularity)
self.assertEqual(deleted_count, (self.FACTS_PER_HOST * self.HOSTS) / 2)
# Check the number of facts per host
for host in self.hosts:
count = FactVersion.objects.filter(host=host).count()
self.assertEqual(count, self.FACTS_PER_HOST / 2, "should have half the number of FactVersion per host for host %s")
count = Fact.objects.filter(host=host).count()
self.assertEqual(count, self.FACTS_PER_HOST / 2, "should have half the number of Fact per host")
# Ensure that only 1 fact exists per granularity time
date_pivot = self.datetime_base
for host in self.hosts:
while date_pivot > fact_oldest.timestamp:
date_pivot_next = date_pivot - granularity
kv = {
'timestamp__lte': date_pivot,
'timestamp__gt': date_pivot_next,
'host': host,
}
count = FactVersion.objects.filter(**kv).count()
self.assertEqual(count, 1, "should only be 1 FactVersion per the 2 year granularity")
count = Fact.objects.filter(**kv).count()
self.assertEqual(count, 1, "should only be 1 Fact per the 2 year granularity")
date_pivot = date_pivot_next