Correctly delete facts when module isn't specified. Also made deletion algorithm more simple and memory efficient by not loading in records into memory.

This commit is contained in:
Chris Meyers
2015-06-05 17:05:20 -04:00
parent c7dfb4dd84
commit 16682ba830
2 changed files with 71 additions and 35 deletions

View File

@@ -36,48 +36,44 @@ class CleanupFacts(object):
if not fact_oldest: if not fact_oldest:
return 0 return 0
kv = {
'timestamp__lte': older_than_abs
}
if module:
kv['module'] = module
# Special case, granularity=0x where x is d, w, or y # Special case, granularity=0x where x is d, w, or y
# The intent is to delete all facts < older_than_abs # The intent is to delete all facts < older_than_abs
if granularity == relativedelta(): if granularity == relativedelta():
flag_delete_all = True return FactVersion.objects.filter(**kv).order_by('-timestamp').delete()
total = 0 total = 0
date_pivot = older_than_abs date_pivot = older_than_abs
while date_pivot > fact_oldest.timestamp: while date_pivot > fact_oldest.timestamp:
date_pivot_next = date_pivot - granularity date_pivot_next = date_pivot - granularity
# For the current time window.
# Delete all facts expect the fact that matches the largest timestamp.
kv = { kv = {
'timestamp__lte': date_pivot 'timestamp__lte': date_pivot
} }
if not flag_delete_all:
kv['timestamp__gt'] = date_pivot_next
if module: if module:
kv['module'] = module kv['module'] = module
version_objs = FactVersion.objects.filter(**kv).order_by('-timestamp')
if flag_delete_all: fact_version_objs = FactVersion.objects.filter(**kv).order_by('-timestamp').limit(1)
total = version_objs.delete() if fact_version_objs:
break fact_version_obj = fact_version_objs[0]
kv = {
# Transform array -> {host_id} = [<fact_version>, <fact_version>, ...] 'timestamp__lt': fact_version_obj.timestamp,
# TODO: If this set gets large then we can use mongo to transform the data set for us. 'timestamp__gt': date_pivot_next
host_ids = {} }
for obj in version_objs: if module:
k = obj.host.id kv['module'] = module
if k not in host_ids: count = FactVersion.objects.filter(**kv).delete()
host_ids[k] = [] # FIXME: These two deletes should be a transaction
host_ids[k].append(obj) count = Fact.objects.filter(**kv).delete()
for k in host_ids:
ids = [fact.id for fact in host_ids[k]]
fact_ids = [fact.fact.id for fact in host_ids[k]]
# Remove 1 entry
ids.pop()
fact_ids.pop()
# delete the rest
count = FactVersion.objects.filter(id__in=ids).delete()
# FIXME: if this crashes here then we are inconsistent
count = Fact.objects.filter(id__in=fact_ids).delete()
total += count total += count
date_pivot = date_pivot_next date_pivot = date_pivot_next

View File

@@ -149,11 +149,11 @@ class CleanupFactsUnitTest(BaseCommandMixin, BaseTest, MongoDBRequired):
self.builder = FactScanBuilder() self.builder = FactScanBuilder()
self.builder.add_fact('ansible', TEST_FACT_ANSIBLE) self.builder.add_fact('ansible', TEST_FACT_ANSIBLE)
self.builder.add_fact('packages', TEST_FACT_PACKAGES)
self.builder.build(scan_count=20, host_count=10) self.builder.build(scan_count=20, host_count=10)
''' '''
Create 10 hosts with 20 facts each. A single fact a year for 20 years. Create 10 hosts with 40 facts each. After cleanup, there should be 20 facts for each host.
After cleanup, there should be 10 facts for each host.
Then ensure the correct facts are deleted. Then ensure the correct facts are deleted.
''' '''
def test_cleanup_logic(self): def test_cleanup_logic(self):
@@ -162,17 +162,18 @@ class CleanupFactsUnitTest(BaseCommandMixin, BaseTest, MongoDBRequired):
granularity = relativedelta(years=2) granularity = relativedelta(years=2)
deleted_count = cleanup_facts.cleanup(self.builder.get_timestamp(0), granularity) deleted_count = cleanup_facts.cleanup(self.builder.get_timestamp(0), granularity)
self.assertEqual(deleted_count, (self.builder.get_scan_count() * self.builder.get_host_count()) / 2) self.assertEqual(deleted_count, 2 * (self.builder.get_scan_count() * self.builder.get_host_count()) / 2)
# Check the number of facts per host # Check the number of facts per host
for host in self.builder.get_hosts(): for host in self.builder.get_hosts():
count = FactVersion.objects.filter(host=host).count() count = FactVersion.objects.filter(host=host).count()
self.assertEqual(count, self.builder.get_scan_count() / 2, "should have half the number of FactVersion per host for host %s") scan_count = (2 * self.builder.get_scan_count()) / 2
self.assertEqual(count, scan_count)
count = Fact.objects.filter(host=host).count() count = Fact.objects.filter(host=host).count()
self.assertEqual(count, self.builder.get_scan_count() / 2, "should have half the number of Fact per host") self.assertEqual(count, scan_count)
# Ensure that only 1 fact exists per granularity time # Ensure that only 2 facts (ansible and packages) exists per granularity time
date_pivot = self.builder.get_timestamp(0) date_pivot = self.builder.get_timestamp(0)
for host in self.builder.get_hosts(): for host in self.builder.get_hosts():
while date_pivot > fact_oldest.timestamp: while date_pivot > fact_oldest.timestamp:
@@ -183,11 +184,50 @@ class CleanupFactsUnitTest(BaseCommandMixin, BaseTest, MongoDBRequired):
'host': host, 'host': host,
} }
count = FactVersion.objects.filter(**kv).count() count = FactVersion.objects.filter(**kv).count()
self.assertEqual(count, 1, "should only be 1 FactVersion per the 2 year granularity") self.assertEqual(count, 2, "should only be 2 FactVersion per the 2 year granularity")
count = Fact.objects.filter(**kv).count() count = Fact.objects.filter(**kv).count()
self.assertEqual(count, 1, "should only be 1 Fact per the 2 year granularity") self.assertEqual(count, 2, "should only be 2 Fact per the 2 year granularity")
date_pivot = date_pivot_next
'''
Create 10 hosts with 40 facts each. After cleanup, there should be 30 facts for each host.
Then ensure the correct facts are deleted.
'''
def test_cleanup_module(self):
cleanup_facts = CleanupFacts()
fact_oldest = FactVersion.objects.all().order_by('timestamp').first()
granularity = relativedelta(years=2)
deleted_count = cleanup_facts.cleanup(self.builder.get_timestamp(0), granularity, module='ansible')
self.assertEqual(deleted_count, (self.builder.get_scan_count() * self.builder.get_host_count()) / 2)
# Check the number of facts per host
for host in self.builder.get_hosts():
count = FactVersion.objects.filter(host=host).count()
self.assertEqual(count, 30)
count = Fact.objects.filter(host=host).count()
self.assertEqual(count, 30)
# Ensure that only 1 ansible fact exists per granularity time
date_pivot = self.builder.get_timestamp(0)
for host in self.builder.get_hosts():
while date_pivot > fact_oldest.timestamp:
date_pivot_next = date_pivot - granularity
kv = {
'timestamp__lte': date_pivot,
'timestamp__gt': date_pivot_next,
'host': host,
'module': 'ansible',
}
count = FactVersion.objects.filter(**kv).count()
self.assertEqual(count, 1)
count = Fact.objects.filter(**kv).count()
self.assertEqual(count, 1)
date_pivot = date_pivot_next date_pivot = date_pivot_next