From 16682ba830fd0f128ed3b82a1fefc3ecbd8ab0dc Mon Sep 17 00:00:00 2001 From: Chris Meyers Date: Fri, 5 Jun 2015 17:05:20 -0400 Subject: [PATCH] Correctly delete facts when module isn't specified. Also made deletion algorithm more simple and memory efficient by not loading in records into memory. --- awx/main/management/commands/cleanup_facts.py | 50 ++++++++--------- awx/main/tests/commands/cleanup_facts.py | 56 ++++++++++++++++--- 2 files changed, 71 insertions(+), 35 deletions(-) diff --git a/awx/main/management/commands/cleanup_facts.py b/awx/main/management/commands/cleanup_facts.py index 0ff743b15e..97effac2bf 100644 --- a/awx/main/management/commands/cleanup_facts.py +++ b/awx/main/management/commands/cleanup_facts.py @@ -36,48 +36,44 @@ class CleanupFacts(object): if not fact_oldest: return 0 + kv = { + 'timestamp__lte': older_than_abs + } + if module: + kv['module'] = module + # Special case, granularity=0x where x is d, w, or y # The intent is to delete all facts < older_than_abs if granularity == relativedelta(): - flag_delete_all = True + return FactVersion.objects.filter(**kv).order_by('-timestamp').delete() total = 0 + date_pivot = older_than_abs while date_pivot > fact_oldest.timestamp: date_pivot_next = date_pivot - granularity + + # For the current time window. + # Delete all facts expect the fact that matches the largest timestamp. kv = { 'timestamp__lte': date_pivot } - if not flag_delete_all: - kv['timestamp__gt'] = date_pivot_next if module: kv['module'] = module - version_objs = FactVersion.objects.filter(**kv).order_by('-timestamp') - if flag_delete_all: - total = version_objs.delete() - break - - # Transform array -> {host_id} = [, , ...] - # TODO: If this set gets large then we can use mongo to transform the data set for us. - host_ids = {} - for obj in version_objs: - k = obj.host.id - if k not in host_ids: - host_ids[k] = [] - host_ids[k].append(obj) - - for k in host_ids: - ids = [fact.id for fact in host_ids[k]] - fact_ids = [fact.fact.id for fact in host_ids[k]] - # Remove 1 entry - ids.pop() - fact_ids.pop() - # delete the rest - count = FactVersion.objects.filter(id__in=ids).delete() - # FIXME: if this crashes here then we are inconsistent - count = Fact.objects.filter(id__in=fact_ids).delete() + fact_version_objs = FactVersion.objects.filter(**kv).order_by('-timestamp').limit(1) + if fact_version_objs: + fact_version_obj = fact_version_objs[0] + kv = { + 'timestamp__lt': fact_version_obj.timestamp, + 'timestamp__gt': date_pivot_next + } + if module: + kv['module'] = module + count = FactVersion.objects.filter(**kv).delete() + # FIXME: These two deletes should be a transaction + count = Fact.objects.filter(**kv).delete() total += count date_pivot = date_pivot_next diff --git a/awx/main/tests/commands/cleanup_facts.py b/awx/main/tests/commands/cleanup_facts.py index 4d40acbcb2..2e397b201b 100644 --- a/awx/main/tests/commands/cleanup_facts.py +++ b/awx/main/tests/commands/cleanup_facts.py @@ -149,11 +149,11 @@ class CleanupFactsUnitTest(BaseCommandMixin, BaseTest, MongoDBRequired): self.builder = FactScanBuilder() self.builder.add_fact('ansible', TEST_FACT_ANSIBLE) + self.builder.add_fact('packages', TEST_FACT_PACKAGES) self.builder.build(scan_count=20, host_count=10) ''' - Create 10 hosts with 20 facts each. A single fact a year for 20 years. - After cleanup, there should be 10 facts for each host. + Create 10 hosts with 40 facts each. After cleanup, there should be 20 facts for each host. Then ensure the correct facts are deleted. ''' def test_cleanup_logic(self): @@ -162,17 +162,18 @@ class CleanupFactsUnitTest(BaseCommandMixin, BaseTest, MongoDBRequired): granularity = relativedelta(years=2) deleted_count = cleanup_facts.cleanup(self.builder.get_timestamp(0), granularity) - self.assertEqual(deleted_count, (self.builder.get_scan_count() * self.builder.get_host_count()) / 2) + self.assertEqual(deleted_count, 2 * (self.builder.get_scan_count() * self.builder.get_host_count()) / 2) # Check the number of facts per host for host in self.builder.get_hosts(): count = FactVersion.objects.filter(host=host).count() - self.assertEqual(count, self.builder.get_scan_count() / 2, "should have half the number of FactVersion per host for host %s") + scan_count = (2 * self.builder.get_scan_count()) / 2 + self.assertEqual(count, scan_count) count = Fact.objects.filter(host=host).count() - self.assertEqual(count, self.builder.get_scan_count() / 2, "should have half the number of Fact per host") + self.assertEqual(count, scan_count) - # Ensure that only 1 fact exists per granularity time + # Ensure that only 2 facts (ansible and packages) exists per granularity time date_pivot = self.builder.get_timestamp(0) for host in self.builder.get_hosts(): while date_pivot > fact_oldest.timestamp: @@ -183,11 +184,50 @@ class CleanupFactsUnitTest(BaseCommandMixin, BaseTest, MongoDBRequired): 'host': host, } count = FactVersion.objects.filter(**kv).count() - self.assertEqual(count, 1, "should only be 1 FactVersion per the 2 year granularity") + self.assertEqual(count, 2, "should only be 2 FactVersion per the 2 year granularity") count = Fact.objects.filter(**kv).count() - self.assertEqual(count, 1, "should only be 1 Fact per the 2 year granularity") + self.assertEqual(count, 2, "should only be 2 Fact per the 2 year granularity") + date_pivot = date_pivot_next + + ''' + Create 10 hosts with 40 facts each. After cleanup, there should be 30 facts for each host. + Then ensure the correct facts are deleted. + ''' + def test_cleanup_module(self): + cleanup_facts = CleanupFacts() + fact_oldest = FactVersion.objects.all().order_by('timestamp').first() + granularity = relativedelta(years=2) + + deleted_count = cleanup_facts.cleanup(self.builder.get_timestamp(0), granularity, module='ansible') + self.assertEqual(deleted_count, (self.builder.get_scan_count() * self.builder.get_host_count()) / 2) + + # Check the number of facts per host + for host in self.builder.get_hosts(): + count = FactVersion.objects.filter(host=host).count() + self.assertEqual(count, 30) + + count = Fact.objects.filter(host=host).count() + self.assertEqual(count, 30) + + # Ensure that only 1 ansible fact exists per granularity time + date_pivot = self.builder.get_timestamp(0) + for host in self.builder.get_hosts(): + while date_pivot > fact_oldest.timestamp: + date_pivot_next = date_pivot - granularity + kv = { + 'timestamp__lte': date_pivot, + 'timestamp__gt': date_pivot_next, + 'host': host, + 'module': 'ansible', + } + count = FactVersion.objects.filter(**kv).count() + self.assertEqual(count, 1) + count = Fact.objects.filter(**kv).count() + self.assertEqual(count, 1) date_pivot = date_pivot_next + +