From 391a4aa4394e4155327333158b63332768b8d190 Mon Sep 17 00:00:00 2001 From: Chris Meyers Date: Wed, 6 May 2015 10:50:51 -0400 Subject: [PATCH] fixed single_fact query Foreach host return only the FIRST fact matching the less than timestamp query. --- awx/fact/models/fact.py | 59 +++++++++--- awx/fact/tests/models/fact/base.py | 23 +++++ .../models/fact/fact_get_single_facts.py | 93 ++++++++++--------- awx/fact/tests/models/fact/fact_simple.py | 18 +--- 4 files changed, 120 insertions(+), 73 deletions(-) diff --git a/awx/fact/models/fact.py b/awx/fact/models/fact.py index 1a5d0f64ed..e9a7e30cf7 100644 --- a/awx/fact/models/fact.py +++ b/awx/fact/models/fact.py @@ -101,22 +101,55 @@ class Fact(Document): @staticmethod def get_single_facts(hostnames, fact_key, fact_value, timestamp, module): - host_ids = FactHost.objects.filter(hostname__in=hostnames).values_list('id') - if not host_ids or len(host_ids) == 0: - return None - kv = { - 'host__in': host_ids, - 'timestamp__lte': timestamp, - 'module': module, + 'hostname': { + '$in': hostnames, + } } - facts = FactVersion.objects.filter(**kv).values_list('fact') - if not facts or len(facts) == 0: + fields = { + '_id': 1 + } + host_ids = FactHost._get_collection().find(kv, fields) + if not host_ids or host_ids.count() == 0: return None - # TODO: Make sure the below doesn't trigger a query to get the fact record - # It's unclear as to if mongoengine will query the full fact when the id is referenced. - # This is not a logic problem, but a performance problem. - fact_ids = [fact.id for fact in facts] + # TODO: use mongo to transform [{_id: <>}, {_id: <>},...] into [_id, _id,...] + host_ids = [e['_id'] for e in host_ids] + + pipeline = [] + match = { + 'host': { + '$in': host_ids + }, + 'timestamp': { + '$lte': timestamp + }, + 'module': module + } + sort = { + 'timestamp': -1 + } + group = { + '_id': '$host', + 'timestamp': { + '$first': '$timestamp' + }, + 'fact': { + '$first': '$fact' + } + } + project = { + '_id': 0, + 'fact': 1, + } + pipeline.append({'$match': match}) # noqa + pipeline.append({'$sort': sort}) # noqa + pipeline.append({'$group': group}) # noqa + pipeline.append({'$project': project}) # noqa + q = FactVersion._get_collection().aggregate(pipeline) + if not q or 'result' not in q or len(q['result']) == 0: + return None + # TODO: use mongo to transform [{fact: <>}, {fact: <>},...] into [fact, fact,...] + fact_ids = [fact['fact'] for fact in q['result']] kv = { 'fact.%s' % fact_key : fact_value, diff --git a/awx/fact/tests/models/fact/base.py b/awx/fact/tests/models/fact/base.py index 3d8c4653f0..28544dab4e 100644 --- a/awx/fact/tests/models/fact/base.py +++ b/awx/fact/tests/models/fact/base.py @@ -4,6 +4,8 @@ # Python from __future__ import absolute_import from awx.main.tests.base import BaseTest, MongoDBRequired +from copy import deepcopy +from datetime import datetime # AWX from awx.fact.models.fact import * # noqa @@ -32,3 +34,24 @@ class BaseFactTest(BaseTest, MongoDBRequired): def create_host_document(self, data): data['add_fact_data']['host'] = FactHost(hostname=data['hostname']).save() + + def create_fact_scans(self, data, host_count=1, scan_count=1): + timestamps = [] + self.fact_data = [] + self.fact_objs = [] + self.hostnames = [FactHost(hostname='%s_%s' % (data['hostname'], i)).save() for i in range(0, host_count)] + for i in range(0, scan_count): + self.fact_data.append([]) + self.fact_objs.append([]) + for j in range(0, host_count): + data = deepcopy(data) + t = datetime.now().replace(year=2015 - i, microsecond=0) + data['add_fact_data']['timestamp'] = t + data['add_fact_data']['host'] = self.hostnames[j] + (f, v) = Fact.add_fact(**data['add_fact_data']) + timestamps.append(t) + + self.fact_data[i].append(data) + self.fact_objs[i].append(f) + + return timestamps diff --git a/awx/fact/tests/models/fact/fact_get_single_facts.py b/awx/fact/tests/models/fact/fact_get_single_facts.py index 7322ee2853..9b5f25cb8b 100644 --- a/awx/fact/tests/models/fact/fact_get_single_facts.py +++ b/awx/fact/tests/models/fact/fact_get_single_facts.py @@ -4,7 +4,6 @@ # Python from __future__ import absolute_import from datetime import datetime -from copy import deepcopy # Django @@ -12,27 +11,24 @@ from copy import deepcopy from awx.fact.models.fact import * # noqa from .base import BaseFactTest -__all__ = ['FactGetSingleFactsTest'] +__all__ = ['FactGetSingleFactsTest', 'FactGetSingleFactsMultipleScansTest',] TEST_FACT_PACKAGES = [ { "name": "accountsservice", "architecture": "amd64", - "name": "accountsservice", "source": "apt", "version": "0.6.35-0ubuntu7.1" }, { "name": "acpid", "architecture": "amd64", - "name": "acpid", "source": "apt", "version": "1:2.0.21-1ubuntu2" }, { "name": "adduser", "architecture": "all", - "name": "adduser", "source": "apt", "version": "3.113+nmu3ubuntu3" }, @@ -62,32 +58,16 @@ TEST_FACT_NESTED_DATA = { class FactGetSingleFactsTest(BaseFactTest): - def create_fact_scans_unique_hosts(self, data, host_count): - self.fact_data = [] - self.fact_objs = [] - self.hostnames = [] - for i in range(1, host_count + 1): - fact_data = deepcopy(data) - fact_data['hostname'] = fact_data['hostname'] % (i) - fact_data['add_fact_data']['timestamp'] = datetime.now().replace(year=2015 - i) - BaseFactTest.normalize_timestamp(fact_data) - - self.create_host_document(fact_data) - (fact_obj, version_obj) = Fact.add_fact(**fact_data['add_fact_data']) - - self.fact_data.append(fact_data) - self.fact_objs.append(fact_obj) - self.hostnames.append(fact_data['hostname']) - - def setup_test_fact_data(self): + def setUp(self): + super(FactGetSingleFactsTest, self).setUp() self.host_count = 20 - self.create_fact_scans_unique_hosts(TEST_FACT_DATA, self.host_count) - - def setup_test_fact_nested_data(self): - self.host_count = 20 - self.create_fact_scans_unique_hosts(TEST_FACT_NESTED_DATA, self.host_count) + self.timestamp = datetime.now().replace(year=2016) + self.create_fact_scans(TEST_FACT_DATA, self.host_count, scan_count=1) + self.hosts = [self.hostnames[i].hostname for i in range(0, self.host_count)] def check_query_results(self, facts_known, facts): + self.assertIsNotNone(facts) + self.assertEqual(len(facts_known), len(facts), "More or less facts found than expected") # Ensure only 'acpid' is returned for fact in facts: self.assertEqual(len(fact.fact), 1) @@ -109,33 +89,56 @@ class FactGetSingleFactsTest(BaseFactTest): self.assertEqual(len(facts_dict[key].fact), 1) def check_query_results_nested(self, facts): + self.assertIsNotNone(facts) for fact in facts: self.assertEqual(len(fact.fact), 1) self.assertEqual(fact.fact['nested'][0]['name'], 'acpid') - def test_get_single_facts_ok(self): - self.setup_test_fact_data() + def test_single_host(self): + self.hosts = [self.hostnames[i].hostname for i in range(0, 1)] + facts = Fact.get_single_facts(self.hosts, 'name', 'acpid', self.timestamp, 'packages') - timestamp = datetime.now().replace(year=2016) - facts = Fact.get_single_facts(self.hostnames, 'name', 'acpid', timestamp, 'packages') - self.assertIsNotNone(facts) + self.check_query_results(self.fact_objs[0][:1], facts) - self.check_query_results(self.fact_objs, facts) + def test_all(self): + facts = Fact.get_single_facts(self.hosts, 'name', 'acpid', self.timestamp, 'packages') - def test_get_single_facts_subset_by_timestamp(self): - self.setup_test_fact_data() + self.check_query_results(self.fact_objs[0], facts) - timestamp = datetime.now().replace(year=2010) - facts = Fact.get_single_facts(self.hostnames, 'name', 'acpid', timestamp, 'packages') - self.assertIsNotNone(facts) + def test_subset_hosts(self): + self.hosts = [self.hostnames[i].hostname for i in range(0, (self.host_count / 2))] + facts = Fact.get_single_facts(self.hosts, 'name', 'acpid', self.timestamp, 'packages') - self.check_query_results(self.fact_objs[4:], facts) + self.check_query_results(self.fact_objs[0][:(self.host_count / 2)], facts) def test_get_single_facts_nested(self): - self.setup_test_fact_nested_data() - - timestamp = datetime.now().replace(year=2016) - facts = Fact.get_single_facts(self.hostnames, 'nested.name', 'acpid', timestamp, 'packages') - self.assertIsNotNone(facts) + facts = Fact.get_single_facts(self.hosts, 'nested.name', 'acpid', self.timestamp, 'packages') self.check_query_results_nested(facts) + +class FactGetSingleFactsMultipleScansTest(BaseFactTest): + def setUp(self): + super(FactGetSingleFactsMultipleScansTest, self).setUp() + self.create_fact_scans(TEST_FACT_DATA, host_count=10, scan_count=10) + + def test_1_host(self): + timestamp = datetime.now().replace(year=2016) + facts = Fact.get_single_facts([self.hostnames[0].hostname], 'name', 'acpid', timestamp, 'packages') + self.assertEqual(len(facts), 1) + self.assertEqual(facts[0], self.fact_objs[0][0]) + + def test_multiple_hosts(self): + timestamp = datetime.now().replace(year=2016) + hosts = [self.hostnames[i].hostname for i in range(0, 3)] + facts = Fact.get_single_facts(hosts, 'name', 'acpid', timestamp, 'packages') + self.assertEqual(len(facts), 3) + for i, fact in enumerate(facts): + self.assertEqual(fact, self.fact_objs[0][i]) + + def test_middle_of_timeline(self): + timestamp = datetime.now().replace(year=2013) + hosts = [self.hostnames[i].hostname for i in range(0, 3)] + facts = Fact.get_single_facts(hosts, 'name', 'acpid', timestamp, 'packages') + self.assertEqual(len(facts), 3) + for i, fact in enumerate(facts): + self.assertEqual(fact, self.fact_objs[2][i]) diff --git a/awx/fact/tests/models/fact/fact_simple.py b/awx/fact/tests/models/fact/fact_simple.py index 9103416c22..65b1d960ed 100644 --- a/awx/fact/tests/models/fact/fact_simple.py +++ b/awx/fact/tests/models/fact/fact_simple.py @@ -51,18 +51,6 @@ TEST_FACT_DATA = { # Strip off microseconds because mongo has less precision BaseFactTest.normalize_timestamp(TEST_FACT_DATA) -def create_fact_scans(count=1): - timestamps = [] - for i in range(0, count): - data = deepcopy(TEST_FACT_DATA) - t = datetime.now().replace(year=2015 - i, microsecond=0) - data['add_fact_data']['timestamp'] = t - (f, v) = Fact.add_fact(**data['add_fact_data']) - timestamps.append(t) - - return timestamps - - class FactHostTest(BaseFactTest): def test_create_host(self): host = FactHost(hostname=TEST_FACT_DATA['hostname']) @@ -138,13 +126,13 @@ class FactGetHostVersionTest(BaseFactTest): class FactGetHostTimelineTest(BaseFactTest): def setUp(self): super(FactGetHostTimelineTest, self).setUp() - self.create_host_document(TEST_FACT_DATA) + #self.create_host_document(TEST_FACT_DATA) self.scans = 20 - self.timestamps = create_fact_scans(self.scans) + self.timestamps = self.create_fact_scans(TEST_FACT_DATA, host_count=1, scan_count=self.scans) def test_get_host_timeline_ok(self): - timestamps = Fact.get_host_timeline(hostname=TEST_FACT_DATA['hostname'], module=TEST_FACT_DATA['add_fact_data']['module']) + timestamps = Fact.get_host_timeline(hostname=self.hostnames[0].hostname, module=TEST_FACT_DATA['add_fact_data']['module']) self.assertIsNotNone(timestamps) self.assertEqual(len(timestamps), len(self.timestamps)) for i in range(0, self.scans):