fixed single_fact query

Foreach host return only the FIRST fact matching the less than timestamp query.
This commit is contained in:
Chris Meyers
2015-05-06 10:50:51 -04:00
parent e784595119
commit 391a4aa439
4 changed files with 120 additions and 73 deletions

View File

@@ -101,22 +101,55 @@ class Fact(Document):
@staticmethod
def get_single_facts(hostnames, fact_key, fact_value, timestamp, module):
host_ids = FactHost.objects.filter(hostname__in=hostnames).values_list('id')
if not host_ids or len(host_ids) == 0:
return None
kv = {
'host__in': host_ids,
'timestamp__lte': timestamp,
'module': module,
'hostname': {
'$in': hostnames,
}
}
facts = FactVersion.objects.filter(**kv).values_list('fact')
if not facts or len(facts) == 0:
fields = {
'_id': 1
}
host_ids = FactHost._get_collection().find(kv, fields)
if not host_ids or host_ids.count() == 0:
return None
# TODO: Make sure the below doesn't trigger a query to get the fact record
# It's unclear as to if mongoengine will query the full fact when the id is referenced.
# This is not a logic problem, but a performance problem.
fact_ids = [fact.id for fact in facts]
# TODO: use mongo to transform [{_id: <>}, {_id: <>},...] into [_id, _id,...]
host_ids = [e['_id'] for e in host_ids]
pipeline = []
match = {
'host': {
'$in': host_ids
},
'timestamp': {
'$lte': timestamp
},
'module': module
}
sort = {
'timestamp': -1
}
group = {
'_id': '$host',
'timestamp': {
'$first': '$timestamp'
},
'fact': {
'$first': '$fact'
}
}
project = {
'_id': 0,
'fact': 1,
}
pipeline.append({'$match': match}) # noqa
pipeline.append({'$sort': sort}) # noqa
pipeline.append({'$group': group}) # noqa
pipeline.append({'$project': project}) # noqa
q = FactVersion._get_collection().aggregate(pipeline)
if not q or 'result' not in q or len(q['result']) == 0:
return None
# TODO: use mongo to transform [{fact: <>}, {fact: <>},...] into [fact, fact,...]
fact_ids = [fact['fact'] for fact in q['result']]
kv = {
'fact.%s' % fact_key : fact_value,

View File

@@ -4,6 +4,8 @@
# Python
from __future__ import absolute_import
from awx.main.tests.base import BaseTest, MongoDBRequired
from copy import deepcopy
from datetime import datetime
# AWX
from awx.fact.models.fact import * # noqa
@@ -32,3 +34,24 @@ class BaseFactTest(BaseTest, MongoDBRequired):
def create_host_document(self, data):
data['add_fact_data']['host'] = FactHost(hostname=data['hostname']).save()
def create_fact_scans(self, data, host_count=1, scan_count=1):
timestamps = []
self.fact_data = []
self.fact_objs = []
self.hostnames = [FactHost(hostname='%s_%s' % (data['hostname'], i)).save() for i in range(0, host_count)]
for i in range(0, scan_count):
self.fact_data.append([])
self.fact_objs.append([])
for j in range(0, host_count):
data = deepcopy(data)
t = datetime.now().replace(year=2015 - i, microsecond=0)
data['add_fact_data']['timestamp'] = t
data['add_fact_data']['host'] = self.hostnames[j]
(f, v) = Fact.add_fact(**data['add_fact_data'])
timestamps.append(t)
self.fact_data[i].append(data)
self.fact_objs[i].append(f)
return timestamps

View File

@@ -4,7 +4,6 @@
# Python
from __future__ import absolute_import
from datetime import datetime
from copy import deepcopy
# Django
@@ -12,27 +11,24 @@ from copy import deepcopy
from awx.fact.models.fact import * # noqa
from .base import BaseFactTest
__all__ = ['FactGetSingleFactsTest']
__all__ = ['FactGetSingleFactsTest', 'FactGetSingleFactsMultipleScansTest',]
TEST_FACT_PACKAGES = [
{
"name": "accountsservice",
"architecture": "amd64",
"name": "accountsservice",
"source": "apt",
"version": "0.6.35-0ubuntu7.1"
},
{
"name": "acpid",
"architecture": "amd64",
"name": "acpid",
"source": "apt",
"version": "1:2.0.21-1ubuntu2"
},
{
"name": "adduser",
"architecture": "all",
"name": "adduser",
"source": "apt",
"version": "3.113+nmu3ubuntu3"
},
@@ -62,32 +58,16 @@ TEST_FACT_NESTED_DATA = {
class FactGetSingleFactsTest(BaseFactTest):
def create_fact_scans_unique_hosts(self, data, host_count):
self.fact_data = []
self.fact_objs = []
self.hostnames = []
for i in range(1, host_count + 1):
fact_data = deepcopy(data)
fact_data['hostname'] = fact_data['hostname'] % (i)
fact_data['add_fact_data']['timestamp'] = datetime.now().replace(year=2015 - i)
BaseFactTest.normalize_timestamp(fact_data)
self.create_host_document(fact_data)
(fact_obj, version_obj) = Fact.add_fact(**fact_data['add_fact_data'])
self.fact_data.append(fact_data)
self.fact_objs.append(fact_obj)
self.hostnames.append(fact_data['hostname'])
def setup_test_fact_data(self):
def setUp(self):
super(FactGetSingleFactsTest, self).setUp()
self.host_count = 20
self.create_fact_scans_unique_hosts(TEST_FACT_DATA, self.host_count)
def setup_test_fact_nested_data(self):
self.host_count = 20
self.create_fact_scans_unique_hosts(TEST_FACT_NESTED_DATA, self.host_count)
self.timestamp = datetime.now().replace(year=2016)
self.create_fact_scans(TEST_FACT_DATA, self.host_count, scan_count=1)
self.hosts = [self.hostnames[i].hostname for i in range(0, self.host_count)]
def check_query_results(self, facts_known, facts):
self.assertIsNotNone(facts)
self.assertEqual(len(facts_known), len(facts), "More or less facts found than expected")
# Ensure only 'acpid' is returned
for fact in facts:
self.assertEqual(len(fact.fact), 1)
@@ -109,33 +89,56 @@ class FactGetSingleFactsTest(BaseFactTest):
self.assertEqual(len(facts_dict[key].fact), 1)
def check_query_results_nested(self, facts):
self.assertIsNotNone(facts)
for fact in facts:
self.assertEqual(len(fact.fact), 1)
self.assertEqual(fact.fact['nested'][0]['name'], 'acpid')
def test_get_single_facts_ok(self):
self.setup_test_fact_data()
def test_single_host(self):
self.hosts = [self.hostnames[i].hostname for i in range(0, 1)]
facts = Fact.get_single_facts(self.hosts, 'name', 'acpid', self.timestamp, 'packages')
timestamp = datetime.now().replace(year=2016)
facts = Fact.get_single_facts(self.hostnames, 'name', 'acpid', timestamp, 'packages')
self.assertIsNotNone(facts)
self.check_query_results(self.fact_objs[0][:1], facts)
self.check_query_results(self.fact_objs, facts)
def test_all(self):
facts = Fact.get_single_facts(self.hosts, 'name', 'acpid', self.timestamp, 'packages')
def test_get_single_facts_subset_by_timestamp(self):
self.setup_test_fact_data()
self.check_query_results(self.fact_objs[0], facts)
timestamp = datetime.now().replace(year=2010)
facts = Fact.get_single_facts(self.hostnames, 'name', 'acpid', timestamp, 'packages')
self.assertIsNotNone(facts)
def test_subset_hosts(self):
self.hosts = [self.hostnames[i].hostname for i in range(0, (self.host_count / 2))]
facts = Fact.get_single_facts(self.hosts, 'name', 'acpid', self.timestamp, 'packages')
self.check_query_results(self.fact_objs[4:], facts)
self.check_query_results(self.fact_objs[0][:(self.host_count / 2)], facts)
def test_get_single_facts_nested(self):
self.setup_test_fact_nested_data()
timestamp = datetime.now().replace(year=2016)
facts = Fact.get_single_facts(self.hostnames, 'nested.name', 'acpid', timestamp, 'packages')
self.assertIsNotNone(facts)
facts = Fact.get_single_facts(self.hosts, 'nested.name', 'acpid', self.timestamp, 'packages')
self.check_query_results_nested(facts)
class FactGetSingleFactsMultipleScansTest(BaseFactTest):
def setUp(self):
super(FactGetSingleFactsMultipleScansTest, self).setUp()
self.create_fact_scans(TEST_FACT_DATA, host_count=10, scan_count=10)
def test_1_host(self):
timestamp = datetime.now().replace(year=2016)
facts = Fact.get_single_facts([self.hostnames[0].hostname], 'name', 'acpid', timestamp, 'packages')
self.assertEqual(len(facts), 1)
self.assertEqual(facts[0], self.fact_objs[0][0])
def test_multiple_hosts(self):
timestamp = datetime.now().replace(year=2016)
hosts = [self.hostnames[i].hostname for i in range(0, 3)]
facts = Fact.get_single_facts(hosts, 'name', 'acpid', timestamp, 'packages')
self.assertEqual(len(facts), 3)
for i, fact in enumerate(facts):
self.assertEqual(fact, self.fact_objs[0][i])
def test_middle_of_timeline(self):
timestamp = datetime.now().replace(year=2013)
hosts = [self.hostnames[i].hostname for i in range(0, 3)]
facts = Fact.get_single_facts(hosts, 'name', 'acpid', timestamp, 'packages')
self.assertEqual(len(facts), 3)
for i, fact in enumerate(facts):
self.assertEqual(fact, self.fact_objs[2][i])

View File

@@ -51,18 +51,6 @@ TEST_FACT_DATA = {
# Strip off microseconds because mongo has less precision
BaseFactTest.normalize_timestamp(TEST_FACT_DATA)
def create_fact_scans(count=1):
timestamps = []
for i in range(0, count):
data = deepcopy(TEST_FACT_DATA)
t = datetime.now().replace(year=2015 - i, microsecond=0)
data['add_fact_data']['timestamp'] = t
(f, v) = Fact.add_fact(**data['add_fact_data'])
timestamps.append(t)
return timestamps
class FactHostTest(BaseFactTest):
def test_create_host(self):
host = FactHost(hostname=TEST_FACT_DATA['hostname'])
@@ -138,13 +126,13 @@ class FactGetHostVersionTest(BaseFactTest):
class FactGetHostTimelineTest(BaseFactTest):
def setUp(self):
super(FactGetHostTimelineTest, self).setUp()
self.create_host_document(TEST_FACT_DATA)
#self.create_host_document(TEST_FACT_DATA)
self.scans = 20
self.timestamps = create_fact_scans(self.scans)
self.timestamps = self.create_fact_scans(TEST_FACT_DATA, host_count=1, scan_count=self.scans)
def test_get_host_timeline_ok(self):
timestamps = Fact.get_host_timeline(hostname=TEST_FACT_DATA['hostname'], module=TEST_FACT_DATA['add_fact_data']['module'])
timestamps = Fact.get_host_timeline(hostname=self.hostnames[0].hostname, module=TEST_FACT_DATA['add_fact_data']['module'])
self.assertIsNotNone(timestamps)
self.assertEqual(len(timestamps), len(self.timestamps))
for i in range(0, self.scans):