[constructed-inventory] Save facts on model for original host (#13700)

* Save facts on model for original host

Redirect to original host for ansible facts

Use current inventory hosts for facts instance_id filter
Thanks for Gabe for identifying this bug

* Fix spelling of queryset

Co-authored-by: Rick Elrod <rick@elrod.me>

* Fix sign error with facts expiry - from review

---------

Co-authored-by: Rick Elrod <rick@elrod.me>
This commit is contained in:
Alan Rominger 2023-03-25 21:43:41 -04:00 committed by Rick Elrod
parent 3f5a4cb6f1
commit 16ad27099e
7 changed files with 119 additions and 78 deletions

View File

@ -1883,6 +1883,7 @@ class HostSerializer(BaseSerializerWithVariables):
)
if obj.inventory.kind == 'constructed':
res['original_host'] = self.reverse('api:host_detail', kwargs={'pk': obj.instance_id})
res['ansible_facts'] = self.reverse('api:host_ansible_facts_detail', kwargs={'pk': obj.instance_id})
if obj.inventory:
res['inventory'] = self.reverse('api:inventory_detail', kwargs={'pk': obj.inventory.pk})
if obj.last_job:

View File

@ -29,7 +29,7 @@ from django.utils.safestring import mark_safe
from django.utils.timezone import now
from django.views.decorators.csrf import csrf_exempt
from django.template.loader import render_to_string
from django.http import HttpResponse
from django.http import HttpResponse, HttpResponseRedirect
from django.contrib.contenttypes.models import ContentType
from django.utils.translation import gettext_lazy as _
@ -1619,6 +1619,14 @@ class HostAnsibleFactsDetail(RetrieveAPIView):
model = models.Host
serializer_class = serializers.AnsibleFactsSerializer
def get(self, request, *args, **kwargs):
obj = self.get_object()
if obj.inventory.kind == 'constructed':
# If this is a constructed inventory host, it is not the source of truth about facts
# redirect to the original input inventory host instead
return HttpResponseRedirect(reverse('api:host_ansible_facts_detail', kwargs={'pk': obj.instance_id}, request=self.request))
return super().get(request, *args, **kwargs)
class InventoryHostsList(HostRelatedSearchMixin, SubListCreateAttachDetachAPIView):
model = models.Host

View File

@ -111,3 +111,6 @@ ANSIBLE_RUNNER_NEEDS_UPDATE_MESSAGE = (
# Values for setting SUBSCRIPTION_USAGE_MODEL
SUBSCRIPTION_USAGE_MODEL_UNIQUE_HOSTS = 'unique_managed_hosts'
# Shared prefetch to use for creating a queryset for the purpose of writing or saving facts
HOST_FACTS_FIELDS = ('name', 'ansible_facts', 'ansible_facts_modified', 'modified', 'inventory_id')

View File

@ -11,6 +11,7 @@ from urllib.parse import urljoin
from django.conf import settings
from django.core.exceptions import ValidationError
from django.db import models
from django.db.models.functions import Cast
# from django.core.cache import cache
from django.utils.translation import gettext_lazy as _
@ -21,6 +22,7 @@ from rest_framework.exceptions import ParseError
# AWX
from awx.api.versioning import reverse
from awx.main.constants import HOST_FACTS_FIELDS
from awx.main.models.base import (
BaseModel,
CreatedModifiedModel,
@ -834,6 +836,27 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
def get_notification_friendly_name(self):
return "Job"
def get_hosts_for_fact_cache(self):
"""
Builds the queryset to use for writing or finalizing the fact cache
these need to be the 'real' hosts associated with the job.
For constructed inventories, that means the original (input inventory) hosts
when slicing, that means only returning hosts in that slice
"""
Host = JobHostSummary._meta.get_field('host').related_model
if not self.inventory_id:
return Host.objects.none()
if self.inventory.kind == 'constructed':
id_field = Host._meta.get_field('id')
host_qs = Host.objects.filter(id__in=self.inventory.hosts.exclude(instance_id='').values_list(Cast('instance_id', output_field=id_field)))
else:
host_qs = self.inventory.hosts
host_qs = host_qs.only(*HOST_FACTS_FIELDS)
host_qs = self.inventory.get_sliced_hosts(host_qs, self.job_slice_number, self.job_slice_count)
return host_qs
class LaunchTimeConfigBase(BaseModel):
"""

View File

@ -12,28 +12,16 @@ from django.utils.timezone import now
# AWX
from awx.main.utils.common import log_excess_runtime
from awx.main.models.inventory import Host
logger = logging.getLogger('awx.main.tasks.facts')
system_tracking_logger = logging.getLogger('awx.analytics.system_tracking')
def _get_inventory_hosts(inventory, slice_number, slice_count, only=('name', 'ansible_facts', 'ansible_facts_modified', 'modified', 'inventory_id'), **filters):
"""Return value is an iterable for the relevant hosts for this job"""
if not inventory:
return []
host_queryset = inventory.hosts.only(*only)
if filters:
host_queryset = host_queryset.filter(**filters)
host_queryset = inventory.get_sliced_hosts(host_queryset, slice_number, slice_count)
if isinstance(host_queryset, QuerySet):
return host_queryset.iterator()
return host_queryset
@log_excess_runtime(logger, debug_cutoff=0.01, msg='Inventory {inventory_id} host facts prepared for {written_ct} hosts, took {delta:.3f} s', add_log_data=True)
def start_fact_cache(inventory, destination, log_data, timeout=None, slice_number=0, slice_count=1):
log_data['inventory_id'] = inventory.id
def start_fact_cache(hosts, destination, log_data, timeout=None, inventory_id=None):
log_data['inventory_id'] = inventory_id
log_data['written_ct'] = 0
try:
os.makedirs(destination, mode=0o700)
@ -42,15 +30,14 @@ def start_fact_cache(inventory, destination, log_data, timeout=None, slice_numbe
if timeout is None:
timeout = settings.ANSIBLE_FACT_CACHE_TIMEOUT
if timeout > 0:
# exclude hosts with fact data older than `settings.ANSIBLE_FACT_CACHE_TIMEOUT seconds`
timeout = now() - datetime.timedelta(seconds=timeout)
hosts = _get_inventory_hosts(inventory, slice_number, slice_count, ansible_facts_modified__gte=timeout)
else:
hosts = _get_inventory_hosts(inventory, slice_number, slice_count)
if isinstance(hosts, QuerySet):
hosts = hosts.iterator()
last_filepath_written = None
for host in hosts:
if (not host.ansible_facts_modified) or (timeout and host.ansible_facts_modified < now() - datetime.timedelta(seconds=timeout)):
continue # facts are expired - do not write them
filepath = os.sep.join(map(str, [destination, host.name]))
if not os.path.realpath(filepath).startswith(destination):
system_tracking_logger.error('facts for host {} could not be cached'.format(smart_str(host.name)))
@ -76,13 +63,17 @@ def start_fact_cache(inventory, destination, log_data, timeout=None, slice_numbe
msg='Inventory {inventory_id} host facts: updated {updated_ct}, cleared {cleared_ct}, unchanged {unmodified_ct}, took {delta:.3f} s',
add_log_data=True,
)
def finish_fact_cache(inventory, destination, facts_write_time, log_data, slice_number=0, slice_count=1, job_id=None):
log_data['inventory_id'] = inventory.id
def finish_fact_cache(hosts, destination, facts_write_time, log_data, job_id=None, inventory_id=None):
log_data['inventory_id'] = inventory_id
log_data['updated_ct'] = 0
log_data['unmodified_ct'] = 0
log_data['cleared_ct'] = 0
if isinstance(hosts, QuerySet):
hosts = hosts.iterator()
hosts_to_update = []
for host in _get_inventory_hosts(inventory, slice_number, slice_count):
for host in hosts:
filepath = os.sep.join(map(str, [destination, host.name]))
if not os.path.realpath(filepath).startswith(destination):
system_tracking_logger.error('facts for host {} could not be cached'.format(smart_str(host.name)))
@ -120,7 +111,7 @@ def finish_fact_cache(inventory, destination, facts_write_time, log_data, slice_
system_tracking_logger.info('Facts cleared for inventory {} host {}'.format(smart_str(host.inventory.name), smart_str(host.name)))
log_data['cleared_ct'] += 1
if len(hosts_to_update) > 100:
inventory.hosts.bulk_update(hosts_to_update, ['ansible_facts', 'ansible_facts_modified'])
Host.objects.bulk_update(hosts_to_update, ['ansible_facts', 'ansible_facts_modified'])
hosts_to_update = []
if hosts_to_update:
inventory.hosts.bulk_update(hosts_to_update, ['ansible_facts', 'ansible_facts_modified'])
Host.objects.bulk_update(hosts_to_update, ['ansible_facts', 'ansible_facts_modified'])

View File

@ -37,6 +37,7 @@ from awx.main.constants import (
MAX_ISOLATED_PATH_COLON_DELIMITER,
CONTAINER_VOLUMES_MOUNT_TYPES,
ACTIVE_STATES,
HOST_FACTS_FIELDS,
)
from awx.main.models import (
Instance,
@ -1084,10 +1085,7 @@ class RunJob(SourceControlMixin, BaseTask):
if self.should_use_fact_cache():
job.log_lifecycle("start_job_fact_cache")
self.facts_write_time = start_fact_cache(
job.inventory,
os.path.join(private_data_dir, 'artifacts', str(job.id), 'fact_cache'),
slice_number=job.job_slice_number,
slice_count=job.job_slice_count,
job.get_hosts_for_fact_cache(), os.path.join(private_data_dir, 'artifacts', str(job.id), 'fact_cache'), inventory_id=job.inventory_id
)
def build_project_dir(self, job, private_data_dir):
@ -1105,12 +1103,11 @@ class RunJob(SourceControlMixin, BaseTask):
if self.should_use_fact_cache():
job.log_lifecycle("finish_job_fact_cache")
finish_fact_cache(
job.inventory,
job.get_hosts_for_fact_cache(),
os.path.join(private_data_dir, 'artifacts', str(job.id), 'fact_cache'),
facts_write_time=self.facts_write_time,
slice_number=job.job_slice_number,
slice_count=job.job_slice_count,
job_id=job.id,
inventory_id=job.inventory_id,
)
def final_run_hook(self, job, status, private_data_dir):
@ -1555,7 +1552,11 @@ class RunInventoryUpdate(SourceControlMixin, BaseTask):
source_inv_path = self.write_inventory_file(input_inventory, private_data_dir, f'hosts_{input_inventory.id}', script_params)
args.append(to_container_path(source_inv_path, private_data_dir))
# Include any facts from input inventories so they can be used in filters
start_fact_cache(input_inventory, os.path.join(private_data_dir, 'artifacts', str(inventory_update.id), 'fact_cache'))
start_fact_cache(
input_inventory.hosts.only(*HOST_FACTS_FIELDS),
os.path.join(private_data_dir, 'artifacts', str(inventory_update.id), 'fact_cache'),
inventory_id=input_inventory.id,
)
# Add arguments for the source inventory file/script/thing
rel_path = self.pseudo_build_inventory(inventory_update, private_data_dir)

View File

@ -6,40 +6,35 @@ import time
import pytest
from awx.main.models import (
Job,
Inventory,
Host,
)
from awx.main.tasks.facts import start_fact_cache, finish_fact_cache
from django.utils.timezone import now
from datetime import timedelta
@pytest.fixture
def hosts():
def ref_time():
return now() - timedelta(seconds=5)
@pytest.fixture
def hosts(ref_time):
inventory = Inventory(id=5)
return [
Host(name='host1', ansible_facts={"a": 1, "b": 2}, inventory=inventory),
Host(name='host2', ansible_facts={"a": 1, "b": 2}, inventory=inventory),
Host(name='host3', ansible_facts={"a": 1, "b": 2}, inventory=inventory),
Host(name=u'Iñtërnâtiônàlizætiøn', ansible_facts={"a": 1, "b": 2}, inventory=inventory),
Host(name='host1', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=ref_time, inventory=inventory),
Host(name='host2', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=ref_time, inventory=inventory),
Host(name='host3', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=ref_time, inventory=inventory),
Host(name=u'Iñtërnâtiônàlizætiøn', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=ref_time, inventory=inventory),
]
@pytest.fixture
def inventory(mocker, hosts):
mocker.patch('awx.main.tasks.facts._get_inventory_hosts', return_value=hosts)
return Inventory(id=5)
@pytest.fixture
def job(mocker, inventory):
j = Job(inventory=inventory, id=2)
# j._get_inventory_hosts = mocker.Mock(return_value=hosts)
return j
def test_start_job_fact_cache(hosts, job, inventory, tmpdir):
def test_start_job_fact_cache(hosts, tmpdir):
fact_cache = os.path.join(tmpdir, 'facts')
last_modified = start_fact_cache(inventory, fact_cache, timeout=0)
last_modified = start_fact_cache(hosts, fact_cache, timeout=0)
for host in hosts:
filepath = os.path.join(fact_cache, host.name)
@ -49,26 +44,43 @@ def test_start_job_fact_cache(hosts, job, inventory, tmpdir):
assert os.path.getmtime(filepath) <= last_modified
def test_fact_cache_with_invalid_path_traversal(job, inventory, tmpdir, mocker):
mocker.patch(
'awx.main.tasks.facts._get_inventory_hosts',
return_value=[
Host(
name='../foo',
ansible_facts={"a": 1, "b": 2},
),
],
)
def test_fact_cache_with_invalid_path_traversal(tmpdir):
hosts = [
Host(
name='../foo',
ansible_facts={"a": 1, "b": 2},
),
]
fact_cache = os.path.join(tmpdir, 'facts')
start_fact_cache(inventory, fact_cache, timeout=0)
start_fact_cache(hosts, fact_cache, timeout=0)
# a file called "foo" should _not_ be written outside the facts dir
assert os.listdir(os.path.join(fact_cache, '..')) == ['facts']
def test_finish_job_fact_cache_with_existing_data(job, hosts, inventory, mocker, tmpdir):
def test_start_job_fact_cache_past_timeout(hosts, tmpdir):
fact_cache = os.path.join(tmpdir, 'facts')
last_modified = start_fact_cache(inventory, fact_cache, timeout=0)
# the hosts fixture was modified 5s ago, which is more than 2s
last_modified = start_fact_cache(hosts, fact_cache, timeout=2)
assert last_modified is None
for host in hosts:
assert not os.path.exists(os.path.join(fact_cache, host.name))
def test_start_job_fact_cache_within_timeout(hosts, tmpdir):
fact_cache = os.path.join(tmpdir, 'facts')
# the hosts fixture was modified 5s ago, which is less than 7s
last_modified = start_fact_cache(hosts, fact_cache, timeout=7)
assert last_modified
for host in hosts:
assert os.path.exists(os.path.join(fact_cache, host.name))
def test_finish_job_fact_cache_with_existing_data(hosts, mocker, tmpdir, ref_time):
fact_cache = os.path.join(tmpdir, 'facts')
last_modified = start_fact_cache(hosts, fact_cache, timeout=0)
bulk_update = mocker.patch('django.db.models.query.QuerySet.bulk_update')
@ -84,18 +96,19 @@ def test_finish_job_fact_cache_with_existing_data(job, hosts, inventory, mocker,
new_modification_time = time.time() + 3600
os.utime(filepath, (new_modification_time, new_modification_time))
finish_fact_cache(inventory, fact_cache, last_modified)
finish_fact_cache(hosts, fact_cache, last_modified)
for host in (hosts[0], hosts[2], hosts[3]):
assert host.ansible_facts == {"a": 1, "b": 2}
assert host.ansible_facts_modified is None
assert host.ansible_facts_modified == ref_time
assert hosts[1].ansible_facts == ansible_facts_new
assert hosts[1].ansible_facts_modified > ref_time
bulk_update.assert_called_once_with([hosts[1]], ['ansible_facts', 'ansible_facts_modified'])
def test_finish_job_fact_cache_with_bad_data(job, hosts, inventory, mocker, tmpdir):
def test_finish_job_fact_cache_with_bad_data(hosts, mocker, tmpdir):
fact_cache = os.path.join(tmpdir, 'facts')
last_modified = start_fact_cache(inventory, fact_cache, timeout=0)
last_modified = start_fact_cache(hosts, fact_cache, timeout=0)
bulk_update = mocker.patch('django.db.models.query.QuerySet.bulk_update')
@ -107,22 +120,23 @@ def test_finish_job_fact_cache_with_bad_data(job, hosts, inventory, mocker, tmpd
new_modification_time = time.time() + 3600
os.utime(filepath, (new_modification_time, new_modification_time))
finish_fact_cache(inventory, fact_cache, last_modified)
finish_fact_cache(hosts, fact_cache, last_modified)
bulk_update.assert_not_called()
def test_finish_job_fact_cache_clear(job, hosts, inventory, mocker, tmpdir):
def test_finish_job_fact_cache_clear(hosts, mocker, ref_time, tmpdir):
fact_cache = os.path.join(tmpdir, 'facts')
last_modified = start_fact_cache(inventory, fact_cache, timeout=0)
last_modified = start_fact_cache(hosts, fact_cache, timeout=0)
bulk_update = mocker.patch('django.db.models.query.QuerySet.bulk_update')
os.remove(os.path.join(fact_cache, hosts[1].name))
finish_fact_cache(inventory, fact_cache, last_modified)
finish_fact_cache(hosts, fact_cache, last_modified)
for host in (hosts[0], hosts[2], hosts[3]):
assert host.ansible_facts == {"a": 1, "b": 2}
assert host.ansible_facts_modified is None
assert host.ansible_facts_modified == ref_time
assert hosts[1].ansible_facts == {}
assert hosts[1].ansible_facts_modified > ref_time
bulk_update.assert_called_once_with([hosts[1]], ['ansible_facts', 'ansible_facts_modified'])