diff --git a/awx/api/serializers.py b/awx/api/serializers.py index de5c0661fc..4b3a62c841 100644 --- a/awx/api/serializers.py +++ b/awx/api/serializers.py @@ -1883,6 +1883,7 @@ class HostSerializer(BaseSerializerWithVariables): ) if obj.inventory.kind == 'constructed': res['original_host'] = self.reverse('api:host_detail', kwargs={'pk': obj.instance_id}) + res['ansible_facts'] = self.reverse('api:host_ansible_facts_detail', kwargs={'pk': obj.instance_id}) if obj.inventory: res['inventory'] = self.reverse('api:inventory_detail', kwargs={'pk': obj.inventory.pk}) if obj.last_job: diff --git a/awx/api/views/__init__.py b/awx/api/views/__init__.py index d023f92984..e7f1d5cf8a 100644 --- a/awx/api/views/__init__.py +++ b/awx/api/views/__init__.py @@ -29,7 +29,7 @@ from django.utils.safestring import mark_safe from django.utils.timezone import now from django.views.decorators.csrf import csrf_exempt from django.template.loader import render_to_string -from django.http import HttpResponse +from django.http import HttpResponse, HttpResponseRedirect from django.contrib.contenttypes.models import ContentType from django.utils.translation import gettext_lazy as _ @@ -1619,6 +1619,14 @@ class HostAnsibleFactsDetail(RetrieveAPIView): model = models.Host serializer_class = serializers.AnsibleFactsSerializer + def get(self, request, *args, **kwargs): + obj = self.get_object() + if obj.inventory.kind == 'constructed': + # If this is a constructed inventory host, it is not the source of truth about facts + # redirect to the original input inventory host instead + return HttpResponseRedirect(reverse('api:host_ansible_facts_detail', kwargs={'pk': obj.instance_id}, request=self.request)) + return super().get(request, *args, **kwargs) + class InventoryHostsList(HostRelatedSearchMixin, SubListCreateAttachDetachAPIView): model = models.Host diff --git a/awx/main/constants.py b/awx/main/constants.py index 8450c9145e..32d8a2184c 100644 --- a/awx/main/constants.py +++ b/awx/main/constants.py @@ -111,3 +111,6 @@ ANSIBLE_RUNNER_NEEDS_UPDATE_MESSAGE = ( # Values for setting SUBSCRIPTION_USAGE_MODEL SUBSCRIPTION_USAGE_MODEL_UNIQUE_HOSTS = 'unique_managed_hosts' + +# Shared prefetch to use for creating a queryset for the purpose of writing or saving facts +HOST_FACTS_FIELDS = ('name', 'ansible_facts', 'ansible_facts_modified', 'modified', 'inventory_id') diff --git a/awx/main/models/jobs.py b/awx/main/models/jobs.py index daad187d97..5e55683c20 100644 --- a/awx/main/models/jobs.py +++ b/awx/main/models/jobs.py @@ -11,6 +11,7 @@ from urllib.parse import urljoin from django.conf import settings from django.core.exceptions import ValidationError from django.db import models +from django.db.models.functions import Cast # from django.core.cache import cache from django.utils.translation import gettext_lazy as _ @@ -21,6 +22,7 @@ from rest_framework.exceptions import ParseError # AWX from awx.api.versioning import reverse +from awx.main.constants import HOST_FACTS_FIELDS from awx.main.models.base import ( BaseModel, CreatedModifiedModel, @@ -834,6 +836,27 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana def get_notification_friendly_name(self): return "Job" + def get_hosts_for_fact_cache(self): + """ + Builds the queryset to use for writing or finalizing the fact cache + these need to be the 'real' hosts associated with the job. + For constructed inventories, that means the original (input inventory) hosts + when slicing, that means only returning hosts in that slice + """ + Host = JobHostSummary._meta.get_field('host').related_model + if not self.inventory_id: + return Host.objects.none() + + if self.inventory.kind == 'constructed': + id_field = Host._meta.get_field('id') + host_qs = Host.objects.filter(id__in=self.inventory.hosts.exclude(instance_id='').values_list(Cast('instance_id', output_field=id_field))) + else: + host_qs = self.inventory.hosts + + host_qs = host_qs.only(*HOST_FACTS_FIELDS) + host_qs = self.inventory.get_sliced_hosts(host_qs, self.job_slice_number, self.job_slice_count) + return host_qs + class LaunchTimeConfigBase(BaseModel): """ diff --git a/awx/main/tasks/facts.py b/awx/main/tasks/facts.py index ba48bc2249..3db5f13091 100644 --- a/awx/main/tasks/facts.py +++ b/awx/main/tasks/facts.py @@ -12,28 +12,16 @@ from django.utils.timezone import now # AWX from awx.main.utils.common import log_excess_runtime +from awx.main.models.inventory import Host logger = logging.getLogger('awx.main.tasks.facts') system_tracking_logger = logging.getLogger('awx.analytics.system_tracking') -def _get_inventory_hosts(inventory, slice_number, slice_count, only=('name', 'ansible_facts', 'ansible_facts_modified', 'modified', 'inventory_id'), **filters): - """Return value is an iterable for the relevant hosts for this job""" - if not inventory: - return [] - host_queryset = inventory.hosts.only(*only) - if filters: - host_queryset = host_queryset.filter(**filters) - host_queryset = inventory.get_sliced_hosts(host_queryset, slice_number, slice_count) - if isinstance(host_queryset, QuerySet): - return host_queryset.iterator() - return host_queryset - - @log_excess_runtime(logger, debug_cutoff=0.01, msg='Inventory {inventory_id} host facts prepared for {written_ct} hosts, took {delta:.3f} s', add_log_data=True) -def start_fact_cache(inventory, destination, log_data, timeout=None, slice_number=0, slice_count=1): - log_data['inventory_id'] = inventory.id +def start_fact_cache(hosts, destination, log_data, timeout=None, inventory_id=None): + log_data['inventory_id'] = inventory_id log_data['written_ct'] = 0 try: os.makedirs(destination, mode=0o700) @@ -42,15 +30,14 @@ def start_fact_cache(inventory, destination, log_data, timeout=None, slice_numbe if timeout is None: timeout = settings.ANSIBLE_FACT_CACHE_TIMEOUT - if timeout > 0: - # exclude hosts with fact data older than `settings.ANSIBLE_FACT_CACHE_TIMEOUT seconds` - timeout = now() - datetime.timedelta(seconds=timeout) - hosts = _get_inventory_hosts(inventory, slice_number, slice_count, ansible_facts_modified__gte=timeout) - else: - hosts = _get_inventory_hosts(inventory, slice_number, slice_count) + + if isinstance(hosts, QuerySet): + hosts = hosts.iterator() last_filepath_written = None for host in hosts: + if (not host.ansible_facts_modified) or (timeout and host.ansible_facts_modified < now() - datetime.timedelta(seconds=timeout)): + continue # facts are expired - do not write them filepath = os.sep.join(map(str, [destination, host.name])) if not os.path.realpath(filepath).startswith(destination): system_tracking_logger.error('facts for host {} could not be cached'.format(smart_str(host.name))) @@ -76,13 +63,17 @@ def start_fact_cache(inventory, destination, log_data, timeout=None, slice_numbe msg='Inventory {inventory_id} host facts: updated {updated_ct}, cleared {cleared_ct}, unchanged {unmodified_ct}, took {delta:.3f} s', add_log_data=True, ) -def finish_fact_cache(inventory, destination, facts_write_time, log_data, slice_number=0, slice_count=1, job_id=None): - log_data['inventory_id'] = inventory.id +def finish_fact_cache(hosts, destination, facts_write_time, log_data, job_id=None, inventory_id=None): + log_data['inventory_id'] = inventory_id log_data['updated_ct'] = 0 log_data['unmodified_ct'] = 0 log_data['cleared_ct'] = 0 + + if isinstance(hosts, QuerySet): + hosts = hosts.iterator() + hosts_to_update = [] - for host in _get_inventory_hosts(inventory, slice_number, slice_count): + for host in hosts: filepath = os.sep.join(map(str, [destination, host.name])) if not os.path.realpath(filepath).startswith(destination): system_tracking_logger.error('facts for host {} could not be cached'.format(smart_str(host.name))) @@ -120,7 +111,7 @@ def finish_fact_cache(inventory, destination, facts_write_time, log_data, slice_ system_tracking_logger.info('Facts cleared for inventory {} host {}'.format(smart_str(host.inventory.name), smart_str(host.name))) log_data['cleared_ct'] += 1 if len(hosts_to_update) > 100: - inventory.hosts.bulk_update(hosts_to_update, ['ansible_facts', 'ansible_facts_modified']) + Host.objects.bulk_update(hosts_to_update, ['ansible_facts', 'ansible_facts_modified']) hosts_to_update = [] if hosts_to_update: - inventory.hosts.bulk_update(hosts_to_update, ['ansible_facts', 'ansible_facts_modified']) + Host.objects.bulk_update(hosts_to_update, ['ansible_facts', 'ansible_facts_modified']) diff --git a/awx/main/tasks/jobs.py b/awx/main/tasks/jobs.py index 1bb886a557..74286faa20 100644 --- a/awx/main/tasks/jobs.py +++ b/awx/main/tasks/jobs.py @@ -37,6 +37,7 @@ from awx.main.constants import ( MAX_ISOLATED_PATH_COLON_DELIMITER, CONTAINER_VOLUMES_MOUNT_TYPES, ACTIVE_STATES, + HOST_FACTS_FIELDS, ) from awx.main.models import ( Instance, @@ -1084,10 +1085,7 @@ class RunJob(SourceControlMixin, BaseTask): if self.should_use_fact_cache(): job.log_lifecycle("start_job_fact_cache") self.facts_write_time = start_fact_cache( - job.inventory, - os.path.join(private_data_dir, 'artifacts', str(job.id), 'fact_cache'), - slice_number=job.job_slice_number, - slice_count=job.job_slice_count, + job.get_hosts_for_fact_cache(), os.path.join(private_data_dir, 'artifacts', str(job.id), 'fact_cache'), inventory_id=job.inventory_id ) def build_project_dir(self, job, private_data_dir): @@ -1105,12 +1103,11 @@ class RunJob(SourceControlMixin, BaseTask): if self.should_use_fact_cache(): job.log_lifecycle("finish_job_fact_cache") finish_fact_cache( - job.inventory, + job.get_hosts_for_fact_cache(), os.path.join(private_data_dir, 'artifacts', str(job.id), 'fact_cache'), facts_write_time=self.facts_write_time, - slice_number=job.job_slice_number, - slice_count=job.job_slice_count, job_id=job.id, + inventory_id=job.inventory_id, ) def final_run_hook(self, job, status, private_data_dir): @@ -1555,7 +1552,11 @@ class RunInventoryUpdate(SourceControlMixin, BaseTask): source_inv_path = self.write_inventory_file(input_inventory, private_data_dir, f'hosts_{input_inventory.id}', script_params) args.append(to_container_path(source_inv_path, private_data_dir)) # Include any facts from input inventories so they can be used in filters - start_fact_cache(input_inventory, os.path.join(private_data_dir, 'artifacts', str(inventory_update.id), 'fact_cache')) + start_fact_cache( + input_inventory.hosts.only(*HOST_FACTS_FIELDS), + os.path.join(private_data_dir, 'artifacts', str(inventory_update.id), 'fact_cache'), + inventory_id=input_inventory.id, + ) # Add arguments for the source inventory file/script/thing rel_path = self.pseudo_build_inventory(inventory_update, private_data_dir) diff --git a/awx/main/tests/unit/models/test_jobs.py b/awx/main/tests/unit/models/test_jobs.py index d17a434fb1..4f05a82535 100644 --- a/awx/main/tests/unit/models/test_jobs.py +++ b/awx/main/tests/unit/models/test_jobs.py @@ -6,40 +6,35 @@ import time import pytest from awx.main.models import ( - Job, Inventory, Host, ) from awx.main.tasks.facts import start_fact_cache, finish_fact_cache +from django.utils.timezone import now + +from datetime import timedelta + @pytest.fixture -def hosts(): +def ref_time(): + return now() - timedelta(seconds=5) + + +@pytest.fixture +def hosts(ref_time): inventory = Inventory(id=5) return [ - Host(name='host1', ansible_facts={"a": 1, "b": 2}, inventory=inventory), - Host(name='host2', ansible_facts={"a": 1, "b": 2}, inventory=inventory), - Host(name='host3', ansible_facts={"a": 1, "b": 2}, inventory=inventory), - Host(name=u'Iñtërnâtiônàlizætiøn', ansible_facts={"a": 1, "b": 2}, inventory=inventory), + Host(name='host1', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=ref_time, inventory=inventory), + Host(name='host2', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=ref_time, inventory=inventory), + Host(name='host3', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=ref_time, inventory=inventory), + Host(name=u'Iñtërnâtiônàlizætiøn', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=ref_time, inventory=inventory), ] -@pytest.fixture -def inventory(mocker, hosts): - mocker.patch('awx.main.tasks.facts._get_inventory_hosts', return_value=hosts) - return Inventory(id=5) - - -@pytest.fixture -def job(mocker, inventory): - j = Job(inventory=inventory, id=2) - # j._get_inventory_hosts = mocker.Mock(return_value=hosts) - return j - - -def test_start_job_fact_cache(hosts, job, inventory, tmpdir): +def test_start_job_fact_cache(hosts, tmpdir): fact_cache = os.path.join(tmpdir, 'facts') - last_modified = start_fact_cache(inventory, fact_cache, timeout=0) + last_modified = start_fact_cache(hosts, fact_cache, timeout=0) for host in hosts: filepath = os.path.join(fact_cache, host.name) @@ -49,26 +44,43 @@ def test_start_job_fact_cache(hosts, job, inventory, tmpdir): assert os.path.getmtime(filepath) <= last_modified -def test_fact_cache_with_invalid_path_traversal(job, inventory, tmpdir, mocker): - mocker.patch( - 'awx.main.tasks.facts._get_inventory_hosts', - return_value=[ - Host( - name='../foo', - ansible_facts={"a": 1, "b": 2}, - ), - ], - ) +def test_fact_cache_with_invalid_path_traversal(tmpdir): + hosts = [ + Host( + name='../foo', + ansible_facts={"a": 1, "b": 2}, + ), + ] fact_cache = os.path.join(tmpdir, 'facts') - start_fact_cache(inventory, fact_cache, timeout=0) + start_fact_cache(hosts, fact_cache, timeout=0) # a file called "foo" should _not_ be written outside the facts dir assert os.listdir(os.path.join(fact_cache, '..')) == ['facts'] -def test_finish_job_fact_cache_with_existing_data(job, hosts, inventory, mocker, tmpdir): +def test_start_job_fact_cache_past_timeout(hosts, tmpdir): fact_cache = os.path.join(tmpdir, 'facts') - last_modified = start_fact_cache(inventory, fact_cache, timeout=0) + # the hosts fixture was modified 5s ago, which is more than 2s + last_modified = start_fact_cache(hosts, fact_cache, timeout=2) + assert last_modified is None + + for host in hosts: + assert not os.path.exists(os.path.join(fact_cache, host.name)) + + +def test_start_job_fact_cache_within_timeout(hosts, tmpdir): + fact_cache = os.path.join(tmpdir, 'facts') + # the hosts fixture was modified 5s ago, which is less than 7s + last_modified = start_fact_cache(hosts, fact_cache, timeout=7) + assert last_modified + + for host in hosts: + assert os.path.exists(os.path.join(fact_cache, host.name)) + + +def test_finish_job_fact_cache_with_existing_data(hosts, mocker, tmpdir, ref_time): + fact_cache = os.path.join(tmpdir, 'facts') + last_modified = start_fact_cache(hosts, fact_cache, timeout=0) bulk_update = mocker.patch('django.db.models.query.QuerySet.bulk_update') @@ -84,18 +96,19 @@ def test_finish_job_fact_cache_with_existing_data(job, hosts, inventory, mocker, new_modification_time = time.time() + 3600 os.utime(filepath, (new_modification_time, new_modification_time)) - finish_fact_cache(inventory, fact_cache, last_modified) + finish_fact_cache(hosts, fact_cache, last_modified) for host in (hosts[0], hosts[2], hosts[3]): assert host.ansible_facts == {"a": 1, "b": 2} - assert host.ansible_facts_modified is None + assert host.ansible_facts_modified == ref_time assert hosts[1].ansible_facts == ansible_facts_new + assert hosts[1].ansible_facts_modified > ref_time bulk_update.assert_called_once_with([hosts[1]], ['ansible_facts', 'ansible_facts_modified']) -def test_finish_job_fact_cache_with_bad_data(job, hosts, inventory, mocker, tmpdir): +def test_finish_job_fact_cache_with_bad_data(hosts, mocker, tmpdir): fact_cache = os.path.join(tmpdir, 'facts') - last_modified = start_fact_cache(inventory, fact_cache, timeout=0) + last_modified = start_fact_cache(hosts, fact_cache, timeout=0) bulk_update = mocker.patch('django.db.models.query.QuerySet.bulk_update') @@ -107,22 +120,23 @@ def test_finish_job_fact_cache_with_bad_data(job, hosts, inventory, mocker, tmpd new_modification_time = time.time() + 3600 os.utime(filepath, (new_modification_time, new_modification_time)) - finish_fact_cache(inventory, fact_cache, last_modified) + finish_fact_cache(hosts, fact_cache, last_modified) bulk_update.assert_not_called() -def test_finish_job_fact_cache_clear(job, hosts, inventory, mocker, tmpdir): +def test_finish_job_fact_cache_clear(hosts, mocker, ref_time, tmpdir): fact_cache = os.path.join(tmpdir, 'facts') - last_modified = start_fact_cache(inventory, fact_cache, timeout=0) + last_modified = start_fact_cache(hosts, fact_cache, timeout=0) bulk_update = mocker.patch('django.db.models.query.QuerySet.bulk_update') os.remove(os.path.join(fact_cache, hosts[1].name)) - finish_fact_cache(inventory, fact_cache, last_modified) + finish_fact_cache(hosts, fact_cache, last_modified) for host in (hosts[0], hosts[2], hosts[3]): assert host.ansible_facts == {"a": 1, "b": 2} - assert host.ansible_facts_modified is None + assert host.ansible_facts_modified == ref_time assert hosts[1].ansible_facts == {} + assert hosts[1].ansible_facts_modified > ref_time bulk_update.assert_called_once_with([hosts[1]], ['ansible_facts', 'ansible_facts_modified'])