diff --git a/awx/main/models/jobs.py b/awx/main/models/jobs.py index ae586a19c7..81d57c60bf 100644 --- a/awx/main/models/jobs.py +++ b/awx/main/models/jobs.py @@ -845,6 +845,21 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana def get_notification_friendly_name(self): return "Job" + def get_source_hosts_for_constructed_inventory(self): + """Return a QuerySet of the source (input inventory) hosts for a constructed inventory. + + Constructed inventory hosts have an instance_id pointing to the real + host in the input inventory. This resolves those references and returns + a proper QuerySet (never a list), suitable for use with finish_fact_cache. + """ + Host = JobHostSummary._meta.get_field('host').related_model + if not self.inventory_id: + return Host.objects.none() + id_field = Host._meta.get_field('id') + return Host.objects.filter(id__in=self.inventory.hosts.exclude(instance_id='').values_list(Cast('instance_id', output_field=id_field))).only( + *HOST_FACTS_FIELDS + ) + def get_hosts_for_fact_cache(self): """ Builds the queryset to use for writing or finalizing the fact cache @@ -852,17 +867,15 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana For constructed inventories, that means the original (input inventory) hosts when slicing, that means only returning hosts in that slice """ - Host = JobHostSummary._meta.get_field('host').related_model if not self.inventory_id: + Host = JobHostSummary._meta.get_field('host').related_model return Host.objects.none() if self.inventory.kind == 'constructed': - id_field = Host._meta.get_field('id') - host_qs = Host.objects.filter(id__in=self.inventory.hosts.exclude(instance_id='').values_list(Cast('instance_id', output_field=id_field))) + host_qs = self.get_source_hosts_for_constructed_inventory() else: - host_qs = self.inventory.hosts + host_qs = self.inventory.hosts.only(*HOST_FACTS_FIELDS) - host_qs = host_qs.only(*HOST_FACTS_FIELDS) host_qs = self.inventory.get_sliced_hosts(host_qs, self.job_slice_number, self.job_slice_count) return host_qs diff --git a/awx/main/tasks/facts.py b/awx/main/tasks/facts.py index c9e7bbfa54..8fbdae9019 100644 --- a/awx/main/tasks/facts.py +++ b/awx/main/tasks/facts.py @@ -51,7 +51,14 @@ def start_fact_cache(hosts, artifacts_dir, timeout=None, inventory_id=None, log_ os.chmod(f.name, 0o600) json.dump(host.ansible_facts, f) log_data['written_ct'] += 1 - last_write_time = os.path.getmtime(filepath) + # Backdate the file by 2 seconds so finish_fact_cache can reliably + # distinguish these reference files from files updated by ansible. + # This guarantees fact file mtime < summary file mtime even with + # zipfile's 2-second timestamp rounding during artifact transfer. + mtime = os.path.getmtime(filepath) + backdated = mtime - 2 + os.utime(filepath, (backdated, backdated)) + last_write_time = backdated except IOError: logger.error(f'facts for host {smart_str(host.name)} could not be cached') continue @@ -74,7 +81,7 @@ def start_fact_cache(hosts, artifacts_dir, timeout=None, inventory_id=None, log_ msg='Inventory {inventory_id} host facts: updated {updated_ct}, cleared {cleared_ct}, unchanged {unmodified_ct}, took {delta:.3f} s', add_log_data=True, ) -def finish_fact_cache(artifacts_dir, job_id=None, inventory_id=None, log_data=None): +def finish_fact_cache(host_qs, artifacts_dir, job_id=None, inventory_id=None, job_created=None, log_data=None): log_data = log_data or {} log_data['inventory_id'] = inventory_id log_data['updated_ct'] = 0 @@ -95,7 +102,7 @@ def finish_fact_cache(artifacts_dir, job_id=None, inventory_id=None, log_data=No return host_names = summary.get('hosts_cached', []) - hosts_cached = Host.objects.filter(name__in=host_names).order_by('id').iterator() + hosts_cached = host_qs.filter(name__in=host_names).order_by('id').iterator() # Path where individual fact files were written fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache') hosts_to_update = [] @@ -138,14 +145,25 @@ def finish_fact_cache(artifacts_dir, job_id=None, inventory_id=None, log_data=No else: # if the file goes missing, ansible removed it (likely via clear_facts) # if the file goes missing, but the host has not started facts, then we should not clear the facts - host.ansible_facts = {} - host.ansible_facts_modified = now() - hosts_to_update.append(host) - logger.info(f'Facts cleared for inventory {smart_str(host.inventory.name)} host {smart_str(host.name)}') - log_data['cleared_ct'] += 1 + if job_created and host.ansible_facts_modified and host.ansible_facts_modified > job_created: + logger.warning( + f'Skipping fact clear for host {smart_str(host.name)} in job {job_id} ' + f'inventory {inventory_id}: host ansible_facts_modified ' + f'({host.ansible_facts_modified.isoformat()}) is after this job\'s ' + f'created time ({job_created.isoformat()}). ' + f'A concurrent job likely updated this host\'s facts while this job was running.' + ) + log_data['unmodified_ct'] += 1 + else: + host.ansible_facts = {} + host.ansible_facts_modified = now() + hosts_to_update.append(host) + logger.info(f'Facts cleared for inventory {smart_str(host.inventory.name)} host {smart_str(host.name)}') + log_data['cleared_ct'] += 1 if len(hosts_to_update) >= 100: bulk_update_sorted_by_id(Host, hosts_to_update, fields=['ansible_facts', 'ansible_facts_modified']) hosts_to_update = [] bulk_update_sorted_by_id(Host, hosts_to_update, fields=['ansible_facts', 'ansible_facts_modified']) + logger.debug(f'Updated {log_data["updated_ct"]} host facts for inventory {inventory_id} in job {job_id}') diff --git a/awx/main/tasks/jobs.py b/awx/main/tasks/jobs.py index 0ad6f05186..f57c80f8a3 100644 --- a/awx/main/tasks/jobs.py +++ b/awx/main/tasks/jobs.py @@ -1302,10 +1302,16 @@ class RunJob(SourceControlMixin, BaseTask): return if self.should_use_fact_cache() and self.runner_callback.artifacts_processed: job.log_lifecycle("finish_job_fact_cache") + if job.inventory.kind == 'constructed': + hosts_qs = job.get_source_hosts_for_constructed_inventory() + else: + hosts_qs = job.inventory.hosts finish_fact_cache( + hosts_qs, artifacts_dir=os.path.join(private_data_dir, 'artifacts', str(job.id)), job_id=job.id, inventory_id=job.inventory_id, + job_created=job.created, ) def final_run_hook(self, job, status, private_data_dir): diff --git a/awx/main/tests/data/projects/facts/gather_slow.yml b/awx/main/tests/data/projects/facts/gather_slow.yml new file mode 100644 index 0000000000..e39f7f8d4c --- /dev/null +++ b/awx/main/tests/data/projects/facts/gather_slow.yml @@ -0,0 +1,21 @@ +--- +# Generated by Claude Opus 4.6 (claude-opus-4-6). + +- hosts: all + vars: + extra_value: "" + gather_facts: false + connection: local + tasks: + - name: set a custom fact + set_fact: + foo: "bar{{ extra_value }}" + bar: + a: + b: + - "c" + - "d" + cacheable: true + - name: sleep to create overlap window for concurrent job testing + wait_for: + timeout: 2 diff --git a/awx/main/tests/functional/models/test_job.py b/awx/main/tests/functional/models/test_job.py index e2ac17fb43..3e15c1fc9e 100644 --- a/awx/main/tests/functional/models/test_job.py +++ b/awx/main/tests/functional/models/test_job.py @@ -1,6 +1,6 @@ import pytest -from awx.main.models import JobTemplate, Job, JobHostSummary, WorkflowJob, Inventory, Project, Organization +from awx.main.models import JobTemplate, Job, JobHostSummary, WorkflowJob, Inventory, Host, Project, Organization @pytest.mark.django_db @@ -87,3 +87,47 @@ class TestSlicingModels: unified_job = job_template.create_unified_job(job_slice_count=2) assert isinstance(unified_job, Job) + + +@pytest.mark.django_db +class TestGetSourceHostsForConstructedInventory: + """Tests for Job.get_source_hosts_for_constructed_inventory""" + + def test_returns_source_hosts_via_instance_id(self): + """Constructed hosts with instance_id pointing to source hosts are resolved correctly.""" + org = Organization.objects.create(name='test-org') + inv_input = Inventory.objects.create(organization=org, name='input-inv') + source_host1 = inv_input.hosts.create(name='host1') + source_host2 = inv_input.hosts.create(name='host2') + + inv_constructed = Inventory.objects.create(organization=org, name='constructed-inv', kind='constructed') + inv_constructed.input_inventories.add(inv_input) + Host.objects.create(inventory=inv_constructed, name='host1', instance_id=str(source_host1.id)) + Host.objects.create(inventory=inv_constructed, name='host2', instance_id=str(source_host2.id)) + + job = Job.objects.create(name='test-job', inventory=inv_constructed) + result = job.get_source_hosts_for_constructed_inventory() + + assert set(result.values_list('id', flat=True)) == {source_host1.id, source_host2.id} + + def test_no_inventory_returns_empty(self): + """A job with no inventory returns an empty queryset.""" + job = Job.objects.create(name='test-job') + result = job.get_source_hosts_for_constructed_inventory() + assert result.count() == 0 + + def test_ignores_hosts_without_instance_id(self): + """Hosts with empty instance_id are excluded from the result.""" + org = Organization.objects.create(name='test-org') + inv_input = Inventory.objects.create(organization=org, name='input-inv') + source_host = inv_input.hosts.create(name='host1') + + inv_constructed = Inventory.objects.create(organization=org, name='constructed-inv', kind='constructed') + inv_constructed.input_inventories.add(inv_input) + Host.objects.create(inventory=inv_constructed, name='host1', instance_id=str(source_host.id)) + Host.objects.create(inventory=inv_constructed, name='host-no-ref', instance_id='') + + job = Job.objects.create(name='test-job', inventory=inv_constructed) + result = job.get_source_hosts_for_constructed_inventory() + + assert list(result.values_list('id', flat=True)) == [source_host.id] diff --git a/awx/main/tests/functional/tasks/test_fact_cache.py b/awx/main/tests/functional/tasks/test_fact_cache.py new file mode 100644 index 0000000000..1ca43da034 --- /dev/null +++ b/awx/main/tests/functional/tasks/test_fact_cache.py @@ -0,0 +1,188 @@ +"""Functional tests for start_fact_cache / finish_fact_cache. + +These tests use real database objects (via pytest-django) and real files +on disk, but do not launch jobs or subprocesses. Fact files are written +by start_fact_cache and then manipulated to simulate ansible output +before calling finish_fact_cache. + +Generated by Claude Opus 4.6 (claude-opus-4-6). +""" + +import json +import os +import time +from datetime import timedelta + +import pytest + +from django.utils.timezone import now + +from awx.main.models import Host, Inventory +from awx.main.tasks.facts import start_fact_cache, finish_fact_cache + + +@pytest.fixture +def artifacts_dir(tmp_path): + d = tmp_path / 'artifacts' + d.mkdir() + return str(d) + + +@pytest.mark.django_db +class TestFinishFactCacheScoping: + """finish_fact_cache must only update hosts matched by the provided queryset.""" + + def test_same_hostname_different_inventories(self, organization, artifacts_dir): + """Two inventories share a hostname; only the targeted one should be updated. + + Generated by Claude Opus 4.6 (claude-opus-4-6). + """ + inv1 = Inventory.objects.create(organization=organization, name='scope-inv1') + inv2 = Inventory.objects.create(organization=organization, name='scope-inv2') + + host1 = inv1.hosts.create(name='shared') + host2 = inv2.hosts.create(name='shared') + + # Give both hosts initial facts + for h in (host1, host2): + h.ansible_facts = {'original': True} + h.ansible_facts_modified = now() + h.save(update_fields=['ansible_facts', 'ansible_facts_modified']) + + # start_fact_cache writes reference files for inv1's hosts + start_fact_cache(inv1.hosts.all(), artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv1.id) + + # Simulate ansible writing new facts for 'shared' + fact_file = os.path.join(artifacts_dir, 'fact_cache', 'shared') + future = time.time() + 60 + with open(fact_file, 'w') as f: + json.dump({'updated': True}, f) + os.utime(fact_file, (future, future)) + + # finish with inv1's hosts as the queryset + finish_fact_cache(inv1.hosts, artifacts_dir=artifacts_dir, inventory_id=inv1.id) + + host1.refresh_from_db() + host2.refresh_from_db() + + assert host1.ansible_facts == {'updated': True} + assert host2.ansible_facts == {'original': True}, 'Host in a different inventory was modified despite not being in the queryset' + + +@pytest.mark.django_db +class TestFinishFactCacheConcurrentProtection: + """finish_fact_cache must not clear facts that a concurrent job updated.""" + + def test_skip_clear_when_facts_modified_after_job_created(self, organization, artifacts_dir): + """If a host's facts were updated after the job was created, do not clear them. + + Generated by Claude Opus 4.6 (claude-opus-4-6). + """ + inv = Inventory.objects.create(organization=organization, name='concurrent-inv') + host = inv.hosts.create(name='target') + + job_created = now() - timedelta(minutes=5) + + # start_fact_cache writes a reference file (host has no facts yet → no file written) + start_fact_cache(inv.hosts.all(), artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv.id) + + # Simulate a concurrent job updating this host's facts AFTER our job was created + host.ansible_facts = {'from_concurrent_job': True} + host.ansible_facts_modified = now() + host.save(update_fields=['ansible_facts', 'ansible_facts_modified']) + + # The fact file is missing (our job's ansible didn't target this host), + # which would normally trigger a clear. But the concurrent protection + # should skip it because ansible_facts_modified > job_created. + fact_file = os.path.join(artifacts_dir, 'fact_cache', host.name) + if os.path.exists(fact_file): + os.remove(fact_file) + + finish_fact_cache( + inv.hosts, + artifacts_dir=artifacts_dir, + inventory_id=inv.id, + job_created=job_created, + ) + + host.refresh_from_db() + assert host.ansible_facts == {'from_concurrent_job': True}, 'Facts set by a concurrent job were cleared despite ansible_facts_modified > job_created' + + def test_clear_when_facts_predate_job(self, organization, artifacts_dir): + """If facts predate the job, a missing file should still clear them. + + Generated by Claude Opus 4.6 (claude-opus-4-6). + """ + inv = Inventory.objects.create(organization=organization, name='clear-inv') + host = inv.hosts.create(name='stale') + + old_time = now() - timedelta(hours=1) + host.ansible_facts = {'stale': True} + host.ansible_facts_modified = old_time + host.save(update_fields=['ansible_facts', 'ansible_facts_modified']) + + job_created = now() - timedelta(minutes=5) + + start_fact_cache(inv.hosts.all(), artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv.id) + + # Remove the fact file to simulate ansible's clear_facts + os.remove(os.path.join(artifacts_dir, 'fact_cache', host.name)) + + finish_fact_cache( + inv.hosts, + artifacts_dir=artifacts_dir, + inventory_id=inv.id, + job_created=job_created, + ) + + host.refresh_from_db() + assert host.ansible_facts == {}, 'Stale facts should have been cleared when the fact file is missing ' 'and ansible_facts_modified predates job_created' + + +@pytest.mark.django_db +class TestConstructedInventoryFactCache: + """finish_fact_cache with a constructed inventory queryset must target source hosts.""" + + def test_facts_resolve_to_source_host(self, organization, artifacts_dir): + """Facts must be written to the source host, not the constructed copy. + + Generated by Claude Opus 4.6 (claude-opus-4-6). + """ + from django.db.models.functions import Cast + + inv_input = Inventory.objects.create(organization=organization, name='ci-input') + source_host = inv_input.hosts.create(name='webserver') + + inv_constructed = Inventory.objects.create(organization=organization, name='ci-constructed', kind='constructed') + inv_constructed.input_inventories.add(inv_input) + constructed_host = Host.objects.create( + inventory=inv_constructed, + name='webserver', + instance_id=str(source_host.id), + ) + + # Build the same queryset that get_hosts_for_fact_cache uses + id_field = Host._meta.get_field('id') + source_qs = Host.objects.filter(id__in=inv_constructed.hosts.exclude(instance_id='').values_list(Cast('instance_id', output_field=id_field))) + + # Give the source host initial facts so start_fact_cache writes a file + source_host.ansible_facts = {'role': 'web'} + source_host.ansible_facts_modified = now() + source_host.save(update_fields=['ansible_facts', 'ansible_facts_modified']) + + start_fact_cache(source_qs, artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv_constructed.id) + + # Simulate ansible writing updated facts + fact_file = os.path.join(artifacts_dir, 'fact_cache', 'webserver') + future = time.time() + 60 + with open(fact_file, 'w') as f: + json.dump({'role': 'web', 'deployed': True}, f) + os.utime(fact_file, (future, future)) + + finish_fact_cache(source_qs, artifacts_dir=artifacts_dir, inventory_id=inv_constructed.id) + + source_host.refresh_from_db() + constructed_host.refresh_from_db() + + assert source_host.ansible_facts == {'role': 'web', 'deployed': True} + assert not constructed_host.ansible_facts, f'Facts were stored on the constructed host: {constructed_host.ansible_facts!r}' diff --git a/awx/main/tests/live/tests/test_concurrent_fact_cache.py b/awx/main/tests/live/tests/test_concurrent_fact_cache.py new file mode 100644 index 0000000000..1bbd2337f0 --- /dev/null +++ b/awx/main/tests/live/tests/test_concurrent_fact_cache.py @@ -0,0 +1,351 @@ +"""Tests for concurrent fact caching with --limit. + +Reproduces bugs where concurrent jobs targeting different hosts via --limit +incorrectly modify (clear or revert) facts for hosts outside their limit. + +Customer report: concurrent jobs on the same job template with different limits +cause facts set by an earlier-finishing job to be rolled back when the +later-finishing job completes. + +See: https://github.com/jritter/concurrent-aap-fact-caching + +Generated by Claude Opus 4.6 (claude-opus-4-6). +""" + +import logging + +import pytest + +from django.utils.timezone import now + +from awx.api.versioning import reverse +from awx.main.models import Inventory, JobTemplate +from awx.main.tests.live.tests.conftest import wait_for_job, wait_to_leave_status + +logger = logging.getLogger(__name__) + + +@pytest.fixture +def concurrent_facts_inventory(default_org): + """Inventory with two hosts for concurrent fact cache testing.""" + inv_name = 'test_concurrent_fact_cache' + Inventory.objects.filter(organization=default_org, name=inv_name).delete() + inv = Inventory.objects.create(organization=default_org, name=inv_name) + inv.hosts.create(name='cc_host_0') + inv.hosts.create(name='cc_host_1') + return inv + + +@pytest.fixture +def concurrent_facts_jt(concurrent_facts_inventory, live_tmp_folder, post, admin, project_factory): + """Job template configured for concurrent fact-cached runs.""" + proj = project_factory(scm_url=f'file://{live_tmp_folder}/facts') + if proj.current_job: + wait_for_job(proj.current_job) + assert 'gather_slow.yml' in proj.playbooks, f'gather_slow.yml not in {proj.playbooks}' + + jt_name = 'test_concurrent_fact_cache JT' + existing_jt = JobTemplate.objects.filter(name=jt_name).first() + if existing_jt: + existing_jt.delete() + result = post( + reverse('api:job_template_list'), + { + 'name': jt_name, + 'project': proj.id, + 'playbook': 'gather_slow.yml', + 'inventory': concurrent_facts_inventory.id, + 'use_fact_cache': True, + 'allow_simultaneous': True, + }, + admin, + expect=201, + ) + return JobTemplate.objects.get(id=result.data['id']) + + +def test_concurrent_limit_does_not_clear_facts(concurrent_facts_inventory, concurrent_facts_jt): + """Concurrent jobs with different --limit must not clear each other's facts. + + Generated by Claude Opus 4.6 (claude-opus-4-6). + + Scenario: + - Inventory has cc_host_0 and cc_host_1, neither has prior facts + - Job A runs gather_slow.yml with limit=cc_host_0 + - While Job A is still running (sleeping), Job B launches with limit=cc_host_1 + - Both jobs set cacheable facts, but only for their respective limited host + - After both complete, BOTH hosts should have populated facts + + The bug: get_hosts_for_fact_cache() returns ALL inventory hosts regardless + of --limit. start_fact_cache records them all in hosts_cached but writes + no fact files (no prior facts). When the later-finishing job runs + finish_fact_cache, it sees a missing fact file for the other job's host + and clears that host's facts. + """ + inv = concurrent_facts_inventory + jt = concurrent_facts_jt + + # Launch Job A targeting cc_host_0 + job_a = jt.create_unified_job() + job_a.limit = 'cc_host_0' + job_a.save(update_fields=['limit']) + job_a.signal_start() + + # Wait for Job A to reach running (it will sleep inside the playbook) + wait_to_leave_status(job_a, 'pending') + wait_to_leave_status(job_a, 'waiting') + logger.info(f'Job A (id={job_a.id}) is now running with limit=cc_host_0') + + # Launch Job B targeting cc_host_1 while Job A is still running + job_b = jt.create_unified_job() + job_b.limit = 'cc_host_1' + job_b.save(update_fields=['limit']) + job_b.signal_start() + + # Verify that Job A is still running when Job B starts, + # otherwise the overlap that triggers the bug did not happen. + wait_to_leave_status(job_b, 'pending') + wait_to_leave_status(job_b, 'waiting') + job_a.refresh_from_db() + if job_a.status != 'running': + pytest.skip('Job A finished before Job B started running; overlap did not occur') + logger.info(f'Job B (id={job_b.id}) is now running with limit=cc_host_1 (concurrent with Job A)') + + # Wait for both to complete + wait_for_job(job_a) + wait_for_job(job_b) + + # Verify facts survived concurrent execution + host_0 = inv.hosts.get(name='cc_host_0') + host_1 = inv.hosts.get(name='cc_host_1') + + # sanity + job_a.refresh_from_db() + job_b.refresh_from_db() + assert job_a.limit == "cc_host_0" + assert job_b.limit == "cc_host_1" + + discovered_foos = [host_0.ansible_facts.get('foo'), host_1.ansible_facts.get('foo')] + assert discovered_foos == ['bar'] * 2, f'Unexpected facts on cc_host_0 or _1: {discovered_foos} after job a,b {job_a.id}, {job_b.id}' + + +def test_concurrent_limit_does_not_revert_facts(live_tmp_folder, run_job_from_playbook, concurrent_facts_inventory): + """Concurrent jobs must not revert facts that a prior concurrent job just set. + + Generated by Claude Opus 4.6 (claude-opus-4-6). + + Scenario: + - First, populate both hosts with initial facts (foo=bar) via a + non-concurrent gather run + - Then run two concurrent jobs with different limits, each setting + a new value (foo=bar_v2 via extra_vars) + - After both complete, BOTH hosts should have foo=bar_v2 + + The bug: start_fact_cache writes the OLD facts (foo=bar) into each job's + artifact dir for ALL hosts. If ansible's cache plugin rewrites a non-limited + host's fact file with the stale content (updating the mtime), finish_fact_cache + treats it as a legitimate update and overwrites the DB with old values. + """ + # --- Seed both hosts with initial facts via a non-concurrent run --- + inv = concurrent_facts_inventory + + scm_url = f'file://{live_tmp_folder}/facts' + res = run_job_from_playbook( + 'seed_facts_for_revert_test', + 'gather_slow.yml', + scm_url=scm_url, + jt_params={'use_fact_cache': True, 'allow_simultaneous': True, 'inventory': inv.id}, + ) + for host in inv.hosts.all(): + assert host.ansible_facts.get('foo') == 'bar', f'Seed run failed to set facts on {host.name}: {host.ansible_facts}' + + job = res['job'] + wait_for_job(job) + + # sanity, jobs should be set up to both have facts with just bar + host_0 = inv.hosts.get(name='cc_host_0') + host_1 = inv.hosts.get(name='cc_host_1') + discovered_foos = [host_0.ansible_facts.get('foo'), host_1.ansible_facts.get('foo')] + assert discovered_foos == ['bar'] * 2, f'Facts did not get expected initial values: {discovered_foos}' + + jt = job.job_template + assert jt.allow_simultaneous is True + assert jt.use_fact_cache is True + + # Sanity assertion, sometimes this would give problems from the Django rel cache + assert jt.project + + # --- Run two concurrent jobs that write a new value --- + # Update the JT to pass extra_vars that change the fact value + jt.extra_vars = '{"extra_value": "_v2"}' + jt.save(update_fields=['extra_vars']) + + job_a = jt.create_unified_job() + job_a.limit = 'cc_host_0' + job_a.save(update_fields=['limit']) + job_a.signal_start() + + wait_to_leave_status(job_a, 'pending') + wait_to_leave_status(job_a, 'waiting') + + job_b = jt.create_unified_job() + job_b.limit = 'cc_host_1' + job_b.save(update_fields=['limit']) + job_b.signal_start() + + wait_to_leave_status(job_b, 'pending') + wait_to_leave_status(job_b, 'waiting') + job_a.refresh_from_db() + if job_a.status != 'running': + pytest.skip('Job A finished before Job B started running; overlap did not occur') + + wait_for_job(job_a) + wait_for_job(job_b) + + host_0 = inv.hosts.get(name='cc_host_0') + host_1 = inv.hosts.get(name='cc_host_1') + + # Both hosts should have the UPDATED value, not the old seed value + discovered_foos = [host_0.ansible_facts.get('foo'), host_1.ansible_facts.get('foo')] + assert discovered_foos == ['bar_v2'] * 2, f'Facts were reverted to stale values by concurrent job cc_host_0 or cc_host_1: {discovered_foos}' + + +def test_fact_cache_scoped_to_inventory(live_tmp_folder, default_org, run_job_from_playbook): + """finish_fact_cache must not modify hosts in other inventories. + + Generated by Claude Opus 4.6 (claude-opus-4-6). + + Bug: finish_fact_cache queries Host.objects.filter(name__in=host_names) + without an inventory_id filter, so hosts with the same name in different + inventories get their facts cross-contaminated. + """ + shared_name = 'scope_shared_host' + + # Prepare for test by deleting junk from last run + for inv_name in ('test_fact_scope_inv1', 'test_fact_scope_inv2'): + inv = Inventory.objects.filter(name=inv_name).first() + if inv: + inv.delete() + + inv1 = Inventory.objects.create(organization=default_org, name='test_fact_scope_inv1') + inv1.hosts.create(name=shared_name) + + inv2 = Inventory.objects.create(organization=default_org, name='test_fact_scope_inv2') + host2 = inv2.hosts.create(name=shared_name) + + # Give inv2's host distinct facts that should not be touched + original_facts = {'source': 'inventory_2', 'untouched': True} + host2.ansible_facts = original_facts + host2.ansible_facts_modified = now() + host2.save(update_fields=['ansible_facts', 'ansible_facts_modified']) + + # Run a fact-gathering job against inv1 only + run_job_from_playbook( + 'test_fact_scope', + 'gather.yml', + scm_url=f'file://{live_tmp_folder}/facts', + jt_params={'use_fact_cache': True, 'inventory': inv1.id}, + ) + + # inv1's host should have facts + host1 = inv1.hosts.get(name=shared_name) + assert host1.ansible_facts, f'inv1 host should have facts after gather: {host1.ansible_facts}' + + # inv2's host must NOT have been touched + host2.refresh_from_db() + assert host2.ansible_facts == original_facts, ( + f'Host in a different inventory was modified by a fact cache operation ' + f'on another inventory sharing the same hostname. ' + f'Expected {original_facts!r}, got {host2.ansible_facts!r}' + ) + + +def test_constructed_inventory_facts_saved_to_source_host(live_tmp_folder, default_org, run_job_from_playbook): + """Facts from a constructed inventory job must be saved to the source host. + + Generated by Claude Opus 4.6 (claude-opus-4-6). + + Constructed inventories contain hosts that are references (via instance_id) + to 'real' hosts in input inventories. start_fact_cache correctly resolves + source hosts via get_hosts_for_fact_cache(), but finish_fact_cache must also + write facts back to the source hosts, not the constructed inventory's copies. + + Scenario: + - Two input inventories each have a host named 'ci_shared_host' + - A constructed inventory uses both as inputs + - The inventory sync picks one source host (via instance_id) for the + constructed host — which one depends on input processing order + - Both source hosts start with distinct pre-existing facts + - A fact-gathering job runs against the constructed inventory + - After completion, the targeted source host should have the job's facts + - The OTHER source host must retain its original facts untouched + - The constructed host itself must NOT have facts stored on it + (constructed hosts are transient — recreated on each inventory sync) + """ + shared_name = 'ci_shared_host' + + # Cleanup from prior runs + for inv_name in ('test_ci_facts_input1', 'test_ci_facts_input2', 'test_ci_facts_constructed'): + Inventory.objects.filter(name=inv_name).delete() + + # --- Create two input inventories, each with an identically-named host --- + inv1 = Inventory.objects.create(organization=default_org, name='test_ci_facts_input1') + source_host1 = inv1.hosts.create(name=shared_name) + + inv2 = Inventory.objects.create(organization=default_org, name='test_ci_facts_input2') + source_host2 = inv2.hosts.create(name=shared_name) + + # Give both hosts distinct pre-existing facts so we can detect cross-contamination + host1_original_facts = {'source': 'inventory_1'} + source_host1.ansible_facts = host1_original_facts + source_host1.ansible_facts_modified = now() + source_host1.save(update_fields=['ansible_facts', 'ansible_facts_modified']) + + host2_original_facts = {'source': 'inventory_2'} + source_host2.ansible_facts = host2_original_facts + source_host2.ansible_facts_modified = now() + source_host2.save(update_fields=['ansible_facts', 'ansible_facts_modified']) + + source_hosts_by_id = {source_host1.id: source_host1, source_host2.id: source_host2} + original_facts_by_id = {source_host1.id: host1_original_facts, source_host2.id: host2_original_facts} + + # --- Create constructed inventory (sync will create hosts from inputs) --- + constructed_inv = Inventory.objects.create( + organization=default_org, + name='test_ci_facts_constructed', + kind='constructed', + ) + constructed_inv.input_inventories.add(inv1) + constructed_inv.input_inventories.add(inv2) + + # --- Run a fact-gathering job against the constructed inventory --- + # The job launch triggers an inventory sync which creates the constructed + # host with instance_id pointing to one of the source hosts. + scm_url = f'file://{live_tmp_folder}/facts' + run_job_from_playbook( + 'test_ci_facts', + 'gather.yml', + scm_url=scm_url, + jt_params={'use_fact_cache': True, 'inventory': constructed_inv.id}, + ) + + # --- Determine which source host the constructed host points to --- + constructed_host = constructed_inv.hosts.get(name=shared_name) + target_id = int(constructed_host.instance_id) + other_id = (set(source_hosts_by_id.keys()) - {target_id}).pop() + + target_host = source_hosts_by_id[target_id] + other_host = source_hosts_by_id[other_id] + + target_host.refresh_from_db() + other_host.refresh_from_db() + constructed_host.refresh_from_db() + + actual = [target_host.ansible_facts.get('foo'), other_host.ansible_facts, constructed_host.ansible_facts] + expected = ['bar', original_facts_by_id[other_id], {}] + assert actual == expected, ( + f'Constructed inventory fact cache wrote to wrong host(s). ' + f'target source host (id={target_id}) foo={actual[0]!r}, ' + f'other source host (id={other_id}) facts={actual[1]!r}, ' + f'constructed host facts={actual[2]!r}; expected {expected!r}' + ) diff --git a/awx/main/tests/unit/models/test_jobs.py b/awx/main/tests/unit/models/test_jobs.py index ff1887f34e..bd3422d7b3 100644 --- a/awx/main/tests/unit/models/test_jobs.py +++ b/awx/main/tests/unit/models/test_jobs.py @@ -2,6 +2,7 @@ import json import os import pytest +from unittest import mock from awx.main.models import ( Inventory, @@ -99,52 +100,55 @@ def test_start_job_fact_cache_within_timeout(hosts, tmpdir): def test_finish_job_fact_cache_clear(hosts, mocker, ref_time, tmpdir): - fact_cache = os.path.join(tmpdir, 'facts') - start_fact_cache(hosts, fact_cache, timeout=0) + artifacts_dir = str(tmpdir.mkdir("artifacts")) + inventory_id = 5 - bulk_update = mocker.patch('awx.main.tasks.facts.bulk_update_sorted_by_id') + start_fact_cache(hosts, artifacts_dir=artifacts_dir, timeout=0, inventory_id=inventory_id) - # Mock the os.path.exists behavior for host deletion - # Let's assume the fact file for hosts[1] is missing. - mocker.patch('os.path.exists', side_effect=lambda path: hosts[1].name not in path) + mocker.patch('awx.main.tasks.facts.bulk_update_sorted_by_id') - # Simulate one host's fact file getting deleted manually - host_to_delete_filepath = os.path.join(fact_cache, hosts[1].name) + # Remove the fact file for hosts[1] to simulate ansible's clear_facts + fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache') + os.remove(os.path.join(fact_cache_dir, hosts[1].name)) - # Simulate the file being removed by checking existence first, to avoid FileNotFoundError - if os.path.exists(host_to_delete_filepath): - os.remove(host_to_delete_filepath) + hosts_qs = mock.MagicMock() + hosts_qs.filter.return_value.order_by.return_value.iterator.return_value = iter(hosts) - finish_fact_cache(fact_cache) + finish_fact_cache(hosts_qs, artifacts_dir=artifacts_dir, inventory_id=inventory_id) - # Simulate side effects that would normally be applied during bulk update - hosts[1].ansible_facts = {} - hosts[1].ansible_facts_modified = now() + # hosts[1] should have had its facts cleared (file was missing, job_created=None) + assert hosts[1].ansible_facts == {} + assert hosts[1].ansible_facts_modified > ref_time - # Verify facts are preserved for hosts with valid cache files + # Other hosts should be unmodified (fact files exist but weren't changed by ansible) for host in (hosts[0], hosts[2], hosts[3]): assert host.ansible_facts == {"a": 1, "b": 2} assert host.ansible_facts_modified == ref_time - assert hosts[1].ansible_facts_modified > ref_time - - # Current implementation skips the call entirely if hosts_to_update == [] - bulk_update.assert_not_called() def test_finish_job_fact_cache_with_bad_data(hosts, mocker, tmpdir): - fact_cache = os.path.join(tmpdir, 'facts') - start_fact_cache(hosts, fact_cache, timeout=0) + artifacts_dir = str(tmpdir.mkdir("artifacts")) + inventory_id = 5 - bulk_update = mocker.patch('django.db.models.query.QuerySet.bulk_update') + start_fact_cache(hosts, artifacts_dir=artifacts_dir, timeout=0, inventory_id=inventory_id) + bulk_update = mocker.patch('awx.main.tasks.facts.bulk_update_sorted_by_id') + + # Overwrite fact files with invalid JSON and set future mtime + fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache') for h in hosts: - filepath = os.path.join(fact_cache, h.name) + filepath = os.path.join(fact_cache_dir, h.name) with open(filepath, 'w') as f: f.write('not valid json!') f.flush() new_modification_time = time.time() + 3600 os.utime(filepath, (new_modification_time, new_modification_time)) - finish_fact_cache(fact_cache) + hosts_qs = mock.MagicMock() + hosts_qs.filter.return_value.order_by.return_value.iterator.return_value = iter(hosts) - bulk_update.assert_not_called() + finish_fact_cache(hosts_qs, artifacts_dir=artifacts_dir, inventory_id=inventory_id) + + # Invalid JSON should be skipped — no hosts updated + updated_hosts = bulk_update.call_args[0][1] + assert updated_hosts == [] diff --git a/awx/main/tests/unit/tasks/test_jobs.py b/awx/main/tests/unit/tasks/test_jobs.py index 058e54ed2f..c57403e1c6 100644 --- a/awx/main/tests/unit/tasks/test_jobs.py +++ b/awx/main/tests/unit/tasks/test_jobs.py @@ -84,7 +84,7 @@ def job_template_with_credentials(): @mock.patch('awx.main.tasks.jobs.create_partition', return_value=True) def test_pre_post_run_hook_facts(mock_create_partition, mock_facts_settings, private_data_dir, execution_environment): # Create mocked inventory and host queryset - inventory = mock.MagicMock(spec=Inventory, pk=1) + inventory = mock.MagicMock(spec=Inventory, pk=1, kind='') host1 = mock.MagicMock(spec=Host, id=1, name='host1', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=now(), inventory=inventory) host2 = mock.MagicMock(spec=Host, id=2, name='host2', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=now(), inventory=inventory) @@ -107,6 +107,8 @@ def test_pre_post_run_hook_facts(mock_create_partition, mock_facts_settings, pri job_slice_number=1, job_slice_count=1, inventory=inventory, + inventory_id=inventory.pk, + created=now(), execution_environment=execution_environment, ) job.get_hosts_for_fact_cache = Job.get_hosts_for_fact_cache.__get__(job) @@ -140,7 +142,7 @@ def test_pre_post_run_hook_facts(mock_create_partition, mock_facts_settings, pri @mock.patch('awx.main.tasks.jobs.create_partition', return_value=True) def test_pre_post_run_hook_facts_deleted_sliced(mock_create_partition, mock_facts_settings, private_data_dir, execution_environment): # Fully mocked inventory - mock_inventory = mock.MagicMock(spec=Inventory) + mock_inventory = mock.MagicMock(spec=Inventory, pk=1, kind='') # Create 999 mocked Host instances hosts = [] @@ -173,6 +175,8 @@ def test_pre_post_run_hook_facts_deleted_sliced(mock_create_partition, mock_fact job.job_slice_count = 3 job.execution_environment = execution_environment job.inventory = mock_inventory + job.inventory_id = mock_inventory.pk + job.created = now() job.job_env.get.return_value = private_data_dir # Bind actual method for host filtering