AAP-65054 Fix bugs where concurrent jobs would clear facts of unrelated hosts (#16318)

* Add new tests for bug saving concurrent facts * Fix first bug and improve tests * Fix new bug where concurrent job clears facts from other job in unwanted way * minor test fixes * Add in missing playbook * Fix host reference for constructed inventory * Increase speed for concurrent fact tests * Make test a bit faster * Fix linters * Add some functional tests * Remove the sanity test * Agent markers added * Address SonarCloud * Do backdating method, resolving stricter assertions * Address coderabbit comments * Address review comment with qs only method * Delete missed sleep statement * Add more coverage
2026-06-24 08:07:47 -02:30 · 2026-03-05 07:58:32 -05:00
parent 0995f7c5fe
commit be5fbf365e
9 changed files with 691 additions and 42 deletions
--- a/awx/main/models/jobs.py
+++ b/awx/main/models/jobs.py
@@ -845,6 +845,21 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
    def get_notification_friendly_name(self):
        return "Job"
    def get_source_hosts_for_constructed_inventory(self):
        """Return a QuerySet of the source (input inventory) hosts for a constructed inventory.
        Constructed inventory hosts have an instance_id pointing to the real
        host in the input inventory.  This resolves those references and returns
        a proper QuerySet (never a list), suitable for use with finish_fact_cache.
        """
        Host = JobHostSummary._meta.get_field('host').related_model
        if not self.inventory_id:
            return Host.objects.none()
        id_field = Host._meta.get_field('id')
        return Host.objects.filter(id__in=self.inventory.hosts.exclude(instance_id='').values_list(Cast('instance_id', output_field=id_field))).only(
            *HOST_FACTS_FIELDS
        )
    def get_hosts_for_fact_cache(self):
        """
        Builds the queryset to use for writing or finalizing the fact cache
@@ -852,17 +867,15 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
        For constructed inventories, that means the original (input inventory) hosts
        when slicing, that means only returning hosts in that slice
        """
        Host = JobHostSummary._meta.get_field('host').related_model
        if not self.inventory_id:
            Host = JobHostSummary._meta.get_field('host').related_model
            return Host.objects.none()
        if self.inventory.kind == 'constructed':
-            id_field = Host._meta.get_field('id')
+            host_qs = self.get_source_hosts_for_constructed_inventory()
            host_qs = Host.objects.filter(id__in=self.inventory.hosts.exclude(instance_id='').values_list(Cast('instance_id', output_field=id_field)))
        else:
-            host_qs = self.inventory.hosts
+            host_qs = self.inventory.hosts.only(*HOST_FACTS_FIELDS)
        host_qs = host_qs.only(*HOST_FACTS_FIELDS)
        host_qs = self.inventory.get_sliced_hosts(host_qs, self.job_slice_number, self.job_slice_count)
        return host_qs
--- a/awx/main/tasks/facts.py
+++ b/awx/main/tasks/facts.py
@@ -51,7 +51,14 @@ def start_fact_cache(hosts, artifacts_dir, timeout=None, inventory_id=None, log_
                os.chmod(f.name, 0o600)
                json.dump(host.ansible_facts, f)
                log_data['written_ct'] += 1
-                last_write_time = os.path.getmtime(filepath)
+            # Backdate the file by 2 seconds so finish_fact_cache can reliably
            # distinguish these reference files from files updated by ansible.
            # This guarantees fact file mtime < summary file mtime even with
            # zipfile's 2-second timestamp rounding during artifact transfer.
            mtime = os.path.getmtime(filepath)
            backdated = mtime - 2
            os.utime(filepath, (backdated, backdated))
            last_write_time = backdated
        except IOError:
            logger.error(f'facts for host {smart_str(host.name)} could not be cached')
            continue
@@ -74,7 +81,7 @@ def start_fact_cache(hosts, artifacts_dir, timeout=None, inventory_id=None, log_
    msg='Inventory {inventory_id} host facts: updated {updated_ct}, cleared {cleared_ct}, unchanged {unmodified_ct}, took {delta:.3f} s',
    add_log_data=True,
 )
-def finish_fact_cache(artifacts_dir, job_id=None, inventory_id=None, log_data=None):
+def finish_fact_cache(host_qs, artifacts_dir, job_id=None, inventory_id=None, job_created=None, log_data=None):
    log_data = log_data or {}
    log_data['inventory_id'] = inventory_id
    log_data['updated_ct'] = 0
@@ -95,7 +102,7 @@ def finish_fact_cache(artifacts_dir, job_id=None, inventory_id=None, log_data=No
        return
    host_names = summary.get('hosts_cached', [])
-    hosts_cached = Host.objects.filter(name__in=host_names).order_by('id').iterator()
+    hosts_cached = host_qs.filter(name__in=host_names).order_by('id').iterator()
    # Path where individual fact files were written
    fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache')
    hosts_to_update = []
@@ -138,14 +145,25 @@ def finish_fact_cache(artifacts_dir, job_id=None, inventory_id=None, log_data=No
        else:
            # if the file goes missing, ansible removed it (likely via clear_facts)
            # if the file goes missing, but the host has not started facts, then we should not clear the facts
-            host.ansible_facts = {}
+            if job_created and host.ansible_facts_modified and host.ansible_facts_modified > job_created:
-            host.ansible_facts_modified = now()
+                logger.warning(
-            hosts_to_update.append(host)
+                    f'Skipping fact clear for host {smart_str(host.name)} in job {job_id} '
-            logger.info(f'Facts cleared for inventory {smart_str(host.inventory.name)} host {smart_str(host.name)}')
+                    f'inventory {inventory_id}: host ansible_facts_modified '
-            log_data['cleared_ct'] += 1
+                    f'({host.ansible_facts_modified.isoformat()}) is after this job\'s '
                    f'created time ({job_created.isoformat()}). '
                    f'A concurrent job likely updated this host\'s facts while this job was running.'
                )
                log_data['unmodified_ct'] += 1
            else:
                host.ansible_facts = {}
                host.ansible_facts_modified = now()
                hosts_to_update.append(host)
                logger.info(f'Facts cleared for inventory {smart_str(host.inventory.name)} host {smart_str(host.name)}')
                log_data['cleared_ct'] += 1
        if len(hosts_to_update) >= 100:
            bulk_update_sorted_by_id(Host, hosts_to_update, fields=['ansible_facts', 'ansible_facts_modified'])
            hosts_to_update = []
    bulk_update_sorted_by_id(Host, hosts_to_update, fields=['ansible_facts', 'ansible_facts_modified'])
    logger.debug(f'Updated {log_data["updated_ct"]} host facts for inventory {inventory_id} in job {job_id}')
--- a/awx/main/tasks/jobs.py
+++ b/awx/main/tasks/jobs.py
@@ -1302,10 +1302,16 @@ class RunJob(SourceControlMixin, BaseTask):
            return
        if self.should_use_fact_cache() and self.runner_callback.artifacts_processed:
            job.log_lifecycle("finish_job_fact_cache")
            if job.inventory.kind == 'constructed':
                hosts_qs = job.get_source_hosts_for_constructed_inventory()
            else:
                hosts_qs = job.inventory.hosts
            finish_fact_cache(
                hosts_qs,
                artifacts_dir=os.path.join(private_data_dir, 'artifacts', str(job.id)),
                job_id=job.id,
                inventory_id=job.inventory_id,
                job_created=job.created,
            )
    def final_run_hook(self, job, status, private_data_dir):
--- a/awx/main/tests/data/projects/facts/gather_slow.yml
+++ b/awx/main/tests/data/projects/facts/gather_slow.yml
@@ -0,0 +1,21 @@
 ---
 # Generated by Claude Opus 4.6 (claude-opus-4-6).
 - hosts: all
  vars:
    extra_value: ""
  gather_facts: false
  connection: local
  tasks:
    - name: set a custom fact
      set_fact:
        foo: "bar{{ extra_value }}"
        bar:
          a:
            b:
              - "c"
              - "d"
        cacheable: true
    - name: sleep to create overlap window for concurrent job testing
      wait_for:
        timeout: 2
--- a/awx/main/tests/functional/models/test_job.py
+++ b/awx/main/tests/functional/models/test_job.py
@@ -1,6 +1,6 @@
 import pytest
-from awx.main.models import JobTemplate, Job, JobHostSummary, WorkflowJob, Inventory, Project, Organization
+from awx.main.models import JobTemplate, Job, JobHostSummary, WorkflowJob, Inventory, Host, Project, Organization
@pytest.mark.django_db
@@ -87,3 +87,47 @@ class TestSlicingModels:
        unified_job = job_template.create_unified_job(job_slice_count=2)
        assert isinstance(unified_job, Job)
@pytest.mark.django_db
 class TestGetSourceHostsForConstructedInventory:
    """Tests for Job.get_source_hosts_for_constructed_inventory"""
    def test_returns_source_hosts_via_instance_id(self):
        """Constructed hosts with instance_id pointing to source hosts are resolved correctly."""
        org = Organization.objects.create(name='test-org')
        inv_input = Inventory.objects.create(organization=org, name='input-inv')
        source_host1 = inv_input.hosts.create(name='host1')
        source_host2 = inv_input.hosts.create(name='host2')
        inv_constructed = Inventory.objects.create(organization=org, name='constructed-inv', kind='constructed')
        inv_constructed.input_inventories.add(inv_input)
        Host.objects.create(inventory=inv_constructed, name='host1', instance_id=str(source_host1.id))
        Host.objects.create(inventory=inv_constructed, name='host2', instance_id=str(source_host2.id))
        job = Job.objects.create(name='test-job', inventory=inv_constructed)
        result = job.get_source_hosts_for_constructed_inventory()
        assert set(result.values_list('id', flat=True)) == {source_host1.id, source_host2.id}
    def test_no_inventory_returns_empty(self):
        """A job with no inventory returns an empty queryset."""
        job = Job.objects.create(name='test-job')
        result = job.get_source_hosts_for_constructed_inventory()
        assert result.count() == 0
    def test_ignores_hosts_without_instance_id(self):
        """Hosts with empty instance_id are excluded from the result."""
        org = Organization.objects.create(name='test-org')
        inv_input = Inventory.objects.create(organization=org, name='input-inv')
        source_host = inv_input.hosts.create(name='host1')
        inv_constructed = Inventory.objects.create(organization=org, name='constructed-inv', kind='constructed')
        inv_constructed.input_inventories.add(inv_input)
        Host.objects.create(inventory=inv_constructed, name='host1', instance_id=str(source_host.id))
        Host.objects.create(inventory=inv_constructed, name='host-no-ref', instance_id='')
        job = Job.objects.create(name='test-job', inventory=inv_constructed)
        result = job.get_source_hosts_for_constructed_inventory()
        assert list(result.values_list('id', flat=True)) == [source_host.id]
--- a/awx/main/tests/functional/tasks/test_fact_cache.py
+++ b/awx/main/tests/functional/tasks/test_fact_cache.py
@@ -0,0 +1,188 @@
 """Functional tests for start_fact_cache / finish_fact_cache.
 These tests use real database objects (via pytest-django) and real files
 on disk, but do not launch jobs or subprocesses.  Fact files are written
 by start_fact_cache and then manipulated to simulate ansible output
 before calling finish_fact_cache.
 Generated by Claude Opus 4.6 (claude-opus-4-6).
 """
 import json
 import os
 import time
 from datetime import timedelta
 import pytest
 from django.utils.timezone import now
 from awx.main.models import Host, Inventory
 from awx.main.tasks.facts import start_fact_cache, finish_fact_cache
@pytest.fixture
 def artifacts_dir(tmp_path):
    d = tmp_path / 'artifacts'
    d.mkdir()
    return str(d)
@pytest.mark.django_db
 class TestFinishFactCacheScoping:
    """finish_fact_cache must only update hosts matched by the provided queryset."""
    def test_same_hostname_different_inventories(self, organization, artifacts_dir):
        """Two inventories share a hostname; only the targeted one should be updated.
        Generated by Claude Opus 4.6 (claude-opus-4-6).
        """
        inv1 = Inventory.objects.create(organization=organization, name='scope-inv1')
        inv2 = Inventory.objects.create(organization=organization, name='scope-inv2')
        host1 = inv1.hosts.create(name='shared')
        host2 = inv2.hosts.create(name='shared')
        # Give both hosts initial facts
        for h in (host1, host2):
            h.ansible_facts = {'original': True}
            h.ansible_facts_modified = now()
            h.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
        # start_fact_cache writes reference files for inv1's hosts
        start_fact_cache(inv1.hosts.all(), artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv1.id)
        # Simulate ansible writing new facts for 'shared'
        fact_file = os.path.join(artifacts_dir, 'fact_cache', 'shared')
        future = time.time() + 60
        with open(fact_file, 'w') as f:
            json.dump({'updated': True}, f)
        os.utime(fact_file, (future, future))
        # finish with inv1's hosts as the queryset
        finish_fact_cache(inv1.hosts, artifacts_dir=artifacts_dir, inventory_id=inv1.id)
        host1.refresh_from_db()
        host2.refresh_from_db()
        assert host1.ansible_facts == {'updated': True}
        assert host2.ansible_facts == {'original': True}, 'Host in a different inventory was modified despite not being in the queryset'
@pytest.mark.django_db
 class TestFinishFactCacheConcurrentProtection:
    """finish_fact_cache must not clear facts that a concurrent job updated."""
    def test_skip_clear_when_facts_modified_after_job_created(self, organization, artifacts_dir):
        """If a host's facts were updated after the job was created, do not clear them.
        Generated by Claude Opus 4.6 (claude-opus-4-6).
        """
        inv = Inventory.objects.create(organization=organization, name='concurrent-inv')
        host = inv.hosts.create(name='target')
        job_created = now() - timedelta(minutes=5)
        # start_fact_cache writes a reference file (host has no facts yet → no file written)
        start_fact_cache(inv.hosts.all(), artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv.id)
        # Simulate a concurrent job updating this host's facts AFTER our job was created
        host.ansible_facts = {'from_concurrent_job': True}
        host.ansible_facts_modified = now()
        host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
        # The fact file is missing (our job's ansible didn't target this host),
        # which would normally trigger a clear.  But the concurrent protection
        # should skip it because ansible_facts_modified > job_created.
        fact_file = os.path.join(artifacts_dir, 'fact_cache', host.name)
        if os.path.exists(fact_file):
            os.remove(fact_file)
        finish_fact_cache(
            inv.hosts,
            artifacts_dir=artifacts_dir,
            inventory_id=inv.id,
            job_created=job_created,
        )
        host.refresh_from_db()
        assert host.ansible_facts == {'from_concurrent_job': True}, 'Facts set by a concurrent job were cleared despite ansible_facts_modified > job_created'
    def test_clear_when_facts_predate_job(self, organization, artifacts_dir):
        """If facts predate the job, a missing file should still clear them.
        Generated by Claude Opus 4.6 (claude-opus-4-6).
        """
        inv = Inventory.objects.create(organization=organization, name='clear-inv')
        host = inv.hosts.create(name='stale')
        old_time = now() - timedelta(hours=1)
        host.ansible_facts = {'stale': True}
        host.ansible_facts_modified = old_time
        host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
        job_created = now() - timedelta(minutes=5)
        start_fact_cache(inv.hosts.all(), artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv.id)
        # Remove the fact file to simulate ansible's clear_facts
        os.remove(os.path.join(artifacts_dir, 'fact_cache', host.name))
        finish_fact_cache(
            inv.hosts,
            artifacts_dir=artifacts_dir,
            inventory_id=inv.id,
            job_created=job_created,
        )
        host.refresh_from_db()
        assert host.ansible_facts == {}, 'Stale facts should have been cleared when the fact file is missing ' 'and ansible_facts_modified predates job_created'
@pytest.mark.django_db
 class TestConstructedInventoryFactCache:
    """finish_fact_cache with a constructed inventory queryset must target source hosts."""
    def test_facts_resolve_to_source_host(self, organization, artifacts_dir):
        """Facts must be written to the source host, not the constructed copy.
        Generated by Claude Opus 4.6 (claude-opus-4-6).
        """
        from django.db.models.functions import Cast
        inv_input = Inventory.objects.create(organization=organization, name='ci-input')
        source_host = inv_input.hosts.create(name='webserver')
        inv_constructed = Inventory.objects.create(organization=organization, name='ci-constructed', kind='constructed')
        inv_constructed.input_inventories.add(inv_input)
        constructed_host = Host.objects.create(
            inventory=inv_constructed,
            name='webserver',
            instance_id=str(source_host.id),
        )
        # Build the same queryset that get_hosts_for_fact_cache uses
        id_field = Host._meta.get_field('id')
        source_qs = Host.objects.filter(id__in=inv_constructed.hosts.exclude(instance_id='').values_list(Cast('instance_id', output_field=id_field)))
        # Give the source host initial facts so start_fact_cache writes a file
        source_host.ansible_facts = {'role': 'web'}
        source_host.ansible_facts_modified = now()
        source_host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
        start_fact_cache(source_qs, artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv_constructed.id)
        # Simulate ansible writing updated facts
        fact_file = os.path.join(artifacts_dir, 'fact_cache', 'webserver')
        future = time.time() + 60
        with open(fact_file, 'w') as f:
            json.dump({'role': 'web', 'deployed': True}, f)
        os.utime(fact_file, (future, future))
        finish_fact_cache(source_qs, artifacts_dir=artifacts_dir, inventory_id=inv_constructed.id)
        source_host.refresh_from_db()
        constructed_host.refresh_from_db()
        assert source_host.ansible_facts == {'role': 'web', 'deployed': True}
        assert not constructed_host.ansible_facts, f'Facts were stored on the constructed host: {constructed_host.ansible_facts!r}'
--- a/awx/main/tests/live/tests/test_concurrent_fact_cache.py
+++ b/awx/main/tests/live/tests/test_concurrent_fact_cache.py
@@ -0,0 +1,351 @@
 """Tests for concurrent fact caching with --limit.
 Reproduces bugs where concurrent jobs targeting different hosts via --limit
 incorrectly modify (clear or revert) facts for hosts outside their limit.
 Customer report: concurrent jobs on the same job template with different limits
 cause facts set by an earlier-finishing job to be rolled back when the
 later-finishing job completes.
 See: https://github.com/jritter/concurrent-aap-fact-caching
 Generated by Claude Opus 4.6 (claude-opus-4-6).
 """
 import logging
 import pytest
 from django.utils.timezone import now
 from awx.api.versioning import reverse
 from awx.main.models import Inventory, JobTemplate
 from awx.main.tests.live.tests.conftest import wait_for_job, wait_to_leave_status
 logger = logging.getLogger(__name__)
@pytest.fixture
 def concurrent_facts_inventory(default_org):
    """Inventory with two hosts for concurrent fact cache testing."""
    inv_name = 'test_concurrent_fact_cache'
    Inventory.objects.filter(organization=default_org, name=inv_name).delete()
    inv = Inventory.objects.create(organization=default_org, name=inv_name)
    inv.hosts.create(name='cc_host_0')
    inv.hosts.create(name='cc_host_1')
    return inv
@pytest.fixture
 def concurrent_facts_jt(concurrent_facts_inventory, live_tmp_folder, post, admin, project_factory):
    """Job template configured for concurrent fact-cached runs."""
    proj = project_factory(scm_url=f'file://{live_tmp_folder}/facts')
    if proj.current_job:
        wait_for_job(proj.current_job)
    assert 'gather_slow.yml' in proj.playbooks, f'gather_slow.yml not in {proj.playbooks}'
    jt_name = 'test_concurrent_fact_cache JT'
    existing_jt = JobTemplate.objects.filter(name=jt_name).first()
    if existing_jt:
        existing_jt.delete()
    result = post(
        reverse('api:job_template_list'),
        {
            'name': jt_name,
            'project': proj.id,
            'playbook': 'gather_slow.yml',
            'inventory': concurrent_facts_inventory.id,
            'use_fact_cache': True,
            'allow_simultaneous': True,
        },
        admin,
        expect=201,
    )
    return JobTemplate.objects.get(id=result.data['id'])
 def test_concurrent_limit_does_not_clear_facts(concurrent_facts_inventory, concurrent_facts_jt):
    """Concurrent jobs with different --limit must not clear each other's facts.
    Generated by Claude Opus 4.6 (claude-opus-4-6).
    Scenario:
      - Inventory has cc_host_0 and cc_host_1, neither has prior facts
      - Job A runs gather_slow.yml with limit=cc_host_0
      - While Job A is still running (sleeping), Job B launches with limit=cc_host_1
      - Both jobs set cacheable facts, but only for their respective limited host
      - After both complete, BOTH hosts should have populated facts
    The bug: get_hosts_for_fact_cache() returns ALL inventory hosts regardless
    of --limit.  start_fact_cache records them all in hosts_cached but writes
    no fact files (no prior facts).  When the later-finishing job runs
    finish_fact_cache, it sees a missing fact file for the other job's host
    and clears that host's facts.
    """
    inv = concurrent_facts_inventory
    jt = concurrent_facts_jt
    # Launch Job A targeting cc_host_0
    job_a = jt.create_unified_job()
    job_a.limit = 'cc_host_0'
    job_a.save(update_fields=['limit'])
    job_a.signal_start()
    # Wait for Job A to reach running (it will sleep inside the playbook)
    wait_to_leave_status(job_a, 'pending')
    wait_to_leave_status(job_a, 'waiting')
    logger.info(f'Job A (id={job_a.id}) is now running with limit=cc_host_0')
    # Launch Job B targeting cc_host_1 while Job A is still running
    job_b = jt.create_unified_job()
    job_b.limit = 'cc_host_1'
    job_b.save(update_fields=['limit'])
    job_b.signal_start()
    # Verify that Job A is still running when Job B starts,
    # otherwise the overlap that triggers the bug did not happen.
    wait_to_leave_status(job_b, 'pending')
    wait_to_leave_status(job_b, 'waiting')
    job_a.refresh_from_db()
    if job_a.status != 'running':
        pytest.skip('Job A finished before Job B started running; overlap did not occur')
    logger.info(f'Job B (id={job_b.id}) is now running with limit=cc_host_1 (concurrent with Job A)')
    # Wait for both to complete
    wait_for_job(job_a)
    wait_for_job(job_b)
    # Verify facts survived concurrent execution
    host_0 = inv.hosts.get(name='cc_host_0')
    host_1 = inv.hosts.get(name='cc_host_1')
    # sanity
    job_a.refresh_from_db()
    job_b.refresh_from_db()
    assert job_a.limit == "cc_host_0"
    assert job_b.limit == "cc_host_1"
    discovered_foos = [host_0.ansible_facts.get('foo'), host_1.ansible_facts.get('foo')]
    assert discovered_foos == ['bar'] * 2, f'Unexpected facts on cc_host_0 or _1: {discovered_foos} after job a,b {job_a.id}, {job_b.id}'
 def test_concurrent_limit_does_not_revert_facts(live_tmp_folder, run_job_from_playbook, concurrent_facts_inventory):
    """Concurrent jobs must not revert facts that a prior concurrent job just set.
    Generated by Claude Opus 4.6 (claude-opus-4-6).
    Scenario:
      - First, populate both hosts with initial facts (foo=bar) via a
        non-concurrent gather run
      - Then run two concurrent jobs with different limits, each setting
        a new value (foo=bar_v2 via extra_vars)
      - After both complete, BOTH hosts should have foo=bar_v2
    The bug: start_fact_cache writes the OLD facts (foo=bar) into each job's
    artifact dir for ALL hosts.  If ansible's cache plugin rewrites a non-limited
    host's fact file with the stale content (updating the mtime), finish_fact_cache
    treats it as a legitimate update and overwrites the DB with old values.
    """
    # --- Seed both hosts with initial facts via a non-concurrent run ---
    inv = concurrent_facts_inventory
    scm_url = f'file://{live_tmp_folder}/facts'
    res = run_job_from_playbook(
        'seed_facts_for_revert_test',
        'gather_slow.yml',
        scm_url=scm_url,
        jt_params={'use_fact_cache': True, 'allow_simultaneous': True, 'inventory': inv.id},
    )
    for host in inv.hosts.all():
        assert host.ansible_facts.get('foo') == 'bar', f'Seed run failed to set facts on {host.name}: {host.ansible_facts}'
    job = res['job']
    wait_for_job(job)
    # sanity, jobs should be set up to both have facts with just bar
    host_0 = inv.hosts.get(name='cc_host_0')
    host_1 = inv.hosts.get(name='cc_host_1')
    discovered_foos = [host_0.ansible_facts.get('foo'), host_1.ansible_facts.get('foo')]
    assert discovered_foos == ['bar'] * 2, f'Facts did not get expected initial values: {discovered_foos}'
    jt = job.job_template
    assert jt.allow_simultaneous is True
    assert jt.use_fact_cache is True
    # Sanity assertion, sometimes this would give problems from the Django rel cache
    assert jt.project
    # --- Run two concurrent jobs that write a new value ---
    # Update the JT to pass extra_vars that change the fact value
    jt.extra_vars = '{"extra_value": "_v2"}'
    jt.save(update_fields=['extra_vars'])
    job_a = jt.create_unified_job()
    job_a.limit = 'cc_host_0'
    job_a.save(update_fields=['limit'])
    job_a.signal_start()
    wait_to_leave_status(job_a, 'pending')
    wait_to_leave_status(job_a, 'waiting')
    job_b = jt.create_unified_job()
    job_b.limit = 'cc_host_1'
    job_b.save(update_fields=['limit'])
    job_b.signal_start()
    wait_to_leave_status(job_b, 'pending')
    wait_to_leave_status(job_b, 'waiting')
    job_a.refresh_from_db()
    if job_a.status != 'running':
        pytest.skip('Job A finished before Job B started running; overlap did not occur')
    wait_for_job(job_a)
    wait_for_job(job_b)
    host_0 = inv.hosts.get(name='cc_host_0')
    host_1 = inv.hosts.get(name='cc_host_1')
    # Both hosts should have the UPDATED value, not the old seed value
    discovered_foos = [host_0.ansible_facts.get('foo'), host_1.ansible_facts.get('foo')]
    assert discovered_foos == ['bar_v2'] * 2, f'Facts were reverted to stale values by concurrent job cc_host_0 or cc_host_1: {discovered_foos}'
 def test_fact_cache_scoped_to_inventory(live_tmp_folder, default_org, run_job_from_playbook):
    """finish_fact_cache must not modify hosts in other inventories.
    Generated by Claude Opus 4.6 (claude-opus-4-6).
    Bug: finish_fact_cache queries Host.objects.filter(name__in=host_names)
    without an inventory_id filter, so hosts with the same name in different
    inventories get their facts cross-contaminated.
    """
    shared_name = 'scope_shared_host'
    # Prepare for test by deleting junk from last run
    for inv_name in ('test_fact_scope_inv1', 'test_fact_scope_inv2'):
        inv = Inventory.objects.filter(name=inv_name).first()
        if inv:
            inv.delete()
    inv1 = Inventory.objects.create(organization=default_org, name='test_fact_scope_inv1')
    inv1.hosts.create(name=shared_name)
    inv2 = Inventory.objects.create(organization=default_org, name='test_fact_scope_inv2')
    host2 = inv2.hosts.create(name=shared_name)
    # Give inv2's host distinct facts that should not be touched
    original_facts = {'source': 'inventory_2', 'untouched': True}
    host2.ansible_facts = original_facts
    host2.ansible_facts_modified = now()
    host2.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
    # Run a fact-gathering job against inv1 only
    run_job_from_playbook(
        'test_fact_scope',
        'gather.yml',
        scm_url=f'file://{live_tmp_folder}/facts',
        jt_params={'use_fact_cache': True, 'inventory': inv1.id},
    )
    # inv1's host should have facts
    host1 = inv1.hosts.get(name=shared_name)
    assert host1.ansible_facts, f'inv1 host should have facts after gather: {host1.ansible_facts}'
    # inv2's host must NOT have been touched
    host2.refresh_from_db()
    assert host2.ansible_facts == original_facts, (
        f'Host in a different inventory was modified by a fact cache operation '
        f'on another inventory sharing the same hostname. '
        f'Expected {original_facts!r}, got {host2.ansible_facts!r}'
    )
 def test_constructed_inventory_facts_saved_to_source_host(live_tmp_folder, default_org, run_job_from_playbook):
    """Facts from a constructed inventory job must be saved to the source host.
    Generated by Claude Opus 4.6 (claude-opus-4-6).
    Constructed inventories contain hosts that are references (via instance_id)
    to 'real' hosts in input inventories.  start_fact_cache correctly resolves
    source hosts via get_hosts_for_fact_cache(), but finish_fact_cache must also
    write facts back to the source hosts, not the constructed inventory's copies.
    Scenario:
      - Two input inventories each have a host named 'ci_shared_host'
      - A constructed inventory uses both as inputs
      - The inventory sync picks one source host (via instance_id) for the
        constructed host — which one depends on input processing order
      - Both source hosts start with distinct pre-existing facts
      - A fact-gathering job runs against the constructed inventory
      - After completion, the targeted source host should have the job's facts
      - The OTHER source host must retain its original facts untouched
      - The constructed host itself must NOT have facts stored on it
        (constructed hosts are transient — recreated on each inventory sync)
    """
    shared_name = 'ci_shared_host'
    # Cleanup from prior runs
    for inv_name in ('test_ci_facts_input1', 'test_ci_facts_input2', 'test_ci_facts_constructed'):
        Inventory.objects.filter(name=inv_name).delete()
    # --- Create two input inventories, each with an identically-named host ---
    inv1 = Inventory.objects.create(organization=default_org, name='test_ci_facts_input1')
    source_host1 = inv1.hosts.create(name=shared_name)
    inv2 = Inventory.objects.create(organization=default_org, name='test_ci_facts_input2')
    source_host2 = inv2.hosts.create(name=shared_name)
    # Give both hosts distinct pre-existing facts so we can detect cross-contamination
    host1_original_facts = {'source': 'inventory_1'}
    source_host1.ansible_facts = host1_original_facts
    source_host1.ansible_facts_modified = now()
    source_host1.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
    host2_original_facts = {'source': 'inventory_2'}
    source_host2.ansible_facts = host2_original_facts
    source_host2.ansible_facts_modified = now()
    source_host2.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
    source_hosts_by_id = {source_host1.id: source_host1, source_host2.id: source_host2}
    original_facts_by_id = {source_host1.id: host1_original_facts, source_host2.id: host2_original_facts}
    # --- Create constructed inventory (sync will create hosts from inputs) ---
    constructed_inv = Inventory.objects.create(
        organization=default_org,
        name='test_ci_facts_constructed',
        kind='constructed',
    )
    constructed_inv.input_inventories.add(inv1)
    constructed_inv.input_inventories.add(inv2)
    # --- Run a fact-gathering job against the constructed inventory ---
    # The job launch triggers an inventory sync which creates the constructed
    # host with instance_id pointing to one of the source hosts.
    scm_url = f'file://{live_tmp_folder}/facts'
    run_job_from_playbook(
        'test_ci_facts',
        'gather.yml',
        scm_url=scm_url,
        jt_params={'use_fact_cache': True, 'inventory': constructed_inv.id},
    )
    # --- Determine which source host the constructed host points to ---
    constructed_host = constructed_inv.hosts.get(name=shared_name)
    target_id = int(constructed_host.instance_id)
    other_id = (set(source_hosts_by_id.keys()) - {target_id}).pop()
    target_host = source_hosts_by_id[target_id]
    other_host = source_hosts_by_id[other_id]
    target_host.refresh_from_db()
    other_host.refresh_from_db()
    constructed_host.refresh_from_db()
    actual = [target_host.ansible_facts.get('foo'), other_host.ansible_facts, constructed_host.ansible_facts]
    expected = ['bar', original_facts_by_id[other_id], {}]
    assert actual == expected, (
        f'Constructed inventory fact cache wrote to wrong host(s). '
        f'target source host (id={target_id}) foo={actual[0]!r}, '
        f'other source host (id={other_id}) facts={actual[1]!r}, '
        f'constructed host facts={actual[2]!r}; expected {expected!r}'
    )
--- a/awx/main/tests/unit/models/test_jobs.py
+++ b/awx/main/tests/unit/models/test_jobs.py
@@ -2,6 +2,7 @@
 import json
 import os
 import pytest
 from unittest import mock
 from awx.main.models import (
    Inventory,
@@ -99,52 +100,55 @@ def test_start_job_fact_cache_within_timeout(hosts, tmpdir):
 def test_finish_job_fact_cache_clear(hosts, mocker, ref_time, tmpdir):
-    fact_cache = os.path.join(tmpdir, 'facts')
+    artifacts_dir = str(tmpdir.mkdir("artifacts"))
-    start_fact_cache(hosts, fact_cache, timeout=0)
+    inventory_id = 5
-    bulk_update = mocker.patch('awx.main.tasks.facts.bulk_update_sorted_by_id')
+    start_fact_cache(hosts, artifacts_dir=artifacts_dir, timeout=0, inventory_id=inventory_id)
-    # Mock the os.path.exists behavior for host deletion
+    mocker.patch('awx.main.tasks.facts.bulk_update_sorted_by_id')
    # Let's assume the fact file for hosts[1] is missing.
    mocker.patch('os.path.exists', side_effect=lambda path: hosts[1].name not in path)
-    # Simulate one host's fact file getting deleted manually
+    # Remove the fact file for hosts[1] to simulate ansible's clear_facts
-    host_to_delete_filepath = os.path.join(fact_cache, hosts[1].name)
+    fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache')
    os.remove(os.path.join(fact_cache_dir, hosts[1].name))
-    # Simulate the file being removed by checking existence first, to avoid FileNotFoundError
+    hosts_qs = mock.MagicMock()
-    if os.path.exists(host_to_delete_filepath):
+    hosts_qs.filter.return_value.order_by.return_value.iterator.return_value = iter(hosts)
        os.remove(host_to_delete_filepath)
-    finish_fact_cache(fact_cache)
+    finish_fact_cache(hosts_qs, artifacts_dir=artifacts_dir, inventory_id=inventory_id)
-    # Simulate side effects that would normally be applied during bulk update
+    # hosts[1] should have had its facts cleared (file was missing, job_created=None)
-    hosts[1].ansible_facts = {}
+    assert hosts[1].ansible_facts == {}
-    hosts[1].ansible_facts_modified = now()
+    assert hosts[1].ansible_facts_modified > ref_time
-    # Verify facts are preserved for hosts with valid cache files
+    # Other hosts should be unmodified (fact files exist but weren't changed by ansible)
    for host in (hosts[0], hosts[2], hosts[3]):
        assert host.ansible_facts == {"a": 1, "b": 2}
        assert host.ansible_facts_modified == ref_time
    assert hosts[1].ansible_facts_modified > ref_time
    # Current implementation skips the call entirely if hosts_to_update == []
    bulk_update.assert_not_called()
 def test_finish_job_fact_cache_with_bad_data(hosts, mocker, tmpdir):
-    fact_cache = os.path.join(tmpdir, 'facts')
+    artifacts_dir = str(tmpdir.mkdir("artifacts"))
-    start_fact_cache(hosts, fact_cache, timeout=0)
+    inventory_id = 5
-    bulk_update = mocker.patch('django.db.models.query.QuerySet.bulk_update')
+    start_fact_cache(hosts, artifacts_dir=artifacts_dir, timeout=0, inventory_id=inventory_id)
    bulk_update = mocker.patch('awx.main.tasks.facts.bulk_update_sorted_by_id')
    # Overwrite fact files with invalid JSON and set future mtime
    fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache')
    for h in hosts:
-        filepath = os.path.join(fact_cache, h.name)
+        filepath = os.path.join(fact_cache_dir, h.name)
        with open(filepath, 'w') as f:
            f.write('not valid json!')
            f.flush()
            new_modification_time = time.time() + 3600
            os.utime(filepath, (new_modification_time, new_modification_time))
-    finish_fact_cache(fact_cache)
+    hosts_qs = mock.MagicMock()
    hosts_qs.filter.return_value.order_by.return_value.iterator.return_value = iter(hosts)
-    bulk_update.assert_not_called()
+    finish_fact_cache(hosts_qs, artifacts_dir=artifacts_dir, inventory_id=inventory_id)
    # Invalid JSON should be skipped — no hosts updated
    updated_hosts = bulk_update.call_args[0][1]
    assert updated_hosts == []
--- a/awx/main/tests/unit/tasks/test_jobs.py
+++ b/awx/main/tests/unit/tasks/test_jobs.py
@@ -84,7 +84,7 @@ def job_template_with_credentials():
@mock.patch('awx.main.tasks.jobs.create_partition', return_value=True)
 def test_pre_post_run_hook_facts(mock_create_partition, mock_facts_settings, private_data_dir, execution_environment):
    # Create mocked inventory and host queryset
-    inventory = mock.MagicMock(spec=Inventory, pk=1)
+    inventory = mock.MagicMock(spec=Inventory, pk=1, kind='')
    host1 = mock.MagicMock(spec=Host, id=1, name='host1', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=now(), inventory=inventory)
    host2 = mock.MagicMock(spec=Host, id=2, name='host2', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=now(), inventory=inventory)
@@ -107,6 +107,8 @@ def test_pre_post_run_hook_facts(mock_create_partition, mock_facts_settings, pri
        job_slice_number=1,
        job_slice_count=1,
        inventory=inventory,
        inventory_id=inventory.pk,
        created=now(),
        execution_environment=execution_environment,
    )
    job.get_hosts_for_fact_cache = Job.get_hosts_for_fact_cache.__get__(job)
@@ -140,7 +142,7 @@ def test_pre_post_run_hook_facts(mock_create_partition, mock_facts_settings, pri
@mock.patch('awx.main.tasks.jobs.create_partition', return_value=True)
 def test_pre_post_run_hook_facts_deleted_sliced(mock_create_partition, mock_facts_settings, private_data_dir, execution_environment):
    # Fully mocked inventory
-    mock_inventory = mock.MagicMock(spec=Inventory)
+    mock_inventory = mock.MagicMock(spec=Inventory, pk=1, kind='')
    # Create 999 mocked Host instances
    hosts = []
@@ -173,6 +175,8 @@ def test_pre_post_run_hook_facts_deleted_sliced(mock_create_partition, mock_fact
    job.job_slice_count = 3
    job.execution_environment = execution_environment
    job.inventory = mock_inventory
    job.inventory_id = mock_inventory.pk
    job.created = now()
    job.job_env.get.return_value = private_data_dir
    # Bind actual method for host filtering