AAP-65054 Fix bugs where concurrent jobs would clear facts of unrelated hosts (#16318)

* Add new tests for bug saving concurrent facts

* Fix first bug and improve tests

* Fix new bug where concurrent job clears facts from other job in unwanted way

* minor test fixes

* Add in missing playbook

* Fix host reference for constructed inventory

* Increase speed for concurrent fact tests

* Make test a bit faster

* Fix linters

* Add some functional tests

* Remove the sanity test

* Agent markers added

* Address SonarCloud

* Do backdating method, resolving stricter assertions

* Address coderabbit comments

* Address review comment with qs only method

* Delete missed sleep statement

* Add more coverage
This commit is contained in:
Alan Rominger
2026-03-05 07:58:32 -05:00
committed by GitHub
parent 0995f7c5fe
commit be5fbf365e
9 changed files with 691 additions and 42 deletions

View File

@@ -845,6 +845,21 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
def get_notification_friendly_name(self):
return "Job"
def get_source_hosts_for_constructed_inventory(self):
"""Return a QuerySet of the source (input inventory) hosts for a constructed inventory.
Constructed inventory hosts have an instance_id pointing to the real
host in the input inventory. This resolves those references and returns
a proper QuerySet (never a list), suitable for use with finish_fact_cache.
"""
Host = JobHostSummary._meta.get_field('host').related_model
if not self.inventory_id:
return Host.objects.none()
id_field = Host._meta.get_field('id')
return Host.objects.filter(id__in=self.inventory.hosts.exclude(instance_id='').values_list(Cast('instance_id', output_field=id_field))).only(
*HOST_FACTS_FIELDS
)
def get_hosts_for_fact_cache(self):
"""
Builds the queryset to use for writing or finalizing the fact cache
@@ -852,17 +867,15 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
For constructed inventories, that means the original (input inventory) hosts
when slicing, that means only returning hosts in that slice
"""
Host = JobHostSummary._meta.get_field('host').related_model
if not self.inventory_id:
Host = JobHostSummary._meta.get_field('host').related_model
return Host.objects.none()
if self.inventory.kind == 'constructed':
id_field = Host._meta.get_field('id')
host_qs = Host.objects.filter(id__in=self.inventory.hosts.exclude(instance_id='').values_list(Cast('instance_id', output_field=id_field)))
host_qs = self.get_source_hosts_for_constructed_inventory()
else:
host_qs = self.inventory.hosts
host_qs = self.inventory.hosts.only(*HOST_FACTS_FIELDS)
host_qs = host_qs.only(*HOST_FACTS_FIELDS)
host_qs = self.inventory.get_sliced_hosts(host_qs, self.job_slice_number, self.job_slice_count)
return host_qs

View File

@@ -51,7 +51,14 @@ def start_fact_cache(hosts, artifacts_dir, timeout=None, inventory_id=None, log_
os.chmod(f.name, 0o600)
json.dump(host.ansible_facts, f)
log_data['written_ct'] += 1
last_write_time = os.path.getmtime(filepath)
# Backdate the file by 2 seconds so finish_fact_cache can reliably
# distinguish these reference files from files updated by ansible.
# This guarantees fact file mtime < summary file mtime even with
# zipfile's 2-second timestamp rounding during artifact transfer.
mtime = os.path.getmtime(filepath)
backdated = mtime - 2
os.utime(filepath, (backdated, backdated))
last_write_time = backdated
except IOError:
logger.error(f'facts for host {smart_str(host.name)} could not be cached')
continue
@@ -74,7 +81,7 @@ def start_fact_cache(hosts, artifacts_dir, timeout=None, inventory_id=None, log_
msg='Inventory {inventory_id} host facts: updated {updated_ct}, cleared {cleared_ct}, unchanged {unmodified_ct}, took {delta:.3f} s',
add_log_data=True,
)
def finish_fact_cache(artifacts_dir, job_id=None, inventory_id=None, log_data=None):
def finish_fact_cache(host_qs, artifacts_dir, job_id=None, inventory_id=None, job_created=None, log_data=None):
log_data = log_data or {}
log_data['inventory_id'] = inventory_id
log_data['updated_ct'] = 0
@@ -95,7 +102,7 @@ def finish_fact_cache(artifacts_dir, job_id=None, inventory_id=None, log_data=No
return
host_names = summary.get('hosts_cached', [])
hosts_cached = Host.objects.filter(name__in=host_names).order_by('id').iterator()
hosts_cached = host_qs.filter(name__in=host_names).order_by('id').iterator()
# Path where individual fact files were written
fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache')
hosts_to_update = []
@@ -138,14 +145,25 @@ def finish_fact_cache(artifacts_dir, job_id=None, inventory_id=None, log_data=No
else:
# if the file goes missing, ansible removed it (likely via clear_facts)
# if the file goes missing, but the host has not started facts, then we should not clear the facts
host.ansible_facts = {}
host.ansible_facts_modified = now()
hosts_to_update.append(host)
logger.info(f'Facts cleared for inventory {smart_str(host.inventory.name)} host {smart_str(host.name)}')
log_data['cleared_ct'] += 1
if job_created and host.ansible_facts_modified and host.ansible_facts_modified > job_created:
logger.warning(
f'Skipping fact clear for host {smart_str(host.name)} in job {job_id} '
f'inventory {inventory_id}: host ansible_facts_modified '
f'({host.ansible_facts_modified.isoformat()}) is after this job\'s '
f'created time ({job_created.isoformat()}). '
f'A concurrent job likely updated this host\'s facts while this job was running.'
)
log_data['unmodified_ct'] += 1
else:
host.ansible_facts = {}
host.ansible_facts_modified = now()
hosts_to_update.append(host)
logger.info(f'Facts cleared for inventory {smart_str(host.inventory.name)} host {smart_str(host.name)}')
log_data['cleared_ct'] += 1
if len(hosts_to_update) >= 100:
bulk_update_sorted_by_id(Host, hosts_to_update, fields=['ansible_facts', 'ansible_facts_modified'])
hosts_to_update = []
bulk_update_sorted_by_id(Host, hosts_to_update, fields=['ansible_facts', 'ansible_facts_modified'])
logger.debug(f'Updated {log_data["updated_ct"]} host facts for inventory {inventory_id} in job {job_id}')

View File

@@ -1302,10 +1302,16 @@ class RunJob(SourceControlMixin, BaseTask):
return
if self.should_use_fact_cache() and self.runner_callback.artifacts_processed:
job.log_lifecycle("finish_job_fact_cache")
if job.inventory.kind == 'constructed':
hosts_qs = job.get_source_hosts_for_constructed_inventory()
else:
hosts_qs = job.inventory.hosts
finish_fact_cache(
hosts_qs,
artifacts_dir=os.path.join(private_data_dir, 'artifacts', str(job.id)),
job_id=job.id,
inventory_id=job.inventory_id,
job_created=job.created,
)
def final_run_hook(self, job, status, private_data_dir):

View File

@@ -0,0 +1,21 @@
---
# Generated by Claude Opus 4.6 (claude-opus-4-6).
- hosts: all
vars:
extra_value: ""
gather_facts: false
connection: local
tasks:
- name: set a custom fact
set_fact:
foo: "bar{{ extra_value }}"
bar:
a:
b:
- "c"
- "d"
cacheable: true
- name: sleep to create overlap window for concurrent job testing
wait_for:
timeout: 2

View File

@@ -1,6 +1,6 @@
import pytest
from awx.main.models import JobTemplate, Job, JobHostSummary, WorkflowJob, Inventory, Project, Organization
from awx.main.models import JobTemplate, Job, JobHostSummary, WorkflowJob, Inventory, Host, Project, Organization
@pytest.mark.django_db
@@ -87,3 +87,47 @@ class TestSlicingModels:
unified_job = job_template.create_unified_job(job_slice_count=2)
assert isinstance(unified_job, Job)
@pytest.mark.django_db
class TestGetSourceHostsForConstructedInventory:
"""Tests for Job.get_source_hosts_for_constructed_inventory"""
def test_returns_source_hosts_via_instance_id(self):
"""Constructed hosts with instance_id pointing to source hosts are resolved correctly."""
org = Organization.objects.create(name='test-org')
inv_input = Inventory.objects.create(organization=org, name='input-inv')
source_host1 = inv_input.hosts.create(name='host1')
source_host2 = inv_input.hosts.create(name='host2')
inv_constructed = Inventory.objects.create(organization=org, name='constructed-inv', kind='constructed')
inv_constructed.input_inventories.add(inv_input)
Host.objects.create(inventory=inv_constructed, name='host1', instance_id=str(source_host1.id))
Host.objects.create(inventory=inv_constructed, name='host2', instance_id=str(source_host2.id))
job = Job.objects.create(name='test-job', inventory=inv_constructed)
result = job.get_source_hosts_for_constructed_inventory()
assert set(result.values_list('id', flat=True)) == {source_host1.id, source_host2.id}
def test_no_inventory_returns_empty(self):
"""A job with no inventory returns an empty queryset."""
job = Job.objects.create(name='test-job')
result = job.get_source_hosts_for_constructed_inventory()
assert result.count() == 0
def test_ignores_hosts_without_instance_id(self):
"""Hosts with empty instance_id are excluded from the result."""
org = Organization.objects.create(name='test-org')
inv_input = Inventory.objects.create(organization=org, name='input-inv')
source_host = inv_input.hosts.create(name='host1')
inv_constructed = Inventory.objects.create(organization=org, name='constructed-inv', kind='constructed')
inv_constructed.input_inventories.add(inv_input)
Host.objects.create(inventory=inv_constructed, name='host1', instance_id=str(source_host.id))
Host.objects.create(inventory=inv_constructed, name='host-no-ref', instance_id='')
job = Job.objects.create(name='test-job', inventory=inv_constructed)
result = job.get_source_hosts_for_constructed_inventory()
assert list(result.values_list('id', flat=True)) == [source_host.id]

View File

@@ -0,0 +1,188 @@
"""Functional tests for start_fact_cache / finish_fact_cache.
These tests use real database objects (via pytest-django) and real files
on disk, but do not launch jobs or subprocesses. Fact files are written
by start_fact_cache and then manipulated to simulate ansible output
before calling finish_fact_cache.
Generated by Claude Opus 4.6 (claude-opus-4-6).
"""
import json
import os
import time
from datetime import timedelta
import pytest
from django.utils.timezone import now
from awx.main.models import Host, Inventory
from awx.main.tasks.facts import start_fact_cache, finish_fact_cache
@pytest.fixture
def artifacts_dir(tmp_path):
d = tmp_path / 'artifacts'
d.mkdir()
return str(d)
@pytest.mark.django_db
class TestFinishFactCacheScoping:
"""finish_fact_cache must only update hosts matched by the provided queryset."""
def test_same_hostname_different_inventories(self, organization, artifacts_dir):
"""Two inventories share a hostname; only the targeted one should be updated.
Generated by Claude Opus 4.6 (claude-opus-4-6).
"""
inv1 = Inventory.objects.create(organization=organization, name='scope-inv1')
inv2 = Inventory.objects.create(organization=organization, name='scope-inv2')
host1 = inv1.hosts.create(name='shared')
host2 = inv2.hosts.create(name='shared')
# Give both hosts initial facts
for h in (host1, host2):
h.ansible_facts = {'original': True}
h.ansible_facts_modified = now()
h.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
# start_fact_cache writes reference files for inv1's hosts
start_fact_cache(inv1.hosts.all(), artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv1.id)
# Simulate ansible writing new facts for 'shared'
fact_file = os.path.join(artifacts_dir, 'fact_cache', 'shared')
future = time.time() + 60
with open(fact_file, 'w') as f:
json.dump({'updated': True}, f)
os.utime(fact_file, (future, future))
# finish with inv1's hosts as the queryset
finish_fact_cache(inv1.hosts, artifacts_dir=artifacts_dir, inventory_id=inv1.id)
host1.refresh_from_db()
host2.refresh_from_db()
assert host1.ansible_facts == {'updated': True}
assert host2.ansible_facts == {'original': True}, 'Host in a different inventory was modified despite not being in the queryset'
@pytest.mark.django_db
class TestFinishFactCacheConcurrentProtection:
"""finish_fact_cache must not clear facts that a concurrent job updated."""
def test_skip_clear_when_facts_modified_after_job_created(self, organization, artifacts_dir):
"""If a host's facts were updated after the job was created, do not clear them.
Generated by Claude Opus 4.6 (claude-opus-4-6).
"""
inv = Inventory.objects.create(organization=organization, name='concurrent-inv')
host = inv.hosts.create(name='target')
job_created = now() - timedelta(minutes=5)
# start_fact_cache writes a reference file (host has no facts yet → no file written)
start_fact_cache(inv.hosts.all(), artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv.id)
# Simulate a concurrent job updating this host's facts AFTER our job was created
host.ansible_facts = {'from_concurrent_job': True}
host.ansible_facts_modified = now()
host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
# The fact file is missing (our job's ansible didn't target this host),
# which would normally trigger a clear. But the concurrent protection
# should skip it because ansible_facts_modified > job_created.
fact_file = os.path.join(artifacts_dir, 'fact_cache', host.name)
if os.path.exists(fact_file):
os.remove(fact_file)
finish_fact_cache(
inv.hosts,
artifacts_dir=artifacts_dir,
inventory_id=inv.id,
job_created=job_created,
)
host.refresh_from_db()
assert host.ansible_facts == {'from_concurrent_job': True}, 'Facts set by a concurrent job were cleared despite ansible_facts_modified > job_created'
def test_clear_when_facts_predate_job(self, organization, artifacts_dir):
"""If facts predate the job, a missing file should still clear them.
Generated by Claude Opus 4.6 (claude-opus-4-6).
"""
inv = Inventory.objects.create(organization=organization, name='clear-inv')
host = inv.hosts.create(name='stale')
old_time = now() - timedelta(hours=1)
host.ansible_facts = {'stale': True}
host.ansible_facts_modified = old_time
host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
job_created = now() - timedelta(minutes=5)
start_fact_cache(inv.hosts.all(), artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv.id)
# Remove the fact file to simulate ansible's clear_facts
os.remove(os.path.join(artifacts_dir, 'fact_cache', host.name))
finish_fact_cache(
inv.hosts,
artifacts_dir=artifacts_dir,
inventory_id=inv.id,
job_created=job_created,
)
host.refresh_from_db()
assert host.ansible_facts == {}, 'Stale facts should have been cleared when the fact file is missing ' 'and ansible_facts_modified predates job_created'
@pytest.mark.django_db
class TestConstructedInventoryFactCache:
"""finish_fact_cache with a constructed inventory queryset must target source hosts."""
def test_facts_resolve_to_source_host(self, organization, artifacts_dir):
"""Facts must be written to the source host, not the constructed copy.
Generated by Claude Opus 4.6 (claude-opus-4-6).
"""
from django.db.models.functions import Cast
inv_input = Inventory.objects.create(organization=organization, name='ci-input')
source_host = inv_input.hosts.create(name='webserver')
inv_constructed = Inventory.objects.create(organization=organization, name='ci-constructed', kind='constructed')
inv_constructed.input_inventories.add(inv_input)
constructed_host = Host.objects.create(
inventory=inv_constructed,
name='webserver',
instance_id=str(source_host.id),
)
# Build the same queryset that get_hosts_for_fact_cache uses
id_field = Host._meta.get_field('id')
source_qs = Host.objects.filter(id__in=inv_constructed.hosts.exclude(instance_id='').values_list(Cast('instance_id', output_field=id_field)))
# Give the source host initial facts so start_fact_cache writes a file
source_host.ansible_facts = {'role': 'web'}
source_host.ansible_facts_modified = now()
source_host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
start_fact_cache(source_qs, artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv_constructed.id)
# Simulate ansible writing updated facts
fact_file = os.path.join(artifacts_dir, 'fact_cache', 'webserver')
future = time.time() + 60
with open(fact_file, 'w') as f:
json.dump({'role': 'web', 'deployed': True}, f)
os.utime(fact_file, (future, future))
finish_fact_cache(source_qs, artifacts_dir=artifacts_dir, inventory_id=inv_constructed.id)
source_host.refresh_from_db()
constructed_host.refresh_from_db()
assert source_host.ansible_facts == {'role': 'web', 'deployed': True}
assert not constructed_host.ansible_facts, f'Facts were stored on the constructed host: {constructed_host.ansible_facts!r}'

View File

@@ -0,0 +1,351 @@
"""Tests for concurrent fact caching with --limit.
Reproduces bugs where concurrent jobs targeting different hosts via --limit
incorrectly modify (clear or revert) facts for hosts outside their limit.
Customer report: concurrent jobs on the same job template with different limits
cause facts set by an earlier-finishing job to be rolled back when the
later-finishing job completes.
See: https://github.com/jritter/concurrent-aap-fact-caching
Generated by Claude Opus 4.6 (claude-opus-4-6).
"""
import logging
import pytest
from django.utils.timezone import now
from awx.api.versioning import reverse
from awx.main.models import Inventory, JobTemplate
from awx.main.tests.live.tests.conftest import wait_for_job, wait_to_leave_status
logger = logging.getLogger(__name__)
@pytest.fixture
def concurrent_facts_inventory(default_org):
"""Inventory with two hosts for concurrent fact cache testing."""
inv_name = 'test_concurrent_fact_cache'
Inventory.objects.filter(organization=default_org, name=inv_name).delete()
inv = Inventory.objects.create(organization=default_org, name=inv_name)
inv.hosts.create(name='cc_host_0')
inv.hosts.create(name='cc_host_1')
return inv
@pytest.fixture
def concurrent_facts_jt(concurrent_facts_inventory, live_tmp_folder, post, admin, project_factory):
"""Job template configured for concurrent fact-cached runs."""
proj = project_factory(scm_url=f'file://{live_tmp_folder}/facts')
if proj.current_job:
wait_for_job(proj.current_job)
assert 'gather_slow.yml' in proj.playbooks, f'gather_slow.yml not in {proj.playbooks}'
jt_name = 'test_concurrent_fact_cache JT'
existing_jt = JobTemplate.objects.filter(name=jt_name).first()
if existing_jt:
existing_jt.delete()
result = post(
reverse('api:job_template_list'),
{
'name': jt_name,
'project': proj.id,
'playbook': 'gather_slow.yml',
'inventory': concurrent_facts_inventory.id,
'use_fact_cache': True,
'allow_simultaneous': True,
},
admin,
expect=201,
)
return JobTemplate.objects.get(id=result.data['id'])
def test_concurrent_limit_does_not_clear_facts(concurrent_facts_inventory, concurrent_facts_jt):
"""Concurrent jobs with different --limit must not clear each other's facts.
Generated by Claude Opus 4.6 (claude-opus-4-6).
Scenario:
- Inventory has cc_host_0 and cc_host_1, neither has prior facts
- Job A runs gather_slow.yml with limit=cc_host_0
- While Job A is still running (sleeping), Job B launches with limit=cc_host_1
- Both jobs set cacheable facts, but only for their respective limited host
- After both complete, BOTH hosts should have populated facts
The bug: get_hosts_for_fact_cache() returns ALL inventory hosts regardless
of --limit. start_fact_cache records them all in hosts_cached but writes
no fact files (no prior facts). When the later-finishing job runs
finish_fact_cache, it sees a missing fact file for the other job's host
and clears that host's facts.
"""
inv = concurrent_facts_inventory
jt = concurrent_facts_jt
# Launch Job A targeting cc_host_0
job_a = jt.create_unified_job()
job_a.limit = 'cc_host_0'
job_a.save(update_fields=['limit'])
job_a.signal_start()
# Wait for Job A to reach running (it will sleep inside the playbook)
wait_to_leave_status(job_a, 'pending')
wait_to_leave_status(job_a, 'waiting')
logger.info(f'Job A (id={job_a.id}) is now running with limit=cc_host_0')
# Launch Job B targeting cc_host_1 while Job A is still running
job_b = jt.create_unified_job()
job_b.limit = 'cc_host_1'
job_b.save(update_fields=['limit'])
job_b.signal_start()
# Verify that Job A is still running when Job B starts,
# otherwise the overlap that triggers the bug did not happen.
wait_to_leave_status(job_b, 'pending')
wait_to_leave_status(job_b, 'waiting')
job_a.refresh_from_db()
if job_a.status != 'running':
pytest.skip('Job A finished before Job B started running; overlap did not occur')
logger.info(f'Job B (id={job_b.id}) is now running with limit=cc_host_1 (concurrent with Job A)')
# Wait for both to complete
wait_for_job(job_a)
wait_for_job(job_b)
# Verify facts survived concurrent execution
host_0 = inv.hosts.get(name='cc_host_0')
host_1 = inv.hosts.get(name='cc_host_1')
# sanity
job_a.refresh_from_db()
job_b.refresh_from_db()
assert job_a.limit == "cc_host_0"
assert job_b.limit == "cc_host_1"
discovered_foos = [host_0.ansible_facts.get('foo'), host_1.ansible_facts.get('foo')]
assert discovered_foos == ['bar'] * 2, f'Unexpected facts on cc_host_0 or _1: {discovered_foos} after job a,b {job_a.id}, {job_b.id}'
def test_concurrent_limit_does_not_revert_facts(live_tmp_folder, run_job_from_playbook, concurrent_facts_inventory):
"""Concurrent jobs must not revert facts that a prior concurrent job just set.
Generated by Claude Opus 4.6 (claude-opus-4-6).
Scenario:
- First, populate both hosts with initial facts (foo=bar) via a
non-concurrent gather run
- Then run two concurrent jobs with different limits, each setting
a new value (foo=bar_v2 via extra_vars)
- After both complete, BOTH hosts should have foo=bar_v2
The bug: start_fact_cache writes the OLD facts (foo=bar) into each job's
artifact dir for ALL hosts. If ansible's cache plugin rewrites a non-limited
host's fact file with the stale content (updating the mtime), finish_fact_cache
treats it as a legitimate update and overwrites the DB with old values.
"""
# --- Seed both hosts with initial facts via a non-concurrent run ---
inv = concurrent_facts_inventory
scm_url = f'file://{live_tmp_folder}/facts'
res = run_job_from_playbook(
'seed_facts_for_revert_test',
'gather_slow.yml',
scm_url=scm_url,
jt_params={'use_fact_cache': True, 'allow_simultaneous': True, 'inventory': inv.id},
)
for host in inv.hosts.all():
assert host.ansible_facts.get('foo') == 'bar', f'Seed run failed to set facts on {host.name}: {host.ansible_facts}'
job = res['job']
wait_for_job(job)
# sanity, jobs should be set up to both have facts with just bar
host_0 = inv.hosts.get(name='cc_host_0')
host_1 = inv.hosts.get(name='cc_host_1')
discovered_foos = [host_0.ansible_facts.get('foo'), host_1.ansible_facts.get('foo')]
assert discovered_foos == ['bar'] * 2, f'Facts did not get expected initial values: {discovered_foos}'
jt = job.job_template
assert jt.allow_simultaneous is True
assert jt.use_fact_cache is True
# Sanity assertion, sometimes this would give problems from the Django rel cache
assert jt.project
# --- Run two concurrent jobs that write a new value ---
# Update the JT to pass extra_vars that change the fact value
jt.extra_vars = '{"extra_value": "_v2"}'
jt.save(update_fields=['extra_vars'])
job_a = jt.create_unified_job()
job_a.limit = 'cc_host_0'
job_a.save(update_fields=['limit'])
job_a.signal_start()
wait_to_leave_status(job_a, 'pending')
wait_to_leave_status(job_a, 'waiting')
job_b = jt.create_unified_job()
job_b.limit = 'cc_host_1'
job_b.save(update_fields=['limit'])
job_b.signal_start()
wait_to_leave_status(job_b, 'pending')
wait_to_leave_status(job_b, 'waiting')
job_a.refresh_from_db()
if job_a.status != 'running':
pytest.skip('Job A finished before Job B started running; overlap did not occur')
wait_for_job(job_a)
wait_for_job(job_b)
host_0 = inv.hosts.get(name='cc_host_0')
host_1 = inv.hosts.get(name='cc_host_1')
# Both hosts should have the UPDATED value, not the old seed value
discovered_foos = [host_0.ansible_facts.get('foo'), host_1.ansible_facts.get('foo')]
assert discovered_foos == ['bar_v2'] * 2, f'Facts were reverted to stale values by concurrent job cc_host_0 or cc_host_1: {discovered_foos}'
def test_fact_cache_scoped_to_inventory(live_tmp_folder, default_org, run_job_from_playbook):
"""finish_fact_cache must not modify hosts in other inventories.
Generated by Claude Opus 4.6 (claude-opus-4-6).
Bug: finish_fact_cache queries Host.objects.filter(name__in=host_names)
without an inventory_id filter, so hosts with the same name in different
inventories get their facts cross-contaminated.
"""
shared_name = 'scope_shared_host'
# Prepare for test by deleting junk from last run
for inv_name in ('test_fact_scope_inv1', 'test_fact_scope_inv2'):
inv = Inventory.objects.filter(name=inv_name).first()
if inv:
inv.delete()
inv1 = Inventory.objects.create(organization=default_org, name='test_fact_scope_inv1')
inv1.hosts.create(name=shared_name)
inv2 = Inventory.objects.create(organization=default_org, name='test_fact_scope_inv2')
host2 = inv2.hosts.create(name=shared_name)
# Give inv2's host distinct facts that should not be touched
original_facts = {'source': 'inventory_2', 'untouched': True}
host2.ansible_facts = original_facts
host2.ansible_facts_modified = now()
host2.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
# Run a fact-gathering job against inv1 only
run_job_from_playbook(
'test_fact_scope',
'gather.yml',
scm_url=f'file://{live_tmp_folder}/facts',
jt_params={'use_fact_cache': True, 'inventory': inv1.id},
)
# inv1's host should have facts
host1 = inv1.hosts.get(name=shared_name)
assert host1.ansible_facts, f'inv1 host should have facts after gather: {host1.ansible_facts}'
# inv2's host must NOT have been touched
host2.refresh_from_db()
assert host2.ansible_facts == original_facts, (
f'Host in a different inventory was modified by a fact cache operation '
f'on another inventory sharing the same hostname. '
f'Expected {original_facts!r}, got {host2.ansible_facts!r}'
)
def test_constructed_inventory_facts_saved_to_source_host(live_tmp_folder, default_org, run_job_from_playbook):
"""Facts from a constructed inventory job must be saved to the source host.
Generated by Claude Opus 4.6 (claude-opus-4-6).
Constructed inventories contain hosts that are references (via instance_id)
to 'real' hosts in input inventories. start_fact_cache correctly resolves
source hosts via get_hosts_for_fact_cache(), but finish_fact_cache must also
write facts back to the source hosts, not the constructed inventory's copies.
Scenario:
- Two input inventories each have a host named 'ci_shared_host'
- A constructed inventory uses both as inputs
- The inventory sync picks one source host (via instance_id) for the
constructed host — which one depends on input processing order
- Both source hosts start with distinct pre-existing facts
- A fact-gathering job runs against the constructed inventory
- After completion, the targeted source host should have the job's facts
- The OTHER source host must retain its original facts untouched
- The constructed host itself must NOT have facts stored on it
(constructed hosts are transient — recreated on each inventory sync)
"""
shared_name = 'ci_shared_host'
# Cleanup from prior runs
for inv_name in ('test_ci_facts_input1', 'test_ci_facts_input2', 'test_ci_facts_constructed'):
Inventory.objects.filter(name=inv_name).delete()
# --- Create two input inventories, each with an identically-named host ---
inv1 = Inventory.objects.create(organization=default_org, name='test_ci_facts_input1')
source_host1 = inv1.hosts.create(name=shared_name)
inv2 = Inventory.objects.create(organization=default_org, name='test_ci_facts_input2')
source_host2 = inv2.hosts.create(name=shared_name)
# Give both hosts distinct pre-existing facts so we can detect cross-contamination
host1_original_facts = {'source': 'inventory_1'}
source_host1.ansible_facts = host1_original_facts
source_host1.ansible_facts_modified = now()
source_host1.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
host2_original_facts = {'source': 'inventory_2'}
source_host2.ansible_facts = host2_original_facts
source_host2.ansible_facts_modified = now()
source_host2.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
source_hosts_by_id = {source_host1.id: source_host1, source_host2.id: source_host2}
original_facts_by_id = {source_host1.id: host1_original_facts, source_host2.id: host2_original_facts}
# --- Create constructed inventory (sync will create hosts from inputs) ---
constructed_inv = Inventory.objects.create(
organization=default_org,
name='test_ci_facts_constructed',
kind='constructed',
)
constructed_inv.input_inventories.add(inv1)
constructed_inv.input_inventories.add(inv2)
# --- Run a fact-gathering job against the constructed inventory ---
# The job launch triggers an inventory sync which creates the constructed
# host with instance_id pointing to one of the source hosts.
scm_url = f'file://{live_tmp_folder}/facts'
run_job_from_playbook(
'test_ci_facts',
'gather.yml',
scm_url=scm_url,
jt_params={'use_fact_cache': True, 'inventory': constructed_inv.id},
)
# --- Determine which source host the constructed host points to ---
constructed_host = constructed_inv.hosts.get(name=shared_name)
target_id = int(constructed_host.instance_id)
other_id = (set(source_hosts_by_id.keys()) - {target_id}).pop()
target_host = source_hosts_by_id[target_id]
other_host = source_hosts_by_id[other_id]
target_host.refresh_from_db()
other_host.refresh_from_db()
constructed_host.refresh_from_db()
actual = [target_host.ansible_facts.get('foo'), other_host.ansible_facts, constructed_host.ansible_facts]
expected = ['bar', original_facts_by_id[other_id], {}]
assert actual == expected, (
f'Constructed inventory fact cache wrote to wrong host(s). '
f'target source host (id={target_id}) foo={actual[0]!r}, '
f'other source host (id={other_id}) facts={actual[1]!r}, '
f'constructed host facts={actual[2]!r}; expected {expected!r}'
)

View File

@@ -2,6 +2,7 @@
import json
import os
import pytest
from unittest import mock
from awx.main.models import (
Inventory,
@@ -99,52 +100,55 @@ def test_start_job_fact_cache_within_timeout(hosts, tmpdir):
def test_finish_job_fact_cache_clear(hosts, mocker, ref_time, tmpdir):
fact_cache = os.path.join(tmpdir, 'facts')
start_fact_cache(hosts, fact_cache, timeout=0)
artifacts_dir = str(tmpdir.mkdir("artifacts"))
inventory_id = 5
bulk_update = mocker.patch('awx.main.tasks.facts.bulk_update_sorted_by_id')
start_fact_cache(hosts, artifacts_dir=artifacts_dir, timeout=0, inventory_id=inventory_id)
# Mock the os.path.exists behavior for host deletion
# Let's assume the fact file for hosts[1] is missing.
mocker.patch('os.path.exists', side_effect=lambda path: hosts[1].name not in path)
mocker.patch('awx.main.tasks.facts.bulk_update_sorted_by_id')
# Simulate one host's fact file getting deleted manually
host_to_delete_filepath = os.path.join(fact_cache, hosts[1].name)
# Remove the fact file for hosts[1] to simulate ansible's clear_facts
fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache')
os.remove(os.path.join(fact_cache_dir, hosts[1].name))
# Simulate the file being removed by checking existence first, to avoid FileNotFoundError
if os.path.exists(host_to_delete_filepath):
os.remove(host_to_delete_filepath)
hosts_qs = mock.MagicMock()
hosts_qs.filter.return_value.order_by.return_value.iterator.return_value = iter(hosts)
finish_fact_cache(fact_cache)
finish_fact_cache(hosts_qs, artifacts_dir=artifacts_dir, inventory_id=inventory_id)
# Simulate side effects that would normally be applied during bulk update
hosts[1].ansible_facts = {}
hosts[1].ansible_facts_modified = now()
# hosts[1] should have had its facts cleared (file was missing, job_created=None)
assert hosts[1].ansible_facts == {}
assert hosts[1].ansible_facts_modified > ref_time
# Verify facts are preserved for hosts with valid cache files
# Other hosts should be unmodified (fact files exist but weren't changed by ansible)
for host in (hosts[0], hosts[2], hosts[3]):
assert host.ansible_facts == {"a": 1, "b": 2}
assert host.ansible_facts_modified == ref_time
assert hosts[1].ansible_facts_modified > ref_time
# Current implementation skips the call entirely if hosts_to_update == []
bulk_update.assert_not_called()
def test_finish_job_fact_cache_with_bad_data(hosts, mocker, tmpdir):
fact_cache = os.path.join(tmpdir, 'facts')
start_fact_cache(hosts, fact_cache, timeout=0)
artifacts_dir = str(tmpdir.mkdir("artifacts"))
inventory_id = 5
bulk_update = mocker.patch('django.db.models.query.QuerySet.bulk_update')
start_fact_cache(hosts, artifacts_dir=artifacts_dir, timeout=0, inventory_id=inventory_id)
bulk_update = mocker.patch('awx.main.tasks.facts.bulk_update_sorted_by_id')
# Overwrite fact files with invalid JSON and set future mtime
fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache')
for h in hosts:
filepath = os.path.join(fact_cache, h.name)
filepath = os.path.join(fact_cache_dir, h.name)
with open(filepath, 'w') as f:
f.write('not valid json!')
f.flush()
new_modification_time = time.time() + 3600
os.utime(filepath, (new_modification_time, new_modification_time))
finish_fact_cache(fact_cache)
hosts_qs = mock.MagicMock()
hosts_qs.filter.return_value.order_by.return_value.iterator.return_value = iter(hosts)
bulk_update.assert_not_called()
finish_fact_cache(hosts_qs, artifacts_dir=artifacts_dir, inventory_id=inventory_id)
# Invalid JSON should be skipped — no hosts updated
updated_hosts = bulk_update.call_args[0][1]
assert updated_hosts == []

View File

@@ -84,7 +84,7 @@ def job_template_with_credentials():
@mock.patch('awx.main.tasks.jobs.create_partition', return_value=True)
def test_pre_post_run_hook_facts(mock_create_partition, mock_facts_settings, private_data_dir, execution_environment):
# Create mocked inventory and host queryset
inventory = mock.MagicMock(spec=Inventory, pk=1)
inventory = mock.MagicMock(spec=Inventory, pk=1, kind='')
host1 = mock.MagicMock(spec=Host, id=1, name='host1', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=now(), inventory=inventory)
host2 = mock.MagicMock(spec=Host, id=2, name='host2', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=now(), inventory=inventory)
@@ -107,6 +107,8 @@ def test_pre_post_run_hook_facts(mock_create_partition, mock_facts_settings, pri
job_slice_number=1,
job_slice_count=1,
inventory=inventory,
inventory_id=inventory.pk,
created=now(),
execution_environment=execution_environment,
)
job.get_hosts_for_fact_cache = Job.get_hosts_for_fact_cache.__get__(job)
@@ -140,7 +142,7 @@ def test_pre_post_run_hook_facts(mock_create_partition, mock_facts_settings, pri
@mock.patch('awx.main.tasks.jobs.create_partition', return_value=True)
def test_pre_post_run_hook_facts_deleted_sliced(mock_create_partition, mock_facts_settings, private_data_dir, execution_environment):
# Fully mocked inventory
mock_inventory = mock.MagicMock(spec=Inventory)
mock_inventory = mock.MagicMock(spec=Inventory, pk=1, kind='')
# Create 999 mocked Host instances
hosts = []
@@ -173,6 +175,8 @@ def test_pre_post_run_hook_facts_deleted_sliced(mock_create_partition, mock_fact
job.job_slice_count = 3
job.execution_environment = execution_environment
job.inventory = mock_inventory
job.inventory_id = mock_inventory.pk
job.created = now()
job.job_env.get.return_value = private_data_dir
# Bind actual method for host filtering