mirror of
https://github.com/ansible/awx.git
synced 2026-03-06 03:01:06 -03:30
AAP-65054 Fix bugs where concurrent jobs would clear facts of unrelated hosts (#16318)
* Add new tests for bug saving concurrent facts * Fix first bug and improve tests * Fix new bug where concurrent job clears facts from other job in unwanted way * minor test fixes * Add in missing playbook * Fix host reference for constructed inventory * Increase speed for concurrent fact tests * Make test a bit faster * Fix linters * Add some functional tests * Remove the sanity test * Agent markers added * Address SonarCloud * Do backdating method, resolving stricter assertions * Address coderabbit comments * Address review comment with qs only method * Delete missed sleep statement * Add more coverage
This commit is contained in:
@@ -845,6 +845,21 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
|
||||
def get_notification_friendly_name(self):
|
||||
return "Job"
|
||||
|
||||
def get_source_hosts_for_constructed_inventory(self):
|
||||
"""Return a QuerySet of the source (input inventory) hosts for a constructed inventory.
|
||||
|
||||
Constructed inventory hosts have an instance_id pointing to the real
|
||||
host in the input inventory. This resolves those references and returns
|
||||
a proper QuerySet (never a list), suitable for use with finish_fact_cache.
|
||||
"""
|
||||
Host = JobHostSummary._meta.get_field('host').related_model
|
||||
if not self.inventory_id:
|
||||
return Host.objects.none()
|
||||
id_field = Host._meta.get_field('id')
|
||||
return Host.objects.filter(id__in=self.inventory.hosts.exclude(instance_id='').values_list(Cast('instance_id', output_field=id_field))).only(
|
||||
*HOST_FACTS_FIELDS
|
||||
)
|
||||
|
||||
def get_hosts_for_fact_cache(self):
|
||||
"""
|
||||
Builds the queryset to use for writing or finalizing the fact cache
|
||||
@@ -852,17 +867,15 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
|
||||
For constructed inventories, that means the original (input inventory) hosts
|
||||
when slicing, that means only returning hosts in that slice
|
||||
"""
|
||||
Host = JobHostSummary._meta.get_field('host').related_model
|
||||
if not self.inventory_id:
|
||||
Host = JobHostSummary._meta.get_field('host').related_model
|
||||
return Host.objects.none()
|
||||
|
||||
if self.inventory.kind == 'constructed':
|
||||
id_field = Host._meta.get_field('id')
|
||||
host_qs = Host.objects.filter(id__in=self.inventory.hosts.exclude(instance_id='').values_list(Cast('instance_id', output_field=id_field)))
|
||||
host_qs = self.get_source_hosts_for_constructed_inventory()
|
||||
else:
|
||||
host_qs = self.inventory.hosts
|
||||
host_qs = self.inventory.hosts.only(*HOST_FACTS_FIELDS)
|
||||
|
||||
host_qs = host_qs.only(*HOST_FACTS_FIELDS)
|
||||
host_qs = self.inventory.get_sliced_hosts(host_qs, self.job_slice_number, self.job_slice_count)
|
||||
return host_qs
|
||||
|
||||
|
||||
@@ -51,7 +51,14 @@ def start_fact_cache(hosts, artifacts_dir, timeout=None, inventory_id=None, log_
|
||||
os.chmod(f.name, 0o600)
|
||||
json.dump(host.ansible_facts, f)
|
||||
log_data['written_ct'] += 1
|
||||
last_write_time = os.path.getmtime(filepath)
|
||||
# Backdate the file by 2 seconds so finish_fact_cache can reliably
|
||||
# distinguish these reference files from files updated by ansible.
|
||||
# This guarantees fact file mtime < summary file mtime even with
|
||||
# zipfile's 2-second timestamp rounding during artifact transfer.
|
||||
mtime = os.path.getmtime(filepath)
|
||||
backdated = mtime - 2
|
||||
os.utime(filepath, (backdated, backdated))
|
||||
last_write_time = backdated
|
||||
except IOError:
|
||||
logger.error(f'facts for host {smart_str(host.name)} could not be cached')
|
||||
continue
|
||||
@@ -74,7 +81,7 @@ def start_fact_cache(hosts, artifacts_dir, timeout=None, inventory_id=None, log_
|
||||
msg='Inventory {inventory_id} host facts: updated {updated_ct}, cleared {cleared_ct}, unchanged {unmodified_ct}, took {delta:.3f} s',
|
||||
add_log_data=True,
|
||||
)
|
||||
def finish_fact_cache(artifacts_dir, job_id=None, inventory_id=None, log_data=None):
|
||||
def finish_fact_cache(host_qs, artifacts_dir, job_id=None, inventory_id=None, job_created=None, log_data=None):
|
||||
log_data = log_data or {}
|
||||
log_data['inventory_id'] = inventory_id
|
||||
log_data['updated_ct'] = 0
|
||||
@@ -95,7 +102,7 @@ def finish_fact_cache(artifacts_dir, job_id=None, inventory_id=None, log_data=No
|
||||
return
|
||||
|
||||
host_names = summary.get('hosts_cached', [])
|
||||
hosts_cached = Host.objects.filter(name__in=host_names).order_by('id').iterator()
|
||||
hosts_cached = host_qs.filter(name__in=host_names).order_by('id').iterator()
|
||||
# Path where individual fact files were written
|
||||
fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache')
|
||||
hosts_to_update = []
|
||||
@@ -138,14 +145,25 @@ def finish_fact_cache(artifacts_dir, job_id=None, inventory_id=None, log_data=No
|
||||
else:
|
||||
# if the file goes missing, ansible removed it (likely via clear_facts)
|
||||
# if the file goes missing, but the host has not started facts, then we should not clear the facts
|
||||
host.ansible_facts = {}
|
||||
host.ansible_facts_modified = now()
|
||||
hosts_to_update.append(host)
|
||||
logger.info(f'Facts cleared for inventory {smart_str(host.inventory.name)} host {smart_str(host.name)}')
|
||||
log_data['cleared_ct'] += 1
|
||||
if job_created and host.ansible_facts_modified and host.ansible_facts_modified > job_created:
|
||||
logger.warning(
|
||||
f'Skipping fact clear for host {smart_str(host.name)} in job {job_id} '
|
||||
f'inventory {inventory_id}: host ansible_facts_modified '
|
||||
f'({host.ansible_facts_modified.isoformat()}) is after this job\'s '
|
||||
f'created time ({job_created.isoformat()}). '
|
||||
f'A concurrent job likely updated this host\'s facts while this job was running.'
|
||||
)
|
||||
log_data['unmodified_ct'] += 1
|
||||
else:
|
||||
host.ansible_facts = {}
|
||||
host.ansible_facts_modified = now()
|
||||
hosts_to_update.append(host)
|
||||
logger.info(f'Facts cleared for inventory {smart_str(host.inventory.name)} host {smart_str(host.name)}')
|
||||
log_data['cleared_ct'] += 1
|
||||
|
||||
if len(hosts_to_update) >= 100:
|
||||
bulk_update_sorted_by_id(Host, hosts_to_update, fields=['ansible_facts', 'ansible_facts_modified'])
|
||||
hosts_to_update = []
|
||||
|
||||
bulk_update_sorted_by_id(Host, hosts_to_update, fields=['ansible_facts', 'ansible_facts_modified'])
|
||||
logger.debug(f'Updated {log_data["updated_ct"]} host facts for inventory {inventory_id} in job {job_id}')
|
||||
|
||||
@@ -1302,10 +1302,16 @@ class RunJob(SourceControlMixin, BaseTask):
|
||||
return
|
||||
if self.should_use_fact_cache() and self.runner_callback.artifacts_processed:
|
||||
job.log_lifecycle("finish_job_fact_cache")
|
||||
if job.inventory.kind == 'constructed':
|
||||
hosts_qs = job.get_source_hosts_for_constructed_inventory()
|
||||
else:
|
||||
hosts_qs = job.inventory.hosts
|
||||
finish_fact_cache(
|
||||
hosts_qs,
|
||||
artifacts_dir=os.path.join(private_data_dir, 'artifacts', str(job.id)),
|
||||
job_id=job.id,
|
||||
inventory_id=job.inventory_id,
|
||||
job_created=job.created,
|
||||
)
|
||||
|
||||
def final_run_hook(self, job, status, private_data_dir):
|
||||
|
||||
21
awx/main/tests/data/projects/facts/gather_slow.yml
Normal file
21
awx/main/tests/data/projects/facts/gather_slow.yml
Normal file
@@ -0,0 +1,21 @@
|
||||
---
|
||||
# Generated by Claude Opus 4.6 (claude-opus-4-6).
|
||||
|
||||
- hosts: all
|
||||
vars:
|
||||
extra_value: ""
|
||||
gather_facts: false
|
||||
connection: local
|
||||
tasks:
|
||||
- name: set a custom fact
|
||||
set_fact:
|
||||
foo: "bar{{ extra_value }}"
|
||||
bar:
|
||||
a:
|
||||
b:
|
||||
- "c"
|
||||
- "d"
|
||||
cacheable: true
|
||||
- name: sleep to create overlap window for concurrent job testing
|
||||
wait_for:
|
||||
timeout: 2
|
||||
@@ -1,6 +1,6 @@
|
||||
import pytest
|
||||
|
||||
from awx.main.models import JobTemplate, Job, JobHostSummary, WorkflowJob, Inventory, Project, Organization
|
||||
from awx.main.models import JobTemplate, Job, JobHostSummary, WorkflowJob, Inventory, Host, Project, Organization
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@@ -87,3 +87,47 @@ class TestSlicingModels:
|
||||
|
||||
unified_job = job_template.create_unified_job(job_slice_count=2)
|
||||
assert isinstance(unified_job, Job)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestGetSourceHostsForConstructedInventory:
|
||||
"""Tests for Job.get_source_hosts_for_constructed_inventory"""
|
||||
|
||||
def test_returns_source_hosts_via_instance_id(self):
|
||||
"""Constructed hosts with instance_id pointing to source hosts are resolved correctly."""
|
||||
org = Organization.objects.create(name='test-org')
|
||||
inv_input = Inventory.objects.create(organization=org, name='input-inv')
|
||||
source_host1 = inv_input.hosts.create(name='host1')
|
||||
source_host2 = inv_input.hosts.create(name='host2')
|
||||
|
||||
inv_constructed = Inventory.objects.create(organization=org, name='constructed-inv', kind='constructed')
|
||||
inv_constructed.input_inventories.add(inv_input)
|
||||
Host.objects.create(inventory=inv_constructed, name='host1', instance_id=str(source_host1.id))
|
||||
Host.objects.create(inventory=inv_constructed, name='host2', instance_id=str(source_host2.id))
|
||||
|
||||
job = Job.objects.create(name='test-job', inventory=inv_constructed)
|
||||
result = job.get_source_hosts_for_constructed_inventory()
|
||||
|
||||
assert set(result.values_list('id', flat=True)) == {source_host1.id, source_host2.id}
|
||||
|
||||
def test_no_inventory_returns_empty(self):
|
||||
"""A job with no inventory returns an empty queryset."""
|
||||
job = Job.objects.create(name='test-job')
|
||||
result = job.get_source_hosts_for_constructed_inventory()
|
||||
assert result.count() == 0
|
||||
|
||||
def test_ignores_hosts_without_instance_id(self):
|
||||
"""Hosts with empty instance_id are excluded from the result."""
|
||||
org = Organization.objects.create(name='test-org')
|
||||
inv_input = Inventory.objects.create(organization=org, name='input-inv')
|
||||
source_host = inv_input.hosts.create(name='host1')
|
||||
|
||||
inv_constructed = Inventory.objects.create(organization=org, name='constructed-inv', kind='constructed')
|
||||
inv_constructed.input_inventories.add(inv_input)
|
||||
Host.objects.create(inventory=inv_constructed, name='host1', instance_id=str(source_host.id))
|
||||
Host.objects.create(inventory=inv_constructed, name='host-no-ref', instance_id='')
|
||||
|
||||
job = Job.objects.create(name='test-job', inventory=inv_constructed)
|
||||
result = job.get_source_hosts_for_constructed_inventory()
|
||||
|
||||
assert list(result.values_list('id', flat=True)) == [source_host.id]
|
||||
|
||||
188
awx/main/tests/functional/tasks/test_fact_cache.py
Normal file
188
awx/main/tests/functional/tasks/test_fact_cache.py
Normal file
@@ -0,0 +1,188 @@
|
||||
"""Functional tests for start_fact_cache / finish_fact_cache.
|
||||
|
||||
These tests use real database objects (via pytest-django) and real files
|
||||
on disk, but do not launch jobs or subprocesses. Fact files are written
|
||||
by start_fact_cache and then manipulated to simulate ansible output
|
||||
before calling finish_fact_cache.
|
||||
|
||||
Generated by Claude Opus 4.6 (claude-opus-4-6).
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from datetime import timedelta
|
||||
|
||||
import pytest
|
||||
|
||||
from django.utils.timezone import now
|
||||
|
||||
from awx.main.models import Host, Inventory
|
||||
from awx.main.tasks.facts import start_fact_cache, finish_fact_cache
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def artifacts_dir(tmp_path):
|
||||
d = tmp_path / 'artifacts'
|
||||
d.mkdir()
|
||||
return str(d)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestFinishFactCacheScoping:
|
||||
"""finish_fact_cache must only update hosts matched by the provided queryset."""
|
||||
|
||||
def test_same_hostname_different_inventories(self, organization, artifacts_dir):
|
||||
"""Two inventories share a hostname; only the targeted one should be updated.
|
||||
|
||||
Generated by Claude Opus 4.6 (claude-opus-4-6).
|
||||
"""
|
||||
inv1 = Inventory.objects.create(organization=organization, name='scope-inv1')
|
||||
inv2 = Inventory.objects.create(organization=organization, name='scope-inv2')
|
||||
|
||||
host1 = inv1.hosts.create(name='shared')
|
||||
host2 = inv2.hosts.create(name='shared')
|
||||
|
||||
# Give both hosts initial facts
|
||||
for h in (host1, host2):
|
||||
h.ansible_facts = {'original': True}
|
||||
h.ansible_facts_modified = now()
|
||||
h.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
|
||||
|
||||
# start_fact_cache writes reference files for inv1's hosts
|
||||
start_fact_cache(inv1.hosts.all(), artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv1.id)
|
||||
|
||||
# Simulate ansible writing new facts for 'shared'
|
||||
fact_file = os.path.join(artifacts_dir, 'fact_cache', 'shared')
|
||||
future = time.time() + 60
|
||||
with open(fact_file, 'w') as f:
|
||||
json.dump({'updated': True}, f)
|
||||
os.utime(fact_file, (future, future))
|
||||
|
||||
# finish with inv1's hosts as the queryset
|
||||
finish_fact_cache(inv1.hosts, artifacts_dir=artifacts_dir, inventory_id=inv1.id)
|
||||
|
||||
host1.refresh_from_db()
|
||||
host2.refresh_from_db()
|
||||
|
||||
assert host1.ansible_facts == {'updated': True}
|
||||
assert host2.ansible_facts == {'original': True}, 'Host in a different inventory was modified despite not being in the queryset'
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestFinishFactCacheConcurrentProtection:
|
||||
"""finish_fact_cache must not clear facts that a concurrent job updated."""
|
||||
|
||||
def test_skip_clear_when_facts_modified_after_job_created(self, organization, artifacts_dir):
|
||||
"""If a host's facts were updated after the job was created, do not clear them.
|
||||
|
||||
Generated by Claude Opus 4.6 (claude-opus-4-6).
|
||||
"""
|
||||
inv = Inventory.objects.create(organization=organization, name='concurrent-inv')
|
||||
host = inv.hosts.create(name='target')
|
||||
|
||||
job_created = now() - timedelta(minutes=5)
|
||||
|
||||
# start_fact_cache writes a reference file (host has no facts yet → no file written)
|
||||
start_fact_cache(inv.hosts.all(), artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv.id)
|
||||
|
||||
# Simulate a concurrent job updating this host's facts AFTER our job was created
|
||||
host.ansible_facts = {'from_concurrent_job': True}
|
||||
host.ansible_facts_modified = now()
|
||||
host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
|
||||
|
||||
# The fact file is missing (our job's ansible didn't target this host),
|
||||
# which would normally trigger a clear. But the concurrent protection
|
||||
# should skip it because ansible_facts_modified > job_created.
|
||||
fact_file = os.path.join(artifacts_dir, 'fact_cache', host.name)
|
||||
if os.path.exists(fact_file):
|
||||
os.remove(fact_file)
|
||||
|
||||
finish_fact_cache(
|
||||
inv.hosts,
|
||||
artifacts_dir=artifacts_dir,
|
||||
inventory_id=inv.id,
|
||||
job_created=job_created,
|
||||
)
|
||||
|
||||
host.refresh_from_db()
|
||||
assert host.ansible_facts == {'from_concurrent_job': True}, 'Facts set by a concurrent job were cleared despite ansible_facts_modified > job_created'
|
||||
|
||||
def test_clear_when_facts_predate_job(self, organization, artifacts_dir):
|
||||
"""If facts predate the job, a missing file should still clear them.
|
||||
|
||||
Generated by Claude Opus 4.6 (claude-opus-4-6).
|
||||
"""
|
||||
inv = Inventory.objects.create(organization=organization, name='clear-inv')
|
||||
host = inv.hosts.create(name='stale')
|
||||
|
||||
old_time = now() - timedelta(hours=1)
|
||||
host.ansible_facts = {'stale': True}
|
||||
host.ansible_facts_modified = old_time
|
||||
host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
|
||||
|
||||
job_created = now() - timedelta(minutes=5)
|
||||
|
||||
start_fact_cache(inv.hosts.all(), artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv.id)
|
||||
|
||||
# Remove the fact file to simulate ansible's clear_facts
|
||||
os.remove(os.path.join(artifacts_dir, 'fact_cache', host.name))
|
||||
|
||||
finish_fact_cache(
|
||||
inv.hosts,
|
||||
artifacts_dir=artifacts_dir,
|
||||
inventory_id=inv.id,
|
||||
job_created=job_created,
|
||||
)
|
||||
|
||||
host.refresh_from_db()
|
||||
assert host.ansible_facts == {}, 'Stale facts should have been cleared when the fact file is missing ' 'and ansible_facts_modified predates job_created'
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestConstructedInventoryFactCache:
|
||||
"""finish_fact_cache with a constructed inventory queryset must target source hosts."""
|
||||
|
||||
def test_facts_resolve_to_source_host(self, organization, artifacts_dir):
|
||||
"""Facts must be written to the source host, not the constructed copy.
|
||||
|
||||
Generated by Claude Opus 4.6 (claude-opus-4-6).
|
||||
"""
|
||||
from django.db.models.functions import Cast
|
||||
|
||||
inv_input = Inventory.objects.create(organization=organization, name='ci-input')
|
||||
source_host = inv_input.hosts.create(name='webserver')
|
||||
|
||||
inv_constructed = Inventory.objects.create(organization=organization, name='ci-constructed', kind='constructed')
|
||||
inv_constructed.input_inventories.add(inv_input)
|
||||
constructed_host = Host.objects.create(
|
||||
inventory=inv_constructed,
|
||||
name='webserver',
|
||||
instance_id=str(source_host.id),
|
||||
)
|
||||
|
||||
# Build the same queryset that get_hosts_for_fact_cache uses
|
||||
id_field = Host._meta.get_field('id')
|
||||
source_qs = Host.objects.filter(id__in=inv_constructed.hosts.exclude(instance_id='').values_list(Cast('instance_id', output_field=id_field)))
|
||||
|
||||
# Give the source host initial facts so start_fact_cache writes a file
|
||||
source_host.ansible_facts = {'role': 'web'}
|
||||
source_host.ansible_facts_modified = now()
|
||||
source_host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
|
||||
|
||||
start_fact_cache(source_qs, artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv_constructed.id)
|
||||
|
||||
# Simulate ansible writing updated facts
|
||||
fact_file = os.path.join(artifacts_dir, 'fact_cache', 'webserver')
|
||||
future = time.time() + 60
|
||||
with open(fact_file, 'w') as f:
|
||||
json.dump({'role': 'web', 'deployed': True}, f)
|
||||
os.utime(fact_file, (future, future))
|
||||
|
||||
finish_fact_cache(source_qs, artifacts_dir=artifacts_dir, inventory_id=inv_constructed.id)
|
||||
|
||||
source_host.refresh_from_db()
|
||||
constructed_host.refresh_from_db()
|
||||
|
||||
assert source_host.ansible_facts == {'role': 'web', 'deployed': True}
|
||||
assert not constructed_host.ansible_facts, f'Facts were stored on the constructed host: {constructed_host.ansible_facts!r}'
|
||||
351
awx/main/tests/live/tests/test_concurrent_fact_cache.py
Normal file
351
awx/main/tests/live/tests/test_concurrent_fact_cache.py
Normal file
@@ -0,0 +1,351 @@
|
||||
"""Tests for concurrent fact caching with --limit.
|
||||
|
||||
Reproduces bugs where concurrent jobs targeting different hosts via --limit
|
||||
incorrectly modify (clear or revert) facts for hosts outside their limit.
|
||||
|
||||
Customer report: concurrent jobs on the same job template with different limits
|
||||
cause facts set by an earlier-finishing job to be rolled back when the
|
||||
later-finishing job completes.
|
||||
|
||||
See: https://github.com/jritter/concurrent-aap-fact-caching
|
||||
|
||||
Generated by Claude Opus 4.6 (claude-opus-4-6).
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
import pytest
|
||||
|
||||
from django.utils.timezone import now
|
||||
|
||||
from awx.api.versioning import reverse
|
||||
from awx.main.models import Inventory, JobTemplate
|
||||
from awx.main.tests.live.tests.conftest import wait_for_job, wait_to_leave_status
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def concurrent_facts_inventory(default_org):
|
||||
"""Inventory with two hosts for concurrent fact cache testing."""
|
||||
inv_name = 'test_concurrent_fact_cache'
|
||||
Inventory.objects.filter(organization=default_org, name=inv_name).delete()
|
||||
inv = Inventory.objects.create(organization=default_org, name=inv_name)
|
||||
inv.hosts.create(name='cc_host_0')
|
||||
inv.hosts.create(name='cc_host_1')
|
||||
return inv
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def concurrent_facts_jt(concurrent_facts_inventory, live_tmp_folder, post, admin, project_factory):
|
||||
"""Job template configured for concurrent fact-cached runs."""
|
||||
proj = project_factory(scm_url=f'file://{live_tmp_folder}/facts')
|
||||
if proj.current_job:
|
||||
wait_for_job(proj.current_job)
|
||||
assert 'gather_slow.yml' in proj.playbooks, f'gather_slow.yml not in {proj.playbooks}'
|
||||
|
||||
jt_name = 'test_concurrent_fact_cache JT'
|
||||
existing_jt = JobTemplate.objects.filter(name=jt_name).first()
|
||||
if existing_jt:
|
||||
existing_jt.delete()
|
||||
result = post(
|
||||
reverse('api:job_template_list'),
|
||||
{
|
||||
'name': jt_name,
|
||||
'project': proj.id,
|
||||
'playbook': 'gather_slow.yml',
|
||||
'inventory': concurrent_facts_inventory.id,
|
||||
'use_fact_cache': True,
|
||||
'allow_simultaneous': True,
|
||||
},
|
||||
admin,
|
||||
expect=201,
|
||||
)
|
||||
return JobTemplate.objects.get(id=result.data['id'])
|
||||
|
||||
|
||||
def test_concurrent_limit_does_not_clear_facts(concurrent_facts_inventory, concurrent_facts_jt):
|
||||
"""Concurrent jobs with different --limit must not clear each other's facts.
|
||||
|
||||
Generated by Claude Opus 4.6 (claude-opus-4-6).
|
||||
|
||||
Scenario:
|
||||
- Inventory has cc_host_0 and cc_host_1, neither has prior facts
|
||||
- Job A runs gather_slow.yml with limit=cc_host_0
|
||||
- While Job A is still running (sleeping), Job B launches with limit=cc_host_1
|
||||
- Both jobs set cacheable facts, but only for their respective limited host
|
||||
- After both complete, BOTH hosts should have populated facts
|
||||
|
||||
The bug: get_hosts_for_fact_cache() returns ALL inventory hosts regardless
|
||||
of --limit. start_fact_cache records them all in hosts_cached but writes
|
||||
no fact files (no prior facts). When the later-finishing job runs
|
||||
finish_fact_cache, it sees a missing fact file for the other job's host
|
||||
and clears that host's facts.
|
||||
"""
|
||||
inv = concurrent_facts_inventory
|
||||
jt = concurrent_facts_jt
|
||||
|
||||
# Launch Job A targeting cc_host_0
|
||||
job_a = jt.create_unified_job()
|
||||
job_a.limit = 'cc_host_0'
|
||||
job_a.save(update_fields=['limit'])
|
||||
job_a.signal_start()
|
||||
|
||||
# Wait for Job A to reach running (it will sleep inside the playbook)
|
||||
wait_to_leave_status(job_a, 'pending')
|
||||
wait_to_leave_status(job_a, 'waiting')
|
||||
logger.info(f'Job A (id={job_a.id}) is now running with limit=cc_host_0')
|
||||
|
||||
# Launch Job B targeting cc_host_1 while Job A is still running
|
||||
job_b = jt.create_unified_job()
|
||||
job_b.limit = 'cc_host_1'
|
||||
job_b.save(update_fields=['limit'])
|
||||
job_b.signal_start()
|
||||
|
||||
# Verify that Job A is still running when Job B starts,
|
||||
# otherwise the overlap that triggers the bug did not happen.
|
||||
wait_to_leave_status(job_b, 'pending')
|
||||
wait_to_leave_status(job_b, 'waiting')
|
||||
job_a.refresh_from_db()
|
||||
if job_a.status != 'running':
|
||||
pytest.skip('Job A finished before Job B started running; overlap did not occur')
|
||||
logger.info(f'Job B (id={job_b.id}) is now running with limit=cc_host_1 (concurrent with Job A)')
|
||||
|
||||
# Wait for both to complete
|
||||
wait_for_job(job_a)
|
||||
wait_for_job(job_b)
|
||||
|
||||
# Verify facts survived concurrent execution
|
||||
host_0 = inv.hosts.get(name='cc_host_0')
|
||||
host_1 = inv.hosts.get(name='cc_host_1')
|
||||
|
||||
# sanity
|
||||
job_a.refresh_from_db()
|
||||
job_b.refresh_from_db()
|
||||
assert job_a.limit == "cc_host_0"
|
||||
assert job_b.limit == "cc_host_1"
|
||||
|
||||
discovered_foos = [host_0.ansible_facts.get('foo'), host_1.ansible_facts.get('foo')]
|
||||
assert discovered_foos == ['bar'] * 2, f'Unexpected facts on cc_host_0 or _1: {discovered_foos} after job a,b {job_a.id}, {job_b.id}'
|
||||
|
||||
|
||||
def test_concurrent_limit_does_not_revert_facts(live_tmp_folder, run_job_from_playbook, concurrent_facts_inventory):
|
||||
"""Concurrent jobs must not revert facts that a prior concurrent job just set.
|
||||
|
||||
Generated by Claude Opus 4.6 (claude-opus-4-6).
|
||||
|
||||
Scenario:
|
||||
- First, populate both hosts with initial facts (foo=bar) via a
|
||||
non-concurrent gather run
|
||||
- Then run two concurrent jobs with different limits, each setting
|
||||
a new value (foo=bar_v2 via extra_vars)
|
||||
- After both complete, BOTH hosts should have foo=bar_v2
|
||||
|
||||
The bug: start_fact_cache writes the OLD facts (foo=bar) into each job's
|
||||
artifact dir for ALL hosts. If ansible's cache plugin rewrites a non-limited
|
||||
host's fact file with the stale content (updating the mtime), finish_fact_cache
|
||||
treats it as a legitimate update and overwrites the DB with old values.
|
||||
"""
|
||||
# --- Seed both hosts with initial facts via a non-concurrent run ---
|
||||
inv = concurrent_facts_inventory
|
||||
|
||||
scm_url = f'file://{live_tmp_folder}/facts'
|
||||
res = run_job_from_playbook(
|
||||
'seed_facts_for_revert_test',
|
||||
'gather_slow.yml',
|
||||
scm_url=scm_url,
|
||||
jt_params={'use_fact_cache': True, 'allow_simultaneous': True, 'inventory': inv.id},
|
||||
)
|
||||
for host in inv.hosts.all():
|
||||
assert host.ansible_facts.get('foo') == 'bar', f'Seed run failed to set facts on {host.name}: {host.ansible_facts}'
|
||||
|
||||
job = res['job']
|
||||
wait_for_job(job)
|
||||
|
||||
# sanity, jobs should be set up to both have facts with just bar
|
||||
host_0 = inv.hosts.get(name='cc_host_0')
|
||||
host_1 = inv.hosts.get(name='cc_host_1')
|
||||
discovered_foos = [host_0.ansible_facts.get('foo'), host_1.ansible_facts.get('foo')]
|
||||
assert discovered_foos == ['bar'] * 2, f'Facts did not get expected initial values: {discovered_foos}'
|
||||
|
||||
jt = job.job_template
|
||||
assert jt.allow_simultaneous is True
|
||||
assert jt.use_fact_cache is True
|
||||
|
||||
# Sanity assertion, sometimes this would give problems from the Django rel cache
|
||||
assert jt.project
|
||||
|
||||
# --- Run two concurrent jobs that write a new value ---
|
||||
# Update the JT to pass extra_vars that change the fact value
|
||||
jt.extra_vars = '{"extra_value": "_v2"}'
|
||||
jt.save(update_fields=['extra_vars'])
|
||||
|
||||
job_a = jt.create_unified_job()
|
||||
job_a.limit = 'cc_host_0'
|
||||
job_a.save(update_fields=['limit'])
|
||||
job_a.signal_start()
|
||||
|
||||
wait_to_leave_status(job_a, 'pending')
|
||||
wait_to_leave_status(job_a, 'waiting')
|
||||
|
||||
job_b = jt.create_unified_job()
|
||||
job_b.limit = 'cc_host_1'
|
||||
job_b.save(update_fields=['limit'])
|
||||
job_b.signal_start()
|
||||
|
||||
wait_to_leave_status(job_b, 'pending')
|
||||
wait_to_leave_status(job_b, 'waiting')
|
||||
job_a.refresh_from_db()
|
||||
if job_a.status != 'running':
|
||||
pytest.skip('Job A finished before Job B started running; overlap did not occur')
|
||||
|
||||
wait_for_job(job_a)
|
||||
wait_for_job(job_b)
|
||||
|
||||
host_0 = inv.hosts.get(name='cc_host_0')
|
||||
host_1 = inv.hosts.get(name='cc_host_1')
|
||||
|
||||
# Both hosts should have the UPDATED value, not the old seed value
|
||||
discovered_foos = [host_0.ansible_facts.get('foo'), host_1.ansible_facts.get('foo')]
|
||||
assert discovered_foos == ['bar_v2'] * 2, f'Facts were reverted to stale values by concurrent job cc_host_0 or cc_host_1: {discovered_foos}'
|
||||
|
||||
|
||||
def test_fact_cache_scoped_to_inventory(live_tmp_folder, default_org, run_job_from_playbook):
|
||||
"""finish_fact_cache must not modify hosts in other inventories.
|
||||
|
||||
Generated by Claude Opus 4.6 (claude-opus-4-6).
|
||||
|
||||
Bug: finish_fact_cache queries Host.objects.filter(name__in=host_names)
|
||||
without an inventory_id filter, so hosts with the same name in different
|
||||
inventories get their facts cross-contaminated.
|
||||
"""
|
||||
shared_name = 'scope_shared_host'
|
||||
|
||||
# Prepare for test by deleting junk from last run
|
||||
for inv_name in ('test_fact_scope_inv1', 'test_fact_scope_inv2'):
|
||||
inv = Inventory.objects.filter(name=inv_name).first()
|
||||
if inv:
|
||||
inv.delete()
|
||||
|
||||
inv1 = Inventory.objects.create(organization=default_org, name='test_fact_scope_inv1')
|
||||
inv1.hosts.create(name=shared_name)
|
||||
|
||||
inv2 = Inventory.objects.create(organization=default_org, name='test_fact_scope_inv2')
|
||||
host2 = inv2.hosts.create(name=shared_name)
|
||||
|
||||
# Give inv2's host distinct facts that should not be touched
|
||||
original_facts = {'source': 'inventory_2', 'untouched': True}
|
||||
host2.ansible_facts = original_facts
|
||||
host2.ansible_facts_modified = now()
|
||||
host2.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
|
||||
|
||||
# Run a fact-gathering job against inv1 only
|
||||
run_job_from_playbook(
|
||||
'test_fact_scope',
|
||||
'gather.yml',
|
||||
scm_url=f'file://{live_tmp_folder}/facts',
|
||||
jt_params={'use_fact_cache': True, 'inventory': inv1.id},
|
||||
)
|
||||
|
||||
# inv1's host should have facts
|
||||
host1 = inv1.hosts.get(name=shared_name)
|
||||
assert host1.ansible_facts, f'inv1 host should have facts after gather: {host1.ansible_facts}'
|
||||
|
||||
# inv2's host must NOT have been touched
|
||||
host2.refresh_from_db()
|
||||
assert host2.ansible_facts == original_facts, (
|
||||
f'Host in a different inventory was modified by a fact cache operation '
|
||||
f'on another inventory sharing the same hostname. '
|
||||
f'Expected {original_facts!r}, got {host2.ansible_facts!r}'
|
||||
)
|
||||
|
||||
|
||||
def test_constructed_inventory_facts_saved_to_source_host(live_tmp_folder, default_org, run_job_from_playbook):
|
||||
"""Facts from a constructed inventory job must be saved to the source host.
|
||||
|
||||
Generated by Claude Opus 4.6 (claude-opus-4-6).
|
||||
|
||||
Constructed inventories contain hosts that are references (via instance_id)
|
||||
to 'real' hosts in input inventories. start_fact_cache correctly resolves
|
||||
source hosts via get_hosts_for_fact_cache(), but finish_fact_cache must also
|
||||
write facts back to the source hosts, not the constructed inventory's copies.
|
||||
|
||||
Scenario:
|
||||
- Two input inventories each have a host named 'ci_shared_host'
|
||||
- A constructed inventory uses both as inputs
|
||||
- The inventory sync picks one source host (via instance_id) for the
|
||||
constructed host — which one depends on input processing order
|
||||
- Both source hosts start with distinct pre-existing facts
|
||||
- A fact-gathering job runs against the constructed inventory
|
||||
- After completion, the targeted source host should have the job's facts
|
||||
- The OTHER source host must retain its original facts untouched
|
||||
- The constructed host itself must NOT have facts stored on it
|
||||
(constructed hosts are transient — recreated on each inventory sync)
|
||||
"""
|
||||
shared_name = 'ci_shared_host'
|
||||
|
||||
# Cleanup from prior runs
|
||||
for inv_name in ('test_ci_facts_input1', 'test_ci_facts_input2', 'test_ci_facts_constructed'):
|
||||
Inventory.objects.filter(name=inv_name).delete()
|
||||
|
||||
# --- Create two input inventories, each with an identically-named host ---
|
||||
inv1 = Inventory.objects.create(organization=default_org, name='test_ci_facts_input1')
|
||||
source_host1 = inv1.hosts.create(name=shared_name)
|
||||
|
||||
inv2 = Inventory.objects.create(organization=default_org, name='test_ci_facts_input2')
|
||||
source_host2 = inv2.hosts.create(name=shared_name)
|
||||
|
||||
# Give both hosts distinct pre-existing facts so we can detect cross-contamination
|
||||
host1_original_facts = {'source': 'inventory_1'}
|
||||
source_host1.ansible_facts = host1_original_facts
|
||||
source_host1.ansible_facts_modified = now()
|
||||
source_host1.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
|
||||
|
||||
host2_original_facts = {'source': 'inventory_2'}
|
||||
source_host2.ansible_facts = host2_original_facts
|
||||
source_host2.ansible_facts_modified = now()
|
||||
source_host2.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
|
||||
|
||||
source_hosts_by_id = {source_host1.id: source_host1, source_host2.id: source_host2}
|
||||
original_facts_by_id = {source_host1.id: host1_original_facts, source_host2.id: host2_original_facts}
|
||||
|
||||
# --- Create constructed inventory (sync will create hosts from inputs) ---
|
||||
constructed_inv = Inventory.objects.create(
|
||||
organization=default_org,
|
||||
name='test_ci_facts_constructed',
|
||||
kind='constructed',
|
||||
)
|
||||
constructed_inv.input_inventories.add(inv1)
|
||||
constructed_inv.input_inventories.add(inv2)
|
||||
|
||||
# --- Run a fact-gathering job against the constructed inventory ---
|
||||
# The job launch triggers an inventory sync which creates the constructed
|
||||
# host with instance_id pointing to one of the source hosts.
|
||||
scm_url = f'file://{live_tmp_folder}/facts'
|
||||
run_job_from_playbook(
|
||||
'test_ci_facts',
|
||||
'gather.yml',
|
||||
scm_url=scm_url,
|
||||
jt_params={'use_fact_cache': True, 'inventory': constructed_inv.id},
|
||||
)
|
||||
|
||||
# --- Determine which source host the constructed host points to ---
|
||||
constructed_host = constructed_inv.hosts.get(name=shared_name)
|
||||
target_id = int(constructed_host.instance_id)
|
||||
other_id = (set(source_hosts_by_id.keys()) - {target_id}).pop()
|
||||
|
||||
target_host = source_hosts_by_id[target_id]
|
||||
other_host = source_hosts_by_id[other_id]
|
||||
|
||||
target_host.refresh_from_db()
|
||||
other_host.refresh_from_db()
|
||||
constructed_host.refresh_from_db()
|
||||
|
||||
actual = [target_host.ansible_facts.get('foo'), other_host.ansible_facts, constructed_host.ansible_facts]
|
||||
expected = ['bar', original_facts_by_id[other_id], {}]
|
||||
assert actual == expected, (
|
||||
f'Constructed inventory fact cache wrote to wrong host(s). '
|
||||
f'target source host (id={target_id}) foo={actual[0]!r}, '
|
||||
f'other source host (id={other_id}) facts={actual[1]!r}, '
|
||||
f'constructed host facts={actual[2]!r}; expected {expected!r}'
|
||||
)
|
||||
@@ -2,6 +2,7 @@
|
||||
import json
|
||||
import os
|
||||
import pytest
|
||||
from unittest import mock
|
||||
|
||||
from awx.main.models import (
|
||||
Inventory,
|
||||
@@ -99,52 +100,55 @@ def test_start_job_fact_cache_within_timeout(hosts, tmpdir):
|
||||
|
||||
|
||||
def test_finish_job_fact_cache_clear(hosts, mocker, ref_time, tmpdir):
|
||||
fact_cache = os.path.join(tmpdir, 'facts')
|
||||
start_fact_cache(hosts, fact_cache, timeout=0)
|
||||
artifacts_dir = str(tmpdir.mkdir("artifacts"))
|
||||
inventory_id = 5
|
||||
|
||||
bulk_update = mocker.patch('awx.main.tasks.facts.bulk_update_sorted_by_id')
|
||||
start_fact_cache(hosts, artifacts_dir=artifacts_dir, timeout=0, inventory_id=inventory_id)
|
||||
|
||||
# Mock the os.path.exists behavior for host deletion
|
||||
# Let's assume the fact file for hosts[1] is missing.
|
||||
mocker.patch('os.path.exists', side_effect=lambda path: hosts[1].name not in path)
|
||||
mocker.patch('awx.main.tasks.facts.bulk_update_sorted_by_id')
|
||||
|
||||
# Simulate one host's fact file getting deleted manually
|
||||
host_to_delete_filepath = os.path.join(fact_cache, hosts[1].name)
|
||||
# Remove the fact file for hosts[1] to simulate ansible's clear_facts
|
||||
fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache')
|
||||
os.remove(os.path.join(fact_cache_dir, hosts[1].name))
|
||||
|
||||
# Simulate the file being removed by checking existence first, to avoid FileNotFoundError
|
||||
if os.path.exists(host_to_delete_filepath):
|
||||
os.remove(host_to_delete_filepath)
|
||||
hosts_qs = mock.MagicMock()
|
||||
hosts_qs.filter.return_value.order_by.return_value.iterator.return_value = iter(hosts)
|
||||
|
||||
finish_fact_cache(fact_cache)
|
||||
finish_fact_cache(hosts_qs, artifacts_dir=artifacts_dir, inventory_id=inventory_id)
|
||||
|
||||
# Simulate side effects that would normally be applied during bulk update
|
||||
hosts[1].ansible_facts = {}
|
||||
hosts[1].ansible_facts_modified = now()
|
||||
# hosts[1] should have had its facts cleared (file was missing, job_created=None)
|
||||
assert hosts[1].ansible_facts == {}
|
||||
assert hosts[1].ansible_facts_modified > ref_time
|
||||
|
||||
# Verify facts are preserved for hosts with valid cache files
|
||||
# Other hosts should be unmodified (fact files exist but weren't changed by ansible)
|
||||
for host in (hosts[0], hosts[2], hosts[3]):
|
||||
assert host.ansible_facts == {"a": 1, "b": 2}
|
||||
assert host.ansible_facts_modified == ref_time
|
||||
assert hosts[1].ansible_facts_modified > ref_time
|
||||
|
||||
# Current implementation skips the call entirely if hosts_to_update == []
|
||||
bulk_update.assert_not_called()
|
||||
|
||||
|
||||
def test_finish_job_fact_cache_with_bad_data(hosts, mocker, tmpdir):
|
||||
fact_cache = os.path.join(tmpdir, 'facts')
|
||||
start_fact_cache(hosts, fact_cache, timeout=0)
|
||||
artifacts_dir = str(tmpdir.mkdir("artifacts"))
|
||||
inventory_id = 5
|
||||
|
||||
bulk_update = mocker.patch('django.db.models.query.QuerySet.bulk_update')
|
||||
start_fact_cache(hosts, artifacts_dir=artifacts_dir, timeout=0, inventory_id=inventory_id)
|
||||
|
||||
bulk_update = mocker.patch('awx.main.tasks.facts.bulk_update_sorted_by_id')
|
||||
|
||||
# Overwrite fact files with invalid JSON and set future mtime
|
||||
fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache')
|
||||
for h in hosts:
|
||||
filepath = os.path.join(fact_cache, h.name)
|
||||
filepath = os.path.join(fact_cache_dir, h.name)
|
||||
with open(filepath, 'w') as f:
|
||||
f.write('not valid json!')
|
||||
f.flush()
|
||||
new_modification_time = time.time() + 3600
|
||||
os.utime(filepath, (new_modification_time, new_modification_time))
|
||||
|
||||
finish_fact_cache(fact_cache)
|
||||
hosts_qs = mock.MagicMock()
|
||||
hosts_qs.filter.return_value.order_by.return_value.iterator.return_value = iter(hosts)
|
||||
|
||||
bulk_update.assert_not_called()
|
||||
finish_fact_cache(hosts_qs, artifacts_dir=artifacts_dir, inventory_id=inventory_id)
|
||||
|
||||
# Invalid JSON should be skipped — no hosts updated
|
||||
updated_hosts = bulk_update.call_args[0][1]
|
||||
assert updated_hosts == []
|
||||
|
||||
@@ -84,7 +84,7 @@ def job_template_with_credentials():
|
||||
@mock.patch('awx.main.tasks.jobs.create_partition', return_value=True)
|
||||
def test_pre_post_run_hook_facts(mock_create_partition, mock_facts_settings, private_data_dir, execution_environment):
|
||||
# Create mocked inventory and host queryset
|
||||
inventory = mock.MagicMock(spec=Inventory, pk=1)
|
||||
inventory = mock.MagicMock(spec=Inventory, pk=1, kind='')
|
||||
host1 = mock.MagicMock(spec=Host, id=1, name='host1', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=now(), inventory=inventory)
|
||||
host2 = mock.MagicMock(spec=Host, id=2, name='host2', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=now(), inventory=inventory)
|
||||
|
||||
@@ -107,6 +107,8 @@ def test_pre_post_run_hook_facts(mock_create_partition, mock_facts_settings, pri
|
||||
job_slice_number=1,
|
||||
job_slice_count=1,
|
||||
inventory=inventory,
|
||||
inventory_id=inventory.pk,
|
||||
created=now(),
|
||||
execution_environment=execution_environment,
|
||||
)
|
||||
job.get_hosts_for_fact_cache = Job.get_hosts_for_fact_cache.__get__(job)
|
||||
@@ -140,7 +142,7 @@ def test_pre_post_run_hook_facts(mock_create_partition, mock_facts_settings, pri
|
||||
@mock.patch('awx.main.tasks.jobs.create_partition', return_value=True)
|
||||
def test_pre_post_run_hook_facts_deleted_sliced(mock_create_partition, mock_facts_settings, private_data_dir, execution_environment):
|
||||
# Fully mocked inventory
|
||||
mock_inventory = mock.MagicMock(spec=Inventory)
|
||||
mock_inventory = mock.MagicMock(spec=Inventory, pk=1, kind='')
|
||||
|
||||
# Create 999 mocked Host instances
|
||||
hosts = []
|
||||
@@ -173,6 +175,8 @@ def test_pre_post_run_hook_facts_deleted_sliced(mock_create_partition, mock_fact
|
||||
job.job_slice_count = 3
|
||||
job.execution_environment = execution_environment
|
||||
job.inventory = mock_inventory
|
||||
job.inventory_id = mock_inventory.pk
|
||||
job.created = now()
|
||||
job.job_env.get.return_value = private_data_dir
|
||||
|
||||
# Bind actual method for host filtering
|
||||
|
||||
Reference in New Issue
Block a user