Record whether a file was written for fact cache (#16361)

This commit is contained in:
Alan Rominger
2026-03-20 12:53:34 -04:00
committed by GitHub
parent ff68d6196d
commit 377dfce197
2 changed files with 60 additions and 13 deletions

View File

@@ -25,7 +25,8 @@ def start_fact_cache(hosts, artifacts_dir, timeout=None, inventory_id=None, log_
log_data = log_data or {}
log_data['inventory_id'] = inventory_id
log_data['written_ct'] = 0
hosts_cached = []
# Dict mapping host name -> bool (True if a fact file was written)
hosts_cached = {}
# Create the fact_cache directory inside artifacts_dir
fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache')
@@ -37,13 +38,14 @@ def start_fact_cache(hosts, artifacts_dir, timeout=None, inventory_id=None, log_
last_write_time = None
for host in hosts:
hosts_cached.append(host.name)
if not host.ansible_facts_modified or (timeout and host.ansible_facts_modified < now() - datetime.timedelta(seconds=timeout)):
hosts_cached[host.name] = False
continue # facts are expired - do not write them
filepath = os.path.join(fact_cache_dir, host.name)
if not os.path.realpath(filepath).startswith(fact_cache_dir):
logger.error(f'facts for host {smart_str(host.name)} could not be cached')
hosts_cached[host.name] = False
continue
try:
@@ -59,8 +61,10 @@ def start_fact_cache(hosts, artifacts_dir, timeout=None, inventory_id=None, log_
backdated = mtime - 2
os.utime(filepath, (backdated, backdated))
last_write_time = backdated
hosts_cached[host.name] = True
except IOError:
logger.error(f'facts for host {smart_str(host.name)} could not be cached')
hosts_cached[host.name] = False
continue
# Write summary file directly to the artifacts_dir
@@ -69,7 +73,6 @@ def start_fact_cache(hosts, artifacts_dir, timeout=None, inventory_id=None, log_
summary_data = {
'last_write_time': last_write_time,
'hosts_cached': hosts_cached,
'written_ct': log_data['written_ct'],
}
with open(summary_file, 'w', encoding='utf-8') as f:
json.dump(summary_data, f, indent=2)
@@ -101,7 +104,8 @@ def finish_fact_cache(host_qs, artifacts_dir, job_id=None, inventory_id=None, jo
logger.error(f'Error reading summary file at {summary_path}: {e}')
return
host_names = summary.get('hosts_cached', [])
hosts_cached_map = summary.get('hosts_cached', {})
host_names = list(hosts_cached_map.keys())
hosts_cached = host_qs.filter(name__in=host_names).order_by('id').iterator()
# Path where individual fact files were written
fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache')
@@ -143,6 +147,14 @@ def finish_fact_cache(host_qs, artifacts_dir, job_id=None, inventory_id=None, jo
else:
log_data['unmodified_ct'] += 1
else:
# File is missing. Only interpret this as "ansible cleared facts" if
# start_fact_cache actually wrote a file for this host (i.e. the host
# had valid, non-expired facts before the job ran). If no file was
# ever written, the missing file is expected and not a clear signal.
if not hosts_cached_map.get(host.name):
log_data['unmodified_ct'] += 1
continue
# if the file goes missing, ansible removed it (likely via clear_facts)
# if the file goes missing, but the host has not started facts, then we should not clear the facts
if job_created and host.ansible_facts_modified and host.ansible_facts_modified > job_created:

View File

@@ -73,17 +73,21 @@ class TestFinishFactCacheScoping:
class TestFinishFactCacheConcurrentProtection:
"""finish_fact_cache must not clear facts that a concurrent job updated."""
def test_skip_clear_when_facts_modified_after_job_created(self, organization, artifacts_dir):
"""If a host's facts were updated after the job was created, do not clear them.
def test_no_clear_when_no_file_was_written(self, organization, artifacts_dir):
"""Host with no prior facts should not have facts cleared when file is missing.
Generated by Claude Opus 4.6 (claude-opus-4-6).
start_fact_cache records hosts_cached[host] = False for hosts with no
prior facts (no file written). finish_fact_cache should skip the clear
for these hosts because the missing file is expected, not a clear signal.
"""
inv = Inventory.objects.create(organization=organization, name='concurrent-inv')
host = inv.hosts.create(name='target')
job_created = now() - timedelta(minutes=5)
# start_fact_cache writes a reference file (host has no facts yet → no file written)
# start_fact_cache records host with False (no facts → no file written)
start_fact_cache(inv.hosts.all(), artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv.id)
# Simulate a concurrent job updating this host's facts AFTER our job was created
@@ -91,12 +95,43 @@ class TestFinishFactCacheConcurrentProtection:
host.ansible_facts_modified = now()
host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
# The fact file is missing (our job's ansible didn't target this host),
# which would normally trigger a clear. But the concurrent protection
# should skip it because ansible_facts_modified > job_created.
fact_file = os.path.join(artifacts_dir, 'fact_cache', host.name)
if os.path.exists(fact_file):
os.remove(fact_file)
# The fact file is missing because start_fact_cache never wrote one.
# finish_fact_cache should skip this host entirely.
finish_fact_cache(
inv.hosts,
artifacts_dir=artifacts_dir,
inventory_id=inv.id,
job_created=job_created,
)
host.refresh_from_db()
assert host.ansible_facts == {'from_concurrent_job': True}, 'Facts were cleared for a host that never had a fact file written'
def test_skip_clear_when_facts_modified_after_job_created(self, organization, artifacts_dir):
"""If a file was written and then deleted, but facts were concurrently updated, skip clear.
Generated by Claude Opus 4.6 (claude-opus-4-6).
"""
inv = Inventory.objects.create(organization=organization, name='concurrent-written-inv')
host = inv.hosts.create(name='target')
old_time = now() - timedelta(hours=1)
host.ansible_facts = {'original': True}
host.ansible_facts_modified = old_time
host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
job_created = now() - timedelta(minutes=5)
# start_fact_cache writes a file (host has facts → True in map)
start_fact_cache(inv.hosts.all(), artifacts_dir=artifacts_dir, timeout=0, inventory_id=inv.id)
# Remove the fact file (ansible didn't target this host via --limit)
os.remove(os.path.join(artifacts_dir, 'fact_cache', host.name))
# Simulate a concurrent job updating this host's facts AFTER our job was created
host.ansible_facts = {'from_concurrent_job': True}
host.ansible_facts_modified = now()
host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
finish_fact_cache(
inv.hosts,