AAP-39365 facts are unintentionally deleted when the inventory is modified during a job execution (#15910) (#6905)

* Added test_jobs.py to the model unit test folder in orther to show the undesired behaviour with fact cache



* Added test_jobs.py to the model unit test folder in orther to show the undesired behaviour with fact cache



* Solved undesired behaviour with fact_cache



* Solved bug with slices



* Remove unused imports

Remove now unused line of code which was commented out by the contributor

Revert "Remove now unused line of code which was commented out by the contributor"

This reverts commit f1a056a2356d56bc7256957a18503bd14dcfd8aa.

* Add back line that had been commented out as this line makes hosts specific to the particular slice when applicable

Revise private_data_dir fixture to see if it improves code coverage

Checked out awx/main/tests/unit/models/test_jobs.py in devel to see if it resolves git diff issue

* Fix formatting in awx/main/tests/unit/models/test_jobs.py

Rename for loop from host in hosts to hosts in hosts_cahced and remove unneeded continue

Revise finish_fact_cache to utilize inventory rather than hosts

Remove local var hosts that was assigned but unused

Revert change in start_fact_cache hosts_cached back to hosts

Revise the way we are handling hosts_cached and joining the file

Revert "Revise the way we are handling hosts_cached and joining the file"

This reverts commit e6e3d2f09c1b79a9bce3647a72e3dad97fe0aed8.

Reapply "Revise the way we are handling hosts_cached and joining the file"

This reverts commit a42b7ae69133fee24d3a5f1b456d9c343d111df9.

Revert some of my changes to get back to a better working state

Rename for loop to host in hosts_cached and remove unneeded continue

Remove jobs job.get_hosts_for_fact_cache() from post run hook, fix if statement after continue block, and revise how we are calling hosts in finish for loop

Add test_invalid_host_facts to test_jobs to increase code coverage

Update method signature to use hosts_cached and updated other references to hosts in finish_facts_cached

Rename hosts iterator to hosts_cached to agree with naming elsewhere

Revise test_invalid_host_facts to get more code coverage

Revise test_invalid_host_facts to increase codecov

Revise test_pre_post_run_hook_facts_deleted_sliced to ensure we are hitting the assertionerror for code cov

Revise  mock_inventory.hosts. to hit assert failure

Add revision of hosts and facts to force failure to satisfy code cov

Fix failure in test_pre_post_run_hook_facts_deleted_sliced

Add back for loop to create failures and add assert to hit them

Remove hosts.iterator() from both start_fact_cache and finish_fact_cache

Remove unused import of Queryset to satisfy api-lint

Fix typo in docstring hasnot to has not

Move hosts_cached.append(host) to outer loop in start_fact_cache

Add class to help support cached hosts resolving host.name issue with hosts_cached

* Add live tests for ansible facts

Remove fixture needed for local work only maybe

Revert "Add class to help support cached hosts resolving host.name issue with hosts_cached"

This reverts commit 99d998cfb9960baafe887de80bd9b01af50513ec.

* Move hosts_cached.append(host) outside of try except

* Move hosts_cached.append(host) to the beginning of start_fact_cache

---------

Signed-off-by: onetti7 <davonebat@gmail.com>
Co-authored-by: onetti7 <davonebat@gmail.com>
Co-authored-by: Alan Rominger <arominge@redhat.com>
This commit is contained in:
Lila Yasin 2025-04-11 11:08:26 -04:00 committed by GitHub
parent bbcdef18a7
commit 87cb6dc0b9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 285 additions and 23 deletions

View File

@ -6,7 +6,6 @@ import logging
# Django
from django.conf import settings
from django.db.models.query import QuerySet
from django.utils.encoding import smart_str
from django.utils.timezone import now
from django.db import OperationalError
@ -24,6 +23,7 @@ system_tracking_logger = logging.getLogger('awx.analytics.system_tracking')
def start_fact_cache(hosts, destination, log_data, timeout=None, inventory_id=None):
log_data['inventory_id'] = inventory_id
log_data['written_ct'] = 0
hosts_cached = list()
try:
os.makedirs(destination, mode=0o700)
except FileExistsError:
@ -32,17 +32,17 @@ def start_fact_cache(hosts, destination, log_data, timeout=None, inventory_id=No
if timeout is None:
timeout = settings.ANSIBLE_FACT_CACHE_TIMEOUT
if isinstance(hosts, QuerySet):
hosts = hosts.iterator()
last_filepath_written = None
for host in hosts:
if (not host.ansible_facts_modified) or (timeout and host.ansible_facts_modified < now() - datetime.timedelta(seconds=timeout)):
hosts_cached.append(host)
if not host.ansible_facts_modified or (timeout and host.ansible_facts_modified < now() - datetime.timedelta(seconds=timeout)):
continue # facts are expired - do not write them
filepath = os.sep.join(map(str, [destination, host.name]))
if not os.path.realpath(filepath).startswith(destination):
system_tracking_logger.error('facts for host {} could not be cached'.format(smart_str(host.name)))
continue
try:
with codecs.open(filepath, 'w', encoding='utf-8') as f:
os.chmod(f.name, 0o600)
@ -52,10 +52,11 @@ def start_fact_cache(hosts, destination, log_data, timeout=None, inventory_id=No
except IOError:
system_tracking_logger.error('facts for host {} could not be cached'.format(smart_str(host.name)))
continue
# make note of the time we wrote the last file so we can check if any file changed later
if last_filepath_written:
return os.path.getmtime(last_filepath_written)
return None
return os.path.getmtime(last_filepath_written), hosts_cached
return None, hosts_cached
def raw_update_hosts(host_list):
@ -86,17 +87,14 @@ def update_hosts(host_list, max_tries=5):
msg='Inventory {inventory_id} host facts: updated {updated_ct}, cleared {cleared_ct}, unchanged {unmodified_ct}, took {delta:.3f} s',
add_log_data=True,
)
def finish_fact_cache(hosts, destination, facts_write_time, log_data, job_id=None, inventory_id=None):
def finish_fact_cache(hosts_cached, destination, facts_write_time, log_data, job_id=None, inventory_id=None):
log_data['inventory_id'] = inventory_id
log_data['updated_ct'] = 0
log_data['unmodified_ct'] = 0
log_data['cleared_ct'] = 0
if isinstance(hosts, QuerySet):
hosts = hosts.iterator()
hosts_to_update = []
for host in hosts:
for host in hosts_cached:
filepath = os.sep.join(map(str, [destination, host.name]))
if not os.path.realpath(filepath).startswith(destination):
system_tracking_logger.error('facts for host {} could not be cached'.format(smart_str(host.name)))
@ -128,6 +126,7 @@ def finish_fact_cache(hosts, destination, facts_write_time, log_data, job_id=Non
log_data['unmodified_ct'] += 1
else:
# if the file goes missing, ansible removed it (likely via clear_facts)
# if the file goes missing, but the host has not started facts, then we should not clear the facts
host.ansible_facts = {}
host.ansible_facts_modified = now()
hosts_to_update.append(host)

View File

@ -1091,7 +1091,7 @@ class RunJob(SourceControlMixin, BaseTask):
# where ansible expects to find it
if self.should_use_fact_cache():
job.log_lifecycle("start_job_fact_cache")
self.facts_write_time = start_fact_cache(
self.facts_write_time, self.hosts_with_facts_cached = start_fact_cache(
job.get_hosts_for_fact_cache(), os.path.join(private_data_dir, 'artifacts', str(job.id), 'fact_cache'), inventory_id=job.inventory_id
)
@ -1110,7 +1110,7 @@ class RunJob(SourceControlMixin, BaseTask):
if self.should_use_fact_cache() and self.runner_callback.artifacts_processed:
job.log_lifecycle("finish_job_fact_cache")
finish_fact_cache(
job.get_hosts_for_fact_cache(),
self.hosts_with_facts_cached,
os.path.join(private_data_dir, 'artifacts', str(job.id), 'fact_cache'),
facts_write_time=self.facts_write_time,
job_id=job.id,

View File

@ -0,0 +1,7 @@
---
- hosts: all
gather_facts: false
connection: local
tasks:
- meta: clear_facts

View File

@ -0,0 +1,17 @@
---
- hosts: all
vars:
extra_value: ""
gather_facts: false
connection: local
tasks:
- name: set a custom fact
set_fact:
foo: "bar{{ extra_value }}"
bar:
a:
b:
- "c"
- "d"
cacheable: true

View File

@ -0,0 +1,9 @@
---
- hosts: all
gather_facts: false
connection: local
vars:
msg: 'hello'
tasks:
- debug: var=msg

View File

@ -128,7 +128,7 @@ def podman_image_generator():
@pytest.fixture
def run_job_from_playbook(default_org, demo_inv, post, admin):
def _rf(test_name, playbook, local_path=None, scm_url=None):
def _rf(test_name, playbook, local_path=None, scm_url=None, jt_params=None):
project_name = f'{test_name} project'
jt_name = f'{test_name} JT: {playbook}'
@ -165,9 +165,13 @@ def run_job_from_playbook(default_org, demo_inv, post, admin):
assert proj.get_project_path()
assert playbook in proj.playbooks
jt_data = {'name': jt_name, 'project': proj.id, 'playbook': playbook, 'inventory': demo_inv.id}
if jt_params:
jt_data.update(jt_params)
result = post(
reverse('api:job_template_list'),
{'name': jt_name, 'project': proj.id, 'playbook': playbook, 'inventory': demo_inv.id},
jt_data,
admin,
expect=201,
)

View File

@ -0,0 +1,64 @@
import pytest
from awx.main.tests.live.tests.conftest import wait_for_events
from awx.main.models import Job, Inventory
def assert_facts_populated(name):
job = Job.objects.filter(name__icontains=name).order_by('-created').first()
assert job is not None
wait_for_events(job)
inventory = job.inventory
assert inventory.hosts.count() > 0 # sanity
for host in inventory.hosts.all():
assert host.ansible_facts
@pytest.fixture
def general_facts_test(live_tmp_folder, run_job_from_playbook):
def _rf(slug, jt_params):
jt_params['use_fact_cache'] = True
standard_kwargs = dict(scm_url=f'file://{live_tmp_folder}/facts', jt_params=jt_params)
# GATHER FACTS
name = f'test_gather_ansible_facts_{slug}'
run_job_from_playbook(name, 'gather.yml', **standard_kwargs)
assert_facts_populated(name)
# KEEP FACTS
name = f'test_clear_ansible_facts_{slug}'
run_job_from_playbook(name, 'no_op.yml', **standard_kwargs)
assert_facts_populated(name)
# CLEAR FACTS
name = f'test_clear_ansible_facts_{slug}'
run_job_from_playbook(name, 'clear.yml', **standard_kwargs)
job = Job.objects.filter(name__icontains=name).order_by('-created').first()
assert job is not None
wait_for_events(job)
inventory = job.inventory
assert inventory.hosts.count() > 0 # sanity
for host in inventory.hosts.all():
assert not host.ansible_facts
return _rf
def test_basic_ansible_facts(general_facts_test):
general_facts_test('basic', {})
@pytest.fixture
def sliced_inventory():
inv, _ = Inventory.objects.get_or_create(name='inventory-to-slice')
if not inv.hosts.exists():
for i in range(10):
inv.hosts.create(name=f'sliced_host_{i}')
return inv
def test_slicing_with_facts(general_facts_test, sliced_inventory):
general_facts_test('sliced', {'job_slice_count': 3, 'inventory': sliced_inventory.id})

View File

@ -34,7 +34,7 @@ def hosts(ref_time):
def test_start_job_fact_cache(hosts, tmpdir):
fact_cache = os.path.join(tmpdir, 'facts')
last_modified = start_fact_cache(hosts, fact_cache, timeout=0)
last_modified, _ = start_fact_cache(hosts, fact_cache, timeout=0)
for host in hosts:
filepath = os.path.join(fact_cache, host.name)
@ -61,7 +61,7 @@ def test_fact_cache_with_invalid_path_traversal(tmpdir):
def test_start_job_fact_cache_past_timeout(hosts, tmpdir):
fact_cache = os.path.join(tmpdir, 'facts')
# the hosts fixture was modified 5s ago, which is more than 2s
last_modified = start_fact_cache(hosts, fact_cache, timeout=2)
last_modified, _ = start_fact_cache(hosts, fact_cache, timeout=2)
assert last_modified is None
for host in hosts:
@ -71,7 +71,7 @@ def test_start_job_fact_cache_past_timeout(hosts, tmpdir):
def test_start_job_fact_cache_within_timeout(hosts, tmpdir):
fact_cache = os.path.join(tmpdir, 'facts')
# the hosts fixture was modified 5s ago, which is less than 7s
last_modified = start_fact_cache(hosts, fact_cache, timeout=7)
last_modified, _ = start_fact_cache(hosts, fact_cache, timeout=7)
assert last_modified
for host in hosts:
@ -80,7 +80,7 @@ def test_start_job_fact_cache_within_timeout(hosts, tmpdir):
def test_finish_job_fact_cache_with_existing_data(hosts, mocker, tmpdir, ref_time):
fact_cache = os.path.join(tmpdir, 'facts')
last_modified = start_fact_cache(hosts, fact_cache, timeout=0)
last_modified, _ = start_fact_cache(hosts, fact_cache, timeout=0)
bulk_update = mocker.patch('django.db.models.query.QuerySet.bulk_update')
@ -108,7 +108,7 @@ def test_finish_job_fact_cache_with_existing_data(hosts, mocker, tmpdir, ref_tim
def test_finish_job_fact_cache_with_bad_data(hosts, mocker, tmpdir):
fact_cache = os.path.join(tmpdir, 'facts')
last_modified = start_fact_cache(hosts, fact_cache, timeout=0)
last_modified, _ = start_fact_cache(hosts, fact_cache, timeout=0)
bulk_update = mocker.patch('django.db.models.query.QuerySet.bulk_update')
@ -127,7 +127,7 @@ def test_finish_job_fact_cache_with_bad_data(hosts, mocker, tmpdir):
def test_finish_job_fact_cache_clear(hosts, mocker, ref_time, tmpdir):
fact_cache = os.path.join(tmpdir, 'facts')
last_modified = start_fact_cache(hosts, fact_cache, timeout=0)
last_modified, _ = start_fact_cache(hosts, fact_cache, timeout=0)
bulk_update = mocker.patch('django.db.models.query.QuerySet.bulk_update')

View File

@ -0,0 +1,162 @@
# -*- coding: utf-8 -*-
import os
import tempfile
import shutil
import pytest
from unittest import mock
from awx.main.models import (
Inventory,
Host,
)
from django.utils.timezone import now
from django.db.models.query import QuerySet
from awx.main.models import (
Job,
Organization,
Project,
)
from awx.main.tasks import jobs
@pytest.fixture
def private_data_dir():
private_data = tempfile.mkdtemp(prefix='awx_')
for subfolder in ('inventory', 'env'):
runner_subfolder = os.path.join(private_data, subfolder)
os.makedirs(runner_subfolder, exist_ok=True)
yield private_data
shutil.rmtree(private_data, True)
@mock.patch('awx.main.tasks.facts.update_hosts')
@mock.patch('awx.main.tasks.facts.settings')
@mock.patch('awx.main.tasks.jobs.create_partition', return_value=True)
def test_pre_post_run_hook_facts(mock_create_partition, mock_facts_settings, update_hosts, private_data_dir, execution_environment):
# creates inventory_object with two hosts
inventory = Inventory(pk=1)
mock_inventory = mock.MagicMock(spec=Inventory, wraps=inventory)
mock_inventory._state = mock.MagicMock()
qs_hosts = QuerySet()
hosts = [
Host(id=1, name='host1', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=now(), inventory=mock_inventory),
Host(id=2, name='host2', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=now(), inventory=mock_inventory),
]
qs_hosts._result_cache = hosts
qs_hosts.only = mock.MagicMock(return_value=hosts)
mock_inventory.hosts = qs_hosts
assert mock_inventory.hosts.count() == 2
# creates job object with fact_cache enabled
org = Organization(pk=1)
proj = Project(pk=1, organization=org)
job = mock.MagicMock(spec=Job, use_fact_cache=True, project=proj, organization=org, job_slice_number=1, job_slice_count=1)
job.inventory = mock_inventory
job.execution_environment = execution_environment
job.get_hosts_for_fact_cache = Job.get_hosts_for_fact_cache.__get__(job) # to run original method
job.job_env.get = mock.MagicMock(return_value=private_data_dir)
# creates the task object with job object as instance
mock_facts_settings.ANSIBLE_FACT_CACHE_TIMEOUT = False # defines timeout to false
task = jobs.RunJob()
task.instance = job
task.update_model = mock.Mock(return_value=job)
task.model.objects.get = mock.Mock(return_value=job)
# run pre_run_hook
task.facts_write_time = task.pre_run_hook(job, private_data_dir)
# updates inventory with one more host
hosts.append(Host(id=3, name='host3', ansible_facts={"added": True}, ansible_facts_modified=now(), inventory=mock_inventory))
assert mock_inventory.hosts.count() == 3
# run post_run_hook
task.runner_callback.artifacts_processed = mock.MagicMock(return_value=True)
task.post_run_hook(job, "success")
assert mock_inventory.hosts[2].ansible_facts == {"added": True}
@mock.patch('awx.main.tasks.facts.update_hosts')
@mock.patch('awx.main.tasks.facts.settings')
@mock.patch('awx.main.tasks.jobs.create_partition', return_value=True)
def test_pre_post_run_hook_facts_deleted_sliced(mock_create_partition, mock_facts_settings, update_hosts, private_data_dir, execution_environment):
# creates inventory_object with two hosts
inventory = Inventory(pk=1)
mock_inventory = mock.MagicMock(spec=Inventory, wraps=inventory)
mock_inventory._state = mock.MagicMock()
qs_hosts = QuerySet()
hosts = [Host(id=num, name=f'host{num}', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=now(), inventory=mock_inventory) for num in range(999)]
qs_hosts._result_cache = hosts
qs_hosts.only = mock.MagicMock(return_value=hosts)
mock_inventory.hosts = qs_hosts
assert mock_inventory.hosts.count() == 999
# creates job object with fact_cache enabled
org = Organization(pk=1)
proj = Project(pk=1, organization=org)
job = mock.MagicMock(spec=Job, use_fact_cache=True, project=proj, organization=org, job_slice_number=1, job_slice_count=3)
job.inventory = mock_inventory
job.execution_environment = execution_environment
job.get_hosts_for_fact_cache = Job.get_hosts_for_fact_cache.__get__(job) # to run original method
job.job_env.get = mock.MagicMock(return_value=private_data_dir)
# creates the task object with job object as instance
mock_facts_settings.ANSIBLE_FACT_CACHE_TIMEOUT = False
task = jobs.RunJob()
task.instance = job
task.update_model = mock.Mock(return_value=job)
task.model.objects.get = mock.Mock(return_value=job)
# run pre_run_hook
task.facts_write_time = task.pre_run_hook(job, private_data_dir)
hosts.pop(1)
assert mock_inventory.hosts.count() == 998
# run post_run_hook
task.runner_callback.artifacts_processed = mock.MagicMock(return_value=True)
task.post_run_hook(job, "success")
for host in hosts:
assert host.ansible_facts == {"a": 1, "b": 2}
failures = []
for host in hosts:
try:
assert host.ansible_facts == {"a": 1, "b": 2, "unexpected_key": "bad"}
except AssertionError:
failures.append("Host named {} has facts {}".format(host.name, host.ansible_facts))
assert len(failures) > 0, f"Failures occurred for the following hosts: {failures}"
@mock.patch('awx.main.tasks.facts.update_hosts')
@mock.patch('awx.main.tasks.facts.settings')
def test_invalid_host_facts(mock_facts_settings, update_hosts, private_data_dir, execution_environment):
inventory = Inventory(pk=1)
mock_inventory = mock.MagicMock(spec=Inventory, wraps=inventory)
mock_inventory._state = mock.MagicMock()
hosts = [
Host(id=0, name='host0', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=now(), inventory=mock_inventory),
Host(id=1, name='host1', ansible_facts={"a": 1, "b": 2, "unexpected_key": "bad"}, ansible_facts_modified=now(), inventory=mock_inventory),
]
mock_inventory.hosts = hosts
failures = []
for host in mock_inventory.hosts:
assert "a" in host.ansible_facts
if "unexpected_key" in host.ansible_facts:
failures.append(host.name)
mock_facts_settings.SOME_SETTING = True
update_hosts(mock_inventory.hosts)
with pytest.raises(pytest.fail.Exception):
if failures:
pytest.fail(f" {len(failures)} facts cleared failures : {','.join(failures)}")