mirror of
https://github.com/ansible/awx.git
synced 2026-05-01 06:35:26 -02:30
[Devel] Optimize host_list_rbac query (#16408)
* Defer ansible_facts in HostManager to avoid fetching large JSON column in host list queries (AAP-68023) The host list endpoint (GET /api/v2/hosts/) fetches the ansible_facts JSON column unnecessarily, contributing to the 7.8s median query time at scale. This column can be very large and is not used by the list serializer. Changes: - HostManager.get_queryset() now defers ansible_facts - finish_fact_cache call site uses .only(*HOST_FACTS_FIELDS) to eagerly load ansible_facts when actually needed, avoiding N+1 queries - Unit test mocks updated to support .only() queryset chaining - Points DAB dependency at the RBAC query optimization branch for combined testing Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> ---------
This commit is contained in:
@@ -112,7 +112,7 @@ class HostManager(models.Manager.from_queryset(HostLatestSummaryQuerySet)):
|
||||
"""When the parent instance of the host query set has a `kind=smart` and a `host_filter`
|
||||
set. Use the `host_filter` to generate the queryset for the hosts.
|
||||
"""
|
||||
qs = super().get_queryset()
|
||||
qs = super().get_queryset().defer('ansible_facts')
|
||||
|
||||
if hasattr(self, 'instance') and hasattr(self.instance, 'host_filter') and hasattr(self.instance, 'kind'):
|
||||
if self.instance.kind == 'smart' and self.instance.host_filter is not None:
|
||||
|
||||
@@ -1331,6 +1331,7 @@ class RunJob(SourceControlMixin, BaseTask):
|
||||
hosts_qs = job.get_source_hosts_for_constructed_inventory()
|
||||
else:
|
||||
hosts_qs = job.inventory.hosts
|
||||
hosts_qs = hosts_qs.only(*HOST_FACTS_FIELDS)
|
||||
finish_fact_cache(
|
||||
hosts_qs,
|
||||
artifacts_dir=os.path.join(private_data_dir, 'artifacts', str(job.id)),
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
"""
|
||||
Tests for AAP-68023: host_list_rbac performance optimization.
|
||||
|
||||
The host list endpoint fetches the large ansible_facts JSON column
|
||||
unnecessarily. The HostManager now defers it by default so that
|
||||
list queries avoid transferring this data from PostgreSQL.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from awx.main.models import Host
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AAP-68023: Verify ansible_facts column is deferred by HostManager
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestHostManagerDeferral:
|
||||
"""AAP-68023: The host list fetches 200+ columns unnecessarily.
|
||||
|
||||
The ansible_facts JSON column is large and not used by the list
|
||||
serializer. HostManager.get_queryset() must defer it so that
|
||||
every query through Host.objects avoids fetching it by default.
|
||||
"""
|
||||
|
||||
def test_ansible_facts_deferred_by_default(self):
|
||||
"""ansible_facts should be in the deferred set for default Host queries."""
|
||||
qs = Host.objects.all()
|
||||
deferred = qs.query.deferred_loading[0]
|
||||
assert 'ansible_facts' in deferred, f'ansible_facts should be deferred by the HostManager. ' f'Deferred fields: {deferred}'
|
||||
|
||||
def test_ansible_facts_accessible_when_needed(self, inventory):
|
||||
"""Deferred fields are still accessible — Django fetches on access."""
|
||||
host = Host.objects.create(
|
||||
name='facts-host',
|
||||
inventory=inventory,
|
||||
ansible_facts={'os': 'linux'},
|
||||
)
|
||||
loaded = Host.objects.get(pk=host.pk)
|
||||
assert loaded.ansible_facts == {'os': 'linux'}
|
||||
@@ -83,11 +83,15 @@ def test_pre_post_run_hook_facts(mock_create_partition, mock_facts_settings, pri
|
||||
host1 = mock.MagicMock(spec=Host, id=1, name='host1', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=now(), inventory=inventory)
|
||||
host2 = mock.MagicMock(spec=Host, id=2, name='host2', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=now(), inventory=inventory)
|
||||
|
||||
# Mock hosts queryset
|
||||
# Mock hosts queryset — must support .only().filter().order_by().iterator() chain
|
||||
hosts = [host1, host2]
|
||||
qs_hosts = mock.MagicMock(spec=QuerySet)
|
||||
qs_hosts._result_cache = hosts
|
||||
qs_hosts.only.return_value = hosts
|
||||
qs_hosts.__iter__ = lambda self: iter(self._result_cache)
|
||||
qs_hosts.only.return_value = qs_hosts
|
||||
qs_hosts.filter.return_value = qs_hosts
|
||||
qs_hosts.order_by.return_value = qs_hosts
|
||||
qs_hosts.iterator.side_effect = lambda: iter(qs_hosts._result_cache)
|
||||
qs_hosts.count.side_effect = lambda: len(qs_hosts._result_cache)
|
||||
inventory.hosts = qs_hosts
|
||||
|
||||
@@ -154,9 +158,12 @@ def test_pre_post_run_hook_facts_deleted_sliced(
|
||||
host.inventory = mock_inventory
|
||||
hosts.append(host)
|
||||
|
||||
# Mock inventory.hosts behavior
|
||||
# Mock inventory.hosts behavior — must support .only().filter().order_by().iterator() chain
|
||||
mock_qs_hosts = mock.MagicMock()
|
||||
mock_qs_hosts.only.return_value = hosts
|
||||
mock_qs_hosts.only.return_value = mock_qs_hosts
|
||||
mock_qs_hosts.filter.return_value = mock_qs_hosts
|
||||
mock_qs_hosts.order_by.return_value = mock_qs_hosts
|
||||
mock_qs_hosts.iterator.side_effect = lambda: iter(hosts)
|
||||
mock_qs_hosts.count.return_value = 999
|
||||
mock_inventory.hosts = mock_qs_hosts
|
||||
|
||||
|
||||
Reference in New Issue
Block a user