[Devel] Optimize host_list_rbac query (#16408)

* Defer ansible_facts in HostManager to avoid fetching large JSON column in host list queries (AAP-68023)

The host list endpoint (GET /api/v2/hosts/) fetches the ansible_facts
JSON column unnecessarily, contributing to the 7.8s median query time
at scale. This column can be very large and is not used by the list
serializer.

Changes:
- HostManager.get_queryset() now defers ansible_facts
- finish_fact_cache call site uses .only(*HOST_FACTS_FIELDS) to eagerly
  load ansible_facts when actually needed, avoiding N+1 queries
- Unit test mocks updated to support .only() queryset chaining
- Points DAB dependency at the RBAC query optimization branch for
  combined testing

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---------
This commit is contained in:
Lila Yasin
2026-04-28 14:00:13 -04:00
committed by GitHub
parent b66c0105ae
commit 1213ea6f62
4 changed files with 54 additions and 5 deletions

View File

@@ -112,7 +112,7 @@ class HostManager(models.Manager.from_queryset(HostLatestSummaryQuerySet)):
"""When the parent instance of the host query set has a `kind=smart` and a `host_filter` """When the parent instance of the host query set has a `kind=smart` and a `host_filter`
set. Use the `host_filter` to generate the queryset for the hosts. set. Use the `host_filter` to generate the queryset for the hosts.
""" """
qs = super().get_queryset() qs = super().get_queryset().defer('ansible_facts')
if hasattr(self, 'instance') and hasattr(self.instance, 'host_filter') and hasattr(self.instance, 'kind'): if hasattr(self, 'instance') and hasattr(self.instance, 'host_filter') and hasattr(self.instance, 'kind'):
if self.instance.kind == 'smart' and self.instance.host_filter is not None: if self.instance.kind == 'smart' and self.instance.host_filter is not None:

View File

@@ -1331,6 +1331,7 @@ class RunJob(SourceControlMixin, BaseTask):
hosts_qs = job.get_source_hosts_for_constructed_inventory() hosts_qs = job.get_source_hosts_for_constructed_inventory()
else: else:
hosts_qs = job.inventory.hosts hosts_qs = job.inventory.hosts
hosts_qs = hosts_qs.only(*HOST_FACTS_FIELDS)
finish_fact_cache( finish_fact_cache(
hosts_qs, hosts_qs,
artifacts_dir=os.path.join(private_data_dir, 'artifacts', str(job.id)), artifacts_dir=os.path.join(private_data_dir, 'artifacts', str(job.id)),

View File

@@ -0,0 +1,41 @@
"""
Tests for AAP-68023: host_list_rbac performance optimization.
The host list endpoint fetches the large ansible_facts JSON column
unnecessarily. The HostManager now defers it by default so that
list queries avoid transferring this data from PostgreSQL.
"""
import pytest
from awx.main.models import Host
# ---------------------------------------------------------------------------
# AAP-68023: Verify ansible_facts column is deferred by HostManager
# ---------------------------------------------------------------------------
@pytest.mark.django_db
class TestHostManagerDeferral:
"""AAP-68023: The host list fetches 200+ columns unnecessarily.
The ansible_facts JSON column is large and not used by the list
serializer. HostManager.get_queryset() must defer it so that
every query through Host.objects avoids fetching it by default.
"""
def test_ansible_facts_deferred_by_default(self):
"""ansible_facts should be in the deferred set for default Host queries."""
qs = Host.objects.all()
deferred = qs.query.deferred_loading[0]
assert 'ansible_facts' in deferred, f'ansible_facts should be deferred by the HostManager. ' f'Deferred fields: {deferred}'
def test_ansible_facts_accessible_when_needed(self, inventory):
"""Deferred fields are still accessible — Django fetches on access."""
host = Host.objects.create(
name='facts-host',
inventory=inventory,
ansible_facts={'os': 'linux'},
)
loaded = Host.objects.get(pk=host.pk)
assert loaded.ansible_facts == {'os': 'linux'}

View File

@@ -83,11 +83,15 @@ def test_pre_post_run_hook_facts(mock_create_partition, mock_facts_settings, pri
host1 = mock.MagicMock(spec=Host, id=1, name='host1', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=now(), inventory=inventory) host1 = mock.MagicMock(spec=Host, id=1, name='host1', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=now(), inventory=inventory)
host2 = mock.MagicMock(spec=Host, id=2, name='host2', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=now(), inventory=inventory) host2 = mock.MagicMock(spec=Host, id=2, name='host2', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=now(), inventory=inventory)
# Mock hosts queryset # Mock hosts queryset — must support .only().filter().order_by().iterator() chain
hosts = [host1, host2] hosts = [host1, host2]
qs_hosts = mock.MagicMock(spec=QuerySet) qs_hosts = mock.MagicMock(spec=QuerySet)
qs_hosts._result_cache = hosts qs_hosts._result_cache = hosts
qs_hosts.only.return_value = hosts qs_hosts.__iter__ = lambda self: iter(self._result_cache)
qs_hosts.only.return_value = qs_hosts
qs_hosts.filter.return_value = qs_hosts
qs_hosts.order_by.return_value = qs_hosts
qs_hosts.iterator.side_effect = lambda: iter(qs_hosts._result_cache)
qs_hosts.count.side_effect = lambda: len(qs_hosts._result_cache) qs_hosts.count.side_effect = lambda: len(qs_hosts._result_cache)
inventory.hosts = qs_hosts inventory.hosts = qs_hosts
@@ -154,9 +158,12 @@ def test_pre_post_run_hook_facts_deleted_sliced(
host.inventory = mock_inventory host.inventory = mock_inventory
hosts.append(host) hosts.append(host)
# Mock inventory.hosts behavior # Mock inventory.hosts behavior — must support .only().filter().order_by().iterator() chain
mock_qs_hosts = mock.MagicMock() mock_qs_hosts = mock.MagicMock()
mock_qs_hosts.only.return_value = hosts mock_qs_hosts.only.return_value = mock_qs_hosts
mock_qs_hosts.filter.return_value = mock_qs_hosts
mock_qs_hosts.order_by.return_value = mock_qs_hosts
mock_qs_hosts.iterator.side_effect = lambda: iter(hosts)
mock_qs_hosts.count.return_value = 999 mock_qs_hosts.count.return_value = 999
mock_inventory.hosts = mock_qs_hosts mock_inventory.hosts = mock_qs_hosts