[Devel] Performance Optimization for Select Hosts Query (#16413)

* Fixed black reformating

* Make test simulate 500k hosts in real world scenario
This commit is contained in:
Stevenson Michel
2026-04-22 12:05:36 -04:00
committed by GitHub
parent 3fd3b741b6
commit 55ad29ac68
2 changed files with 149 additions and 2 deletions

View File

@@ -2078,9 +2078,17 @@ class BulkHostCreateSerializer(serializers.Serializer):
if request and not request.user.is_superuser:
if request.user not in inv.admin_role:
raise serializers.ValidationError(_(f'Inventory with id {inv.id} not found or lack permissions to add hosts.'))
current_hostnames = set(inv.hosts.values_list('name', flat=True))
# Performance optimization (AAP-67978): Instead of loading ALL host names from
# the inventory, only check if the specific new names already exist in the database.
new_names = [host['name'] for host in attrs['hosts']]
duplicate_new_names = [n for n in new_names if n in current_hostnames or new_names.count(n) > 1]
new_name_counts = Counter(new_names)
duplicates_in_new = [name for name, count in new_name_counts.items() if count > 1]
unique_new_names = list(new_name_counts.keys())
existing_duplicates = list(Host.objects.filter(inventory=inv, name__in=unique_new_names).values_list('name', flat=True))
duplicate_new_names = list(set(duplicates_in_new + existing_duplicates))
if duplicate_new_names:
raise serializers.ValidationError(_(f'Hostnames must be unique in an inventory. Duplicates found: {duplicate_new_names}'))

View File

@@ -445,3 +445,142 @@ def get_inventory_hosts(get, inv_id, use_user):
data = get(reverse('api:inventory_hosts_list', kwargs={'pk': inv_id}), use_user, expect=200).data
results = [host['id'] for host in data['results']]
return results
# Tests for BulkHostCreateSerializer duplicate detection optimization
@pytest.mark.django_db
def test_bulk_host_create_duplicate_within_batch(organization, inventory, post, user):
"""
Test that duplicate hostnames within the same batch are detected.
This tests the Counter-based duplicate detection logic.
"""
inventory.organization = organization
inv_admin = user('inventory_admin', False)
organization.member_role.members.add(inv_admin)
inventory.admin_role.members.add(inv_admin)
# Try to create hosts where 'duplicate-host' appears twice in the same batch
hosts = [
{'name': 'unique-host-1'},
{'name': 'duplicate-host'},
{'name': 'unique-host-2'},
{'name': 'duplicate-host'}, # Duplicate within batch
]
response = post(reverse('api:bulk_host_create'), {'inventory': inventory.id, 'hosts': hosts}, inv_admin, expect=400)
assert 'Hostnames must be unique in an inventory' in response.data['__all__'][0]
assert 'duplicate-host' in response.data['__all__'][0]
assert Host.objects.filter(inventory=inventory).count() == 0
@pytest.mark.django_db
def test_bulk_host_create_duplicate_against_existing(organization, inventory, post, user):
"""
Test that duplicate hostnames against existing inventory hosts are detected.
This tests the database query-based duplicate detection.
"""
inventory.organization = organization
inv_admin = user('inventory_admin', False)
organization.member_role.members.add(inv_admin)
inventory.admin_role.members.add(inv_admin)
Host.objects.create(name='existing-host-1', inventory=inventory)
Host.objects.create(name='existing-host-2', inventory=inventory)
# Try to create hosts where one already exists
hosts = [
{'name': 'new-host-1'},
{'name': 'existing-host-1'},
{'name': 'new-host-2'},
]
response = post(reverse('api:bulk_host_create'), {'inventory': inventory.id, 'hosts': hosts}, inv_admin, expect=400)
assert 'Hostnames must be unique in an inventory' in response.data['__all__'][0]
assert 'existing-host-1' in response.data['__all__'][0]
assert Host.objects.filter(inventory=inventory).count() == 2
@pytest.mark.django_db
def test_bulk_host_create_combined_duplicates(organization, inventory, post, user):
"""
Test detection of both batch-internal duplicates and duplicates against existing hosts.
"""
inventory.organization = organization
inventory_admin = user('inventory_admin', False)
organization.member_role.members.add(inventory_admin)
inventory.admin_role.members.add(inventory_admin)
Host.objects.create(name='existing-host', inventory=inventory)
# Try to create hosts with both types of duplicates
hosts = [
{'name': 'new-host'},
{'name': 'batch-duplicate'},
{'name': 'existing-host'},
{'name': 'batch-duplicate'},
]
response = post(reverse('api:bulk_host_create'), {'inventory': inventory.id, 'hosts': hosts}, inventory_admin, expect=400)
error_message = response.data['__all__'][0]
assert 'Hostnames must be unique in an inventory' in error_message
assert 'batch-duplicate' in error_message or 'existing-host' in error_message
@pytest.mark.django_db
def test_bulk_host_create_no_duplicates_success(organization, inventory, post, user):
"""
Test that hosts are created successfully when there are no duplicates.
"""
inventory.organization = organization
inventory_admin = user('inventory_admin', False)
organization.member_role.members.add(inventory_admin)
inventory.admin_role.members.add(inventory_admin)
Host.objects.create(name='existing-host-1', inventory=inventory)
Host.objects.create(name='existing-host-2', inventory=inventory)
# Create new hosts with unique names
hosts = [
{'name': 'new-host-1'},
{'name': 'new-host-2'},
{'name': 'new-host-3'},
]
response = post(reverse('api:bulk_host_create'), {'inventory': inventory.id, 'hosts': hosts}, inventory_admin, expect=201)
assert len(response.data['hosts']) == 3
assert Host.objects.filter(inventory=inventory).count() == 5
assert Host.objects.filter(inventory=inventory, name='new-host-1').exists()
assert Host.objects.filter(inventory=inventory, name='new-host-2').exists()
assert Host.objects.filter(inventory=inventory, name='new-host-3').exists()
@pytest.mark.django_db
def test_bulk_host_create_performance_large_inventory(organization, inventory, post, user, django_assert_max_num_queries):
"""
Test that duplicate detection is performant and doesn't load all hosts.
"""
inventory.organization = organization
inventory_admin = user('inventory_admin', False)
organization.member_role.members.add(inventory_admin)
inventory.admin_role.members.add(inventory_admin)
# Create 10k existing hosts to simulate a reasonably large inventory
from django.utils.timezone import now
_now = now()
existing_hosts = [Host(name=f'existing-host-{i}', inventory=inventory, created=_now, modified=_now) for i in range(10000)]
Host.objects.bulk_create(existing_hosts)
new_hosts = [{'name': f'new-host-{i}'} for i in range(10)]
# The number of queries should be bounded and not scale with inventory size
# This should be around 15-20 queries regardless of whether there are 10k or 500k+ existing hosts
with django_assert_max_num_queries(20):
response = post(reverse('api:bulk_host_create'), {'inventory': inventory.id, 'hosts': new_hosts}, inventory_admin, expect=201)
assert len(response.data['hosts']) == 10
assert Host.objects.filter(inventory=inventory).count() == 10010