mirror of
https://github.com/ansible/awx.git
synced 2026-04-24 03:05:23 -02:30
[Devel] Performance Optimization for Select Hosts Query (#16413)
* Fixed black reformating * Make test simulate 500k hosts in real world scenario
This commit is contained in:
@@ -2078,9 +2078,17 @@ class BulkHostCreateSerializer(serializers.Serializer):
|
||||
if request and not request.user.is_superuser:
|
||||
if request.user not in inv.admin_role:
|
||||
raise serializers.ValidationError(_(f'Inventory with id {inv.id} not found or lack permissions to add hosts.'))
|
||||
current_hostnames = set(inv.hosts.values_list('name', flat=True))
|
||||
|
||||
# Performance optimization (AAP-67978): Instead of loading ALL host names from
|
||||
# the inventory, only check if the specific new names already exist in the database.
|
||||
new_names = [host['name'] for host in attrs['hosts']]
|
||||
duplicate_new_names = [n for n in new_names if n in current_hostnames or new_names.count(n) > 1]
|
||||
|
||||
new_name_counts = Counter(new_names)
|
||||
duplicates_in_new = [name for name, count in new_name_counts.items() if count > 1]
|
||||
unique_new_names = list(new_name_counts.keys())
|
||||
existing_duplicates = list(Host.objects.filter(inventory=inv, name__in=unique_new_names).values_list('name', flat=True))
|
||||
duplicate_new_names = list(set(duplicates_in_new + existing_duplicates))
|
||||
|
||||
if duplicate_new_names:
|
||||
raise serializers.ValidationError(_(f'Hostnames must be unique in an inventory. Duplicates found: {duplicate_new_names}'))
|
||||
|
||||
|
||||
@@ -445,3 +445,142 @@ def get_inventory_hosts(get, inv_id, use_user):
|
||||
data = get(reverse('api:inventory_hosts_list', kwargs={'pk': inv_id}), use_user, expect=200).data
|
||||
results = [host['id'] for host in data['results']]
|
||||
return results
|
||||
|
||||
|
||||
# Tests for BulkHostCreateSerializer duplicate detection optimization
|
||||
@pytest.mark.django_db
|
||||
def test_bulk_host_create_duplicate_within_batch(organization, inventory, post, user):
|
||||
"""
|
||||
Test that duplicate hostnames within the same batch are detected.
|
||||
This tests the Counter-based duplicate detection logic.
|
||||
"""
|
||||
inventory.organization = organization
|
||||
inv_admin = user('inventory_admin', False)
|
||||
organization.member_role.members.add(inv_admin)
|
||||
inventory.admin_role.members.add(inv_admin)
|
||||
|
||||
# Try to create hosts where 'duplicate-host' appears twice in the same batch
|
||||
hosts = [
|
||||
{'name': 'unique-host-1'},
|
||||
{'name': 'duplicate-host'},
|
||||
{'name': 'unique-host-2'},
|
||||
{'name': 'duplicate-host'}, # Duplicate within batch
|
||||
]
|
||||
|
||||
response = post(reverse('api:bulk_host_create'), {'inventory': inventory.id, 'hosts': hosts}, inv_admin, expect=400)
|
||||
|
||||
assert 'Hostnames must be unique in an inventory' in response.data['__all__'][0]
|
||||
assert 'duplicate-host' in response.data['__all__'][0]
|
||||
assert Host.objects.filter(inventory=inventory).count() == 0
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_bulk_host_create_duplicate_against_existing(organization, inventory, post, user):
|
||||
"""
|
||||
Test that duplicate hostnames against existing inventory hosts are detected.
|
||||
This tests the database query-based duplicate detection.
|
||||
"""
|
||||
inventory.organization = organization
|
||||
inv_admin = user('inventory_admin', False)
|
||||
organization.member_role.members.add(inv_admin)
|
||||
inventory.admin_role.members.add(inv_admin)
|
||||
|
||||
Host.objects.create(name='existing-host-1', inventory=inventory)
|
||||
Host.objects.create(name='existing-host-2', inventory=inventory)
|
||||
|
||||
# Try to create hosts where one already exists
|
||||
hosts = [
|
||||
{'name': 'new-host-1'},
|
||||
{'name': 'existing-host-1'},
|
||||
{'name': 'new-host-2'},
|
||||
]
|
||||
|
||||
response = post(reverse('api:bulk_host_create'), {'inventory': inventory.id, 'hosts': hosts}, inv_admin, expect=400)
|
||||
|
||||
assert 'Hostnames must be unique in an inventory' in response.data['__all__'][0]
|
||||
assert 'existing-host-1' in response.data['__all__'][0]
|
||||
assert Host.objects.filter(inventory=inventory).count() == 2
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_bulk_host_create_combined_duplicates(organization, inventory, post, user):
|
||||
"""
|
||||
Test detection of both batch-internal duplicates and duplicates against existing hosts.
|
||||
"""
|
||||
inventory.organization = organization
|
||||
inventory_admin = user('inventory_admin', False)
|
||||
organization.member_role.members.add(inventory_admin)
|
||||
inventory.admin_role.members.add(inventory_admin)
|
||||
|
||||
Host.objects.create(name='existing-host', inventory=inventory)
|
||||
|
||||
# Try to create hosts with both types of duplicates
|
||||
hosts = [
|
||||
{'name': 'new-host'},
|
||||
{'name': 'batch-duplicate'},
|
||||
{'name': 'existing-host'},
|
||||
{'name': 'batch-duplicate'},
|
||||
]
|
||||
|
||||
response = post(reverse('api:bulk_host_create'), {'inventory': inventory.id, 'hosts': hosts}, inventory_admin, expect=400)
|
||||
|
||||
error_message = response.data['__all__'][0]
|
||||
assert 'Hostnames must be unique in an inventory' in error_message
|
||||
assert 'batch-duplicate' in error_message or 'existing-host' in error_message
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_bulk_host_create_no_duplicates_success(organization, inventory, post, user):
|
||||
"""
|
||||
Test that hosts are created successfully when there are no duplicates.
|
||||
"""
|
||||
inventory.organization = organization
|
||||
inventory_admin = user('inventory_admin', False)
|
||||
organization.member_role.members.add(inventory_admin)
|
||||
inventory.admin_role.members.add(inventory_admin)
|
||||
|
||||
Host.objects.create(name='existing-host-1', inventory=inventory)
|
||||
Host.objects.create(name='existing-host-2', inventory=inventory)
|
||||
|
||||
# Create new hosts with unique names
|
||||
hosts = [
|
||||
{'name': 'new-host-1'},
|
||||
{'name': 'new-host-2'},
|
||||
{'name': 'new-host-3'},
|
||||
]
|
||||
|
||||
response = post(reverse('api:bulk_host_create'), {'inventory': inventory.id, 'hosts': hosts}, inventory_admin, expect=201)
|
||||
|
||||
assert len(response.data['hosts']) == 3
|
||||
assert Host.objects.filter(inventory=inventory).count() == 5
|
||||
assert Host.objects.filter(inventory=inventory, name='new-host-1').exists()
|
||||
assert Host.objects.filter(inventory=inventory, name='new-host-2').exists()
|
||||
assert Host.objects.filter(inventory=inventory, name='new-host-3').exists()
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_bulk_host_create_performance_large_inventory(organization, inventory, post, user, django_assert_max_num_queries):
|
||||
"""
|
||||
Test that duplicate detection is performant and doesn't load all hosts.
|
||||
"""
|
||||
inventory.organization = organization
|
||||
inventory_admin = user('inventory_admin', False)
|
||||
organization.member_role.members.add(inventory_admin)
|
||||
inventory.admin_role.members.add(inventory_admin)
|
||||
|
||||
# Create 10k existing hosts to simulate a reasonably large inventory
|
||||
from django.utils.timezone import now
|
||||
|
||||
_now = now()
|
||||
existing_hosts = [Host(name=f'existing-host-{i}', inventory=inventory, created=_now, modified=_now) for i in range(10000)]
|
||||
Host.objects.bulk_create(existing_hosts)
|
||||
|
||||
new_hosts = [{'name': f'new-host-{i}'} for i in range(10)]
|
||||
|
||||
# The number of queries should be bounded and not scale with inventory size
|
||||
# This should be around 15-20 queries regardless of whether there are 10k or 500k+ existing hosts
|
||||
with django_assert_max_num_queries(20):
|
||||
response = post(reverse('api:bulk_host_create'), {'inventory': inventory.id, 'hosts': new_hosts}, inventory_admin, expect=201)
|
||||
|
||||
assert len(response.data['hosts']) == 10
|
||||
assert Host.objects.filter(inventory=inventory).count() == 10010
|
||||
|
||||
Reference in New Issue
Block a user