diff --git a/awx/api/serializers.py b/awx/api/serializers.py index 453c6716e7..e916550d82 100644 --- a/awx/api/serializers.py +++ b/awx/api/serializers.py @@ -2078,9 +2078,17 @@ class BulkHostCreateSerializer(serializers.Serializer): if request and not request.user.is_superuser: if request.user not in inv.admin_role: raise serializers.ValidationError(_(f'Inventory with id {inv.id} not found or lack permissions to add hosts.')) - current_hostnames = set(inv.hosts.values_list('name', flat=True)) + + # Performance optimization (AAP-67978): Instead of loading ALL host names from + # the inventory, only check if the specific new names already exist in the database. new_names = [host['name'] for host in attrs['hosts']] - duplicate_new_names = [n for n in new_names if n in current_hostnames or new_names.count(n) > 1] + + new_name_counts = Counter(new_names) + duplicates_in_new = [name for name, count in new_name_counts.items() if count > 1] + unique_new_names = list(new_name_counts.keys()) + existing_duplicates = list(Host.objects.filter(inventory=inv, name__in=unique_new_names).values_list('name', flat=True)) + duplicate_new_names = list(set(duplicates_in_new + existing_duplicates)) + if duplicate_new_names: raise serializers.ValidationError(_(f'Hostnames must be unique in an inventory. Duplicates found: {duplicate_new_names}')) diff --git a/awx/main/tests/functional/test_bulk.py b/awx/main/tests/functional/test_bulk.py index 6b166cdf2b..3d31a91d86 100644 --- a/awx/main/tests/functional/test_bulk.py +++ b/awx/main/tests/functional/test_bulk.py @@ -445,3 +445,142 @@ def get_inventory_hosts(get, inv_id, use_user): data = get(reverse('api:inventory_hosts_list', kwargs={'pk': inv_id}), use_user, expect=200).data results = [host['id'] for host in data['results']] return results + + +# Tests for BulkHostCreateSerializer duplicate detection optimization +@pytest.mark.django_db +def test_bulk_host_create_duplicate_within_batch(organization, inventory, post, user): + """ + Test that duplicate hostnames within the same batch are detected. + This tests the Counter-based duplicate detection logic. + """ + inventory.organization = organization + inv_admin = user('inventory_admin', False) + organization.member_role.members.add(inv_admin) + inventory.admin_role.members.add(inv_admin) + + # Try to create hosts where 'duplicate-host' appears twice in the same batch + hosts = [ + {'name': 'unique-host-1'}, + {'name': 'duplicate-host'}, + {'name': 'unique-host-2'}, + {'name': 'duplicate-host'}, # Duplicate within batch + ] + + response = post(reverse('api:bulk_host_create'), {'inventory': inventory.id, 'hosts': hosts}, inv_admin, expect=400) + + assert 'Hostnames must be unique in an inventory' in response.data['__all__'][0] + assert 'duplicate-host' in response.data['__all__'][0] + assert Host.objects.filter(inventory=inventory).count() == 0 + + +@pytest.mark.django_db +def test_bulk_host_create_duplicate_against_existing(organization, inventory, post, user): + """ + Test that duplicate hostnames against existing inventory hosts are detected. + This tests the database query-based duplicate detection. + """ + inventory.organization = organization + inv_admin = user('inventory_admin', False) + organization.member_role.members.add(inv_admin) + inventory.admin_role.members.add(inv_admin) + + Host.objects.create(name='existing-host-1', inventory=inventory) + Host.objects.create(name='existing-host-2', inventory=inventory) + + # Try to create hosts where one already exists + hosts = [ + {'name': 'new-host-1'}, + {'name': 'existing-host-1'}, + {'name': 'new-host-2'}, + ] + + response = post(reverse('api:bulk_host_create'), {'inventory': inventory.id, 'hosts': hosts}, inv_admin, expect=400) + + assert 'Hostnames must be unique in an inventory' in response.data['__all__'][0] + assert 'existing-host-1' in response.data['__all__'][0] + assert Host.objects.filter(inventory=inventory).count() == 2 + + +@pytest.mark.django_db +def test_bulk_host_create_combined_duplicates(organization, inventory, post, user): + """ + Test detection of both batch-internal duplicates and duplicates against existing hosts. + """ + inventory.organization = organization + inventory_admin = user('inventory_admin', False) + organization.member_role.members.add(inventory_admin) + inventory.admin_role.members.add(inventory_admin) + + Host.objects.create(name='existing-host', inventory=inventory) + + # Try to create hosts with both types of duplicates + hosts = [ + {'name': 'new-host'}, + {'name': 'batch-duplicate'}, + {'name': 'existing-host'}, + {'name': 'batch-duplicate'}, + ] + + response = post(reverse('api:bulk_host_create'), {'inventory': inventory.id, 'hosts': hosts}, inventory_admin, expect=400) + + error_message = response.data['__all__'][0] + assert 'Hostnames must be unique in an inventory' in error_message + assert 'batch-duplicate' in error_message or 'existing-host' in error_message + + +@pytest.mark.django_db +def test_bulk_host_create_no_duplicates_success(organization, inventory, post, user): + """ + Test that hosts are created successfully when there are no duplicates. + """ + inventory.organization = organization + inventory_admin = user('inventory_admin', False) + organization.member_role.members.add(inventory_admin) + inventory.admin_role.members.add(inventory_admin) + + Host.objects.create(name='existing-host-1', inventory=inventory) + Host.objects.create(name='existing-host-2', inventory=inventory) + + # Create new hosts with unique names + hosts = [ + {'name': 'new-host-1'}, + {'name': 'new-host-2'}, + {'name': 'new-host-3'}, + ] + + response = post(reverse('api:bulk_host_create'), {'inventory': inventory.id, 'hosts': hosts}, inventory_admin, expect=201) + + assert len(response.data['hosts']) == 3 + assert Host.objects.filter(inventory=inventory).count() == 5 + assert Host.objects.filter(inventory=inventory, name='new-host-1').exists() + assert Host.objects.filter(inventory=inventory, name='new-host-2').exists() + assert Host.objects.filter(inventory=inventory, name='new-host-3').exists() + + +@pytest.mark.django_db +def test_bulk_host_create_performance_large_inventory(organization, inventory, post, user, django_assert_max_num_queries): + """ + Test that duplicate detection is performant and doesn't load all hosts. + """ + inventory.organization = organization + inventory_admin = user('inventory_admin', False) + organization.member_role.members.add(inventory_admin) + inventory.admin_role.members.add(inventory_admin) + + # Create 10k existing hosts to simulate a reasonably large inventory + from django.utils.timezone import now + + _now = now() + existing_hosts = [Host(name=f'existing-host-{i}', inventory=inventory, created=_now, modified=_now) for i in range(10000)] + Host.objects.bulk_create(existing_hosts) + + new_hosts = [{'name': f'new-host-{i}'} for i in range(10)] + + # The number of queries should be bounded and not scale with inventory size + # This should be around 15-20 queries regardless of whether there are 10k or 500k+ existing hosts + with django_assert_max_num_queries(20): + response = post(reverse('api:bulk_host_create'), {'inventory': inventory.id, 'hosts': new_hosts}, inventory_admin, expect=201) + + assert len(response.data['hosts']) == 10 + assert Host.objects.filter(inventory=inventory).count() == 10010