mirror of
https://github.com/ansible/awx.git
synced 2026-03-24 12:25:01 -02:30
[Bug] AAP 42572 database deadlock (#15953)
* Demo of sorting hosts live test * Sort both bulk updates and add batch size to facts bulk update to resolve deadlock issue * Update tests to expect batch_size to agree with changes * Add utility method to bulk update and sort hosts and applied that to the appropriate locations Remove unused imports Add utility method for sorting bulk updates Remove try except OperationalError for loop Remove unused import of django.db.OperationalError Remove batch size as it is now on the bulk update utility method as 100 Remove batch size here since it is specified in sortedbulkupdate Add transaction.atomic to have entire transaction is run as a signle transaction before committing to the db Revert change to bulk update as it's not needed here and just sort instead Move bulk_sorted utility method into db.py and updated name to not be specific to Hosts Revise to import bulk_update_sorted.. rather than calling it as an argument Fix way I'm importing bulk_update_sorted.. Remove unneeded Host import and remove calls to bul_update as args Rebise calls to bulk_update_sorted.. to include Host in the args REmove raw_update_hosts method and replace with bulk_update_sorted_by_id in update_hosts Remove update_hosts function and replace with bulk_update_sorted_by_id Update live tests to use bulk_update_sorted_by_id Fix the fields in bulk_update to agree with test * Update functional tests to use bulk_update_sorted_by_id since update_hosts has been deleted Replace update_hosts with bulk_update_sorted_by_id Remove referenes to update_hosts Update corresponding fact cachin tests to use bulk_update_sorted_by_id Remove import of bulk_sorted_update Add code comment to live test to silence Sonarqube hotspot * Add comment NOSONAR to get rid of Sonarqube warning since this is just a test and it's not actually a security issue Get test_finish_job_fact_cache_with_existing_data passing Get test_finish_job_fact_cache_clear passing Remove reference to raw_update and replace with new bulk update utility method Add pytest.mark.django_db to appropriate tests Corrent which model is called in bulk_update_sorted_by_id Remove now unused Host import Point to where bulk_update_sorted_by_id to where that is actually being used Correct import of bulk_update_sorted_by_id Revert changes in this file to avoid db calls issue Remove @pytest.mark.django_db from unit tests Remove commented out host sorting suggested fix Fix failing tests test_pre_post_run_hook_facts_deleted_sliced & test_pre_post_run_hook_facts Remove atomic transaction line, add return, and add docstring * Fix failing test test_finish_job_fact_cache_clear & test_finish_job_fact_cache_with_existing_data --------- Co-authored-by: Alan Rominger <arominge@redhat.com>
This commit is contained in:
@@ -8,13 +8,13 @@ import logging
|
||||
from django.conf import settings
|
||||
from django.utils.encoding import smart_str
|
||||
from django.utils.timezone import now
|
||||
from django.db import OperationalError
|
||||
|
||||
# django-ansible-base
|
||||
from ansible_base.lib.logging.runtime import log_excess_runtime
|
||||
|
||||
# AWX
|
||||
from awx.main.models.inventory import Host
|
||||
from awx.main.utils.db import bulk_update_sorted_by_id
|
||||
from awx.main.models import Host
|
||||
|
||||
|
||||
logger = logging.getLogger('awx.main.tasks.facts')
|
||||
@@ -61,28 +61,6 @@ def start_fact_cache(hosts, destination, log_data, timeout=None, inventory_id=No
|
||||
return None, hosts_cached
|
||||
|
||||
|
||||
def raw_update_hosts(host_list):
|
||||
Host.objects.bulk_update(host_list, ['ansible_facts', 'ansible_facts_modified'])
|
||||
|
||||
|
||||
def update_hosts(host_list, max_tries=5):
|
||||
if not host_list:
|
||||
return
|
||||
for i in range(max_tries):
|
||||
try:
|
||||
raw_update_hosts(host_list)
|
||||
except OperationalError as exc:
|
||||
# Deadlocks can happen if this runs at the same time as another large query
|
||||
# inventory updates and updating last_job_host_summary are candidates for conflict
|
||||
# but these would resolve easily on a retry
|
||||
if i + 1 < max_tries:
|
||||
logger.info(f'OperationalError (suspected deadlock) saving host facts retry {i}, message: {exc}')
|
||||
continue
|
||||
else:
|
||||
raise
|
||||
break
|
||||
|
||||
|
||||
@log_excess_runtime(
|
||||
logger,
|
||||
debug_cutoff=0.01,
|
||||
@@ -95,6 +73,8 @@ def finish_fact_cache(hosts_cached, destination, facts_write_time, log_data, job
|
||||
log_data['unmodified_ct'] = 0
|
||||
log_data['cleared_ct'] = 0
|
||||
|
||||
hosts_cached = sorted((h for h in hosts_cached if h.id is not None), key=lambda h: h.id)
|
||||
|
||||
hosts_to_update = []
|
||||
for host in hosts_cached:
|
||||
filepath = os.sep.join(map(str, [destination, host.name]))
|
||||
@@ -135,6 +115,6 @@ def finish_fact_cache(hosts_cached, destination, facts_write_time, log_data, job
|
||||
system_tracking_logger.info('Facts cleared for inventory {} host {}'.format(smart_str(host.inventory.name), smart_str(host.name)))
|
||||
log_data['cleared_ct'] += 1
|
||||
if len(hosts_to_update) > 100:
|
||||
update_hosts(hosts_to_update)
|
||||
bulk_update_sorted_by_id(Host, hosts_to_update, fields=['ansible_facts', 'ansible_facts_modified'])
|
||||
hosts_to_update = []
|
||||
update_hosts(hosts_to_update)
|
||||
bulk_update_sorted_by_id(Host, hosts_to_update, fields=['ansible_facts', 'ansible_facts_modified'])
|
||||
|
||||
Reference in New Issue
Block a user