Add a retry to update host facts on deadlocks (#14325)

This commit is contained in:
Alan Rominger 2023-08-11 11:13:56 -04:00 committed by GitHub
parent 5cf93febaa
commit 2c7184f9d2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 74 additions and 4 deletions

View File

@ -9,6 +9,7 @@ from django.conf import settings
from django.db.models.query import QuerySet
from django.utils.encoding import smart_str
from django.utils.timezone import now
from django.db import OperationalError
# AWX
from awx.main.utils.common import log_excess_runtime
@ -57,6 +58,28 @@ def start_fact_cache(hosts, destination, log_data, timeout=None, inventory_id=No
return None
def raw_update_hosts(host_list):
Host.objects.bulk_update(host_list, ['ansible_facts', 'ansible_facts_modified'])
def update_hosts(host_list, max_tries=5):
if not host_list:
return
for i in range(max_tries):
try:
raw_update_hosts(host_list)
except OperationalError as exc:
# Deadlocks can happen if this runs at the same time as another large query
# inventory updates and updating last_job_host_summary are candidates for conflict
# but these would resolve easily on a retry
if i + 1 < max_tries:
logger.info(f'OperationalError (suspected deadlock) saving host facts retry {i}, message: {exc}')
continue
else:
raise
break
@log_excess_runtime(
logger,
debug_cutoff=0.01,
@ -111,7 +134,5 @@ def finish_fact_cache(hosts, destination, facts_write_time, log_data, job_id=Non
system_tracking_logger.info('Facts cleared for inventory {} host {}'.format(smart_str(host.inventory.name), smart_str(host.name)))
log_data['cleared_ct'] += 1
if len(hosts_to_update) > 100:
Host.objects.bulk_update(hosts_to_update, ['ansible_facts', 'ansible_facts_modified'])
hosts_to_update = []
if hosts_to_update:
Host.objects.bulk_update(hosts_to_update, ['ansible_facts', 'ansible_facts_modified'])
update_hosts(hosts_to_update)
update_hosts(hosts_to_update)

View File

@ -6,6 +6,7 @@ import json
from awx.main.models import (
Job,
Instance,
Host,
JobHostSummary,
InventoryUpdate,
InventorySource,
@ -18,6 +19,9 @@ from awx.main.models import (
ExecutionEnvironment,
)
from awx.main.tasks.system import cluster_node_heartbeat
from awx.main.tasks.facts import update_hosts
from django.db import OperationalError
from django.test.utils import override_settings
@ -112,6 +116,51 @@ def test_job_notification_host_data(inventory, machine_credential, project, job_
}
@pytest.mark.django_db
class TestAnsibleFactsSave:
current_call = 0
def test_update_hosts_deleted_host(self, inventory):
hosts = [Host.objects.create(inventory=inventory, name=f'foo{i}') for i in range(3)]
for host in hosts:
host.ansible_facts = {'foo': 'bar'}
last_pk = hosts[-1].pk
assert inventory.hosts.count() == 3
Host.objects.get(pk=last_pk).delete()
assert inventory.hosts.count() == 2
update_hosts(hosts)
assert inventory.hosts.count() == 2
for host in inventory.hosts.all():
host.refresh_from_db()
assert host.ansible_facts == {'foo': 'bar'}
def test_update_hosts_forever_deadlock(self, inventory, mocker):
hosts = [Host.objects.create(inventory=inventory, name=f'foo{i}') for i in range(3)]
for host in hosts:
host.ansible_facts = {'foo': 'bar'}
db_mock = mocker.patch('awx.main.tasks.facts.Host.objects.bulk_update')
db_mock.side_effect = OperationalError('deadlock detected')
with pytest.raises(OperationalError):
update_hosts(hosts)
def fake_bulk_update(self, host_list):
if self.current_call > 2:
return Host.objects.bulk_update(host_list, ['ansible_facts', 'ansible_facts_modified'])
self.current_call += 1
raise OperationalError('deadlock detected')
def test_update_hosts_resolved_deadlock(self, inventory, mocker):
hosts = [Host.objects.create(inventory=inventory, name=f'foo{i}') for i in range(3)]
for host in hosts:
host.ansible_facts = {'foo': 'bar'}
self.current_call = 0
mocker.patch('awx.main.tasks.facts.raw_update_hosts', new=self.fake_bulk_update)
update_hosts(hosts)
for host in inventory.hosts.all():
host.refresh_from_db()
assert host.ansible_facts == {'foo': 'bar'}
@pytest.mark.django_db
class TestLaunchConfig:
def test_null_creation_from_prompts(self):