drastically optimize job host summary creation

see: https://github.com/ansible/awx/issues/6991
This commit is contained in:
Ryan Petrello 2020-05-11 16:58:13 -04:00 committed by Elijah DeLee
parent cdfc9e05d4
commit b6d3c3c1a3
3 changed files with 67 additions and 16 deletions

View File

@ -7,7 +7,7 @@ from collections import defaultdict
from django.db import models, DatabaseError, connection
from django.utils.dateparse import parse_datetime
from django.utils.text import Truncator
from django.utils.timezone import utc
from django.utils.timezone import utc, now
from django.utils.translation import ugettext_lazy as _
from django.utils.encoding import force_text
@ -407,11 +407,14 @@ class BasePlaybookEvent(CreatedModifiedModel):
except (KeyError, ValueError):
kwargs.pop('created', None)
host_map = kwargs.pop('host_map', {})
sanitize_event_keys(kwargs, cls.VALID_KEYS)
workflow_job_id = kwargs.pop('workflow_job_id', None)
event = cls(**kwargs)
if workflow_job_id:
setattr(event, 'workflow_job_id', workflow_job_id)
setattr(event, 'host_map', host_map)
event._update_from_event_data()
return event
@ -484,8 +487,10 @@ class JobEvent(BasePlaybookEvent):
if not self.job or not self.job.inventory:
logger.info('Event {} missing job or inventory, host summaries not updated'.format(self.pk))
return
qs = self.job.inventory.hosts.filter(name__in=hostnames)
job = self.job
from awx.main.models.jobs import JobHostSummary # circular import
summaries = dict()
for host in hostnames:
host_stats = {}
for stat in ('changed', 'dark', 'failures', 'ignored', 'ok', 'processed', 'rescued', 'skipped'):
@ -493,20 +498,18 @@ class JobEvent(BasePlaybookEvent):
host_stats[stat] = self.event_data.get(stat, {}).get(host, 0)
except AttributeError: # in case event_data[stat] isn't a dict.
pass
if qs.filter(name=host).exists():
host_actual = qs.get(name=host)
host_summary, created = job.job_host_summaries.get_or_create(host=host_actual, host_name=host_actual.name, defaults=host_stats)
else:
host_summary, created = job.job_host_summaries.get_or_create(host_name=host, defaults=host_stats)
host_id = self.host_map.get(host, None)
summaries.setdefault(
(host_id, host),
JobHostSummary(created=now(), modified=now(), job_id=job.id, host_id=host_id, host_name=host)
)
host_summary = summaries[(host_id, host)]
if not created:
update_fields = []
for stat, value in host_stats.items():
if getattr(host_summary, stat) != value:
setattr(host_summary, stat, value)
update_fields.append(stat)
if update_fields:
host_summary.save(update_fields=update_fields)
for stat, value in host_stats.items():
if getattr(host_summary, stat) != value:
setattr(host_summary, stat, value)
JobHostSummary.objects.bulk_create(summaries.values())
@property
def job_verbosity(self):

View File

@ -1215,6 +1215,8 @@ class BaseTask(object):
else:
event_data['host_name'] = ''
event_data['host_id'] = ''
if event_data.get('event') == 'playbook_on_stats':
event_data['host_map'] = self.host_map
if isinstance(self, RunProjectUpdate):
# it's common for Ansible's SCM modules to print

View File

@ -1,7 +1,9 @@
from unittest import mock
import pytest
from awx.main.models import Job, JobEvent
from django.utils.timezone import now
from awx.main.models import Job, JobEvent, Inventory, Host
@pytest.mark.django_db
@ -61,3 +63,47 @@ def test_parent_failed(emit, event):
assert events.count() == 2
for e in events.all():
assert e.failed is True
@pytest.mark.django_db
def test_host_summary_generation():
hostnames = [f'Host {i}' for i in range(5000)]
inv = Inventory()
inv.save()
Host.objects.bulk_create([
Host(created=now(), modified=now(), name=h, inventory_id=inv.id)
for h in hostnames
])
j = Job(inventory=inv)
j.save()
host_map = dict((host.name, host.id) for host in inv.hosts.all())
JobEvent.create_from_data(
job_id=j.pk,
parent_uuid='abc123',
event='playbook_on_stats',
event_data={
'ok': dict((hostname, len(hostname)) for hostname in hostnames),
'changed': {},
'dark': {},
'failures': {},
'ignored': {},
'processed': {},
'rescued': {},
'skipped': {},
},
host_map=host_map
).save()
assert j.job_host_summaries.count() == len(hostnames)
assert sorted([s.host_name for s in j.job_host_summaries.all()]) == sorted(hostnames)
for s in j.job_host_summaries.all():
assert host_map[s.host_name] == s.host_id
assert s.ok == len(s.host_name)
assert s.changed == 0
assert s.dark == 0
assert s.failures == 0
assert s.ignored == 0
assert s.processed == 0
assert s.rescued == 0
assert s.skipped == 0