Merge pull request #6992 from ryanpetrello/job-host-summary-optimization

drastically optimize job host summary creation

Reviewed-by: Ryan Petrello
             https://github.com/ryanpetrello
This commit is contained in:
softwarefactory-project-zuul[bot] 2020-05-13 13:13:23 +00:00 committed by GitHub
commit ba4ae7c104
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 129 additions and 30 deletions

View File

@ -7,7 +7,7 @@ from collections import defaultdict
from django.db import models, DatabaseError, connection
from django.utils.dateparse import parse_datetime
from django.utils.text import Truncator
from django.utils.timezone import utc
from django.utils.timezone import utc, now
from django.utils.translation import ugettext_lazy as _
from django.utils.encoding import force_text
@ -407,11 +407,14 @@ class BasePlaybookEvent(CreatedModifiedModel):
except (KeyError, ValueError):
kwargs.pop('created', None)
host_map = kwargs.pop('host_map', {})
sanitize_event_keys(kwargs, cls.VALID_KEYS)
workflow_job_id = kwargs.pop('workflow_job_id', None)
event = cls(**kwargs)
if workflow_job_id:
setattr(event, 'workflow_job_id', workflow_job_id)
setattr(event, 'host_map', host_map)
event._update_from_event_data()
return event
@ -484,29 +487,45 @@ class JobEvent(BasePlaybookEvent):
if not self.job or not self.job.inventory:
logger.info('Event {} missing job or inventory, host summaries not updated'.format(self.pk))
return
qs = self.job.inventory.hosts.filter(name__in=hostnames)
job = self.job
from awx.main.models import Host, JobHostSummary # circular import
all_hosts = Host.objects.filter(
pk__in=self.host_map.values()
).only('id')
existing_host_ids = set(h.id for h in all_hosts)
summaries = dict()
for host in hostnames:
host_id = self.host_map.get(host, None)
if host_id not in existing_host_ids:
host_id = None
host_stats = {}
for stat in ('changed', 'dark', 'failures', 'ignored', 'ok', 'processed', 'rescued', 'skipped'):
try:
host_stats[stat] = self.event_data.get(stat, {}).get(host, 0)
except AttributeError: # in case event_data[stat] isn't a dict.
pass
if qs.filter(name=host).exists():
host_actual = qs.get(name=host)
host_summary, created = job.job_host_summaries.get_or_create(host=host_actual, host_name=host_actual.name, defaults=host_stats)
else:
host_summary, created = job.job_host_summaries.get_or_create(host_name=host, defaults=host_stats)
summary = JobHostSummary(
created=now(), modified=now(), job_id=job.id, host_id=host_id, host_name=host, **host_stats
)
summary.failed = bool(summary.dark or summary.failures)
summaries[(host_id, host)] = summary
JobHostSummary.objects.bulk_create(summaries.values())
# update the last_job_id and last_job_host_summary_id
# in single queries
host_mapping = dict(
(summary['host_id'], summary['id'])
for summary in JobHostSummary.objects.filter(job_id=job.id).values('id', 'host_id')
)
for h in all_hosts:
h.last_job_id = job.id
if h.id in host_mapping:
h.last_job_host_summary_id = host_mapping[h.id]
Host.objects.bulk_update(all_hosts, ['last_job_id', 'last_job_host_summary_id'])
if not created:
update_fields = []
for stat, value in host_stats.items():
if getattr(host_summary, stat) != value:
setattr(host_summary, stat, value)
update_fields.append(stat)
if update_fields:
host_summary.save(update_fields=update_fields)
@property
def job_verbosity(self):

View File

@ -1129,20 +1129,6 @@ class JobHostSummary(CreatedModifiedModel):
self.failed = bool(self.dark or self.failures)
update_fields.append('failed')
super(JobHostSummary, self).save(*args, **kwargs)
self.update_host_last_job_summary()
def update_host_last_job_summary(self):
update_fields = []
if self.host is None:
return
if self.host.last_job_id != self.job_id:
self.host.last_job_id = self.job_id
update_fields.append('last_job_id')
if self.host.last_job_host_summary_id != self.id:
self.host.last_job_host_summary_id = self.id
update_fields.append('last_job_host_summary_id')
if update_fields:
self.host.save(update_fields=update_fields)
class SystemJobOptions(BaseModel):

View File

@ -1215,6 +1215,8 @@ class BaseTask(object):
else:
event_data['host_name'] = ''
event_data['host_id'] = ''
if event_data.get('event') == 'playbook_on_stats':
event_data['host_map'] = self.host_map
if isinstance(self, RunProjectUpdate):
# it's common for Ansible's SCM modules to print

View File

@ -1,7 +1,9 @@
from unittest import mock
import pytest
from awx.main.models import Job, JobEvent
from django.utils.timezone import now
from awx.main.models import Job, JobEvent, Inventory, Host
@pytest.mark.django_db
@ -61,3 +63,93 @@ def test_parent_failed(emit, event):
assert events.count() == 2
for e in events.all():
assert e.failed is True
@pytest.mark.django_db
def test_host_summary_generation():
hostnames = [f'Host {i}' for i in range(100)]
inv = Inventory()
inv.save()
Host.objects.bulk_create([
Host(created=now(), modified=now(), name=h, inventory_id=inv.id)
for h in hostnames
])
j = Job(inventory=inv)
j.save()
host_map = dict((host.name, host.id) for host in inv.hosts.all())
JobEvent.create_from_data(
job_id=j.pk,
parent_uuid='abc123',
event='playbook_on_stats',
event_data={
'ok': dict((hostname, len(hostname)) for hostname in hostnames),
'changed': {},
'dark': {},
'failures': {},
'ignored': {},
'processed': {},
'rescued': {},
'skipped': {},
},
host_map=host_map
).save()
assert j.job_host_summaries.count() == len(hostnames)
assert sorted([s.host_name for s in j.job_host_summaries.all()]) == sorted(hostnames)
for s in j.job_host_summaries.all():
assert host_map[s.host_name] == s.host_id
assert s.ok == len(s.host_name)
assert s.changed == 0
assert s.dark == 0
assert s.failures == 0
assert s.ignored == 0
assert s.processed == 0
assert s.rescued == 0
assert s.skipped == 0
for host in Host.objects.all():
assert host.last_job_id == j.id
assert host.last_job_host_summary.host == host
@pytest.mark.django_db
def test_host_summary_generation_with_deleted_hosts():
hostnames = [f'Host {i}' for i in range(10)]
inv = Inventory()
inv.save()
Host.objects.bulk_create([
Host(created=now(), modified=now(), name=h, inventory_id=inv.id)
for h in hostnames
])
j = Job(inventory=inv)
j.save()
host_map = dict((host.name, host.id) for host in inv.hosts.all())
# delete half of the hosts during the playbook run
for h in inv.hosts.all()[:5]:
h.delete()
JobEvent.create_from_data(
job_id=j.pk,
parent_uuid='abc123',
event='playbook_on_stats',
event_data={
'ok': dict((hostname, len(hostname)) for hostname in hostnames),
'changed': {},
'dark': {},
'failures': {},
'ignored': {},
'processed': {},
'rescued': {},
'skipped': {},
},
host_map=host_map
).save()
ids = sorted([s.host_id or -1 for s in j.job_host_summaries.order_by('id').all()])
names = sorted([s.host_name for s in j.job_host_summaries.all()])
assert ids == [-1, -1, -1, -1, -1, 6, 7, 8, 9, 10]
assert names == ['Host 0', 'Host 1', 'Host 2', 'Host 3', 'Host 4', 'Host 5',
'Host 6', 'Host 7', 'Host 8', 'Host 9']