optimize the callback receiver to buffer writes on high throughput

additionaly, optimize away several per-event host lookups and
changed/failed propagation lookups

we've always performed these (fairly expensive) queries *on every event
save* - if you're processing tens of thousands of events in short
bursts, this is way too slow

this commit also introduces a new command for profiling the insertion
rate of events, `awx-manage callback_stats`

see: https://github.com/ansible/awx/issues/5514
This commit is contained in:
Ryan Petrello
2020-01-08 16:14:47 -05:00
parent 862fafab86
commit 306f504fb7
17 changed files with 370 additions and 495 deletions

View File

@@ -0,0 +1,37 @@
import time
import sys
from django.db import connection
from django.core.management.base import BaseCommand
class Command(BaseCommand):
def handle(self, *args, **options):
with connection.cursor() as cursor:
clear = False
while True:
lines = []
for relation in (
'main_jobevent', 'main_inventoryupdateevent',
'main_projectupdateevent', 'main_adhoccommandevent'
):
lines.append(relation)
for label, interval in (
('last minute: ', '1 minute'),
('last 5 minutes:', '5 minutes'),
('last hour: ', '1 hour'),
):
cursor.execute(
f"SELECT MAX(id) - MIN(id) FROM {relation} WHERE modified > now() - '{interval}'::interval;"
)
events = cursor.fetchone()[0] or 0
lines.append(f'{label} {events}')
lines.append('')
if clear:
for i in range(20):
sys.stdout.write('\x1b[1A\x1b[2K')
for l in lines:
print(l)
clear = True
time.sleep(.25)

View File

@@ -9,6 +9,7 @@ import random
from django.utils import timezone
from django.core.management.base import BaseCommand
from awx.main.models.events import emit_event_detail
from awx.main.models import (
UnifiedJob,
Job,
@@ -17,14 +18,6 @@ from awx.main.models import (
InventoryUpdate,
SystemJob
)
from awx.main.consumers import emit_channel_notification
from awx.api.serializers import (
JobEventWebSocketSerializer,
AdHocCommandEventWebSocketSerializer,
ProjectUpdateEventWebSocketSerializer,
InventoryUpdateEventWebSocketSerializer,
SystemJobEventWebSocketSerializer
)
class JobStatusLifeCycle():
@@ -96,21 +89,6 @@ class ReplayJobEvents(JobStatusLifeCycle):
raise RuntimeError("No events for job id {}".format(job.id))
return job_events, count
def get_serializer(self, job):
if type(job) is Job:
return JobEventWebSocketSerializer
elif type(job) is AdHocCommand:
return AdHocCommandEventWebSocketSerializer
elif type(job) is ProjectUpdate:
return ProjectUpdateEventWebSocketSerializer
elif type(job) is InventoryUpdate:
return InventoryUpdateEventWebSocketSerializer
elif type(job) is SystemJob:
return SystemJobEventWebSocketSerializer
else:
raise RuntimeError("Job is of type {} and replay is not yet supported.".format(type(job)))
sys.exit(1)
def run(self, job_id, speed=1.0, verbosity=0, skip_range=[], random_seed=0, final_status_delay=0, debug=False):
stats = {
'events_ontime': {
@@ -136,7 +114,6 @@ class ReplayJobEvents(JobStatusLifeCycle):
try:
job = self.get_job(job_id)
job_events, job_event_count = self.get_job_events(job)
serializer = self.get_serializer(job)
except RuntimeError as e:
print("{}".format(e.message))
sys.exit(1)
@@ -162,8 +139,7 @@ class ReplayJobEvents(JobStatusLifeCycle):
stats['replay_start'] = self.replay_start
je_previous = je_current
je_serialized = serializer(je_current).data
emit_channel_notification('{}-{}'.format(je_serialized['group_name'], job.id), je_serialized)
emit_event_detail(je_current)
replay_offset = self.replay_offset(je_previous.created, speed)
recording_diff = (je_current.created - je_previous.created).total_seconds() * (1.0 / speed)