mirror of
https://github.com/ansible/awx.git
synced 2026-02-26 07:26:03 -03:30
add a tool for discovering which tasks/hosts are slow for a JT
this tool looks at the most recent jobs for a specific job template and attempts to discover the _slowest_ tasks and hosts $ awx-manage bottleneck --template N $ awx-manage bottleneck --template N --threshold 1 --ignore yum $ awx-manage bottleneck --template N --ignore pause --ignore yum
This commit is contained in:
88
awx/main/management/commands/bottleneck.py
Normal file
88
awx/main/management/commands/bottleneck.py
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
from django.db import connection
|
||||||
|
|
||||||
|
from awx.main.models import JobTemplate
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = 'Discover the slowest tasks and hosts for a specific Job Template.'
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
parser.add_argument('--template', dest='jt', type=int,
|
||||||
|
help='ID of the Job Template to profile')
|
||||||
|
parser.add_argument('--threshold', dest='threshold', type=float, default=5,
|
||||||
|
help='Only show tasks that took at least this many seconds (defaults to 5)')
|
||||||
|
parser.add_argument('--ignore', action='append', help='ignore a specific action (e.g., --ignore git)')
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
jt = options['jt']
|
||||||
|
threshold = options['threshold']
|
||||||
|
ignore = options['ignore']
|
||||||
|
|
||||||
|
print('## ' + JobTemplate.objects.get(pk=jt).name + ' (last 25 runs)\n')
|
||||||
|
with connection.cursor() as cursor:
|
||||||
|
cursor.execute(
|
||||||
|
f'''
|
||||||
|
SELECT
|
||||||
|
b.id, b.job_id, b.host_name, b.created - a.created delta,
|
||||||
|
b.event_data::json->'task_action' task_action,
|
||||||
|
b.event_data::json->'task_path' task_path
|
||||||
|
FROM main_jobevent a JOIN main_jobevent b
|
||||||
|
ON b.parent_uuid = a.parent_uuid AND a.host_name = b.host_name
|
||||||
|
WHERE
|
||||||
|
a.event = 'runner_on_start' AND
|
||||||
|
b.event != 'runner_on_start' AND
|
||||||
|
b.event != 'runner_on_skipped' AND
|
||||||
|
b.failed = false AND
|
||||||
|
a.job_id IN (
|
||||||
|
SELECT unifiedjob_ptr_id FROM main_job
|
||||||
|
WHERE job_template_id={jt}
|
||||||
|
ORDER BY unifiedjob_ptr_id DESC
|
||||||
|
LIMIT 25
|
||||||
|
)
|
||||||
|
ORDER BY delta DESC;
|
||||||
|
'''
|
||||||
|
)
|
||||||
|
slowest_events = cursor.fetchall()
|
||||||
|
|
||||||
|
fastest = dict()
|
||||||
|
for event in slowest_events:
|
||||||
|
_id, job_id, host, duration, action, playbook = event
|
||||||
|
playbook = playbook.rsplit('/')[-1]
|
||||||
|
if ignore and action in ignore:
|
||||||
|
continue
|
||||||
|
if host:
|
||||||
|
fastest[(action, playbook)] = (_id, host, str(duration).split('.')[0])
|
||||||
|
|
||||||
|
host_counts = dict()
|
||||||
|
warned = set()
|
||||||
|
print(f'slowest tasks (--threshold={threshold})\n---')
|
||||||
|
for event in slowest_events:
|
||||||
|
_id, job_id, host, duration, action, playbook = event
|
||||||
|
playbook = playbook.rsplit('/')[-1]
|
||||||
|
if ignore and action in ignore:
|
||||||
|
continue
|
||||||
|
if duration.total_seconds() < threshold:
|
||||||
|
break
|
||||||
|
|
||||||
|
fastest_summary = ''
|
||||||
|
fastest_match = fastest.get((action, playbook))
|
||||||
|
if fastest_match[2] != duration.total_seconds() and (host, action, playbook) not in warned:
|
||||||
|
warned.add((host, action, playbook))
|
||||||
|
fastest_summary = f' \033[93m{fastest_match[1]} ran this in {fastest_match[2]}s at /api/v2/job_events/{fastest_match[0]}/\033[0m'
|
||||||
|
|
||||||
|
url = f'/api/v2/jobs/{job_id}/'
|
||||||
|
human_duration = str(duration).split('.')[0]
|
||||||
|
print(' -- '.join([url, host, human_duration, action, playbook]) + fastest_summary)
|
||||||
|
host_counts.setdefault(host, [])
|
||||||
|
host_counts[host].append(duration)
|
||||||
|
|
||||||
|
host_counts = sorted(host_counts.items(), key=lambda item: [e.total_seconds() for e in item[1]], reverse=True)
|
||||||
|
|
||||||
|
print('\nslowest hosts\n---')
|
||||||
|
for h, matches in host_counts:
|
||||||
|
total = len(matches)
|
||||||
|
total_seconds = sum([e.total_seconds() for e in matches])
|
||||||
|
print(f'{h} had {total} tasks that ran longer than {threshold} second(s) for a total of {total_seconds}')
|
||||||
|
|
||||||
|
print('')
|
||||||
Reference in New Issue
Block a user