diff --git a/awx/main/dispatch/worker/callback.py b/awx/main/dispatch/worker/callback.py index 79033e329a..fd96a4f04e 100644 --- a/awx/main/dispatch/worker/callback.py +++ b/awx/main/dispatch/worker/callback.py @@ -1,10 +1,7 @@ -import cProfile import json import logging import os -import pstats import signal -import tempfile import time import traceback @@ -23,6 +20,7 @@ from awx.main.models import (JobEvent, AdHocCommandEvent, ProjectUpdateEvent, Job) from awx.main.tasks import handle_success_and_failure_notifications from awx.main.models.events import emit_event_detail +from awx.main.utils.profiling import AWXProfiler from .base import BaseWorker @@ -48,6 +46,7 @@ class CallbackBrokerWorker(BaseWorker): self.buff = {} self.pid = os.getpid() self.redis = redis.Redis.from_url(settings.BROKER_URL) + self.prof = AWXProfiler("CallbackBrokerWorker") for key in self.redis.keys('awx_callback_receiver_statistics_*'): self.redis.delete(key) @@ -87,19 +86,12 @@ class CallbackBrokerWorker(BaseWorker): ) def toggle_profiling(self, *args): - if self.prof: - self.prof.disable() - filename = f'callback-{self.pid}.pstats' - filepath = os.path.join(tempfile.gettempdir(), filename) - with open(filepath, 'w') as f: - pstats.Stats(self.prof, stream=f).sort_stats('cumulative').print_stats() - pstats.Stats(self.prof).dump_stats(filepath + '.raw') - self.prof = False - logger.error(f'profiling is disabled, wrote {filepath}') - else: - self.prof = cProfile.Profile() - self.prof.enable() + if not self.prof.is_started(): + self.prof.start() logger.error('profiling is enabled') + else: + filepath = self.prof.stop() + logger.error(f'profiling is disabled, wrote {filepath}') def work_loop(self, *args, **kw): if settings.AWX_CALLBACK_PROFILE: diff --git a/awx/main/middleware.py b/awx/main/middleware.py index 781266e8dd..759c2daa98 100644 --- a/awx/main/middleware.py +++ b/awx/main/middleware.py @@ -1,13 +1,9 @@ # Copyright (c) 2015 Ansible, Inc. # All Rights Reserved. -import uuid import logging import threading import time -import cProfile -import pstats -import os import urllib.parse from django.conf import settings @@ -22,6 +18,7 @@ from django.urls import reverse, resolve from awx.main.utils.named_url_graph import generate_graph, GraphNode from awx.conf import fields, register +from awx.main.utils.profiling import AWXProfiler logger = logging.getLogger('awx.main.middleware') @@ -32,11 +29,14 @@ class TimingMiddleware(threading.local, MiddlewareMixin): dest = '/var/log/tower/profile' + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.prof = AWXProfiler("TimingMiddleware") + def process_request(self, request): self.start_time = time.time() if settings.AWX_REQUEST_PROFILE: - self.prof = cProfile.Profile() - self.prof.enable() + self.prof.start() def process_response(self, request, response): if not hasattr(self, 'start_time'): # some tools may not invoke process_request @@ -44,33 +44,10 @@ class TimingMiddleware(threading.local, MiddlewareMixin): total_time = time.time() - self.start_time response['X-API-Total-Time'] = '%0.3fs' % total_time if settings.AWX_REQUEST_PROFILE: - self.prof.disable() - cprofile_file = self.save_profile_file(request) - response['cprofile_file'] = cprofile_file + response['X-API-Profile-File'] = self.prof.stop() perf_logger.info('api response times', extra=dict(python_objects=dict(request=request, response=response))) return response - def save_profile_file(self, request): - if not os.path.isdir(self.dest): - os.makedirs(self.dest) - filename = '%.3fs-%s.pstats' % (pstats.Stats(self.prof).total_tt, uuid.uuid4()) - filepath = os.path.join(self.dest, filename) - with open(filepath, 'w') as f: - f.write('%s %s\n' % (request.method, request.get_full_path())) - pstats.Stats(self.prof, stream=f).sort_stats('cumulative').print_stats() - - if settings.AWX_REQUEST_PROFILE_WITH_DOT: - from gprof2dot import main as generate_dot - raw = os.path.join(self.dest, filename) + '.raw' - pstats.Stats(self.prof).dump_stats(raw) - generate_dot([ - '-n', '2.5', '-f', 'pstats', '-o', - os.path.join( self.dest, filename).replace('.pstats', '.dot'), - raw - ]) - os.remove(raw) - return filepath - class SessionTimeoutMiddleware(MiddlewareMixin): """ diff --git a/awx/main/utils/profiling.py b/awx/main/utils/profiling.py new file mode 100644 index 0000000000..c550175d7b --- /dev/null +++ b/awx/main/utils/profiling.py @@ -0,0 +1,151 @@ +import cProfile +import functools +import pstats +import os +import uuid +import datetime +import json +import sys + + +class AWXProfileBase: + def __init__(self, name, dest): + self.name = name + self.dest = dest + self.results = {} + + def generate_results(self): + raise RuntimeError("define me") + + def output_results(self, fname=None): + if not os.path.isdir(self.dest): + os.makedirs(self.dest) + + if fname: + fpath = os.path.join(self.dest, fname) + with open(fpath, 'w') as f: + f.write(json.dumps(self.results, indent=2)) + + +class AWXTiming(AWXProfileBase): + def __init__(self, name, dest='/var/log/tower/timing'): + super().__init__(name, dest) + + self.time_start = None + self.time_end = None + + def start(self): + self.time_start = datetime.datetime.now() + + def stop(self): + self.time_end = datetime.datetime.now() + + self.generate_results() + self.output_results() + + def generate_results(self): + diff = (self.time_end - self.time_start).total_seconds() + self.results = { + 'name': self.name, + 'diff': f'{diff}-seconds', + } + + def output_results(self): + fname = f"{self.results['diff']}-{self.name}-{uuid.uuid4()}.time" + super().output_results(fname) + + +def timing(name, *init_args, **init_kwargs): + def decorator_profile(func): + @functools.wraps(func) + def wrapper_profile(*args, **kwargs): + timing = AWXTiming(name, *init_args, **init_kwargs) + timing.start() + res = func(*args, **kwargs) + timing.stop() + return res + return wrapper_profile + return decorator_profile + + +class AWXProfiler(AWXProfileBase): + def __init__(self, name, dest='/var/log/tower/profile', dot_enabled=True): + ''' + Try to do as little as possible in init. Instead, do the init + only when the profiling is started. + ''' + super().__init__(name, dest) + self.started = False + self.dot_enabled = dot_enabled + self.results = { + 'total_time_seconds': 0, + } + + def generate_results(self): + self.results['total_time_seconds'] = pstats.Stats(self.prof).total_tt + + def output_results(self): + super().output_results() + + filename_base = '%.3fs-%s-%s-%s' % (self.results['total_time_seconds'], self.name, self.pid, uuid.uuid4()) + pstats_filepath = os.path.join(self.dest, f"{filename_base}.pstats") + extra_data = "" + + if self.dot_enabled: + try: + from gprof2dot import main as generate_dot + except ImportError: + extra_data = 'Dot graph generation failed due to package "gprof2dot" being unavailable.' + else: + raw_filepath = os.path.join(self.dest, f"{filename_base}.raw") + dot_filepath = os.path.join(self.dest, f"{filename_base}.dot") + + pstats.Stats(self.prof).dump_stats(raw_filepath) + generate_dot([ + '-n', '2.5', '-f', 'pstats', '-o', + dot_filepath, + raw_filepath + ]) + os.remove(raw_filepath) + + with open(pstats_filepath, 'w') as f: + print(f"{self.name}, {extra_data}", file=f) + pstats.Stats(self.prof, stream=f).sort_stats('cumulative').print_stats() + return pstats_filepath + + + def start(self): + self.prof = cProfile.Profile() + self.pid = os.getpid() + + self.prof.enable() + self.started = True + + def is_started(self): + return self.started + + def stop(self): + if self.started: + self.prof.disable() + + self.generate_results() + res = self.output_results() + self.started = False + return res + else: + print("AWXProfiler::stop() called without calling start() first", file=sys.stderr) + return None + + +def profile(name, *init_args, **init_kwargs): + def decorator_profile(func): + @functools.wraps(func) + def wrapper_profile(*args, **kwargs): + prof = AWXProfiler(name, *init_args, **init_kwargs) + prof.start() + res = func(*args, **kwargs) + prof.stop() + return res + return wrapper_profile + return decorator_profile + diff --git a/docs/debugging.md b/docs/debugging.md index 1452c3c4bb..b6f2652962 100644 --- a/docs/debugging.md +++ b/docs/debugging.md @@ -124,3 +124,102 @@ optional arguments: Below is an example run with 200 Jobs flowing through the system. [![asciicast](https://asciinema.org/a/xnfzMQ30xWPdhwORiISz0wcEw.svg)](https://asciinema.org/a/xnfzMQ30xWPdhwORiISz0wcEw) +======= +Code Instrumentation Profiling +------------------------------ +Decorate a function to generate profiling data that will tell you the percentage +of time spent in branches of a code path. This comes at an absolute performance +cost. However, the relative numbers are still very helpful. + +Requirements for `dot_enabled=True` +**Note:** The profiling code will run as if `dot_enabled=False` when `gprof2dot` +package is not found +``` +/var/lib/awx/venv/awx/bin/pip3 install gprof2dot +``` + +Below is the signature of the `@profile` decorator. +``` +@profile(name, dest='/var/log/tower/profile', dot_enabled=True) +``` + +``` +from awx.main.utils.profiling import profile + +@profile(name="task_manager_profile") +def task_manager(): + ... +``` + +Now, invoke the function being profiled. Each run of the profiled function +will result in a file output to `dest` containing the profile data summary as +well as a dot graph if enabled. The profile data summary can be viewed in a +text editor. The dot graph can be viewed using `xdot`. + +``` +bash-4.4$ ls -aln /var/log/tower/profile/ +total 24 +drwxr-xr-x 2 awx root 4096 Oct 15 13:23 . +drwxrwxr-x 1 root root 4096 Oct 15 13:23 .. +-rw-r--r-- 1 awx root 635 Oct 15 13:23 2.001s-task_manager_profile-2303-272858af-3bda-45ec-af9e-7067aa86e4f3.dot +-rw-r--r-- 1 awx root 587 Oct 15 13:23 2.001s-task_manager_profile-2303-272858af-3bda-45ec-af9e-7067aa86e4f3.pstats +-rw-r--r-- 1 awx root 632 Oct 15 13:23 2.002s-task_manager_profile-2303-4cdf4660-3ef4-4238-8164-33611822d9e3.dot +-rw-r--r-- 1 awx root 587 Oct 15 13:23 2.002s-task_manager_profile-2303-4cdf4660-3ef4-4238-8164-33611822d9e3.pstats +``` + +``` +xdot /var/log/tower/profile/2.001s-task_manager_profile-2303-272858af-3bda-45ec-af9e-7067aa86e4f3.dot +``` + + +Code Instrumentation Timing +--------------------------- +Similar to the profiling decorator, there is a timing decorator. This is useful +when you do not want to incur the overhead of profiling and want to know the +accurate absolute timing of a code path. + +Below is the signature of the `@timing` decorator. +``` +@timing(name, dest='/var/log/tower/timing') +``` + +``` +from awx.main.utils.profiling import timing + +@timing(name="my_task_manager_timing") +def task_manager(): + ... +``` + +Now, invoke the function being timed. Each run of the timed function will result +in a file output to `dest`. The timing data will be in the file name. + +``` +bash-4.4# ls -aln +total 16 +drwxr-xr-x 2 0 0 4096 Oct 20 12:43 . +drwxrwxr-x 1 0 0 4096 Oct 20 12:43 .. +-rw-r--r-- 1 0 0 61 Oct 20 12:43 2.002178-seconds-my_task_manager-ab720a2f-4624-47d0-b897-8549fe7e8c99.time +-rw-r--r-- 1 0 0 60 Oct 20 12:43 2.00228-seconds-my_task_manager-e8a901be-9cdb-4ffc-a34a-a6bcb4266e7c.time +``` + +The class behind the decorator can also be used for profiling. + +``` +from awx.main.utils.profiling import AWXProfiler + +prof = AWXProfiler("hello_world") +prof.start() +''' +code to profile here +''' +prof.stop() + + +# Note that start() and stop() can be reused. An new profile file will be output. +prof.start() +''' +more code to profile +''' +prof.stop() +```