mirror of
https://github.com/ansible/awx.git
synced 2026-02-01 09:38:10 -03:30
Merge pull request #4716 from jladdjr/perf_stats
Enable collection of performance stats
This commit is contained in:
@@ -10,8 +10,8 @@ from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
# Django REST Framework
|
||||
from rest_framework.fields import ( # noqa
|
||||
BooleanField, CharField, ChoiceField, DictField, EmailField, IntegerField,
|
||||
ListField, NullBooleanField
|
||||
BooleanField, CharField, ChoiceField, DictField, EmailField,
|
||||
IntegerField, ListField, NullBooleanField
|
||||
)
|
||||
|
||||
logger = logging.getLogger('awx.conf.fields')
|
||||
|
||||
@@ -8,6 +8,7 @@ from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
# Django REST Framework
|
||||
from rest_framework import serializers
|
||||
from rest_framework.fields import FloatField
|
||||
|
||||
# Tower
|
||||
from awx.conf import fields, register, register_validate
|
||||
@@ -345,6 +346,49 @@ register(
|
||||
category_slug='jobs',
|
||||
)
|
||||
|
||||
register(
|
||||
'AWX_RESOURCE_PROFILING_ENABLED',
|
||||
field_class=fields.BooleanField,
|
||||
default=False,
|
||||
label=_('Enable resource profiling on all tower jobs'),
|
||||
help_text=_('If set, resource profiling data will be collected on all jobs.'), # noqa
|
||||
category=_('Jobs'),
|
||||
category_slug='jobs',
|
||||
)
|
||||
|
||||
register(
|
||||
'AWX_RESOURCE_PROFILING_CPU_POLL_INTERVAL',
|
||||
field_class=FloatField,
|
||||
default='0.25',
|
||||
label=_('Interval (in seconds) between polls for cpu usage.'),
|
||||
help_text=_('Interval (in seconds) between polls for cpu usage.'),
|
||||
category=_('Jobs'),
|
||||
category_slug='jobs',
|
||||
required=False,
|
||||
)
|
||||
|
||||
register(
|
||||
'AWX_RESOURCE_PROFILING_MEMORY_POLL_INTERVAL',
|
||||
field_class=FloatField,
|
||||
default='0.25',
|
||||
label=_('Interval (in seconds) between polls for memory usage.'),
|
||||
help_text=_('Interval (in seconds) between polls for memory usage.'),
|
||||
category=_('Jobs'),
|
||||
category_slug='jobs',
|
||||
required=False,
|
||||
)
|
||||
|
||||
register(
|
||||
'AWX_RESOURCE_PROFILING_PID_POLL_INTERVAL',
|
||||
field_class=FloatField,
|
||||
default='0.25',
|
||||
label=_('Interval (in seconds) between polls for PID count.'),
|
||||
help_text=_('Interval (in seconds) between polls for PID count.'),
|
||||
category=_('Jobs'),
|
||||
category_slug='jobs',
|
||||
required=False,
|
||||
)
|
||||
|
||||
register(
|
||||
'AWX_TASK_ENV',
|
||||
field_class=fields.KeyValueField,
|
||||
|
||||
@@ -908,6 +908,31 @@ class BaseTask(object):
|
||||
process_isolation_params['process_isolation_ro_paths'].append(instance.ansible_virtualenv_path)
|
||||
return process_isolation_params
|
||||
|
||||
def build_params_resource_profiling(self, instance, private_data_dir):
|
||||
resource_profiling_params = {}
|
||||
if self.should_use_resource_profiling(instance):
|
||||
cpu_poll_interval = settings.AWX_RESOURCE_PROFILING_CPU_POLL_INTERVAL
|
||||
mem_poll_interval = settings.AWX_RESOURCE_PROFILING_MEMORY_POLL_INTERVAL
|
||||
pid_poll_interval = settings.AWX_RESOURCE_PROFILING_PID_POLL_INTERVAL
|
||||
|
||||
results_dir = os.path.join(private_data_dir, 'artifacts/playbook_profiling')
|
||||
if not os.path.isdir(results_dir):
|
||||
os.makedirs(results_dir, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC)
|
||||
|
||||
logger.debug('Collected the following resource profiling intervals: cpu: {} mem: {} pid: {}'
|
||||
.format(cpu_poll_interval, mem_poll_interval, pid_poll_interval))
|
||||
|
||||
resource_profiling_params.update({'resource_profiling': True,
|
||||
'resource_profiling_base_cgroup': 'ansible-runner',
|
||||
'resource_profiling_cpu_poll_interval': cpu_poll_interval,
|
||||
'resource_profiling_memory_poll_interval': mem_poll_interval,
|
||||
'resource_profiling_pid_poll_interval': pid_poll_interval,
|
||||
'resource_profiling_results_dir': results_dir})
|
||||
else:
|
||||
logger.debug('Resource profiling not enabled for task')
|
||||
|
||||
return resource_profiling_params
|
||||
|
||||
def _write_extra_vars_file(self, private_data_dir, vars, safe_dict={}):
|
||||
env_path = os.path.join(private_data_dir, 'env')
|
||||
try:
|
||||
@@ -968,6 +993,12 @@ class BaseTask(object):
|
||||
env['AWX_PRIVATE_DATA_DIR'] = private_data_dir
|
||||
return env
|
||||
|
||||
def should_use_resource_profiling(self, job):
|
||||
'''
|
||||
Return whether this task should use resource profiling
|
||||
'''
|
||||
return False
|
||||
|
||||
def should_use_proot(self, instance):
|
||||
'''
|
||||
Return whether this task should use proot.
|
||||
@@ -1052,6 +1083,12 @@ class BaseTask(object):
|
||||
'''
|
||||
Hook for any steps to run after job/task is marked as complete.
|
||||
'''
|
||||
job_profiling_dir = os.path.join(private_data_dir, 'artifacts/playbook_profiling')
|
||||
awx_profiling_dir = '/var/log/tower/playbook_profiling/'
|
||||
if not os.path.exists(awx_profiling_dir):
|
||||
os.mkdir(awx_profiling_dir)
|
||||
if os.path.isdir(job_profiling_dir):
|
||||
shutil.copytree(job_profiling_dir, os.path.join(awx_profiling_dir, str(instance.pk)))
|
||||
|
||||
def event_handler(self, event_data):
|
||||
#
|
||||
@@ -1205,6 +1242,8 @@ class BaseTask(object):
|
||||
self.build_extra_vars_file(self.instance, private_data_dir)
|
||||
args = self.build_args(self.instance, private_data_dir, passwords)
|
||||
cwd = self.build_cwd(self.instance, private_data_dir)
|
||||
resource_profiling_params = self.build_params_resource_profiling(self.instance,
|
||||
private_data_dir)
|
||||
process_isolation_params = self.build_params_process_isolation(self.instance,
|
||||
private_data_dir,
|
||||
cwd)
|
||||
@@ -1244,6 +1283,7 @@ class BaseTask(object):
|
||||
'pexpect_timeout': getattr(settings, 'PEXPECT_TIMEOUT', 5),
|
||||
'suppress_ansible_output': True,
|
||||
**process_isolation_params,
|
||||
**resource_profiling_params,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1612,6 +1652,12 @@ class RunJob(BaseTask):
|
||||
d[r'Vault password \({}\):\s*?$'.format(vault_id)] = k
|
||||
return d
|
||||
|
||||
def should_use_resource_profiling(self, job):
|
||||
'''
|
||||
Return whether this task should use resource profiling
|
||||
'''
|
||||
return settings.AWX_RESOURCE_PROFILING_ENABLED
|
||||
|
||||
def should_use_proot(self, job):
|
||||
'''
|
||||
Return whether this task should use proot.
|
||||
|
||||
@@ -469,6 +469,36 @@ class TestGenericRun():
|
||||
assert '/AWX_VENV_PATH' in process_isolation_params['process_isolation_ro_paths']
|
||||
assert 2 == len(process_isolation_params['process_isolation_ro_paths'])
|
||||
|
||||
|
||||
@mock.patch('os.makedirs')
|
||||
def test_build_params_resource_profiling(self, os_makedirs):
|
||||
job = Job(project=Project(), inventory=Inventory())
|
||||
task = tasks.RunJob()
|
||||
task.should_use_resource_profiling = lambda job: True
|
||||
task.instance = job
|
||||
|
||||
resource_profiling_params = task.build_params_resource_profiling(task.instance, '/fake_private_data_dir')
|
||||
assert resource_profiling_params['resource_profiling'] is True
|
||||
assert resource_profiling_params['resource_profiling_base_cgroup'] == 'ansible-runner'
|
||||
assert resource_profiling_params['resource_profiling_cpu_poll_interval'] == '0.25'
|
||||
assert resource_profiling_params['resource_profiling_memory_poll_interval'] == '0.25'
|
||||
assert resource_profiling_params['resource_profiling_pid_poll_interval'] == '0.25'
|
||||
assert resource_profiling_params['resource_profiling_results_dir'] == '/fake_private_data_dir/artifacts/playbook_profiling'
|
||||
|
||||
|
||||
@pytest.mark.parametrize("scenario, profiling_enabled", [
|
||||
('global_setting', True),
|
||||
('default', False)])
|
||||
def test_should_use_resource_profiling(self, scenario, profiling_enabled, settings):
|
||||
job = Job(project=Project(), inventory=Inventory())
|
||||
task = tasks.RunJob()
|
||||
task.instance = job
|
||||
|
||||
if scenario == 'global_setting':
|
||||
settings.AWX_RESOURCE_PROFILING_ENABLED = True
|
||||
|
||||
assert task.should_use_resource_profiling(task.instance) == profiling_enabled
|
||||
|
||||
def test_created_by_extra_vars(self):
|
||||
job = Job(created_by=User(pk=123, username='angry-spud'))
|
||||
|
||||
|
||||
@@ -640,6 +640,18 @@ AWX_PROOT_SHOW_PATHS = []
|
||||
# Note: This setting may be overridden by database settings.
|
||||
AWX_PROOT_BASE_PATH = "/tmp"
|
||||
|
||||
# Disable resource profiling by default
|
||||
AWX_RESOURCE_PROFILING_ENABLED = False
|
||||
|
||||
# Interval (in seconds) between polls for cpu usage
|
||||
AWX_RESOURCE_PROFILING_CPU_POLL_INTERVAL = '0.25'
|
||||
|
||||
# Interval (in seconds) between polls for memory usage
|
||||
AWX_RESOURCE_PROFILING_MEMORY_POLL_INTERVAL = '0.25'
|
||||
|
||||
# Interval (in seconds) between polls for PID count
|
||||
AWX_RESOURCE_PROFILING_PID_POLL_INTERVAL = '0.25'
|
||||
|
||||
# User definable ansible callback plugins
|
||||
# Note: This setting may be overridden by database settings.
|
||||
AWX_ANSIBLE_CALLBACK_PLUGINS = ""
|
||||
|
||||
@@ -85,6 +85,9 @@ export default ['i18n', function(i18n) {
|
||||
AWX_ISOLATED_CONNECTION_TIMEOUT: {
|
||||
type: 'text',
|
||||
reset: 'AWX_ISOLATED_CONNECTION_TIMEOUT'
|
||||
},
|
||||
AWX_RESOURCE_PROFILING_ENABLED: {
|
||||
type: 'toggleSwitch',
|
||||
}
|
||||
},
|
||||
buttons: {
|
||||
|
||||
18
docs/performance_data.md
Normal file
18
docs/performance_data.md
Normal file
@@ -0,0 +1,18 @@
|
||||
Performance Data
|
||||
================
|
||||
|
||||
AWX has the ability to collect performance data on job runs.
|
||||
|
||||
The following data is collected periodically (with a default interval of every 0.25 seconds):
|
||||
* CPU usage
|
||||
* Memory usage
|
||||
* PID count
|
||||
|
||||
The data is stored under `/var/log/tower/playbook_profiling`. A new folder is created for each job run. The folder's name is set to the job's ID.
|
||||
|
||||
Performance data collection is not enabled by default. To enable performance data collection on all jobs, set AWX_RESOURCE_PROFILING_ENABLED to true.
|
||||
|
||||
The frequency with which data is collected can be set using:
|
||||
* AWX_RESOURCE_PROFILING_CPU_POLL_INTERVAL
|
||||
* AWX_RESOURCE_PROFILING_MEMORY_POLL_INTERVAL
|
||||
* AWX_RESOURCE_PROFILING_PID_POLL_INTERVAL
|
||||
@@ -1,4 +1,4 @@
|
||||
ansible-runner==1.3.4
|
||||
ansible-runner==1.4.1
|
||||
appdirs==1.4.2
|
||||
asgi-amqp==1.1.3
|
||||
azure-keyvault==1.1.0
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
adal==1.2.1 # via msrestazure
|
||||
amqp==2.4.2 # via kombu
|
||||
ansible-runner==1.3.4
|
||||
ansible-runner==1.4.1
|
||||
appdirs==1.4.2
|
||||
argparse==1.4.0 # via uwsgitop
|
||||
asgi-amqp==1.1.3
|
||||
|
||||
@@ -34,6 +34,7 @@ RUN yum -y install acl \
|
||||
libselinux-python \
|
||||
libstdc++.so.6 \
|
||||
libtool-ltdl-devel \
|
||||
libcgroup-tools \
|
||||
libXcomposite \
|
||||
libXcursor \
|
||||
libXdamage \
|
||||
|
||||
Reference in New Issue
Block a user