mirror of
https://github.com/ansible/awx.git
synced 2026-05-19 23:07:42 -02:30
Merge pull request #4716 from jladdjr/perf_stats
Enable collection of performance stats
This commit is contained in:
@@ -10,8 +10,8 @@ from django.utils.translation import ugettext_lazy as _
|
|||||||
|
|
||||||
# Django REST Framework
|
# Django REST Framework
|
||||||
from rest_framework.fields import ( # noqa
|
from rest_framework.fields import ( # noqa
|
||||||
BooleanField, CharField, ChoiceField, DictField, EmailField, IntegerField,
|
BooleanField, CharField, ChoiceField, DictField, EmailField,
|
||||||
ListField, NullBooleanField
|
IntegerField, ListField, NullBooleanField
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger('awx.conf.fields')
|
logger = logging.getLogger('awx.conf.fields')
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from django.utils.translation import ugettext_lazy as _
|
|||||||
|
|
||||||
# Django REST Framework
|
# Django REST Framework
|
||||||
from rest_framework import serializers
|
from rest_framework import serializers
|
||||||
|
from rest_framework.fields import FloatField
|
||||||
|
|
||||||
# Tower
|
# Tower
|
||||||
from awx.conf import fields, register, register_validate
|
from awx.conf import fields, register, register_validate
|
||||||
@@ -345,6 +346,49 @@ register(
|
|||||||
category_slug='jobs',
|
category_slug='jobs',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
'AWX_RESOURCE_PROFILING_ENABLED',
|
||||||
|
field_class=fields.BooleanField,
|
||||||
|
default=False,
|
||||||
|
label=_('Enable resource profiling on all tower jobs'),
|
||||||
|
help_text=_('If set, resource profiling data will be collected on all jobs.'), # noqa
|
||||||
|
category=_('Jobs'),
|
||||||
|
category_slug='jobs',
|
||||||
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
'AWX_RESOURCE_PROFILING_CPU_POLL_INTERVAL',
|
||||||
|
field_class=FloatField,
|
||||||
|
default='0.25',
|
||||||
|
label=_('Interval (in seconds) between polls for cpu usage.'),
|
||||||
|
help_text=_('Interval (in seconds) between polls for cpu usage.'),
|
||||||
|
category=_('Jobs'),
|
||||||
|
category_slug='jobs',
|
||||||
|
required=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
'AWX_RESOURCE_PROFILING_MEMORY_POLL_INTERVAL',
|
||||||
|
field_class=FloatField,
|
||||||
|
default='0.25',
|
||||||
|
label=_('Interval (in seconds) between polls for memory usage.'),
|
||||||
|
help_text=_('Interval (in seconds) between polls for memory usage.'),
|
||||||
|
category=_('Jobs'),
|
||||||
|
category_slug='jobs',
|
||||||
|
required=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
'AWX_RESOURCE_PROFILING_PID_POLL_INTERVAL',
|
||||||
|
field_class=FloatField,
|
||||||
|
default='0.25',
|
||||||
|
label=_('Interval (in seconds) between polls for PID count.'),
|
||||||
|
help_text=_('Interval (in seconds) between polls for PID count.'),
|
||||||
|
category=_('Jobs'),
|
||||||
|
category_slug='jobs',
|
||||||
|
required=False,
|
||||||
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
'AWX_TASK_ENV',
|
'AWX_TASK_ENV',
|
||||||
field_class=fields.KeyValueField,
|
field_class=fields.KeyValueField,
|
||||||
|
|||||||
@@ -908,6 +908,31 @@ class BaseTask(object):
|
|||||||
process_isolation_params['process_isolation_ro_paths'].append(instance.ansible_virtualenv_path)
|
process_isolation_params['process_isolation_ro_paths'].append(instance.ansible_virtualenv_path)
|
||||||
return process_isolation_params
|
return process_isolation_params
|
||||||
|
|
||||||
|
def build_params_resource_profiling(self, instance, private_data_dir):
|
||||||
|
resource_profiling_params = {}
|
||||||
|
if self.should_use_resource_profiling(instance):
|
||||||
|
cpu_poll_interval = settings.AWX_RESOURCE_PROFILING_CPU_POLL_INTERVAL
|
||||||
|
mem_poll_interval = settings.AWX_RESOURCE_PROFILING_MEMORY_POLL_INTERVAL
|
||||||
|
pid_poll_interval = settings.AWX_RESOURCE_PROFILING_PID_POLL_INTERVAL
|
||||||
|
|
||||||
|
results_dir = os.path.join(private_data_dir, 'artifacts/playbook_profiling')
|
||||||
|
if not os.path.isdir(results_dir):
|
||||||
|
os.makedirs(results_dir, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC)
|
||||||
|
|
||||||
|
logger.debug('Collected the following resource profiling intervals: cpu: {} mem: {} pid: {}'
|
||||||
|
.format(cpu_poll_interval, mem_poll_interval, pid_poll_interval))
|
||||||
|
|
||||||
|
resource_profiling_params.update({'resource_profiling': True,
|
||||||
|
'resource_profiling_base_cgroup': 'ansible-runner',
|
||||||
|
'resource_profiling_cpu_poll_interval': cpu_poll_interval,
|
||||||
|
'resource_profiling_memory_poll_interval': mem_poll_interval,
|
||||||
|
'resource_profiling_pid_poll_interval': pid_poll_interval,
|
||||||
|
'resource_profiling_results_dir': results_dir})
|
||||||
|
else:
|
||||||
|
logger.debug('Resource profiling not enabled for task')
|
||||||
|
|
||||||
|
return resource_profiling_params
|
||||||
|
|
||||||
def _write_extra_vars_file(self, private_data_dir, vars, safe_dict={}):
|
def _write_extra_vars_file(self, private_data_dir, vars, safe_dict={}):
|
||||||
env_path = os.path.join(private_data_dir, 'env')
|
env_path = os.path.join(private_data_dir, 'env')
|
||||||
try:
|
try:
|
||||||
@@ -968,6 +993,12 @@ class BaseTask(object):
|
|||||||
env['AWX_PRIVATE_DATA_DIR'] = private_data_dir
|
env['AWX_PRIVATE_DATA_DIR'] = private_data_dir
|
||||||
return env
|
return env
|
||||||
|
|
||||||
|
def should_use_resource_profiling(self, job):
|
||||||
|
'''
|
||||||
|
Return whether this task should use resource profiling
|
||||||
|
'''
|
||||||
|
return False
|
||||||
|
|
||||||
def should_use_proot(self, instance):
|
def should_use_proot(self, instance):
|
||||||
'''
|
'''
|
||||||
Return whether this task should use proot.
|
Return whether this task should use proot.
|
||||||
@@ -1052,6 +1083,12 @@ class BaseTask(object):
|
|||||||
'''
|
'''
|
||||||
Hook for any steps to run after job/task is marked as complete.
|
Hook for any steps to run after job/task is marked as complete.
|
||||||
'''
|
'''
|
||||||
|
job_profiling_dir = os.path.join(private_data_dir, 'artifacts/playbook_profiling')
|
||||||
|
awx_profiling_dir = '/var/log/tower/playbook_profiling/'
|
||||||
|
if not os.path.exists(awx_profiling_dir):
|
||||||
|
os.mkdir(awx_profiling_dir)
|
||||||
|
if os.path.isdir(job_profiling_dir):
|
||||||
|
shutil.copytree(job_profiling_dir, os.path.join(awx_profiling_dir, str(instance.pk)))
|
||||||
|
|
||||||
def event_handler(self, event_data):
|
def event_handler(self, event_data):
|
||||||
#
|
#
|
||||||
@@ -1205,6 +1242,8 @@ class BaseTask(object):
|
|||||||
self.build_extra_vars_file(self.instance, private_data_dir)
|
self.build_extra_vars_file(self.instance, private_data_dir)
|
||||||
args = self.build_args(self.instance, private_data_dir, passwords)
|
args = self.build_args(self.instance, private_data_dir, passwords)
|
||||||
cwd = self.build_cwd(self.instance, private_data_dir)
|
cwd = self.build_cwd(self.instance, private_data_dir)
|
||||||
|
resource_profiling_params = self.build_params_resource_profiling(self.instance,
|
||||||
|
private_data_dir)
|
||||||
process_isolation_params = self.build_params_process_isolation(self.instance,
|
process_isolation_params = self.build_params_process_isolation(self.instance,
|
||||||
private_data_dir,
|
private_data_dir,
|
||||||
cwd)
|
cwd)
|
||||||
@@ -1244,6 +1283,7 @@ class BaseTask(object):
|
|||||||
'pexpect_timeout': getattr(settings, 'PEXPECT_TIMEOUT', 5),
|
'pexpect_timeout': getattr(settings, 'PEXPECT_TIMEOUT', 5),
|
||||||
'suppress_ansible_output': True,
|
'suppress_ansible_output': True,
|
||||||
**process_isolation_params,
|
**process_isolation_params,
|
||||||
|
**resource_profiling_params,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1612,6 +1652,12 @@ class RunJob(BaseTask):
|
|||||||
d[r'Vault password \({}\):\s*?$'.format(vault_id)] = k
|
d[r'Vault password \({}\):\s*?$'.format(vault_id)] = k
|
||||||
return d
|
return d
|
||||||
|
|
||||||
|
def should_use_resource_profiling(self, job):
|
||||||
|
'''
|
||||||
|
Return whether this task should use resource profiling
|
||||||
|
'''
|
||||||
|
return settings.AWX_RESOURCE_PROFILING_ENABLED
|
||||||
|
|
||||||
def should_use_proot(self, job):
|
def should_use_proot(self, job):
|
||||||
'''
|
'''
|
||||||
Return whether this task should use proot.
|
Return whether this task should use proot.
|
||||||
|
|||||||
@@ -469,6 +469,36 @@ class TestGenericRun():
|
|||||||
assert '/AWX_VENV_PATH' in process_isolation_params['process_isolation_ro_paths']
|
assert '/AWX_VENV_PATH' in process_isolation_params['process_isolation_ro_paths']
|
||||||
assert 2 == len(process_isolation_params['process_isolation_ro_paths'])
|
assert 2 == len(process_isolation_params['process_isolation_ro_paths'])
|
||||||
|
|
||||||
|
|
||||||
|
@mock.patch('os.makedirs')
|
||||||
|
def test_build_params_resource_profiling(self, os_makedirs):
|
||||||
|
job = Job(project=Project(), inventory=Inventory())
|
||||||
|
task = tasks.RunJob()
|
||||||
|
task.should_use_resource_profiling = lambda job: True
|
||||||
|
task.instance = job
|
||||||
|
|
||||||
|
resource_profiling_params = task.build_params_resource_profiling(task.instance, '/fake_private_data_dir')
|
||||||
|
assert resource_profiling_params['resource_profiling'] is True
|
||||||
|
assert resource_profiling_params['resource_profiling_base_cgroup'] == 'ansible-runner'
|
||||||
|
assert resource_profiling_params['resource_profiling_cpu_poll_interval'] == '0.25'
|
||||||
|
assert resource_profiling_params['resource_profiling_memory_poll_interval'] == '0.25'
|
||||||
|
assert resource_profiling_params['resource_profiling_pid_poll_interval'] == '0.25'
|
||||||
|
assert resource_profiling_params['resource_profiling_results_dir'] == '/fake_private_data_dir/artifacts/playbook_profiling'
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("scenario, profiling_enabled", [
|
||||||
|
('global_setting', True),
|
||||||
|
('default', False)])
|
||||||
|
def test_should_use_resource_profiling(self, scenario, profiling_enabled, settings):
|
||||||
|
job = Job(project=Project(), inventory=Inventory())
|
||||||
|
task = tasks.RunJob()
|
||||||
|
task.instance = job
|
||||||
|
|
||||||
|
if scenario == 'global_setting':
|
||||||
|
settings.AWX_RESOURCE_PROFILING_ENABLED = True
|
||||||
|
|
||||||
|
assert task.should_use_resource_profiling(task.instance) == profiling_enabled
|
||||||
|
|
||||||
def test_created_by_extra_vars(self):
|
def test_created_by_extra_vars(self):
|
||||||
job = Job(created_by=User(pk=123, username='angry-spud'))
|
job = Job(created_by=User(pk=123, username='angry-spud'))
|
||||||
|
|
||||||
|
|||||||
@@ -640,6 +640,18 @@ AWX_PROOT_SHOW_PATHS = []
|
|||||||
# Note: This setting may be overridden by database settings.
|
# Note: This setting may be overridden by database settings.
|
||||||
AWX_PROOT_BASE_PATH = "/tmp"
|
AWX_PROOT_BASE_PATH = "/tmp"
|
||||||
|
|
||||||
|
# Disable resource profiling by default
|
||||||
|
AWX_RESOURCE_PROFILING_ENABLED = False
|
||||||
|
|
||||||
|
# Interval (in seconds) between polls for cpu usage
|
||||||
|
AWX_RESOURCE_PROFILING_CPU_POLL_INTERVAL = '0.25'
|
||||||
|
|
||||||
|
# Interval (in seconds) between polls for memory usage
|
||||||
|
AWX_RESOURCE_PROFILING_MEMORY_POLL_INTERVAL = '0.25'
|
||||||
|
|
||||||
|
# Interval (in seconds) between polls for PID count
|
||||||
|
AWX_RESOURCE_PROFILING_PID_POLL_INTERVAL = '0.25'
|
||||||
|
|
||||||
# User definable ansible callback plugins
|
# User definable ansible callback plugins
|
||||||
# Note: This setting may be overridden by database settings.
|
# Note: This setting may be overridden by database settings.
|
||||||
AWX_ANSIBLE_CALLBACK_PLUGINS = ""
|
AWX_ANSIBLE_CALLBACK_PLUGINS = ""
|
||||||
|
|||||||
@@ -85,6 +85,9 @@ export default ['i18n', function(i18n) {
|
|||||||
AWX_ISOLATED_CONNECTION_TIMEOUT: {
|
AWX_ISOLATED_CONNECTION_TIMEOUT: {
|
||||||
type: 'text',
|
type: 'text',
|
||||||
reset: 'AWX_ISOLATED_CONNECTION_TIMEOUT'
|
reset: 'AWX_ISOLATED_CONNECTION_TIMEOUT'
|
||||||
|
},
|
||||||
|
AWX_RESOURCE_PROFILING_ENABLED: {
|
||||||
|
type: 'toggleSwitch',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
buttons: {
|
buttons: {
|
||||||
|
|||||||
18
docs/performance_data.md
Normal file
18
docs/performance_data.md
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
Performance Data
|
||||||
|
================
|
||||||
|
|
||||||
|
AWX has the ability to collect performance data on job runs.
|
||||||
|
|
||||||
|
The following data is collected periodically (with a default interval of every 0.25 seconds):
|
||||||
|
* CPU usage
|
||||||
|
* Memory usage
|
||||||
|
* PID count
|
||||||
|
|
||||||
|
The data is stored under `/var/log/tower/playbook_profiling`. A new folder is created for each job run. The folder's name is set to the job's ID.
|
||||||
|
|
||||||
|
Performance data collection is not enabled by default. To enable performance data collection on all jobs, set AWX_RESOURCE_PROFILING_ENABLED to true.
|
||||||
|
|
||||||
|
The frequency with which data is collected can be set using:
|
||||||
|
* AWX_RESOURCE_PROFILING_CPU_POLL_INTERVAL
|
||||||
|
* AWX_RESOURCE_PROFILING_MEMORY_POLL_INTERVAL
|
||||||
|
* AWX_RESOURCE_PROFILING_PID_POLL_INTERVAL
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
ansible-runner==1.3.4
|
ansible-runner==1.4.1
|
||||||
appdirs==1.4.2
|
appdirs==1.4.2
|
||||||
asgi-amqp==1.1.3
|
asgi-amqp==1.1.3
|
||||||
azure-keyvault==1.1.0
|
azure-keyvault==1.1.0
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
adal==1.2.1 # via msrestazure
|
adal==1.2.1 # via msrestazure
|
||||||
amqp==2.4.2 # via kombu
|
amqp==2.4.2 # via kombu
|
||||||
ansible-runner==1.3.4
|
ansible-runner==1.4.1
|
||||||
appdirs==1.4.2
|
appdirs==1.4.2
|
||||||
argparse==1.4.0 # via uwsgitop
|
argparse==1.4.0 # via uwsgitop
|
||||||
asgi-amqp==1.1.3
|
asgi-amqp==1.1.3
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ RUN yum -y install acl \
|
|||||||
libselinux-python \
|
libselinux-python \
|
||||||
libstdc++.so.6 \
|
libstdc++.so.6 \
|
||||||
libtool-ltdl-devel \
|
libtool-ltdl-devel \
|
||||||
|
libcgroup-tools \
|
||||||
libXcomposite \
|
libXcomposite \
|
||||||
libXcursor \
|
libXcursor \
|
||||||
libXdamage \
|
libXdamage \
|
||||||
|
|||||||
Reference in New Issue
Block a user