Merge pull request #4716 from jladdjr/perf_stats

Enable collection of performance stats
This commit is contained in:
Ryan Petrello
2019-10-04 17:09:30 -04:00
committed by GitHub
10 changed files with 158 additions and 4 deletions

View File

@@ -10,8 +10,8 @@ from django.utils.translation import ugettext_lazy as _
# Django REST Framework
from rest_framework.fields import ( # noqa
BooleanField, CharField, ChoiceField, DictField, EmailField, IntegerField,
ListField, NullBooleanField
BooleanField, CharField, ChoiceField, DictField, EmailField,
IntegerField, ListField, NullBooleanField
)
logger = logging.getLogger('awx.conf.fields')

View File

@@ -8,6 +8,7 @@ from django.utils.translation import ugettext_lazy as _
# Django REST Framework
from rest_framework import serializers
from rest_framework.fields import FloatField
# Tower
from awx.conf import fields, register, register_validate
@@ -345,6 +346,49 @@ register(
category_slug='jobs',
)
register(
'AWX_RESOURCE_PROFILING_ENABLED',
field_class=fields.BooleanField,
default=False,
label=_('Enable resource profiling on all tower jobs'),
help_text=_('If set, resource profiling data will be collected on all jobs.'), # noqa
category=_('Jobs'),
category_slug='jobs',
)
register(
'AWX_RESOURCE_PROFILING_CPU_POLL_INTERVAL',
field_class=FloatField,
default='0.25',
label=_('Interval (in seconds) between polls for cpu usage.'),
help_text=_('Interval (in seconds) between polls for cpu usage.'),
category=_('Jobs'),
category_slug='jobs',
required=False,
)
register(
'AWX_RESOURCE_PROFILING_MEMORY_POLL_INTERVAL',
field_class=FloatField,
default='0.25',
label=_('Interval (in seconds) between polls for memory usage.'),
help_text=_('Interval (in seconds) between polls for memory usage.'),
category=_('Jobs'),
category_slug='jobs',
required=False,
)
register(
'AWX_RESOURCE_PROFILING_PID_POLL_INTERVAL',
field_class=FloatField,
default='0.25',
label=_('Interval (in seconds) between polls for PID count.'),
help_text=_('Interval (in seconds) between polls for PID count.'),
category=_('Jobs'),
category_slug='jobs',
required=False,
)
register(
'AWX_TASK_ENV',
field_class=fields.KeyValueField,

View File

@@ -908,6 +908,31 @@ class BaseTask(object):
process_isolation_params['process_isolation_ro_paths'].append(instance.ansible_virtualenv_path)
return process_isolation_params
def build_params_resource_profiling(self, instance, private_data_dir):
resource_profiling_params = {}
if self.should_use_resource_profiling(instance):
cpu_poll_interval = settings.AWX_RESOURCE_PROFILING_CPU_POLL_INTERVAL
mem_poll_interval = settings.AWX_RESOURCE_PROFILING_MEMORY_POLL_INTERVAL
pid_poll_interval = settings.AWX_RESOURCE_PROFILING_PID_POLL_INTERVAL
results_dir = os.path.join(private_data_dir, 'artifacts/playbook_profiling')
if not os.path.isdir(results_dir):
os.makedirs(results_dir, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC)
logger.debug('Collected the following resource profiling intervals: cpu: {} mem: {} pid: {}'
.format(cpu_poll_interval, mem_poll_interval, pid_poll_interval))
resource_profiling_params.update({'resource_profiling': True,
'resource_profiling_base_cgroup': 'ansible-runner',
'resource_profiling_cpu_poll_interval': cpu_poll_interval,
'resource_profiling_memory_poll_interval': mem_poll_interval,
'resource_profiling_pid_poll_interval': pid_poll_interval,
'resource_profiling_results_dir': results_dir})
else:
logger.debug('Resource profiling not enabled for task')
return resource_profiling_params
def _write_extra_vars_file(self, private_data_dir, vars, safe_dict={}):
env_path = os.path.join(private_data_dir, 'env')
try:
@@ -968,6 +993,12 @@ class BaseTask(object):
env['AWX_PRIVATE_DATA_DIR'] = private_data_dir
return env
def should_use_resource_profiling(self, job):
'''
Return whether this task should use resource profiling
'''
return False
def should_use_proot(self, instance):
'''
Return whether this task should use proot.
@@ -1052,6 +1083,12 @@ class BaseTask(object):
'''
Hook for any steps to run after job/task is marked as complete.
'''
job_profiling_dir = os.path.join(private_data_dir, 'artifacts/playbook_profiling')
awx_profiling_dir = '/var/log/tower/playbook_profiling/'
if not os.path.exists(awx_profiling_dir):
os.mkdir(awx_profiling_dir)
if os.path.isdir(job_profiling_dir):
shutil.copytree(job_profiling_dir, os.path.join(awx_profiling_dir, str(instance.pk)))
def event_handler(self, event_data):
#
@@ -1205,6 +1242,8 @@ class BaseTask(object):
self.build_extra_vars_file(self.instance, private_data_dir)
args = self.build_args(self.instance, private_data_dir, passwords)
cwd = self.build_cwd(self.instance, private_data_dir)
resource_profiling_params = self.build_params_resource_profiling(self.instance,
private_data_dir)
process_isolation_params = self.build_params_process_isolation(self.instance,
private_data_dir,
cwd)
@@ -1244,6 +1283,7 @@ class BaseTask(object):
'pexpect_timeout': getattr(settings, 'PEXPECT_TIMEOUT', 5),
'suppress_ansible_output': True,
**process_isolation_params,
**resource_profiling_params,
},
}
@@ -1612,6 +1652,12 @@ class RunJob(BaseTask):
d[r'Vault password \({}\):\s*?$'.format(vault_id)] = k
return d
def should_use_resource_profiling(self, job):
'''
Return whether this task should use resource profiling
'''
return settings.AWX_RESOURCE_PROFILING_ENABLED
def should_use_proot(self, job):
'''
Return whether this task should use proot.

View File

@@ -469,6 +469,36 @@ class TestGenericRun():
assert '/AWX_VENV_PATH' in process_isolation_params['process_isolation_ro_paths']
assert 2 == len(process_isolation_params['process_isolation_ro_paths'])
@mock.patch('os.makedirs')
def test_build_params_resource_profiling(self, os_makedirs):
job = Job(project=Project(), inventory=Inventory())
task = tasks.RunJob()
task.should_use_resource_profiling = lambda job: True
task.instance = job
resource_profiling_params = task.build_params_resource_profiling(task.instance, '/fake_private_data_dir')
assert resource_profiling_params['resource_profiling'] is True
assert resource_profiling_params['resource_profiling_base_cgroup'] == 'ansible-runner'
assert resource_profiling_params['resource_profiling_cpu_poll_interval'] == '0.25'
assert resource_profiling_params['resource_profiling_memory_poll_interval'] == '0.25'
assert resource_profiling_params['resource_profiling_pid_poll_interval'] == '0.25'
assert resource_profiling_params['resource_profiling_results_dir'] == '/fake_private_data_dir/artifacts/playbook_profiling'
@pytest.mark.parametrize("scenario, profiling_enabled", [
('global_setting', True),
('default', False)])
def test_should_use_resource_profiling(self, scenario, profiling_enabled, settings):
job = Job(project=Project(), inventory=Inventory())
task = tasks.RunJob()
task.instance = job
if scenario == 'global_setting':
settings.AWX_RESOURCE_PROFILING_ENABLED = True
assert task.should_use_resource_profiling(task.instance) == profiling_enabled
def test_created_by_extra_vars(self):
job = Job(created_by=User(pk=123, username='angry-spud'))

View File

@@ -640,6 +640,18 @@ AWX_PROOT_SHOW_PATHS = []
# Note: This setting may be overridden by database settings.
AWX_PROOT_BASE_PATH = "/tmp"
# Disable resource profiling by default
AWX_RESOURCE_PROFILING_ENABLED = False
# Interval (in seconds) between polls for cpu usage
AWX_RESOURCE_PROFILING_CPU_POLL_INTERVAL = '0.25'
# Interval (in seconds) between polls for memory usage
AWX_RESOURCE_PROFILING_MEMORY_POLL_INTERVAL = '0.25'
# Interval (in seconds) between polls for PID count
AWX_RESOURCE_PROFILING_PID_POLL_INTERVAL = '0.25'
# User definable ansible callback plugins
# Note: This setting may be overridden by database settings.
AWX_ANSIBLE_CALLBACK_PLUGINS = ""

View File

@@ -85,6 +85,9 @@ export default ['i18n', function(i18n) {
AWX_ISOLATED_CONNECTION_TIMEOUT: {
type: 'text',
reset: 'AWX_ISOLATED_CONNECTION_TIMEOUT'
},
AWX_RESOURCE_PROFILING_ENABLED: {
type: 'toggleSwitch',
}
},
buttons: {