collect isolated capacity using a cache plugin, not stdout parsing

reading capacity values using the jsonfile cache plugin is more robust
in scenarios where ansible-playbook may print non-JSON output (such as
-vvv or when a custom callback plugin like timer is enabled)
This commit is contained in:
Ryan Petrello
2018-11-26 16:28:17 -05:00
parent 9eb2c02e92
commit fc0a039097
2 changed files with 47 additions and 38 deletions

View File

@@ -407,46 +407,50 @@ class IsolatedManager(object):
args = cls._build_args('heartbeat_isolated.yml', hostname_string) args = cls._build_args('heartbeat_isolated.yml', hostname_string)
args.extend(['--forks', str(len(instance_qs))]) args.extend(['--forks', str(len(instance_qs))])
env = cls._base_management_env() env = cls._base_management_env()
env['ANSIBLE_STDOUT_CALLBACK'] = 'json'
buff = StringIO.StringIO()
timeout = max(60, 2 * settings.AWX_ISOLATED_CONNECTION_TIMEOUT)
status, rc = IsolatedManager.run_pexpect(
args, cls.awx_playbook_path(), env, buff,
idle_timeout=timeout, job_timeout=timeout,
pexpect_timeout=5
)
output = buff.getvalue().encode('utf-8')
buff.close()
try: try:
result = json.loads(output) facts_path = tempfile.mkdtemp()
if not isinstance(result, dict): env['ANSIBLE_CACHE_PLUGIN'] = 'jsonfile'
raise TypeError('Expected a dict but received {}.'.format(str(type(result)))) env['ANSIBLE_CACHE_PLUGIN_CONNECTION'] = facts_path
except (ValueError, AssertionError, TypeError):
logger.exception('Failed to read status from isolated instances, output:\n {}'.format(output))
return
for instance in instance_qs: buff = StringIO.StringIO()
try: timeout = max(60, 2 * settings.AWX_ISOLATED_CONNECTION_TIMEOUT)
task_result = result['plays'][0]['tasks'][0]['hosts'][instance.hostname] status, rc = IsolatedManager.run_pexpect(
except (KeyError, IndexError): args, cls.awx_playbook_path(), env, buff,
task_result = {} idle_timeout=timeout, job_timeout=timeout,
if 'capacity_cpu' in task_result and 'capacity_mem' in task_result: pexpect_timeout=5
cls.update_capacity(instance, task_result, awx_application_version) )
logger.debug('Isolated instance {} successful heartbeat'.format(instance.hostname))
elif instance.capacity == 0: for instance in instance_qs:
logger.debug('Isolated instance {} previously marked as lost, could not re-join.'.format( output = buff.getvalue()
instance.hostname)) try:
else: with open(os.path.join(facts_path, instance.hostname), 'r') as facts_data:
logger.warning('Could not update status of isolated instance {}, msg={}'.format( output = facts_data.read()
instance.hostname, task_result.get('msg', 'unknown failure') task_result = json.loads(output)
)) except Exception:
if instance.is_lost(isolated=True): logger.exception('Failed to read status from isolated instances, output:\n {}'.format(output))
instance.capacity = 0 return
instance.save(update_fields=['capacity']) if 'awx_capacity_cpu' in task_result and 'awx_capacity_mem' in task_result:
logger.error('Isolated instance {} last checked in at {}, marked as lost.'.format( task_result = {
instance.hostname, instance.modified)) 'capacity_cpu': task_result['awx_capacity_cpu'],
'capacity_mem': task_result['awx_capacity_mem'],
'version': task_result['awx_capacity_version']
}
cls.update_capacity(instance, task_result, awx_application_version)
logger.debug('Isolated instance {} successful heartbeat'.format(instance.hostname))
elif instance.capacity == 0:
logger.debug('Isolated instance {} previously marked as lost, could not re-join.'.format(
instance.hostname))
else:
logger.warning('Could not update status of isolated instance {}'.format(instance.hostname))
if instance.is_lost(isolated=True):
instance.capacity = 0
instance.save(update_fields=['capacity'])
logger.error('Isolated instance {} last checked in at {}, marked as lost.'.format(
instance.hostname, instance.modified))
finally:
if os.path.exists(facts_path):
shutil.rmtree(facts_path)
@staticmethod @staticmethod
def get_stdout_handle(instance, private_data_dir, event_data_key='job_id'): def get_stdout_handle(instance, private_data_dir, event_data_key='job_id'):

View File

@@ -62,7 +62,12 @@ def main():
# Module never results in a change # Module never results in a change
module.exit_json(changed=False, capacity_cpu=capacity_cpu, module.exit_json(changed=False, capacity_cpu=capacity_cpu,
capacity_mem=capacity_mem, version=version) capacity_mem=capacity_mem, version=version,
ansible_facts=dict(
awx_capacity_cpu=capacity_cpu,
awx_capacity_mem=capacity_mem,
awx_capacity_version=version
))
if __name__ == '__main__': if __name__ == '__main__':