From f371dd71b259908a9f5fd49727e46b24f397a356 Mon Sep 17 00:00:00 2001 From: AlanCoding Date: Mon, 19 Jun 2017 12:07:40 -0400 Subject: [PATCH] Run isolated heartbeat against all hosts at once Previously we were running the playbook on a host-by-host basis, but this changes it to pass in the list of all isolated isntances the machine is responsible for. Using the `json` Ansible stdout module, we are able to parse the output for information on each host. --- awx/main/isolated/isolated_manager.py | 33 ++++++++++++++++----------- awx/main/tasks.py | 7 +++--- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/awx/main/isolated/isolated_manager.py b/awx/main/isolated/isolated_manager.py index 44aa11e1a3..1a08b98549 100644 --- a/awx/main/isolated/isolated_manager.py +++ b/awx/main/isolated/isolated_manager.py @@ -305,26 +305,29 @@ class IsolatedManager(object): logger.warning('Cleanup from isolated job encountered error, output:\n{}'.format(buff.getvalue())) @staticmethod - def health_check(instance, cutoff_pk=0): + def health_check(instance_qs, cutoff_pk=0): ''' - :param instance: Django object representing the isolated instance + :param instance_qs: List of Django objects representing the + isolated instances to manage :param cutoff_pk: Job id of the oldest job still in the running state Method logic not yet written. returns the instance's capacity or None if it is not reachable ''' - start_delimiter = 'wNqCXG6uul' - end_delimiter = 'n6kmoFyyAP' extra_vars = dict( cutoff_pk=cutoff_pk, - start_delimiter=start_delimiter, - end_delimiter=end_delimiter ) + hostname_string = '' + for instance in instance_qs: + hostname_string += '{},'.format(instance.hostname) args = ['ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i', - '%s,' % instance.hostname, 'heartbeat_isolated.yml', '-e', + hostname_string, 'heartbeat_isolated.yml', '-e', json.dumps(extra_vars)] module_path = os.path.join(os.path.dirname(awx.__file__), 'lib', 'management_modules') playbook_path = os.path.join(os.path.dirname(awx.__file__), 'playbooks') - env = {'ANSIBLE_LIBRARY': module_path} + env = { + 'ANSIBLE_LIBRARY': module_path, + 'ANSIBLE_STDOUT_CALLBACK': 'json' + } buff = cStringIO.StringIO() status, rc = run.run_pexpect( args, playbook_path, env, buff, @@ -332,11 +335,15 @@ class IsolatedManager(object): pexpect_timeout=5 ) output = buff.getvalue() - if status != 'successful': - return 0 # recognized by task manager as 'unreachable' - result = re.search('{}(.*){}'.format(start_delimiter, end_delimiter), output) - cap = result.group(1) - return cap + output = output[output.find('{'):] # Remove starting log statements + result = json.loads(output) + for instance in instance_qs: + task_result = result['plays'][0]['tasks'][0]['hosts'][instance.hostname] + if 'capacity' in task_result: + instance.capacity = int(task_result['capacity']) + instance.save(update_fields=['capacity']) + elif 'msg' in task_result: + logger.warning('Could not update capacity of {}, msg={}'.format(instance.hostname, task_result['msg'])) @staticmethod def wrap_stdout_handle(instance, private_data_dir, stdout_handle): diff --git a/awx/main/tasks.py b/awx/main/tasks.py index 27f16a5d38..09e32c25f5 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -218,10 +218,9 @@ def tower_isolated_heartbeat(self): return cutoff_pk = UnifiedJob.lowest_running_id() # Slow pass looping over isolated IGs and their isolated instances - for isolated_instance in isolated_instance_qs: - logger.debug("Managing isolated instance {}.".format(isolated_instance.hostname)) - isolated_instance.capacity = isolated_manager.IsolatedManager.health_check(isolated_instance, cutoff_pk=cutoff_pk) - isolated_instance.save(update_fields=['capacity']) + if len(isolated_instance_qs) > 0: + logger.debug("Managing isolated instances {}.".format(','.join([inst.hostname for inst in isolated_instance_qs]))) + isolated_manager.IsolatedManager.health_check(isolated_instance_qs, cutoff_pk=cutoff_pk) @task(bind=True, queue='tower')