Run isolated heartbeat against all hosts at once

Previously we were running the playbook on a host-by-host
basis, but this changes it to pass in the list of all
isolated isntances the machine is responsible for.
Using the `json` Ansible stdout module, we are able to
parse the output for information on each host.
This commit is contained in:
AlanCoding
2017-06-19 12:07:40 -04:00
parent 7eb8fdd0f1
commit f371dd71b2
2 changed files with 23 additions and 17 deletions

View File

@@ -305,26 +305,29 @@ class IsolatedManager(object):
logger.warning('Cleanup from isolated job encountered error, output:\n{}'.format(buff.getvalue())) logger.warning('Cleanup from isolated job encountered error, output:\n{}'.format(buff.getvalue()))
@staticmethod @staticmethod
def health_check(instance, cutoff_pk=0): def health_check(instance_qs, cutoff_pk=0):
''' '''
:param instance: Django object representing the isolated instance :param instance_qs: List of Django objects representing the
isolated instances to manage
:param cutoff_pk: Job id of the oldest job still in the running state :param cutoff_pk: Job id of the oldest job still in the running state
Method logic not yet written. Method logic not yet written.
returns the instance's capacity or None if it is not reachable returns the instance's capacity or None if it is not reachable
''' '''
start_delimiter = 'wNqCXG6uul'
end_delimiter = 'n6kmoFyyAP'
extra_vars = dict( extra_vars = dict(
cutoff_pk=cutoff_pk, cutoff_pk=cutoff_pk,
start_delimiter=start_delimiter,
end_delimiter=end_delimiter
) )
hostname_string = ''
for instance in instance_qs:
hostname_string += '{},'.format(instance.hostname)
args = ['ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i', args = ['ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i',
'%s,' % instance.hostname, 'heartbeat_isolated.yml', '-e', hostname_string, 'heartbeat_isolated.yml', '-e',
json.dumps(extra_vars)] json.dumps(extra_vars)]
module_path = os.path.join(os.path.dirname(awx.__file__), 'lib', 'management_modules') module_path = os.path.join(os.path.dirname(awx.__file__), 'lib', 'management_modules')
playbook_path = os.path.join(os.path.dirname(awx.__file__), 'playbooks') playbook_path = os.path.join(os.path.dirname(awx.__file__), 'playbooks')
env = {'ANSIBLE_LIBRARY': module_path} env = {
'ANSIBLE_LIBRARY': module_path,
'ANSIBLE_STDOUT_CALLBACK': 'json'
}
buff = cStringIO.StringIO() buff = cStringIO.StringIO()
status, rc = run.run_pexpect( status, rc = run.run_pexpect(
args, playbook_path, env, buff, args, playbook_path, env, buff,
@@ -332,11 +335,15 @@ class IsolatedManager(object):
pexpect_timeout=5 pexpect_timeout=5
) )
output = buff.getvalue() output = buff.getvalue()
if status != 'successful': output = output[output.find('{'):] # Remove starting log statements
return 0 # recognized by task manager as 'unreachable' result = json.loads(output)
result = re.search('{}(.*){}'.format(start_delimiter, end_delimiter), output) for instance in instance_qs:
cap = result.group(1) task_result = result['plays'][0]['tasks'][0]['hosts'][instance.hostname]
return cap if 'capacity' in task_result:
instance.capacity = int(task_result['capacity'])
instance.save(update_fields=['capacity'])
elif 'msg' in task_result:
logger.warning('Could not update capacity of {}, msg={}'.format(instance.hostname, task_result['msg']))
@staticmethod @staticmethod
def wrap_stdout_handle(instance, private_data_dir, stdout_handle): def wrap_stdout_handle(instance, private_data_dir, stdout_handle):

View File

@@ -218,10 +218,9 @@ def tower_isolated_heartbeat(self):
return return
cutoff_pk = UnifiedJob.lowest_running_id() cutoff_pk = UnifiedJob.lowest_running_id()
# Slow pass looping over isolated IGs and their isolated instances # Slow pass looping over isolated IGs and their isolated instances
for isolated_instance in isolated_instance_qs: if len(isolated_instance_qs) > 0:
logger.debug("Managing isolated instance {}.".format(isolated_instance.hostname)) logger.debug("Managing isolated instances {}.".format(','.join([inst.hostname for inst in isolated_instance_qs])))
isolated_instance.capacity = isolated_manager.IsolatedManager.health_check(isolated_instance, cutoff_pk=cutoff_pk) isolated_manager.IsolatedManager.health_check(isolated_instance_qs, cutoff_pk=cutoff_pk)
isolated_instance.save(update_fields=['capacity'])
@task(bind=True, queue='tower') @task(bind=True, queue='tower')