mirror of
https://github.com/ansible/awx.git
synced 2026-05-16 05:47:38 -02:30
multi-host isolated heartbeat w tower-isolated check
* use tower-expect command to determine job status when running the isolated heartbeat playbook * grok JSON output of playbook to obtain result information * run playbook against multiple isolated hosts at the same time (addresses scalability concerns)
This commit is contained in:
@@ -57,9 +57,7 @@ class IsolatedManager(object):
|
||||
self.cwd = cwd
|
||||
self.env = env.copy()
|
||||
# Do not use callbacks for controller's management jobs
|
||||
self.env['ANSIBLE_CALLBACK_PLUGINS'] = ''
|
||||
self.env['CALLBACK_QUEUE'] = ''
|
||||
self.env['CALLBACK_CONNECTION'] = ''
|
||||
self.env.update(self._base_management_env())
|
||||
self.stdout_handle = stdout_handle
|
||||
self.ssh_key_path = ssh_key_path
|
||||
self.expect_passwords = {k.pattern: v for k, v in expect_passwords.items()}
|
||||
@@ -71,8 +69,18 @@ class IsolatedManager(object):
|
||||
self.proot_cmd = proot_cmd
|
||||
self.started_at = None
|
||||
|
||||
@property
|
||||
def awx_playbook_path(self):
|
||||
@staticmethod
|
||||
def _base_management_env():
|
||||
return {
|
||||
'ANSIBLE_CALLBACK_PLUGINS': '',
|
||||
'CALLBACK_QUEUE': '',
|
||||
'CALLBACK_CONNECTION': '',
|
||||
'ANSIBLE_RETRY_FILES_ENABLED': 'False',
|
||||
'ANSIBLE_HOST_KEY_CHECKING': 'False'
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def awx_playbook_path(cls):
|
||||
return os.path.join(
|
||||
os.path.dirname(awx.__file__),
|
||||
'playbooks'
|
||||
@@ -134,7 +142,7 @@ class IsolatedManager(object):
|
||||
buff = StringIO.StringIO()
|
||||
logger.debug('Starting job on isolated host with `run_isolated.yml` playbook.')
|
||||
status, rc = run.run_pexpect(
|
||||
args, self.awx_playbook_path, self.env, buff,
|
||||
args, self.awx_playbook_path(), self.env, buff,
|
||||
expect_passwords={
|
||||
re.compile(r'Secret:\s*?$', re.M): base64.b64encode(json.dumps(secrets))
|
||||
},
|
||||
@@ -244,7 +252,7 @@ class IsolatedManager(object):
|
||||
buff = cStringIO.StringIO()
|
||||
logger.debug('Checking job on isolated host with `check_isolated.yml` playbook.')
|
||||
status, rc = run.run_pexpect(
|
||||
args, self.awx_playbook_path, self.env, buff,
|
||||
args, self.awx_playbook_path(), self.env, buff,
|
||||
cancelled_callback=self.cancelled_callback,
|
||||
idle_timeout=remaining,
|
||||
job_timeout=remaining,
|
||||
@@ -295,7 +303,7 @@ class IsolatedManager(object):
|
||||
logger.debug('Cleaning up job on isolated host with `clean_isolated.yml` playbook.')
|
||||
buff = cStringIO.StringIO()
|
||||
status, rc = run.run_pexpect(
|
||||
args, self.awx_playbook_path, self.env, buff,
|
||||
args, self.awx_playbook_path(), self.env, buff,
|
||||
idle_timeout=60, job_timeout=60,
|
||||
pexpect_timeout=5
|
||||
)
|
||||
@@ -304,46 +312,55 @@ class IsolatedManager(object):
|
||||
# stdout_handle is closed by this point so writing output to logs is our only option
|
||||
logger.warning('Cleanup from isolated job encountered error, output:\n{}'.format(buff.getvalue()))
|
||||
|
||||
@staticmethod
|
||||
def health_check(instance_qs, cutoff_pk=0):
|
||||
@classmethod
|
||||
def health_check(cls, instance_qs):
|
||||
'''
|
||||
:param instance_qs: List of Django objects representing the
|
||||
isolated instances to manage
|
||||
:param cutoff_pk: Job id of the oldest job still in the running state
|
||||
Method logic not yet written.
|
||||
returns the instance's capacity or None if it is not reachable
|
||||
Runs playbook that will
|
||||
- determine if instance is reachable
|
||||
- find the instance capacity
|
||||
- clean up orphaned private files
|
||||
Performs save on each instance to update its capacity.
|
||||
'''
|
||||
extra_vars = dict(
|
||||
cutoff_pk=cutoff_pk,
|
||||
)
|
||||
hostname_string = ''
|
||||
for instance in instance_qs:
|
||||
hostname_string += '{},'.format(instance.hostname)
|
||||
args = ['ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i',
|
||||
hostname_string, 'heartbeat_isolated.yml', '-e',
|
||||
json.dumps(extra_vars)]
|
||||
module_path = os.path.join(os.path.dirname(awx.__file__), 'lib', 'management_modules')
|
||||
playbook_path = os.path.join(os.path.dirname(awx.__file__), 'playbooks')
|
||||
env = {
|
||||
'ANSIBLE_LIBRARY': module_path,
|
||||
'ANSIBLE_STDOUT_CALLBACK': 'json'
|
||||
}
|
||||
hostname_string, 'heartbeat_isolated.yml']
|
||||
env = cls._base_management_env()
|
||||
env['ANSIBLE_LIBRARY'] = os.path.join(os.path.dirname(awx.__file__), 'lib', 'management_modules')
|
||||
env['ANSIBLE_STDOUT_CALLBACK'] = 'json'
|
||||
|
||||
buff = cStringIO.StringIO()
|
||||
status, rc = run.run_pexpect(
|
||||
args, playbook_path, env, buff,
|
||||
args, cls.awx_playbook_path(), env, buff,
|
||||
idle_timeout=60, job_timeout=60,
|
||||
pexpect_timeout=5
|
||||
)
|
||||
output = buff.getvalue()
|
||||
output = output[output.find('{'):] # Remove starting log statements
|
||||
result = json.loads(output)
|
||||
buff.close()
|
||||
|
||||
try:
|
||||
result = json.loads(output)
|
||||
if not isinstance(result, dict):
|
||||
raise TypeError('Expected a dict but received {}.'.format(str(type(result))))
|
||||
except (ValueError, AssertionError, TypeError):
|
||||
logger.exception('Failed to read status from isolated instances, output:\n {}'.format(output))
|
||||
return
|
||||
|
||||
for instance in instance_qs:
|
||||
task_result = result['plays'][0]['tasks'][0]['hosts'][instance.hostname]
|
||||
try:
|
||||
task_result = result['plays'][0]['tasks'][0]['hosts'][instance.hostname]
|
||||
except (KeyError, IndexError):
|
||||
logger.exception('Failed to read status from isolated instance {}.'.format(instance.hostname))
|
||||
continue
|
||||
if 'capacity' in task_result:
|
||||
instance.capacity = int(task_result['capacity'])
|
||||
instance.save(update_fields=['capacity'])
|
||||
elif 'msg' in task_result:
|
||||
logger.warning('Could not update capacity of {}, msg={}'.format(instance.hostname, task_result['msg']))
|
||||
else:
|
||||
logger.warning('Could not update capacity of {}, msg={}'.format(
|
||||
instance.hostname, task_result.get('msg', 'unknown failure')))
|
||||
|
||||
@staticmethod
|
||||
def wrap_stdout_handle(instance, private_data_dir, stdout_handle):
|
||||
|
||||
Reference in New Issue
Block a user