mirror of
https://github.com/ansible/awx.git
synced 2026-03-13 15:09:32 -02:30
properly handle import errors in the isolated capacity healthcheck
if the awx_capacity module runs on an isolated node with missing libraries (i.e., psutil) or bad permissions, then the runner status will be "failed" in this scenario, we *still* want to react by recording a capacity=0
This commit is contained in:
@@ -370,33 +370,32 @@ class IsolatedManager(object):
|
|||||||
private_data_dir
|
private_data_dir
|
||||||
)
|
)
|
||||||
|
|
||||||
if runner_obj.status == 'successful':
|
for instance in instance_qs:
|
||||||
for instance in instance_qs:
|
task_result = {}
|
||||||
task_result = {}
|
try:
|
||||||
try:
|
task_result = runner_obj.get_fact_cache(instance.hostname)
|
||||||
task_result = runner_obj.get_fact_cache(instance.hostname)
|
except Exception:
|
||||||
except Exception:
|
logger.exception('Failed to read status from isolated instances')
|
||||||
logger.exception('Failed to read status from isolated instances')
|
if 'awx_capacity_cpu' in task_result and 'awx_capacity_mem' in task_result:
|
||||||
if 'awx_capacity_cpu' in task_result and 'awx_capacity_mem' in task_result:
|
task_result = {
|
||||||
task_result = {
|
'cpu': task_result['awx_cpu'],
|
||||||
'cpu': task_result['awx_cpu'],
|
'mem': task_result['awx_mem'],
|
||||||
'mem': task_result['awx_mem'],
|
'capacity_cpu': task_result['awx_capacity_cpu'],
|
||||||
'capacity_cpu': task_result['awx_capacity_cpu'],
|
'capacity_mem': task_result['awx_capacity_mem'],
|
||||||
'capacity_mem': task_result['awx_capacity_mem'],
|
'version': task_result['awx_capacity_version']
|
||||||
'version': task_result['awx_capacity_version']
|
}
|
||||||
}
|
IsolatedManager.update_capacity(instance, task_result)
|
||||||
IsolatedManager.update_capacity(instance, task_result)
|
logger.debug('Isolated instance {} successful heartbeat'.format(instance.hostname))
|
||||||
logger.debug('Isolated instance {} successful heartbeat'.format(instance.hostname))
|
elif instance.capacity == 0:
|
||||||
elif instance.capacity == 0:
|
logger.debug('Isolated instance {} previously marked as lost, could not re-join.'.format(
|
||||||
logger.debug('Isolated instance {} previously marked as lost, could not re-join.'.format(
|
instance.hostname))
|
||||||
instance.hostname))
|
else:
|
||||||
else:
|
logger.warning('Could not update status of isolated instance {}'.format(instance.hostname))
|
||||||
logger.warning('Could not update status of isolated instance {}'.format(instance.hostname))
|
if instance.is_lost(isolated=True):
|
||||||
if instance.is_lost(isolated=True):
|
instance.capacity = 0
|
||||||
instance.capacity = 0
|
instance.save(update_fields=['capacity'])
|
||||||
instance.save(update_fields=['capacity'])
|
logger.error('Isolated instance {} last checked in at {}, marked as lost.'.format(
|
||||||
logger.error('Isolated instance {} last checked in at {}, marked as lost.'.format(
|
instance.hostname, instance.modified))
|
||||||
instance.hostname, instance.modified))
|
|
||||||
finally:
|
finally:
|
||||||
if os.path.exists(private_data_dir):
|
if os.path.exists(private_data_dir):
|
||||||
shutil.rmtree(private_data_dir)
|
shutil.rmtree(private_data_dir)
|
||||||
|
|||||||
Reference in New Issue
Block a user