From 326ed22efeb625dbcc467e907bad62901f704eed Mon Sep 17 00:00:00 2001 From: Ryan Petrello Date: Fri, 31 Jan 2020 10:17:20 -0500 Subject: [PATCH] properly handle import errors in the isolated capacity healthcheck if the awx_capacity module runs on an isolated node with missing libraries (i.e., psutil) or bad permissions, then the runner status will be "failed" in this scenario, we *still* want to react by recording a capacity=0 --- awx/main/isolated/manager.py | 53 ++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/awx/main/isolated/manager.py b/awx/main/isolated/manager.py index bc4a5201d0..afa7eb57be 100644 --- a/awx/main/isolated/manager.py +++ b/awx/main/isolated/manager.py @@ -370,33 +370,32 @@ class IsolatedManager(object): private_data_dir ) - if runner_obj.status == 'successful': - for instance in instance_qs: - task_result = {} - try: - task_result = runner_obj.get_fact_cache(instance.hostname) - except Exception: - logger.exception('Failed to read status from isolated instances') - if 'awx_capacity_cpu' in task_result and 'awx_capacity_mem' in task_result: - task_result = { - 'cpu': task_result['awx_cpu'], - 'mem': task_result['awx_mem'], - 'capacity_cpu': task_result['awx_capacity_cpu'], - 'capacity_mem': task_result['awx_capacity_mem'], - 'version': task_result['awx_capacity_version'] - } - IsolatedManager.update_capacity(instance, task_result) - logger.debug('Isolated instance {} successful heartbeat'.format(instance.hostname)) - elif instance.capacity == 0: - logger.debug('Isolated instance {} previously marked as lost, could not re-join.'.format( - instance.hostname)) - else: - logger.warning('Could not update status of isolated instance {}'.format(instance.hostname)) - if instance.is_lost(isolated=True): - instance.capacity = 0 - instance.save(update_fields=['capacity']) - logger.error('Isolated instance {} last checked in at {}, marked as lost.'.format( - instance.hostname, instance.modified)) + for instance in instance_qs: + task_result = {} + try: + task_result = runner_obj.get_fact_cache(instance.hostname) + except Exception: + logger.exception('Failed to read status from isolated instances') + if 'awx_capacity_cpu' in task_result and 'awx_capacity_mem' in task_result: + task_result = { + 'cpu': task_result['awx_cpu'], + 'mem': task_result['awx_mem'], + 'capacity_cpu': task_result['awx_capacity_cpu'], + 'capacity_mem': task_result['awx_capacity_mem'], + 'version': task_result['awx_capacity_version'] + } + IsolatedManager.update_capacity(instance, task_result) + logger.debug('Isolated instance {} successful heartbeat'.format(instance.hostname)) + elif instance.capacity == 0: + logger.debug('Isolated instance {} previously marked as lost, could not re-join.'.format( + instance.hostname)) + else: + logger.warning('Could not update status of isolated instance {}'.format(instance.hostname)) + if instance.is_lost(isolated=True): + instance.capacity = 0 + instance.save(update_fields=['capacity']) + logger.error('Isolated instance {} last checked in at {}, marked as lost.'.format( + instance.hostname, instance.modified)) finally: if os.path.exists(private_data_dir): shutil.rmtree(private_data_dir)