mirror of
https://github.com/ansible/awx.git
synced 2026-01-11 18:09:57 -03:30
Error handling when node is missing from mesh for jobs and checks
This commit is contained in:
parent
206c85778e
commit
f34c96ecf5
@ -36,3 +36,7 @@ class PostRunError(Exception):
|
||||
self.status = status
|
||||
self.tb = tb
|
||||
super(PostRunError, self).__init__(msg)
|
||||
|
||||
|
||||
class ReceptorNodeNotFound(RuntimeError):
|
||||
pass
|
||||
|
||||
@ -85,7 +85,7 @@ from awx.main.models import (
|
||||
build_safe_env,
|
||||
)
|
||||
from awx.main.constants import ACTIVE_STATES
|
||||
from awx.main.exceptions import AwxTaskError, PostRunError
|
||||
from awx.main.exceptions import AwxTaskError, PostRunError, ReceptorNodeNotFound
|
||||
from awx.main.queue import CallbackQueueDispatcher
|
||||
from awx.main.dispatch.publish import task
|
||||
from awx.main.dispatch import get_local_queuename, reaper
|
||||
@ -1546,6 +1546,8 @@ class BaseTask(object):
|
||||
# ensure failure notification sends even if playbook_on_stats event is not triggered
|
||||
handle_success_and_failure_notifications.apply_async([self.instance.job.id])
|
||||
|
||||
except ReceptorNodeNotFound as exc:
|
||||
extra_update_fields['job_explanation'] = str(exc)
|
||||
except Exception:
|
||||
# this could catch programming or file system errors
|
||||
extra_update_fields['result_traceback'] = traceback.format_exc()
|
||||
@ -3069,7 +3071,7 @@ class AWXReceptorJob:
|
||||
receptor_ctl.simple_command(f"work release {self.unit_id}")
|
||||
# If an error occured without the job itself failing, it could be a broken instance
|
||||
if self.work_type == 'ansible-runner' and ((res is None) or (getattr(res, 'rc', None) is None)):
|
||||
execution_node_health_check(self.task.instance.execution_node)
|
||||
execution_node_health_check.delay(self.task.instance.execution_node)
|
||||
|
||||
@property
|
||||
def sign_work(self):
|
||||
|
||||
@ -1,12 +1,14 @@
|
||||
import logging
|
||||
import yaml
|
||||
import time
|
||||
from enum import Enum, unique
|
||||
|
||||
from receptorctl.socket_interface import ReceptorControl
|
||||
|
||||
from awx.main.exceptions import ReceptorNodeNotFound
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from enum import Enum, unique
|
||||
|
||||
logger = logging.getLogger('awx.main.utils.receptor')
|
||||
|
||||
@ -63,6 +65,7 @@ def get_conn_type(node_name, receptor_ctl):
|
||||
for node in all_nodes:
|
||||
if node.get('NodeID') == node_name:
|
||||
return ReceptorConnectionType(node.get('ConnType'))
|
||||
raise ReceptorNodeNotFound(f'Instance {node_name} is not in the receptor mesh')
|
||||
|
||||
|
||||
def administrative_workunit_reaper(work_list=None):
|
||||
@ -183,6 +186,9 @@ def worker_info(node_name, work_type='ansible-runner'):
|
||||
else:
|
||||
error_list.append(details)
|
||||
|
||||
except ReceptorNodeNotFound as exc:
|
||||
error_list.append(str(exc))
|
||||
|
||||
# If we have a connection error, missing keys would be trivial consequence of that
|
||||
if not data['errors']:
|
||||
# see tasks.py usage of keys
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user