mirror of
https://github.com/ansible/awx.git
synced 2026-01-12 10:30:03 -03:30
Reduce the number of triggers for execution node health checks
This commit is contained in:
parent
6f20a798ab
commit
77076dbd67
@ -534,7 +534,7 @@ def inspect_execution_nodes(instance_list):
|
||||
# check
|
||||
logger.warn(f'Execution node attempting to rejoin as instance {hostname}.')
|
||||
execution_node_health_check.apply_async([hostname])
|
||||
elif instance.capacity == 0:
|
||||
elif instance.capacity == 0 and instance.enabled:
|
||||
# nodes with proven connection but need remediation run health checks are reduced frequency
|
||||
if not instance.last_health_check or (nowtime - instance.last_health_check).total_seconds() >= settings.EXECUTION_NODE_REMEDIATION_CHECKS:
|
||||
# Periodically re-run the health check of errored nodes, in case someone fixed it
|
||||
@ -3069,9 +3069,6 @@ class AWXReceptorJob:
|
||||
# Make sure to always release the work unit if we established it
|
||||
if self.unit_id is not None and settings.RECEPTOR_RELEASE_WORK:
|
||||
receptor_ctl.simple_command(f"work release {self.unit_id}")
|
||||
# If an error occured without the job itself failing, it could be a broken instance
|
||||
if self.work_type == 'ansible-runner' and ((res is None) or (getattr(res, 'rc', None) is None)):
|
||||
execution_node_health_check.delay(self.task.instance.execution_node)
|
||||
|
||||
@property
|
||||
def sign_work(self):
|
||||
|
||||
@ -425,7 +425,7 @@ os.environ.setdefault('DJANGO_LIVE_TEST_SERVER_ADDRESS', 'localhost:9013-9199')
|
||||
# heartbeat period can factor into some forms of logic, so it is maintained as a setting here
|
||||
CLUSTER_NODE_HEARTBEAT_PERIOD = 60
|
||||
RECEPTOR_SERVICE_ADVERTISEMENT_PERIOD = 60 # https://github.com/ansible/receptor/blob/aa1d589e154d8a0cb99a220aff8f98faf2273be6/pkg/netceptor/netceptor.go#L34
|
||||
EXECUTION_NODE_REMEDIATION_CHECKS = 60 * 10 # once every 10 minutes check if an execution node errors have been resolved
|
||||
EXECUTION_NODE_REMEDIATION_CHECKS = 60 * 30 # once every 30 minutes check if an execution node errors have been resolved
|
||||
|
||||
BROKER_URL = 'unix:///var/run/redis/redis.sock'
|
||||
CELERYBEAT_SCHEDULE = {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user