mirror of
https://github.com/ansible/awx.git
synced 2026-01-19 13:41:28 -03:30
Add setting for missed heartbeats before marking node offline
This commit is contained in:
parent
3c51cb130f
commit
c5976e2584
@ -207,7 +207,7 @@ class Instance(HasPolicyEditsMixin, BaseModel):
|
||||
return True
|
||||
if ref_time is None:
|
||||
ref_time = now()
|
||||
grace_period = settings.CLUSTER_NODE_HEARTBEAT_PERIOD * 2
|
||||
grace_period = settings.CLUSTER_NODE_HEARTBEAT_PERIOD * settings.CLUSTER_NODE_MISSED_HEARTBEAT_TOLERANCE
|
||||
if self.node_type in ('execution', 'hop'):
|
||||
grace_period += settings.RECEPTOR_SERVICE_ADVERTISEMENT_PERIOD
|
||||
return self.last_seen < ref_time - timedelta(seconds=grace_period)
|
||||
|
||||
@ -432,6 +432,10 @@ os.environ.setdefault('DJANGO_LIVE_TEST_SERVER_ADDRESS', 'localhost:9013-9199')
|
||||
|
||||
# heartbeat period can factor into some forms of logic, so it is maintained as a setting here
|
||||
CLUSTER_NODE_HEARTBEAT_PERIOD = 60
|
||||
|
||||
# Number of missed heartbeats until a node gets marked as lost
|
||||
CLUSTER_NODE_MISSED_HEARTBEAT_TOLERANCE = 2
|
||||
|
||||
RECEPTOR_SERVICE_ADVERTISEMENT_PERIOD = 60 # https://github.com/ansible/receptor/blob/aa1d589e154d8a0cb99a220aff8f98faf2273be6/pkg/netceptor/netceptor.go#L34
|
||||
EXECUTION_NODE_REMEDIATION_CHECKS = 60 * 30 # once every 30 minutes check if an execution node errors have been resolved
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user