From 43a53f41dd25f23e2f18c5cab1d903c2facb8bfa Mon Sep 17 00:00:00 2001 From: Alan Rominger Date: Thu, 14 Jul 2022 12:39:30 -0400 Subject: [PATCH] Add logs about heartbeat skew Co-authored-by: Shane McDonald --- awx/main/tasks/system.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/awx/main/tasks/system.py b/awx/main/tasks/system.py index 932c8abe5b..7f9e157d8c 100644 --- a/awx/main/tasks/system.py +++ b/awx/main/tasks/system.py @@ -506,10 +506,15 @@ def cluster_node_heartbeat(): if this_inst: startup_event = this_inst.is_lost(ref_time=nowtime) + last_last_seen = this_inst.last_seen this_inst.local_health_check() if startup_event and this_inst.capacity != 0: - logger.warning('Rejoining the cluster as instance {}.'.format(this_inst.hostname)) + logger.warning(f'Rejoining the cluster as instance {this_inst.hostname}. Prior last_seen {last_last_seen}') return + elif not last_last_seen: + logger.warning(f'Instance does not have recorded last_seen, updating to {nowtime}') + elif (nowtime - last_last_seen) > timedelta(seconds=settings.CLUSTER_NODE_HEARTBEAT_PERIOD + 2): + logger.warning(f'Heartbeat skew - interval={(nowtime - last_last_seen).total_seconds():.4f}, expected={settings.CLUSTER_NODE_HEARTBEAT_PERIOD}') else: if settings.AWX_AUTO_DEPROVISION_INSTANCES: (changed, this_inst) = Instance.objects.register(ip_address=os.environ.get('MY_POD_IP'), node_type='control', uuid=settings.SYSTEM_UUID)