Merge pull request #12959 from ansible/new-health-check-started

Add a new Instance.health_check_started field
This commit is contained in:
Jeff Bradberry
2022-09-28 10:58:43 -04:00
committed by GitHub
5 changed files with 46 additions and 4 deletions

View File

@@ -0,0 +1,18 @@
# Generated by Django 3.2.13 on 2022-09-26 20:54
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0170_node_and_link_state'),
]
operations = [
migrations.AddField(
model_name='instance',
name='health_check_started',
field=models.DateTimeField(editable=False, help_text='The last time a health check was initiated on this instance.', null=True),
),
]

View File

@@ -114,6 +114,11 @@ class Instance(HasPolicyEditsMixin, BaseModel):
editable=False,
help_text=_('Last time instance ran its heartbeat task for main cluster nodes. Last known connection to receptor mesh for execution nodes.'),
)
health_check_started = models.DateTimeField(
null=True,
editable=False,
help_text=_("The last time a health check was initiated on this instance."),
)
last_health_check = models.DateTimeField(
null=True,
editable=False,
@@ -207,6 +212,14 @@ class Instance(HasPolicyEditsMixin, BaseModel):
def jobs_total(self):
return UnifiedJob.objects.filter(execution_node=self.hostname).count()
@property
def health_check_pending(self):
if self.health_check_started is None:
return False
if self.last_health_check is None:
return True
return self.health_check_started > self.last_health_check
def get_cleanup_task_kwargs(self, **kwargs):
"""
Produce options to use for the command: ansible-runner worker cleanup

View File

@@ -464,7 +464,7 @@ def inspect_execution_nodes(instance_list):
continue
# Control-plane nodes are dealt with via local_health_check instead.
if instance.node_type in ('control', 'hybrid'):
if instance.node_type in (Instance.Types.CONTROL, Instance.Types.HYBRID):
continue
last_seen = parse_date(ad['Time'])
@@ -474,7 +474,7 @@ def inspect_execution_nodes(instance_list):
instance.save(update_fields=['last_seen'])
# Only execution nodes should be dealt with by execution_node_health_check
if instance.node_type == 'hop':
if instance.node_type == Instance.Types.HOP:
if instance.node_state in (Instance.States.UNAVAILABLE, Instance.States.INSTALLED):
logger.warning(f'Hop node {hostname}, has rejoined the receptor mesh')
instance.save_health_data(errors='')