mirror of
https://github.com/ansible/awx.git
synced 2026-05-06 17:07:36 -02:30
Merge pull request #12959 from ansible/new-health-check-started
Add a new Instance.health_check_started field
This commit is contained in:
@@ -4878,6 +4878,7 @@ class InstanceSerializer(BaseSerializer):
|
|||||||
percent_capacity_remaining = serializers.SerializerMethodField()
|
percent_capacity_remaining = serializers.SerializerMethodField()
|
||||||
jobs_running = serializers.IntegerField(help_text=_('Count of jobs in the running or waiting state that are targeted for this instance'), read_only=True)
|
jobs_running = serializers.IntegerField(help_text=_('Count of jobs in the running or waiting state that are targeted for this instance'), read_only=True)
|
||||||
jobs_total = serializers.IntegerField(help_text=_('Count of all jobs that target this instance'), read_only=True)
|
jobs_total = serializers.IntegerField(help_text=_('Count of all jobs that target this instance'), read_only=True)
|
||||||
|
health_check_pending = serializers.SerializerMethodField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Instance
|
model = Instance
|
||||||
@@ -4893,6 +4894,8 @@ class InstanceSerializer(BaseSerializer):
|
|||||||
'created',
|
'created',
|
||||||
'modified',
|
'modified',
|
||||||
'last_seen',
|
'last_seen',
|
||||||
|
'health_check_started',
|
||||||
|
'health_check_pending',
|
||||||
'last_health_check',
|
'last_health_check',
|
||||||
'errors',
|
'errors',
|
||||||
'capacity_adjustment',
|
'capacity_adjustment',
|
||||||
@@ -4948,6 +4951,9 @@ class InstanceSerializer(BaseSerializer):
|
|||||||
else:
|
else:
|
||||||
return float("{0:.2f}".format(((float(obj.capacity) - float(obj.consumed_capacity)) / (float(obj.capacity))) * 100))
|
return float("{0:.2f}".format(((float(obj.capacity) - float(obj.consumed_capacity)) / (float(obj.capacity))) * 100))
|
||||||
|
|
||||||
|
def get_health_check_pending(self, obj):
|
||||||
|
return obj.health_check_pending
|
||||||
|
|
||||||
def validate(self, data):
|
def validate(self, data):
|
||||||
if self.instance:
|
if self.instance:
|
||||||
if self.instance.node_type == Instance.Types.HOP:
|
if self.instance.node_type == Instance.Types.HOP:
|
||||||
|
|||||||
@@ -451,8 +451,13 @@ class InstanceHealthCheck(GenericAPIView):
|
|||||||
|
|
||||||
def post(self, request, *args, **kwargs):
|
def post(self, request, *args, **kwargs):
|
||||||
obj = self.get_object()
|
obj = self.get_object()
|
||||||
|
if obj.health_check_pending:
|
||||||
|
return Response({'msg': f"Health check was already in progress for {obj.hostname}."}, status=status.HTTP_200_OK)
|
||||||
|
|
||||||
# Note: hop nodes are already excluded by the get_queryset method
|
# Note: hop nodes are already excluded by the get_queryset method
|
||||||
if obj.node_type == 'execution':
|
obj.health_check_started = now()
|
||||||
|
obj.save(update_fields=['health_check_started'])
|
||||||
|
if obj.node_type == models.Instance.Types.EXECUTION:
|
||||||
from awx.main.tasks.system import execution_node_health_check
|
from awx.main.tasks.system import execution_node_health_check
|
||||||
|
|
||||||
execution_node_health_check.apply_async([obj.hostname])
|
execution_node_health_check.apply_async([obj.hostname])
|
||||||
@@ -460,7 +465,7 @@ class InstanceHealthCheck(GenericAPIView):
|
|||||||
from awx.main.tasks.system import cluster_node_health_check
|
from awx.main.tasks.system import cluster_node_health_check
|
||||||
|
|
||||||
cluster_node_health_check.apply_async([obj.hostname], queue=obj.hostname)
|
cluster_node_health_check.apply_async([obj.hostname], queue=obj.hostname)
|
||||||
return Response(dict(msg=f"Health check is running for {obj.hostname}."), status=status.HTTP_200_OK)
|
return Response({'msg': f"Health check is running for {obj.hostname}."}, status=status.HTTP_200_OK)
|
||||||
|
|
||||||
|
|
||||||
class InstanceGroupList(ListCreateAPIView):
|
class InstanceGroupList(ListCreateAPIView):
|
||||||
|
|||||||
18
awx/main/migrations/0171_add_health_check_started.py
Normal file
18
awx/main/migrations/0171_add_health_check_started.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# Generated by Django 3.2.13 on 2022-09-26 20:54
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('main', '0170_node_and_link_state'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='instance',
|
||||||
|
name='health_check_started',
|
||||||
|
field=models.DateTimeField(editable=False, help_text='The last time a health check was initiated on this instance.', null=True),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -114,6 +114,11 @@ class Instance(HasPolicyEditsMixin, BaseModel):
|
|||||||
editable=False,
|
editable=False,
|
||||||
help_text=_('Last time instance ran its heartbeat task for main cluster nodes. Last known connection to receptor mesh for execution nodes.'),
|
help_text=_('Last time instance ran its heartbeat task for main cluster nodes. Last known connection to receptor mesh for execution nodes.'),
|
||||||
)
|
)
|
||||||
|
health_check_started = models.DateTimeField(
|
||||||
|
null=True,
|
||||||
|
editable=False,
|
||||||
|
help_text=_("The last time a health check was initiated on this instance."),
|
||||||
|
)
|
||||||
last_health_check = models.DateTimeField(
|
last_health_check = models.DateTimeField(
|
||||||
null=True,
|
null=True,
|
||||||
editable=False,
|
editable=False,
|
||||||
@@ -207,6 +212,14 @@ class Instance(HasPolicyEditsMixin, BaseModel):
|
|||||||
def jobs_total(self):
|
def jobs_total(self):
|
||||||
return UnifiedJob.objects.filter(execution_node=self.hostname).count()
|
return UnifiedJob.objects.filter(execution_node=self.hostname).count()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def health_check_pending(self):
|
||||||
|
if self.health_check_started is None:
|
||||||
|
return False
|
||||||
|
if self.last_health_check is None:
|
||||||
|
return True
|
||||||
|
return self.health_check_started > self.last_health_check
|
||||||
|
|
||||||
def get_cleanup_task_kwargs(self, **kwargs):
|
def get_cleanup_task_kwargs(self, **kwargs):
|
||||||
"""
|
"""
|
||||||
Produce options to use for the command: ansible-runner worker cleanup
|
Produce options to use for the command: ansible-runner worker cleanup
|
||||||
|
|||||||
@@ -464,7 +464,7 @@ def inspect_execution_nodes(instance_list):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# Control-plane nodes are dealt with via local_health_check instead.
|
# Control-plane nodes are dealt with via local_health_check instead.
|
||||||
if instance.node_type in ('control', 'hybrid'):
|
if instance.node_type in (Instance.Types.CONTROL, Instance.Types.HYBRID):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
last_seen = parse_date(ad['Time'])
|
last_seen = parse_date(ad['Time'])
|
||||||
@@ -474,7 +474,7 @@ def inspect_execution_nodes(instance_list):
|
|||||||
instance.save(update_fields=['last_seen'])
|
instance.save(update_fields=['last_seen'])
|
||||||
|
|
||||||
# Only execution nodes should be dealt with by execution_node_health_check
|
# Only execution nodes should be dealt with by execution_node_health_check
|
||||||
if instance.node_type == 'hop':
|
if instance.node_type == Instance.Types.HOP:
|
||||||
if instance.node_state in (Instance.States.UNAVAILABLE, Instance.States.INSTALLED):
|
if instance.node_state in (Instance.States.UNAVAILABLE, Instance.States.INSTALLED):
|
||||||
logger.warning(f'Hop node {hostname}, has rejoined the receptor mesh')
|
logger.warning(f'Hop node {hostname}, has rejoined the receptor mesh')
|
||||||
instance.save_health_data(errors='')
|
instance.save_health_data(errors='')
|
||||||
|
|||||||
Reference in New Issue
Block a user