diff --git a/awx/api/views/__init__.py b/awx/api/views/__init__.py index 7865527a2b..a7803bca4e 100644 --- a/awx/api/views/__init__.py +++ b/awx/api/views/__init__.py @@ -460,41 +460,16 @@ class InstanceHealthCheck(GenericAPIView): def post(self, request, *args, **kwargs): obj = self.get_object() - # Note: hop nodes are already excluded by the get_queryset method if obj.node_type == 'execution': from awx.main.tasks.system import execution_node_health_check - runner_data = execution_node_health_check(obj.hostname) - obj.refresh_from_db() - data = self.get_serializer(data=request.data).to_representation(obj) - # Add in some extra unsaved fields - for extra_field in ('transmit_timing', 'run_timing'): - if extra_field in runner_data: - data[extra_field] = runner_data[extra_field] + execution_node_health_check.apply_async([obj.hostname]) else: from awx.main.tasks.system import cluster_node_health_check - if settings.CLUSTER_HOST_ID == obj.hostname: - cluster_node_health_check(obj.hostname) - else: - cluster_node_health_check.apply_async([obj.hostname], queue=obj.hostname) - start_time = time.time() - prior_check_time = obj.last_health_check - while time.time() - start_time < 50.0: - obj.refresh_from_db(fields=['last_health_check']) - if obj.last_health_check != prior_check_time: - break - if time.time() - start_time < 1.0: - time.sleep(0.1) - else: - time.sleep(1.0) - else: - obj.mark_offline(errors=_('Health check initiated by user determined this instance to be unresponsive')) - obj.refresh_from_db() - data = self.get_serializer(data=request.data).to_representation(obj) - - return Response(data, status=status.HTTP_200_OK) + cluster_node_health_check.apply_async([obj.hostname], queue=obj.hostname) + return Response(dict(msg=f"Health check is running for {obj.hostname}."), status=status.HTTP_200_OK) class InstanceGroupList(ListCreateAPIView): diff --git a/awx/main/models/ha.py b/awx/main/models/ha.py index cd6313ecaa..9ecaece5de 100644 --- a/awx/main/models/ha.py +++ b/awx/main/models/ha.py @@ -243,20 +243,21 @@ class Instance(HasPolicyEditsMixin, BaseModel): def mark_offline(self, update_last_seen=False, perform_save=True, errors=''): if self.node_state not in (Instance.States.READY, Instance.States.UNAVAILABLE, Instance.States.INSTALLED): - return + return [] if self.node_state == Instance.States.UNAVAILABLE and self.errors == errors and (not update_last_seen): - return + return [] self.node_state = Instance.States.UNAVAILABLE self.cpu_capacity = self.mem_capacity = self.capacity = 0 self.errors = errors if update_last_seen: self.last_seen = now() + update_fields = ['node_state', 'capacity', 'cpu_capacity', 'mem_capacity', 'errors'] + if update_last_seen: + update_fields += ['last_seen'] if perform_save: - update_fields = ['node_state', 'capacity', 'cpu_capacity', 'mem_capacity', 'errors'] - if update_last_seen: - update_fields += ['last_seen'] self.save(update_fields=update_fields) + return update_fields def set_capacity_value(self): """Sets capacity according to capacity adjustment rule (no save)""" @@ -314,7 +315,8 @@ class Instance(HasPolicyEditsMixin, BaseModel): self.node_state = Instance.States.READY update_fields.append('node_state') else: - self.mark_offline(perform_save=False, errors=errors) + fields_to_update = self.mark_offline(perform_save=False, errors=errors) + update_fields.extend(fields_to_update) update_fields.extend(['cpu_capacity', 'mem_capacity', 'capacity']) # disabling activity stream will avoid extra queries, which is important for heatbeat actions