From 940c189c12bdc0843372ce1d88051d05869860f2 Mon Sep 17 00:00:00 2001 From: Alan Rominger Date: Tue, 24 Aug 2021 08:30:55 -0400 Subject: [PATCH] Corresponding AWX changes for runner --worker-info schema update (#10926) --- awx/main/models/ha.py | 11 ++++++++++- awx/main/tasks.py | 13 +++++++------ awx/main/utils/receptor.py | 9 +++++++-- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/awx/main/models/ha.py b/awx/main/models/ha.py index ebd1ad44b0..00475254bd 100644 --- a/awx/main/models/ha.py +++ b/awx/main/models/ha.py @@ -3,6 +3,7 @@ from decimal import Decimal import random +import logging from django.core.validators import MinValueValidator from django.db import models, connection @@ -26,6 +27,8 @@ from awx.main.models.mixins import RelatedJobsMixin __all__ = ('Instance', 'InstanceGroup', 'TowerScheduleState') +logger = logging.getLogger('awx.main.models.ha') + class HasPolicyEditsMixin(HasEditsMixin): class Meta: @@ -174,13 +177,19 @@ class Instance(HasPolicyEditsMixin, BaseModel): self.mem_capacity = get_mem_effective_capacity(self.memory) self.set_capacity_value() - def save_health_data(self, version, cpu, memory, last_seen=None, has_error=False): + def save_health_data(self, version, cpu, memory, uuid=None, last_seen=None, has_error=False): update_fields = [] if last_seen is not None and self.last_seen != last_seen: self.last_seen = last_seen update_fields.append('last_seen') + if uuid is not None and self.uuid != uuid: + if self.uuid is not None: + logger.warn(f'Self-reported uuid of {self.hostname} changed from {self.uuid} to {uuid}') + self.uuid = uuid + update_fields.append('uuid') + if self.version != version: self.version = version update_fields.append('version') diff --git a/awx/main/tasks.py b/awx/main/tasks.py index 9dc0065459..c577a426e8 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -411,14 +411,15 @@ def execution_node_health_check(node): prior_capacity = instance.capacity instance.save_health_data( - 'ansible-runner-' + data.get('Version', '???'), - data.get('CPU Capacity', 0), # TODO: rename field on runner side to not say "Capacity" - data.get('Memory Capacity', 0) * 1000, # TODO: double-check the multiplier here - has_error=bool(data.get('Errors')), + version='ansible-runner-' + data.get('runner_version', '???'), + cpu=data.get('cpu_count', 0), + memory=data.get('mem_in_bytes', 0), + uuid=data.get('uuid'), + has_error=bool(data.get('errors')), ) - if data['Errors']: - formatted_error = "\n".join(data["Errors"]) + if data['errors']: + formatted_error = "\n".join(data["errors"]) if prior_capacity: logger.warn(f'Health check marking execution node {node} as lost, errors:\n{formatted_error}') else: diff --git a/awx/main/utils/receptor.py b/awx/main/utils/receptor.py index 8a205334c1..b792561cf8 100644 --- a/awx/main/utils/receptor.py +++ b/awx/main/utils/receptor.py @@ -16,7 +16,7 @@ def worker_info(node_name): receptor_ctl = get_receptor_ctl() transmit_start = time.time() error_list = [] - data = {'Errors': error_list, 'transmit_timing': 0.0} + data = {'errors': error_list, 'transmit_timing': 0.0} result = receptor_ctl.submit_work(worktype='ansible-runner', payload='', params={"params": f"--worker-info"}, ttl='20s', node=node_name) @@ -71,7 +71,12 @@ def worker_info(node_name): if not isinstance(remote_data, dict): error_list.append(f'Remote node {node_name} --worker-info output is not a YAML dict, output:{stdout}') else: - error_list.extend(remote_data.pop('Errors')) # merge both error lists + error_list.extend(remote_data.pop('errors', [])) # merge both error lists data.update(remote_data) + # see tasks.py usage of keys + missing_keys = set(('runner_version', 'mem_in_bytes', 'cpu_count')) - set(data.keys()) + if missing_keys: + data['errors'].append('Worker failed to return keys {}'.format(' '.join(missing_keys))) + return data