Corresponding AWX changes for runner --worker-info schema update (#10926)

This commit is contained in:
Alan Rominger
2021-08-24 08:30:55 -04:00
parent c3ad479fc6
commit 940c189c12
3 changed files with 24 additions and 9 deletions

View File

@@ -3,6 +3,7 @@
from decimal import Decimal from decimal import Decimal
import random import random
import logging
from django.core.validators import MinValueValidator from django.core.validators import MinValueValidator
from django.db import models, connection from django.db import models, connection
@@ -26,6 +27,8 @@ from awx.main.models.mixins import RelatedJobsMixin
__all__ = ('Instance', 'InstanceGroup', 'TowerScheduleState') __all__ = ('Instance', 'InstanceGroup', 'TowerScheduleState')
logger = logging.getLogger('awx.main.models.ha')
class HasPolicyEditsMixin(HasEditsMixin): class HasPolicyEditsMixin(HasEditsMixin):
class Meta: class Meta:
@@ -174,13 +177,19 @@ class Instance(HasPolicyEditsMixin, BaseModel):
self.mem_capacity = get_mem_effective_capacity(self.memory) self.mem_capacity = get_mem_effective_capacity(self.memory)
self.set_capacity_value() self.set_capacity_value()
def save_health_data(self, version, cpu, memory, last_seen=None, has_error=False): def save_health_data(self, version, cpu, memory, uuid=None, last_seen=None, has_error=False):
update_fields = [] update_fields = []
if last_seen is not None and self.last_seen != last_seen: if last_seen is not None and self.last_seen != last_seen:
self.last_seen = last_seen self.last_seen = last_seen
update_fields.append('last_seen') update_fields.append('last_seen')
if uuid is not None and self.uuid != uuid:
if self.uuid is not None:
logger.warn(f'Self-reported uuid of {self.hostname} changed from {self.uuid} to {uuid}')
self.uuid = uuid
update_fields.append('uuid')
if self.version != version: if self.version != version:
self.version = version self.version = version
update_fields.append('version') update_fields.append('version')

View File

@@ -411,14 +411,15 @@ def execution_node_health_check(node):
prior_capacity = instance.capacity prior_capacity = instance.capacity
instance.save_health_data( instance.save_health_data(
'ansible-runner-' + data.get('Version', '???'), version='ansible-runner-' + data.get('runner_version', '???'),
data.get('CPU Capacity', 0), # TODO: rename field on runner side to not say "Capacity" cpu=data.get('cpu_count', 0),
data.get('Memory Capacity', 0) * 1000, # TODO: double-check the multiplier here memory=data.get('mem_in_bytes', 0),
has_error=bool(data.get('Errors')), uuid=data.get('uuid'),
has_error=bool(data.get('errors')),
) )
if data['Errors']: if data['errors']:
formatted_error = "\n".join(data["Errors"]) formatted_error = "\n".join(data["errors"])
if prior_capacity: if prior_capacity:
logger.warn(f'Health check marking execution node {node} as lost, errors:\n{formatted_error}') logger.warn(f'Health check marking execution node {node} as lost, errors:\n{formatted_error}')
else: else:

View File

@@ -16,7 +16,7 @@ def worker_info(node_name):
receptor_ctl = get_receptor_ctl() receptor_ctl = get_receptor_ctl()
transmit_start = time.time() transmit_start = time.time()
error_list = [] error_list = []
data = {'Errors': error_list, 'transmit_timing': 0.0} data = {'errors': error_list, 'transmit_timing': 0.0}
result = receptor_ctl.submit_work(worktype='ansible-runner', payload='', params={"params": f"--worker-info"}, ttl='20s', node=node_name) result = receptor_ctl.submit_work(worktype='ansible-runner', payload='', params={"params": f"--worker-info"}, ttl='20s', node=node_name)
@@ -71,7 +71,12 @@ def worker_info(node_name):
if not isinstance(remote_data, dict): if not isinstance(remote_data, dict):
error_list.append(f'Remote node {node_name} --worker-info output is not a YAML dict, output:{stdout}') error_list.append(f'Remote node {node_name} --worker-info output is not a YAML dict, output:{stdout}')
else: else:
error_list.extend(remote_data.pop('Errors')) # merge both error lists error_list.extend(remote_data.pop('errors', [])) # merge both error lists
data.update(remote_data) data.update(remote_data)
# see tasks.py usage of keys
missing_keys = set(('runner_version', 'mem_in_bytes', 'cpu_count')) - set(data.keys())
if missing_keys:
data['errors'].append('Worker failed to return keys {}'.format(' '.join(missing_keys)))
return data return data