Corresponding AWX changes for runner --worker-info schema update (#10926)

This commit is contained in:
Alan Rominger 2021-08-24 08:30:55 -04:00
parent c3ad479fc6
commit 940c189c12
No known key found for this signature in database
GPG Key ID: C2D7EAAA12B63559
3 changed files with 24 additions and 9 deletions

View File

@ -3,6 +3,7 @@
from decimal import Decimal
import random
import logging
from django.core.validators import MinValueValidator
from django.db import models, connection
@ -26,6 +27,8 @@ from awx.main.models.mixins import RelatedJobsMixin
__all__ = ('Instance', 'InstanceGroup', 'TowerScheduleState')
logger = logging.getLogger('awx.main.models.ha')
class HasPolicyEditsMixin(HasEditsMixin):
class Meta:
@ -174,13 +177,19 @@ class Instance(HasPolicyEditsMixin, BaseModel):
self.mem_capacity = get_mem_effective_capacity(self.memory)
self.set_capacity_value()
def save_health_data(self, version, cpu, memory, last_seen=None, has_error=False):
def save_health_data(self, version, cpu, memory, uuid=None, last_seen=None, has_error=False):
update_fields = []
if last_seen is not None and self.last_seen != last_seen:
self.last_seen = last_seen
update_fields.append('last_seen')
if uuid is not None and self.uuid != uuid:
if self.uuid is not None:
logger.warn(f'Self-reported uuid of {self.hostname} changed from {self.uuid} to {uuid}')
self.uuid = uuid
update_fields.append('uuid')
if self.version != version:
self.version = version
update_fields.append('version')

View File

@ -411,14 +411,15 @@ def execution_node_health_check(node):
prior_capacity = instance.capacity
instance.save_health_data(
'ansible-runner-' + data.get('Version', '???'),
data.get('CPU Capacity', 0), # TODO: rename field on runner side to not say "Capacity"
data.get('Memory Capacity', 0) * 1000, # TODO: double-check the multiplier here
has_error=bool(data.get('Errors')),
version='ansible-runner-' + data.get('runner_version', '???'),
cpu=data.get('cpu_count', 0),
memory=data.get('mem_in_bytes', 0),
uuid=data.get('uuid'),
has_error=bool(data.get('errors')),
)
if data['Errors']:
formatted_error = "\n".join(data["Errors"])
if data['errors']:
formatted_error = "\n".join(data["errors"])
if prior_capacity:
logger.warn(f'Health check marking execution node {node} as lost, errors:\n{formatted_error}')
else:

View File

@ -16,7 +16,7 @@ def worker_info(node_name):
receptor_ctl = get_receptor_ctl()
transmit_start = time.time()
error_list = []
data = {'Errors': error_list, 'transmit_timing': 0.0}
data = {'errors': error_list, 'transmit_timing': 0.0}
result = receptor_ctl.submit_work(worktype='ansible-runner', payload='', params={"params": f"--worker-info"}, ttl='20s', node=node_name)
@ -71,7 +71,12 @@ def worker_info(node_name):
if not isinstance(remote_data, dict):
error_list.append(f'Remote node {node_name} --worker-info output is not a YAML dict, output:{stdout}')
else:
error_list.extend(remote_data.pop('Errors')) # merge both error lists
error_list.extend(remote_data.pop('errors', [])) # merge both error lists
data.update(remote_data)
# see tasks.py usage of keys
missing_keys = set(('runner_version', 'mem_in_bytes', 'cpu_count')) - set(data.keys())
if missing_keys:
data['errors'].append('Worker failed to return keys {}'.format(' '.join(missing_keys)))
return data