mirror of
https://github.com/ansible/awx.git
synced 2026-05-11 03:17:38 -02:30
Model changes for instance last_seen field to replace modified (#10870)
* Model changes for instance last_seen field to replace modified * Break up refresh_capacity into smaller units * Rename execution node methods, fix last_seen clustering * Use update_fields to make it clear save only affects capacity * Restructing to pass unit tests * Fix bug where a PATCH did not update capacity value
This commit is contained in:
@@ -21,7 +21,7 @@ from awx.main.managers import InstanceManager, InstanceGroupManager
|
||||
from awx.main.fields import JSONField
|
||||
from awx.main.models.base import BaseModel, HasEditsMixin, prevent_search
|
||||
from awx.main.models.unified_jobs import UnifiedJob
|
||||
from awx.main.utils import get_cpu_capacity, get_mem_capacity, get_system_task_capacity
|
||||
from awx.main.utils.common import measure_cpu, get_corrected_cpu, get_cpu_effective_capacity, measure_memory, get_corrected_memory, get_mem_effective_capacity
|
||||
from awx.main.models.mixins import RelatedJobsMixin
|
||||
|
||||
__all__ = ('Instance', 'InstanceGroup', 'TowerScheduleState')
|
||||
@@ -52,6 +52,7 @@ class Instance(HasPolicyEditsMixin, BaseModel):
|
||||
|
||||
objects = InstanceManager()
|
||||
|
||||
# Fields set in instance registration
|
||||
uuid = models.CharField(max_length=40)
|
||||
hostname = models.CharField(max_length=250, unique=True)
|
||||
ip_address = models.CharField(
|
||||
@@ -61,16 +62,11 @@ class Instance(HasPolicyEditsMixin, BaseModel):
|
||||
max_length=50,
|
||||
unique=True,
|
||||
)
|
||||
# Auto-fields, implementation is different from BaseModel
|
||||
created = models.DateTimeField(auto_now_add=True)
|
||||
modified = models.DateTimeField(auto_now=True)
|
||||
# Fields defined in health check or heartbeat
|
||||
version = models.CharField(max_length=120, blank=True)
|
||||
capacity = models.PositiveIntegerField(
|
||||
default=100,
|
||||
editable=False,
|
||||
)
|
||||
capacity_adjustment = models.DecimalField(default=Decimal(1.0), max_digits=3, decimal_places=2, validators=[MinValueValidator(0)])
|
||||
enabled = models.BooleanField(default=True)
|
||||
managed_by_policy = models.BooleanField(default=True)
|
||||
cpu = models.IntegerField(
|
||||
default=0,
|
||||
editable=False,
|
||||
@@ -78,7 +74,22 @@ class Instance(HasPolicyEditsMixin, BaseModel):
|
||||
memory = models.BigIntegerField(
|
||||
default=0,
|
||||
editable=False,
|
||||
help_text=_('Total system memory of this instance in bytes.'),
|
||||
)
|
||||
last_seen = models.DateTimeField(
|
||||
null=True,
|
||||
editable=False,
|
||||
help_text=_('Last time instance ran its heartbeat task for main cluster nodes. Last known connection to receptor mesh for execution nodes.'),
|
||||
)
|
||||
# Capacity management
|
||||
capacity = models.PositiveIntegerField(
|
||||
default=100,
|
||||
editable=False,
|
||||
)
|
||||
capacity_adjustment = models.DecimalField(default=Decimal(1.0), max_digits=3, decimal_places=2, validators=[MinValueValidator(0)])
|
||||
enabled = models.BooleanField(default=True)
|
||||
managed_by_policy = models.BooleanField(default=True)
|
||||
|
||||
cpu_capacity = models.IntegerField(
|
||||
default=0,
|
||||
editable=False,
|
||||
@@ -126,39 +137,83 @@ class Instance(HasPolicyEditsMixin, BaseModel):
|
||||
return random.choice(Instance.objects.filter(enabled=True).filter(node_type__in=['control', 'hybrid']).values_list('hostname', flat=True))
|
||||
|
||||
def is_lost(self, ref_time=None):
|
||||
if self.last_seen is None:
|
||||
return True
|
||||
if ref_time is None:
|
||||
ref_time = now()
|
||||
grace_period = 120
|
||||
return self.modified < ref_time - timedelta(seconds=grace_period)
|
||||
grace_period = settings.CLUSTER_NODE_HEARTBEAT_PERIOD * 2
|
||||
if self.node_type == 'execution':
|
||||
grace_period += settings.RECEPTOR_SERVICE_ADVERTISEMENT_PERIOD
|
||||
return self.last_seen < ref_time - timedelta(seconds=grace_period)
|
||||
|
||||
def mark_offline(self, on_good_terms=False):
|
||||
self.cpu = self.cpu_capacity = self.memory = self.mem_capacity = self.capacity = 0
|
||||
update_fields = ['capacity', 'cpu', 'memory', 'cpu_capacity', 'mem_capacity']
|
||||
if on_good_terms:
|
||||
update_fields.append('modified')
|
||||
self.save()
|
||||
def mark_offline(self, update_last_seen=False, perform_save=True):
|
||||
if self.cpu_capacity == 0 and self.mem_capacity == 0 and self.capacity == 0 and (not update_last_seen):
|
||||
return
|
||||
self.cpu_capacity = self.mem_capacity = self.capacity = 0
|
||||
if update_last_seen:
|
||||
self.last_seen = now()
|
||||
|
||||
def refresh_capacity(self):
|
||||
cpu = get_cpu_capacity()
|
||||
mem = get_mem_capacity()
|
||||
if perform_save:
|
||||
update_fields = ['capacity', 'cpu_capacity', 'mem_capacity']
|
||||
if update_last_seen:
|
||||
update_fields += ['last_seen']
|
||||
self.save(update_fields=update_fields)
|
||||
|
||||
def set_capacity_value(self):
|
||||
"""Sets capacity according to capacity adjustment rule (no save)"""
|
||||
if self.enabled:
|
||||
self.capacity = get_system_task_capacity(self.capacity_adjustment)
|
||||
lower_cap = min(self.mem_capacity, self.cpu_capacity)
|
||||
higher_cap = max(self.mem_capacity, self.cpu_capacity)
|
||||
self.capacity = lower_cap + (higher_cap - lower_cap) * self.capacity_adjustment
|
||||
else:
|
||||
self.capacity = 0
|
||||
|
||||
def refresh_capacity_fields(self):
|
||||
"""Update derived capacity fields from cpu and memory (no save)"""
|
||||
self.cpu_capacity = get_cpu_effective_capacity(self.cpu)
|
||||
self.mem_capacity = get_mem_effective_capacity(self.memory)
|
||||
self.set_capacity_value()
|
||||
|
||||
def save_health_data(self, version, cpu, memory, last_seen=None, has_error=False):
|
||||
update_fields = []
|
||||
|
||||
if last_seen is not None and self.last_seen != last_seen:
|
||||
self.last_seen = last_seen
|
||||
update_fields.append('last_seen')
|
||||
|
||||
if self.version != version:
|
||||
self.version = version
|
||||
update_fields.append('version')
|
||||
|
||||
new_cpu = get_corrected_cpu(cpu)
|
||||
if new_cpu != self.cpu:
|
||||
self.cpu = new_cpu
|
||||
update_fields.append('cpu')
|
||||
|
||||
new_memory = get_corrected_memory(memory)
|
||||
if new_memory != self.memory:
|
||||
self.memory = new_memory
|
||||
update_fields.append('memory')
|
||||
|
||||
if not has_error:
|
||||
self.refresh_capacity_fields()
|
||||
else:
|
||||
self.mark_offline(perform_save=False)
|
||||
update_fields.extend(['cpu_capacity', 'mem_capacity', 'capacity'])
|
||||
|
||||
self.save(update_fields=update_fields)
|
||||
|
||||
def local_health_check(self):
|
||||
"""Only call this method on the instance that this record represents"""
|
||||
has_error = False
|
||||
try:
|
||||
# if redis is down for some reason, that means we can't persist
|
||||
# playbook event data; we should consider this a zero capacity event
|
||||
redis.Redis.from_url(settings.BROKER_URL).ping()
|
||||
except redis.ConnectionError:
|
||||
self.capacity = 0
|
||||
has_error = True
|
||||
|
||||
self.cpu = cpu[0]
|
||||
self.memory = mem[0]
|
||||
self.cpu_capacity = cpu[1]
|
||||
self.mem_capacity = mem[1]
|
||||
self.version = awx_application_version
|
||||
self.save(update_fields=['capacity', 'version', 'modified', 'cpu', 'memory', 'cpu_capacity', 'mem_capacity'])
|
||||
self.save_health_data(awx_application_version, measure_cpu(), measure_memory(), last_seen=now(), has_error=has_error)
|
||||
|
||||
|
||||
class InstanceGroup(HasPolicyEditsMixin, BaseModel, RelatedJobsMixin):
|
||||
|
||||
Reference in New Issue
Block a user