Incorrect capacity for remote execution nodes 14051 (#14315)

This commit is contained in:
Lila Yasin 2023-09-05 11:20:36 -04:00 committed by GitHub
parent dc81aa46d0
commit 6ce5799689
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 24 additions and 17 deletions

View File

@ -309,8 +309,8 @@ class Instance(HasPolicyEditsMixin, BaseModel):
self.cpu_capacity = 0
self.mem_capacity = 0 # formula has a non-zero offset, so we make sure it is 0 for hop nodes
else:
self.cpu_capacity = get_cpu_effective_capacity(self.cpu)
self.mem_capacity = get_mem_effective_capacity(self.memory)
self.cpu_capacity = get_cpu_effective_capacity(self.cpu, is_control_node=bool(self.node_type in (Instance.Types.CONTROL, Instance.Types.HYBRID)))
self.mem_capacity = get_mem_effective_capacity(self.memory, is_control_node=bool(self.node_type in (Instance.Types.CONTROL, Instance.Types.HYBRID)))
self.set_capacity_value()
def save_health_data(self, version=None, cpu=0, memory=0, uuid=None, update_last_seen=False, errors=''):
@ -333,12 +333,17 @@ class Instance(HasPolicyEditsMixin, BaseModel):
self.version = version
update_fields.append('version')
new_cpu = get_corrected_cpu(cpu)
if self.node_type == Instance.Types.EXECUTION:
new_cpu = cpu
new_memory = memory
else:
new_cpu = get_corrected_cpu(cpu)
new_memory = get_corrected_memory(memory)
if new_cpu != self.cpu:
self.cpu = new_cpu
update_fields.append('cpu')
new_memory = get_corrected_memory(memory)
if new_memory != self.memory:
self.memory = new_memory
update_fields.append('memory')

View File

@ -466,7 +466,6 @@ def execution_node_health_check(node):
data = worker_info(node)
prior_capacity = instance.capacity
instance.save_health_data(
version='ansible-runner-' + data.get('runner_version', '???'),
cpu=data.get('cpu_count', 0),

View File

@ -38,8 +38,8 @@ def test_orphan_unified_job_creation(instance, inventory):
@pytest.mark.django_db
@mock.patch('awx.main.tasks.system.inspect_execution_and_hop_nodes', lambda *args, **kwargs: None)
@mock.patch('awx.main.models.ha.get_cpu_effective_capacity', lambda cpu: 8)
@mock.patch('awx.main.models.ha.get_mem_effective_capacity', lambda mem: 62)
@mock.patch('awx.main.models.ha.get_cpu_effective_capacity', lambda cpu, is_control_node: 8)
@mock.patch('awx.main.models.ha.get_mem_effective_capacity', lambda mem, is_control_node: 62)
def test_job_capacity_and_with_inactive_node():
i = Instance.objects.create(hostname='test-1')
i.save_health_data('18.0.1', 2, 8000)

View File

@ -36,7 +36,9 @@ def test_SYSTEM_TASK_ABS_MEM_conversion(value, converted_value, mem_capacity):
mock_settings.IS_K8S = True
assert convert_mem_str_to_bytes(value) == converted_value
assert get_corrected_memory(-1) == converted_value
assert get_mem_effective_capacity(-1) == mem_capacity
assert get_mem_effective_capacity(1, is_control_node=True) == mem_capacity
# SYSTEM_TASK_ABS_MEM should not effect memory and capacity for execution nodes
assert get_mem_effective_capacity(2147483648, is_control_node=False) == 20
@pytest.mark.parametrize(
@ -58,4 +60,6 @@ def test_SYSTEM_TASK_ABS_CPU_conversion(value, converted_value, cpu_capacity):
mock_settings.SYSTEM_TASK_FORKS_CPU = 4
assert convert_cpu_str_to_decimal_cpu(value) == converted_value
assert get_corrected_cpu(-1) == converted_value
assert get_cpu_effective_capacity(-1) == cpu_capacity
assert get_cpu_effective_capacity(-1, is_control_node=True) == cpu_capacity
# SYSTEM_TASK_ABS_CPU should not effect cpu count and capacity for execution nodes
assert get_cpu_effective_capacity(2.0, is_control_node=False) == 8

View File

@ -768,14 +768,13 @@ def get_corrected_cpu(cpu_count): # formerlly get_cpu_capacity
return cpu_count # no correction
def get_cpu_effective_capacity(cpu_count):
def get_cpu_effective_capacity(cpu_count, is_control_node=False):
from django.conf import settings
cpu_count = get_corrected_cpu(cpu_count)
settings_forkcpu = getattr(settings, 'SYSTEM_TASK_FORKS_CPU', None)
env_forkcpu = os.getenv('SYSTEM_TASK_FORKS_CPU', None)
if is_control_node:
cpu_count = get_corrected_cpu(cpu_count)
if env_forkcpu:
forkcpu = int(env_forkcpu)
elif settings_forkcpu:
@ -834,6 +833,7 @@ def get_corrected_memory(memory):
# Runner returns memory in bytes
# so we convert memory from settings to bytes as well.
if env_absmem is not None:
return convert_mem_str_to_bytes(env_absmem)
elif settings_absmem is not None:
@ -842,14 +842,13 @@ def get_corrected_memory(memory):
return memory
def get_mem_effective_capacity(mem_bytes):
def get_mem_effective_capacity(mem_bytes, is_control_node=False):
from django.conf import settings
mem_bytes = get_corrected_memory(mem_bytes)
settings_mem_mb_per_fork = getattr(settings, 'SYSTEM_TASK_FORKS_MEM', None)
env_mem_mb_per_fork = os.getenv('SYSTEM_TASK_FORKS_MEM', None)
if is_control_node:
mem_bytes = get_corrected_memory(mem_bytes)
if env_mem_mb_per_fork:
mem_mb_per_fork = int(env_mem_mb_per_fork)
elif settings_mem_mb_per_fork: