From 6ce579968928c1a41cd493f4e8b4a9d768b22cc4 Mon Sep 17 00:00:00 2001 From: Lila Yasin Date: Tue, 5 Sep 2023 11:20:36 -0400 Subject: [PATCH] Incorrect capacity for remote execution nodes 14051 (#14315) --- awx/main/models/ha.py | 13 +++++++++---- awx/main/tasks/system.py | 1 - awx/main/tests/functional/test_jobs.py | 4 ++-- .../unit/settings/test_k8s_resource_setttings.py | 8 ++++++-- awx/main/utils/common.py | 15 +++++++-------- 5 files changed, 24 insertions(+), 17 deletions(-) diff --git a/awx/main/models/ha.py b/awx/main/models/ha.py index b7cc16421d..96c57ca350 100644 --- a/awx/main/models/ha.py +++ b/awx/main/models/ha.py @@ -309,8 +309,8 @@ class Instance(HasPolicyEditsMixin, BaseModel): self.cpu_capacity = 0 self.mem_capacity = 0 # formula has a non-zero offset, so we make sure it is 0 for hop nodes else: - self.cpu_capacity = get_cpu_effective_capacity(self.cpu) - self.mem_capacity = get_mem_effective_capacity(self.memory) + self.cpu_capacity = get_cpu_effective_capacity(self.cpu, is_control_node=bool(self.node_type in (Instance.Types.CONTROL, Instance.Types.HYBRID))) + self.mem_capacity = get_mem_effective_capacity(self.memory, is_control_node=bool(self.node_type in (Instance.Types.CONTROL, Instance.Types.HYBRID))) self.set_capacity_value() def save_health_data(self, version=None, cpu=0, memory=0, uuid=None, update_last_seen=False, errors=''): @@ -333,12 +333,17 @@ class Instance(HasPolicyEditsMixin, BaseModel): self.version = version update_fields.append('version') - new_cpu = get_corrected_cpu(cpu) + if self.node_type == Instance.Types.EXECUTION: + new_cpu = cpu + new_memory = memory + else: + new_cpu = get_corrected_cpu(cpu) + new_memory = get_corrected_memory(memory) + if new_cpu != self.cpu: self.cpu = new_cpu update_fields.append('cpu') - new_memory = get_corrected_memory(memory) if new_memory != self.memory: self.memory = new_memory update_fields.append('memory') diff --git a/awx/main/tasks/system.py b/awx/main/tasks/system.py index 9adc288324..da7341af36 100644 --- a/awx/main/tasks/system.py +++ b/awx/main/tasks/system.py @@ -466,7 +466,6 @@ def execution_node_health_check(node): data = worker_info(node) prior_capacity = instance.capacity - instance.save_health_data( version='ansible-runner-' + data.get('runner_version', '???'), cpu=data.get('cpu_count', 0), diff --git a/awx/main/tests/functional/test_jobs.py b/awx/main/tests/functional/test_jobs.py index 26dc291f41..e245638f7f 100644 --- a/awx/main/tests/functional/test_jobs.py +++ b/awx/main/tests/functional/test_jobs.py @@ -38,8 +38,8 @@ def test_orphan_unified_job_creation(instance, inventory): @pytest.mark.django_db @mock.patch('awx.main.tasks.system.inspect_execution_and_hop_nodes', lambda *args, **kwargs: None) -@mock.patch('awx.main.models.ha.get_cpu_effective_capacity', lambda cpu: 8) -@mock.patch('awx.main.models.ha.get_mem_effective_capacity', lambda mem: 62) +@mock.patch('awx.main.models.ha.get_cpu_effective_capacity', lambda cpu, is_control_node: 8) +@mock.patch('awx.main.models.ha.get_mem_effective_capacity', lambda mem, is_control_node: 62) def test_job_capacity_and_with_inactive_node(): i = Instance.objects.create(hostname='test-1') i.save_health_data('18.0.1', 2, 8000) diff --git a/awx/main/tests/unit/settings/test_k8s_resource_setttings.py b/awx/main/tests/unit/settings/test_k8s_resource_setttings.py index a2899a8561..65fa45d95a 100644 --- a/awx/main/tests/unit/settings/test_k8s_resource_setttings.py +++ b/awx/main/tests/unit/settings/test_k8s_resource_setttings.py @@ -36,7 +36,9 @@ def test_SYSTEM_TASK_ABS_MEM_conversion(value, converted_value, mem_capacity): mock_settings.IS_K8S = True assert convert_mem_str_to_bytes(value) == converted_value assert get_corrected_memory(-1) == converted_value - assert get_mem_effective_capacity(-1) == mem_capacity + assert get_mem_effective_capacity(1, is_control_node=True) == mem_capacity + # SYSTEM_TASK_ABS_MEM should not effect memory and capacity for execution nodes + assert get_mem_effective_capacity(2147483648, is_control_node=False) == 20 @pytest.mark.parametrize( @@ -58,4 +60,6 @@ def test_SYSTEM_TASK_ABS_CPU_conversion(value, converted_value, cpu_capacity): mock_settings.SYSTEM_TASK_FORKS_CPU = 4 assert convert_cpu_str_to_decimal_cpu(value) == converted_value assert get_corrected_cpu(-1) == converted_value - assert get_cpu_effective_capacity(-1) == cpu_capacity + assert get_cpu_effective_capacity(-1, is_control_node=True) == cpu_capacity + # SYSTEM_TASK_ABS_CPU should not effect cpu count and capacity for execution nodes + assert get_cpu_effective_capacity(2.0, is_control_node=False) == 8 diff --git a/awx/main/utils/common.py b/awx/main/utils/common.py index 896f37d779..1e134b46f8 100644 --- a/awx/main/utils/common.py +++ b/awx/main/utils/common.py @@ -768,14 +768,13 @@ def get_corrected_cpu(cpu_count): # formerlly get_cpu_capacity return cpu_count # no correction -def get_cpu_effective_capacity(cpu_count): +def get_cpu_effective_capacity(cpu_count, is_control_node=False): from django.conf import settings - cpu_count = get_corrected_cpu(cpu_count) - settings_forkcpu = getattr(settings, 'SYSTEM_TASK_FORKS_CPU', None) env_forkcpu = os.getenv('SYSTEM_TASK_FORKS_CPU', None) - + if is_control_node: + cpu_count = get_corrected_cpu(cpu_count) if env_forkcpu: forkcpu = int(env_forkcpu) elif settings_forkcpu: @@ -834,6 +833,7 @@ def get_corrected_memory(memory): # Runner returns memory in bytes # so we convert memory from settings to bytes as well. + if env_absmem is not None: return convert_mem_str_to_bytes(env_absmem) elif settings_absmem is not None: @@ -842,14 +842,13 @@ def get_corrected_memory(memory): return memory -def get_mem_effective_capacity(mem_bytes): +def get_mem_effective_capacity(mem_bytes, is_control_node=False): from django.conf import settings - mem_bytes = get_corrected_memory(mem_bytes) - settings_mem_mb_per_fork = getattr(settings, 'SYSTEM_TASK_FORKS_MEM', None) env_mem_mb_per_fork = os.getenv('SYSTEM_TASK_FORKS_MEM', None) - + if is_control_node: + mem_bytes = get_corrected_memory(mem_bytes) if env_mem_mb_per_fork: mem_mb_per_fork = int(env_mem_mb_per_fork) elif settings_mem_mb_per_fork: