Fixup conversion of memory and cpu settings to support k8s resource request format (#11725)

fix memory and cpu settings to suport k8s resource request format

* fix conversion of memory setting to bytes

This setting has not been getting set by default, and needed some fixing
up to be compatible with setting the memory in the same way as we set it
in the operator, as well as with other changes from last year which
assume that ansible runner is returning memory in bytes.

This way we can start setting this setting in the operator, and get a
more accurate reflection of how much memory is available to the control
pod in k8s.

On platforms where services are all sharing memory, we deduct a
penalty from the memory available. On k8s we don't need to do this
because the web, redis, and task containers each have memory
allocated to them.

* Support CPU setting expressed in units used by k8s

This setting has not been getting set by default, and needed some fixing
up to be compatible with setting the CPU resource request/limits in the
same way as we set it in the resource requests/limits.

This way we can start setting this setting in the
operator, and get a more accurate reflection of how much cpu is
available to the control pod in k8s.

Because cpu on k8s can be partial cores, migrate cpu field to decimal.

k8s does not allow granularity of less than 100m (equivalent to 0.1 cores), so only
store up to 1 decimal place.

fix analytics to deal with decimal cpu

need to use DjangoJSONEncoder when Decimal fields in data passed to
json.dumps
This commit is contained in:
Elijah DeLee 2022-02-15 14:08:24 -05:00 committed by GitHub
parent 3f08e26881
commit 799968460d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 204 additions and 43 deletions

View File

@ -211,7 +211,7 @@ def projects_by_scm_type(since, **kwargs):
return counts
@register('instance_info', '1.1', description=_('Cluster topology and capacity'))
@register('instance_info', '1.2', description=_('Cluster topology and capacity'))
def instance_info(since, include_hostnames=False, **kwargs):
info = {}
instances = models.Instance.objects.values_list('hostname').values(

View File

@ -90,7 +90,7 @@ def package(target, data, timestamp):
if isinstance(item, str):
f.add(item, arcname=f'./{name}')
else:
buf = json.dumps(item).encode('utf-8')
buf = json.dumps(item, cls=DjangoJSONEncoder).encode('utf-8')
info = tarfile.TarInfo(f'./{name}')
info.size = len(buf)
info.mtime = timestamp.timestamp()
@ -230,7 +230,7 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
try:
last_entry = max(last_entries.get(key) or last_gather, until - timedelta(weeks=4))
results = (func(since or last_entry, collection_type=collection_type, until=until), func.__awx_analytics_version__)
json.dumps(results) # throwaway check to see if the data is json-serializable
json.dumps(results, cls=DjangoJSONEncoder) # throwaway check to see if the data is json-serializable
data[filename] = results
except Exception:
logger.exception("Could not generate metric {}".format(filename))

View File

@ -22,6 +22,7 @@ import psutil
from awx.main.models import UnifiedJob
from awx.main.dispatch import reaper
from awx.main.utils.common import convert_mem_str_to_bytes
if 'run_callback_receiver' in sys.argv:
logger = logging.getLogger('awx.main.commands.run_callback_receiver')
@ -319,7 +320,8 @@ class AutoscalePool(WorkerPool):
if self.max_workers is None:
settings_absmem = getattr(settings, 'SYSTEM_TASK_ABS_MEM', None)
if settings_absmem is not None:
total_memory_gb = int(settings_absmem)
# There are 1073741824 bytes in a gigabyte. Convert bytes to gigabytes by dividing by 2**30
total_memory_gb = convert_mem_str_to_bytes(settings_absmem) // 2**30
else:
total_memory_gb = (psutil.virtual_memory().total >> 30) + 1 # noqa: round up
# 5 workers per GB of total memory

View File

@ -0,0 +1,19 @@
# Generated by Django 2.2.24 on 2022-02-14 17:37
from decimal import Decimal
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0157_inventory_labels'),
]
operations = [
migrations.AlterField(
model_name='instance',
name='cpu',
field=models.DecimalField(decimal_places=1, default=Decimal('0'), editable=False, max_digits=4),
),
]

View File

@ -82,8 +82,10 @@ class Instance(HasPolicyEditsMixin, BaseModel):
modified = models.DateTimeField(auto_now=True)
# Fields defined in health check or heartbeat
version = models.CharField(max_length=120, blank=True)
cpu = models.IntegerField(
default=0,
cpu = models.DecimalField(
default=Decimal(0.0),
max_digits=4,
decimal_places=1,
editable=False,
)
memory = models.BigIntegerField(

View File

@ -0,0 +1,61 @@
import pytest
from unittest import mock
from awx.main.utils.common import (
convert_mem_str_to_bytes,
get_mem_effective_capacity,
get_corrected_memory,
convert_cpu_str_to_decimal_cpu,
get_cpu_effective_capacity,
get_corrected_cpu,
)
@pytest.mark.parametrize(
"value,converted_value,mem_capacity",
[
('2G', 2000000000, 19),
('4G', 4000000000, 38),
('2Gi', 2147483648, 20),
('2.1G', 1, 1), # expressing memory with non-integers is not supported, and we'll fall back to 1 fork for memory capacity.
('4Gi', 4294967296, 40),
('2M', 2000000, 1),
('3M', 3000000, 1),
('2Mi', 2097152, 1),
('2048Mi', 2147483648, 20),
('4096Mi', 4294967296, 40),
('64G', 64000000000, 610),
('64Garbage', 1, 1),
],
)
def test_SYSTEM_TASK_ABS_MEM_conversion(value, converted_value, mem_capacity):
with mock.patch('django.conf.settings') as mock_settings:
mock_settings.SYSTEM_TASK_ABS_MEM = value
mock_settings.SYSTEM_TASK_FORKS_MEM = 100
mock_settings.IS_K8S = True
assert convert_mem_str_to_bytes(value) == converted_value
assert get_corrected_memory(-1) == converted_value
assert get_mem_effective_capacity(-1) == mem_capacity
@pytest.mark.parametrize(
"value,converted_value,cpu_capacity",
[
('2', 2.0, 8),
('1.5', 1.5, 6),
('100m', 0.1, 1),
('2000m', 2.0, 8),
('4MillionCPUm', 1.0, 4), # Any suffix other than 'm' is not supported, we fall back to 1 CPU
('Random', 1.0, 4), # Any setting value other than integers, floats millicores (e.g 1, 1.0, or 1000m) is not supported, fall back to 1 CPU
('2505m', 2.5, 10),
('1.55', 1.6, 6),
],
)
def test_SYSTEM_TASK_ABS_CPU_conversion(value, converted_value, cpu_capacity):
with mock.patch('django.conf.settings') as mock_settings:
mock_settings.SYSTEM_TASK_ABS_CPU = value
mock_settings.SYSTEM_TASK_FORKS_CPU = 4
assert convert_cpu_str_to_decimal_cpu(value) == converted_value
assert get_corrected_cpu(-1) == converted_value
assert get_cpu_effective_capacity(-1) == cpu_capacity

View File

@ -692,28 +692,33 @@ def parse_yaml_or_json(vars_str, silent_failure=True):
return vars_dict
def get_cpu_effective_capacity(cpu_count):
from django.conf import settings
def convert_cpu_str_to_decimal_cpu(cpu_str):
"""Convert a string indicating cpu units to decimal.
settings_abscpu = getattr(settings, 'SYSTEM_TASK_ABS_CPU', None)
env_abscpu = os.getenv('SYSTEM_TASK_ABS_CPU', None)
Useful for dealing with cpu setting that may be expressed in units compatible with
kubernetes.
if env_abscpu is not None:
return int(env_abscpu)
elif settings_abscpu is not None:
return int(settings_abscpu)
See https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/#cpu-units
"""
cpu = cpu_str
millicores = False
settings_forkcpu = getattr(settings, 'SYSTEM_TASK_FORKS_CPU', None)
env_forkcpu = os.getenv('SYSTEM_TASK_FORKS_CPU', None)
if cpu_str[-1] == 'm':
cpu = cpu_str[:-1]
millicores = True
if env_forkcpu:
forkcpu = int(env_forkcpu)
elif settings_forkcpu:
forkcpu = int(settings_forkcpu)
else:
forkcpu = 4
try:
cpu = float(cpu)
except ValueError:
cpu = 1.0
millicores = False
logger.warning(f"Could not convert SYSTEM_TASK_ABS_CPU {cpu_str} to a decimal number, falling back to default of 1 cpu")
return cpu_count * forkcpu
if millicores:
cpu = cpu / 1000
# Per kubernetes docs, fractional CPU less than .1 are not allowed
return max(0.1, round(cpu, 1))
def get_corrected_cpu(cpu_count): # formerlly get_cpu_capacity
@ -725,34 +730,70 @@ def get_corrected_cpu(cpu_count): # formerlly get_cpu_capacity
settings_abscpu = getattr(settings, 'SYSTEM_TASK_ABS_CPU', None)
env_abscpu = os.getenv('SYSTEM_TASK_ABS_CPU', None)
if env_abscpu is not None or settings_abscpu is not None:
return 0
if env_abscpu is not None:
return convert_cpu_str_to_decimal_cpu(env_abscpu)
elif settings_abscpu is not None:
return convert_cpu_str_to_decimal_cpu(settings_abscpu)
return cpu_count # no correction
def get_mem_effective_capacity(mem_mb):
def get_cpu_effective_capacity(cpu_count):
from django.conf import settings
settings_absmem = getattr(settings, 'SYSTEM_TASK_ABS_MEM', None)
env_absmem = os.getenv('SYSTEM_TASK_ABS_MEM', None)
cpu_count = get_corrected_cpu(cpu_count)
if env_absmem is not None:
return int(env_absmem)
elif settings_absmem is not None:
return int(settings_absmem)
settings_forkcpu = getattr(settings, 'SYSTEM_TASK_FORKS_CPU', None)
env_forkcpu = os.getenv('SYSTEM_TASK_FORKS_CPU', None)
settings_forkmem = getattr(settings, 'SYSTEM_TASK_FORKS_MEM', None)
env_forkmem = os.getenv('SYSTEM_TASK_FORKS_MEM', None)
if env_forkmem:
forkmem = int(env_forkmem)
elif settings_forkmem:
forkmem = int(settings_forkmem)
if env_forkcpu:
forkcpu = int(env_forkcpu)
elif settings_forkcpu:
forkcpu = int(settings_forkcpu)
else:
forkmem = 100
forkcpu = 4
return max(1, ((mem_mb // 1024 // 1024) - 2048) // forkmem)
return max(1, int(cpu_count * forkcpu))
def convert_mem_str_to_bytes(mem_str):
"""Convert string with suffix indicating units to memory in bytes (base 2)
Useful for dealing with memory setting that may be expressed in units compatible with
kubernetes.
See https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#meaning-of-memory
"""
# If there is no suffix, the memory sourced from the request is in bytes
if mem_str.isdigit():
return int(mem_str)
conversions = {
'Ei': lambda x: x * 2**60,
'E': lambda x: x * 10**18,
'Pi': lambda x: x * 2**50,
'P': lambda x: x * 10**15,
'Ti': lambda x: x * 2**40,
'T': lambda x: x * 10**12,
'Gi': lambda x: x * 2**30,
'G': lambda x: x * 10**9,
'Mi': lambda x: x * 2**20,
'M': lambda x: x * 10**6,
'Ki': lambda x: x * 2**10,
'K': lambda x: x * 10**3,
}
mem = 0
mem_unit = None
for i, char in enumerate(mem_str):
if not char.isdigit():
mem_unit = mem_str[i:]
mem = int(mem_str[:i])
break
if not mem_unit or mem_unit not in conversions.keys():
error = f"Unsupported value for SYSTEM_TASK_ABS_MEM: {mem_str}, memory must be expressed in bytes or with known suffix: {conversions.keys()}. Falling back to 1 byte"
logger.warning(error)
return 1
return max(1, conversions[mem_unit](mem))
def get_corrected_memory(memory):
@ -761,12 +802,48 @@ def get_corrected_memory(memory):
settings_absmem = getattr(settings, 'SYSTEM_TASK_ABS_MEM', None)
env_absmem = os.getenv('SYSTEM_TASK_ABS_MEM', None)
if env_absmem is not None or settings_absmem is not None:
return 0
# Runner returns memory in bytes
# so we convert memory from settings to bytes as well.
if env_absmem is not None:
return convert_mem_str_to_bytes(env_absmem)
elif settings_absmem is not None:
return convert_mem_str_to_bytes(settings_absmem)
return memory
def get_mem_effective_capacity(mem_bytes):
from django.conf import settings
mem_bytes = get_corrected_memory(mem_bytes)
settings_mem_mb_per_fork = getattr(settings, 'SYSTEM_TASK_FORKS_MEM', None)
env_mem_mb_per_fork = os.getenv('SYSTEM_TASK_FORKS_MEM', None)
if env_mem_mb_per_fork:
mem_mb_per_fork = int(env_mem_mb_per_fork)
elif settings_mem_mb_per_fork:
mem_mb_per_fork = int(settings_mem_mb_per_fork)
else:
mem_mb_per_fork = 100
# Per docs, deduct 2GB of memory from the available memory
# to cover memory consumption of background tasks when redis/web etc are colocated with
# the other control processes
memory_penalty_bytes = 2147483648
if settings.IS_K8S:
# In k8s, this is dealt with differently because
# redis and the web containers have their own memory allocation
memory_penalty_bytes = 0
# convert memory to megabytes because our setting of how much memory we
# should allocate per fork is in megabytes
mem_mb = (mem_bytes - memory_penalty_bytes) // 2**20
max_forks_based_on_memory = mem_mb // mem_mb_per_fork
return max(1, max_forks_based_on_memory)
_inventory_updates = threading.local()
_task_manager = threading.local()