From 987924cbdac9ad8527df3b56f83126ad7c4566b4 Mon Sep 17 00:00:00 2001 From: Elijah DeLee Date: Mon, 17 Jan 2022 21:16:09 -0500 Subject: [PATCH] Add resource requests to default podspec Extend the timeout, assuming that we want to let the kubernetes scheduler start containers when it wants to start them. This allows us to make resource requests knowing that when some jobs queue up waiting for resources, they will not get reaped in as short of a timeout. --- awx/main/tasks/receptor.py | 2 +- awx/main/utils/execution_environments.py | 1 + awx/settings/defaults.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/awx/main/tasks/receptor.py b/awx/main/tasks/receptor.py index 0a68800a4d..251356c799 100644 --- a/awx/main/tasks/receptor.py +++ b/awx/main/tasks/receptor.py @@ -381,7 +381,7 @@ class AWXReceptorJob: self.task.instance.result_traceback = detail self.task.instance.save(update_fields=['result_traceback']) else: - logger.warn(f'No result details or output from {self.task.instance.log_format}, status:\n{unit_status}') + logger.warn(f'No result details or output from {self.task.instance.log_format}, status:\n{state_name}') except Exception: raise RuntimeError(detail) diff --git a/awx/main/utils/execution_environments.py b/awx/main/utils/execution_environments.py index 1cb9d8e917..bf85799df1 100644 --- a/awx/main/utils/execution_environments.py +++ b/awx/main/utils/execution_environments.py @@ -38,6 +38,7 @@ def get_default_pod_spec(): "image": ee.image, "name": 'worker', "args": ['ansible-runner', 'worker', '--private-data-dir=/runner'], + "resources": {"requests": {"cpu": "250m", "memory": "100Mi"}}, } ], }, diff --git a/awx/settings/defaults.py b/awx/settings/defaults.py index feab6832fa..9c767ec1f6 100644 --- a/awx/settings/defaults.py +++ b/awx/settings/defaults.py @@ -71,7 +71,7 @@ IS_K8S = False AWX_CONTAINER_GROUP_K8S_API_TIMEOUT = 10 AWX_CONTAINER_GROUP_DEFAULT_NAMESPACE = os.getenv('MY_POD_NAMESPACE', 'default') # Timeout when waiting for pod to enter running state. If the pod is still in pending state , it will be terminated. Valid time units are "s", "m", "h". Example : "5m" , "10s". -AWX_CONTAINER_GROUP_POD_PENDING_TIMEOUT = "5m" +AWX_CONTAINER_GROUP_POD_PENDING_TIMEOUT = "2h" # Internationalization # https://docs.djangoproject.com/en/dev/topics/i18n/