Update pod reaper to work with receptor launched pods

This commit is contained in:
Shane McDonald
2021-04-05 17:45:15 -04:00
parent 6294ddfded
commit 2d48b24ef2
6 changed files with 52 additions and 76 deletions

View File

@@ -8,6 +8,7 @@ from kubernetes import client, config
from django.utils.functional import cached_property
from awx.main.utils.common import parse_yaml_or_json
from awx.main.utils.execution_environments import get_default_pod_spec
logger = logging.getLogger('awx.main.scheduler')
@@ -33,46 +34,23 @@ class PodManager(object):
def __init__(self, task=None):
self.task = task
def deploy(self):
if not self.credential.kubernetes:
raise RuntimeError('Pod deployment cannot occur without a Kubernetes credential')
self.kube_api.create_namespaced_pod(body=self.pod_definition, namespace=self.namespace, _request_timeout=settings.AWX_CONTAINER_GROUP_K8S_API_TIMEOUT)
num_retries = settings.AWX_CONTAINER_GROUP_POD_LAUNCH_RETRIES
for retry_attempt in range(num_retries - 1):
logger.debug(f"Checking for pod {self.pod_name}. Attempt {retry_attempt + 1} of {num_retries}")
pod = self.kube_api.read_namespaced_pod(name=self.pod_name, namespace=self.namespace, _request_timeout=settings.AWX_CONTAINER_GROUP_K8S_API_TIMEOUT)
if pod.status.phase != 'Pending':
break
else:
logger.debug(f"Pod {self.pod_name} is Pending.")
time.sleep(settings.AWX_CONTAINER_GROUP_POD_LAUNCH_RETRY_DELAY)
continue
if pod.status.phase == 'Running':
logger.debug(f"Pod {self.pod_name} is online.")
return pod
else:
logger.warn(f"Pod {self.pod_name} did not start. Status is {pod.status.phase}.")
@classmethod
def list_active_jobs(self, instance_group):
task = collections.namedtuple('Task', 'id instance_group')(id='', instance_group=instance_group)
pm = PodManager(task)
pods = {}
try:
for pod in pm.kube_api.list_namespaced_pod(pm.namespace, label_selector='ansible-awx={}'.format(settings.INSTALL_UUID)).to_dict().get('items', []):
job = pod['metadata'].get('labels', {}).get('ansible-awx-job-id')
if job:
try:
yield int(job)
pods[int(job)] = pod['metadata']['name']
except ValueError:
pass
except Exception:
logger.exception('Failed to list pods for container group {}'.format(instance_group))
def delete(self):
return self.kube_api.delete_namespaced_pod(name=self.pod_name, namespace=self.namespace, _request_timeout=settings.AWX_CONTAINER_GROUP_K8S_API_TIMEOUT)
return pods
@property
def namespace(self):
@@ -91,10 +69,16 @@ class PodManager(object):
# this feels a little janky, but it's what k8s' own code does
# internally when it reads kube config files from disk:
# https://github.com/kubernetes-client/python-base/blob/0b208334ef0247aad9afcaae8003954423b61a0d/config/kube_config.py#L643
loader = config.kube_config.KubeConfigLoader(config_dict=self.kube_config)
cfg = type.__call__(client.Configuration)
loader.load_and_set(cfg)
return client.CoreV1Api(api_client=client.ApiClient(configuration=cfg))
if self.credential:
loader = config.kube_config.KubeConfigLoader(config_dict=self.kube_config)
cfg = type.__call__(client.Configuration)
loader.load_and_set(cfg)
api = client.CoreV1Api(api_client=client.ApiClient(configuration=cfg))
else:
config.load_incluster_config()
api = client.CoreV1Api()
return api
@property
def pod_name(self):
@@ -102,22 +86,7 @@ class PodManager(object):
@property
def pod_definition(self):
default_pod_spec = {
"apiVersion": "v1",
"kind": "Pod",
"metadata": {"namespace": settings.AWX_CONTAINER_GROUP_DEFAULT_NAMESPACE},
"spec": {
"containers": [
{
"image": settings.AWX_CONTAINER_GROUP_DEFAULT_IMAGE,
"tty": True,
"stdin": True,
"imagePullPolicy": "Always",
"args": ['sleep', 'infinity'],
}
]
},
}
default_pod_spec = get_default_pod_spec()
pod_spec_override = {}
if self.task and self.task.instance_group.pod_spec_override: