mirror of
https://github.com/ansible/awx.git
synced 2026-05-08 18:07:36 -02:30
Update pod reaper to work with receptor launched pods
This commit is contained in:
@@ -8,6 +8,7 @@ from kubernetes import client, config
|
||||
from django.utils.functional import cached_property
|
||||
|
||||
from awx.main.utils.common import parse_yaml_or_json
|
||||
from awx.main.utils.execution_environments import get_default_pod_spec
|
||||
|
||||
logger = logging.getLogger('awx.main.scheduler')
|
||||
|
||||
@@ -33,46 +34,23 @@ class PodManager(object):
|
||||
def __init__(self, task=None):
|
||||
self.task = task
|
||||
|
||||
def deploy(self):
|
||||
if not self.credential.kubernetes:
|
||||
raise RuntimeError('Pod deployment cannot occur without a Kubernetes credential')
|
||||
|
||||
self.kube_api.create_namespaced_pod(body=self.pod_definition, namespace=self.namespace, _request_timeout=settings.AWX_CONTAINER_GROUP_K8S_API_TIMEOUT)
|
||||
|
||||
num_retries = settings.AWX_CONTAINER_GROUP_POD_LAUNCH_RETRIES
|
||||
for retry_attempt in range(num_retries - 1):
|
||||
logger.debug(f"Checking for pod {self.pod_name}. Attempt {retry_attempt + 1} of {num_retries}")
|
||||
pod = self.kube_api.read_namespaced_pod(name=self.pod_name, namespace=self.namespace, _request_timeout=settings.AWX_CONTAINER_GROUP_K8S_API_TIMEOUT)
|
||||
if pod.status.phase != 'Pending':
|
||||
break
|
||||
else:
|
||||
logger.debug(f"Pod {self.pod_name} is Pending.")
|
||||
time.sleep(settings.AWX_CONTAINER_GROUP_POD_LAUNCH_RETRY_DELAY)
|
||||
continue
|
||||
|
||||
if pod.status.phase == 'Running':
|
||||
logger.debug(f"Pod {self.pod_name} is online.")
|
||||
return pod
|
||||
else:
|
||||
logger.warn(f"Pod {self.pod_name} did not start. Status is {pod.status.phase}.")
|
||||
|
||||
@classmethod
|
||||
def list_active_jobs(self, instance_group):
|
||||
task = collections.namedtuple('Task', 'id instance_group')(id='', instance_group=instance_group)
|
||||
pm = PodManager(task)
|
||||
pods = {}
|
||||
try:
|
||||
for pod in pm.kube_api.list_namespaced_pod(pm.namespace, label_selector='ansible-awx={}'.format(settings.INSTALL_UUID)).to_dict().get('items', []):
|
||||
job = pod['metadata'].get('labels', {}).get('ansible-awx-job-id')
|
||||
if job:
|
||||
try:
|
||||
yield int(job)
|
||||
pods[int(job)] = pod['metadata']['name']
|
||||
except ValueError:
|
||||
pass
|
||||
except Exception:
|
||||
logger.exception('Failed to list pods for container group {}'.format(instance_group))
|
||||
|
||||
def delete(self):
|
||||
return self.kube_api.delete_namespaced_pod(name=self.pod_name, namespace=self.namespace, _request_timeout=settings.AWX_CONTAINER_GROUP_K8S_API_TIMEOUT)
|
||||
return pods
|
||||
|
||||
@property
|
||||
def namespace(self):
|
||||
@@ -91,10 +69,16 @@ class PodManager(object):
|
||||
# this feels a little janky, but it's what k8s' own code does
|
||||
# internally when it reads kube config files from disk:
|
||||
# https://github.com/kubernetes-client/python-base/blob/0b208334ef0247aad9afcaae8003954423b61a0d/config/kube_config.py#L643
|
||||
loader = config.kube_config.KubeConfigLoader(config_dict=self.kube_config)
|
||||
cfg = type.__call__(client.Configuration)
|
||||
loader.load_and_set(cfg)
|
||||
return client.CoreV1Api(api_client=client.ApiClient(configuration=cfg))
|
||||
if self.credential:
|
||||
loader = config.kube_config.KubeConfigLoader(config_dict=self.kube_config)
|
||||
cfg = type.__call__(client.Configuration)
|
||||
loader.load_and_set(cfg)
|
||||
api = client.CoreV1Api(api_client=client.ApiClient(configuration=cfg))
|
||||
else:
|
||||
config.load_incluster_config()
|
||||
api = client.CoreV1Api()
|
||||
|
||||
return api
|
||||
|
||||
@property
|
||||
def pod_name(self):
|
||||
@@ -102,22 +86,7 @@ class PodManager(object):
|
||||
|
||||
@property
|
||||
def pod_definition(self):
|
||||
default_pod_spec = {
|
||||
"apiVersion": "v1",
|
||||
"kind": "Pod",
|
||||
"metadata": {"namespace": settings.AWX_CONTAINER_GROUP_DEFAULT_NAMESPACE},
|
||||
"spec": {
|
||||
"containers": [
|
||||
{
|
||||
"image": settings.AWX_CONTAINER_GROUP_DEFAULT_IMAGE,
|
||||
"tty": True,
|
||||
"stdin": True,
|
||||
"imagePullPolicy": "Always",
|
||||
"args": ['sleep', 'infinity'],
|
||||
}
|
||||
]
|
||||
},
|
||||
}
|
||||
default_pod_spec = get_default_pod_spec()
|
||||
|
||||
pod_spec_override = {}
|
||||
if self.task and self.task.instance_group.pod_spec_override:
|
||||
|
||||
Reference in New Issue
Block a user