implement a simple periodic pod reaper for container groups

see: https://github.com/ansible/awx/issues/4911
This commit is contained in:
Ryan Petrello
2019-10-17 16:51:30 -04:00
parent 0bcd1db239
commit 16812542f8
3 changed files with 51 additions and 1 deletions

View File

@@ -458,6 +458,25 @@ def cluster_node_heartbeat():
logger.exception('Error marking {} as lost'.format(other_inst.hostname))
@task(queue=get_local_queuename)
def awx_k8s_reaper():
from awx.main.scheduler.kubernetes import PodManager # prevent circular import
for group in InstanceGroup.objects.filter(credential__isnull=False).iterator():
if group.is_containerized:
logger.debug("Checking for orphaned k8s pods for {}.".format(group))
for job in UnifiedJob.objects.filter(
pk__in=list(PodManager.list_active_jobs(group))
).exclude(status__in=ACTIVE_STATES):
logger.debug('{} is no longer active, reaping orphaned k8s pod'.format(job.log_format))
try:
PodManager(job).delete()
except Exception:
logger.exception("Failed to delete orphaned pod {} from {}".format(
job.log_format, group
))
@task(queue=get_local_queuename)
def awx_isolated_heartbeat():
local_hostname = settings.CLUSTER_HOST_ID