From f98b92073da22d5897c019bcfe02036e74709e13 Mon Sep 17 00:00:00 2001 From: Seth Foster Date: Wed, 14 Apr 2021 14:46:51 -0400 Subject: [PATCH 1/9] Add cleanup_images system job template - Removes podman images on the system that are not assigned to an execution environment --- .../management/commands/cleanup_images.py | 79 +++++++++++++++++++ awx/main/migrations/0136_cleanup_ee_images.py | 32 ++++++++ 2 files changed, 111 insertions(+) create mode 100644 awx/main/management/commands/cleanup_images.py create mode 100644 awx/main/migrations/0136_cleanup_ee_images.py diff --git a/awx/main/management/commands/cleanup_images.py b/awx/main/management/commands/cleanup_images.py new file mode 100644 index 0000000000..9d61d353a3 --- /dev/null +++ b/awx/main/management/commands/cleanup_images.py @@ -0,0 +1,79 @@ +# Copyright (c) 2015 Ansible, Inc. +# All Rights Reserved. + +# Python +import subprocess +import logging +import json + +# Django +from django.core.management.base import BaseCommand, CommandError +from django.conf import settings + +# AWX +from awx.main.models import ExecutionEnvironment + + +class Command(BaseCommand): + """ + Management command to cleanup unused execution environment images. + """ + + help = 'Remove unused execution environment images' + + def init_logging(self): + log_levels = dict(enumerate([logging.ERROR, logging.INFO, logging.DEBUG, 0])) + self.logger = logging.getLogger('awx.main.commands.cleanup_sessions') + self.logger.setLevel(log_levels.get(self.verbosity, 0)) + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter('%(message)s')) + self.logger.addHandler(handler) + self.logger.propagate = False + + def add_arguments(self, parser): + parser.add_argument('--dry-run', dest='dry_run', action='store_true', default=False, help='Dry run mode (show items that would ' 'be removed)') + + def delete_images(self, images_json): + for e in images_json: + if 'Names' in e: + image_names = e['Names'] + else: + image_names = [e["Id"]] + image_size = e['Size'] / 1e6 + for i in image_names: + if i not in self.images_in_use and i not in self.deleted: + self.deleted.append(i) + self.logger.info(f"{self.delete_prefix} {i}: {image_size:.0f} MB") + if not self.dry_run: + subprocess.run(['podman', 'rmi', i, '-f'], stdout=subprocess.DEVNULL) + + def cleanup_images(self): + self.images_in_use = [ee.image for ee in ExecutionEnvironment.objects.all()] + self.logger.info(f"Execution environment images in use: {self.images_in_use}") + self.deleted = [] + # find and remove unused images + images_system = subprocess.run("podman images -a --format json".split(" "), capture_output=True) + if len(images_system.stdout) > 0: + images_system = json.loads(images_system.stdout) + + self.delete_images(images_system) + # find and remove dangling images + images_system = subprocess.run('podman images -a --filter "dangling=true" --format json'.split(" "), capture_output=True) + if len(images_system.stdout) > 0: + images_system = json.loads(images_system.stdout) + self.delete_images(images_system) + if not self.deleted: + self.logger.info("Did not find images to remove") + + def handle(self, *args, **options): + self.verbosity = int(options.get('verbosity', 1)) + self.init_logging() + self.dry_run = bool(options.get('dry_run', False)) + if self.dry_run: + self.delete_prefix = "Would delete" + self.logger.info("Dry run enabled, images will not be deleted") + else: + self.delete_prefix = "Deleting" + if settings.IS_K8S: + raise CommandError("Cannot run cleanup tool on k8s installations") + self.cleanup_images() diff --git a/awx/main/migrations/0136_cleanup_ee_images.py b/awx/main/migrations/0136_cleanup_ee_images.py new file mode 100644 index 0000000000..86b81a0a35 --- /dev/null +++ b/awx/main/migrations/0136_cleanup_ee_images.py @@ -0,0 +1,32 @@ +# Generated by Django 2.2.16 on 2021-04-14 16:21 + +from django.db import migrations +from django.utils.timezone import now + + +def create_cleanup_ee_images(apps, schema_editor): + SystemJobTemplate = apps.get_model('main', 'SystemJobTemplate') + ContentType = apps.get_model('contenttypes', 'ContentType') + sjt_ct = ContentType.objects.get_for_model(SystemJobTemplate) + now_dt = now() + sjt, created = SystemJobTemplate.objects.get_or_create( + job_type='cleanup_images', + defaults=dict( + name='Cleanup Execution Environment Images', + description='Remove unused execution environment images', + created=now_dt, + modified=now_dt, + polymorphic_ctype=sjt_ct, + ), + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ('main', '0135_schedule_sort_fallback_to_id'), + ] + + operations = [ + migrations.RunPython(create_cleanup_ee_images), + ] From 1c888ca58b395472e637b4b14d8864481e61fd39 Mon Sep 17 00:00:00 2001 From: Seth Foster Date: Wed, 14 Apr 2021 15:07:18 -0400 Subject: [PATCH 2/9] cleanup stdout --- awx/main/management/commands/cleanup_images.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/awx/main/management/commands/cleanup_images.py b/awx/main/management/commands/cleanup_images.py index 9d61d353a3..153e2e0216 100644 --- a/awx/main/management/commands/cleanup_images.py +++ b/awx/main/management/commands/cleanup_images.py @@ -49,7 +49,10 @@ class Command(BaseCommand): def cleanup_images(self): self.images_in_use = [ee.image for ee in ExecutionEnvironment.objects.all()] - self.logger.info(f"Execution environment images in use: {self.images_in_use}") + if self.images_in_use: + self.logger.info("Execution environment images in use:") + for i in self.images_in_use: + self.logger.info(f"\t{i}") self.deleted = [] # find and remove unused images images_system = subprocess.run("podman images -a --format json".split(" "), capture_output=True) @@ -63,7 +66,7 @@ class Command(BaseCommand): images_system = json.loads(images_system.stdout) self.delete_images(images_system) if not self.deleted: - self.logger.info("Did not find images to remove") + self.logger.info("Did not find unused images to remove") def handle(self, *args, **options): self.verbosity = int(options.get('verbosity', 1)) From 33567f8729196ada46bf4c52ccd6d3d60e7ca6f6 Mon Sep 17 00:00:00 2001 From: Seth Foster Date: Wed, 14 Apr 2021 16:15:39 -0400 Subject: [PATCH 3/9] delete_prefix local --- awx/main/management/commands/cleanup_images.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/awx/main/management/commands/cleanup_images.py b/awx/main/management/commands/cleanup_images.py index 153e2e0216..107209d0cb 100644 --- a/awx/main/management/commands/cleanup_images.py +++ b/awx/main/management/commands/cleanup_images.py @@ -23,7 +23,7 @@ class Command(BaseCommand): def init_logging(self): log_levels = dict(enumerate([logging.ERROR, logging.INFO, logging.DEBUG, 0])) - self.logger = logging.getLogger('awx.main.commands.cleanup_sessions') + self.logger = logging.getLogger('awx.main.commands.cleanup_images') self.logger.setLevel(log_levels.get(self.verbosity, 0)) handler = logging.StreamHandler() handler.setFormatter(logging.Formatter('%(message)s')) @@ -34,6 +34,10 @@ class Command(BaseCommand): parser.add_argument('--dry-run', dest='dry_run', action='store_true', default=False, help='Dry run mode (show items that would ' 'be removed)') def delete_images(self, images_json): + if self.dry_run: + delete_prefix = "Would delete" + else: + delete_prefix = "Deleting" for e in images_json: if 'Names' in e: image_names = e['Names'] @@ -43,7 +47,7 @@ class Command(BaseCommand): for i in image_names: if i not in self.images_in_use and i not in self.deleted: self.deleted.append(i) - self.logger.info(f"{self.delete_prefix} {i}: {image_size:.0f} MB") + self.logger.info(f"{delete_prefix} {i}: {image_size:.0f} MB") if not self.dry_run: subprocess.run(['podman', 'rmi', i, '-f'], stdout=subprocess.DEVNULL) @@ -73,10 +77,7 @@ class Command(BaseCommand): self.init_logging() self.dry_run = bool(options.get('dry_run', False)) if self.dry_run: - self.delete_prefix = "Would delete" self.logger.info("Dry run enabled, images will not be deleted") - else: - self.delete_prefix = "Deleting" if settings.IS_K8S: raise CommandError("Cannot run cleanup tool on k8s installations") self.cleanup_images() From fa61ec6b3ca04d1c84e01736501b9033f7eb30b7 Mon Sep 17 00:00:00 2001 From: Seth Foster Date: Thu, 15 Apr 2021 13:47:37 -0400 Subject: [PATCH 4/9] Remove system job, replace with scheduled task --- .../management/commands/cleanup_images.py | 83 ------------------- awx/main/tasks.py | 19 +++++ awx/settings/defaults.py | 1 + 3 files changed, 20 insertions(+), 83 deletions(-) delete mode 100644 awx/main/management/commands/cleanup_images.py diff --git a/awx/main/management/commands/cleanup_images.py b/awx/main/management/commands/cleanup_images.py deleted file mode 100644 index 107209d0cb..0000000000 --- a/awx/main/management/commands/cleanup_images.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright (c) 2015 Ansible, Inc. -# All Rights Reserved. - -# Python -import subprocess -import logging -import json - -# Django -from django.core.management.base import BaseCommand, CommandError -from django.conf import settings - -# AWX -from awx.main.models import ExecutionEnvironment - - -class Command(BaseCommand): - """ - Management command to cleanup unused execution environment images. - """ - - help = 'Remove unused execution environment images' - - def init_logging(self): - log_levels = dict(enumerate([logging.ERROR, logging.INFO, logging.DEBUG, 0])) - self.logger = logging.getLogger('awx.main.commands.cleanup_images') - self.logger.setLevel(log_levels.get(self.verbosity, 0)) - handler = logging.StreamHandler() - handler.setFormatter(logging.Formatter('%(message)s')) - self.logger.addHandler(handler) - self.logger.propagate = False - - def add_arguments(self, parser): - parser.add_argument('--dry-run', dest='dry_run', action='store_true', default=False, help='Dry run mode (show items that would ' 'be removed)') - - def delete_images(self, images_json): - if self.dry_run: - delete_prefix = "Would delete" - else: - delete_prefix = "Deleting" - for e in images_json: - if 'Names' in e: - image_names = e['Names'] - else: - image_names = [e["Id"]] - image_size = e['Size'] / 1e6 - for i in image_names: - if i not in self.images_in_use and i not in self.deleted: - self.deleted.append(i) - self.logger.info(f"{delete_prefix} {i}: {image_size:.0f} MB") - if not self.dry_run: - subprocess.run(['podman', 'rmi', i, '-f'], stdout=subprocess.DEVNULL) - - def cleanup_images(self): - self.images_in_use = [ee.image for ee in ExecutionEnvironment.objects.all()] - if self.images_in_use: - self.logger.info("Execution environment images in use:") - for i in self.images_in_use: - self.logger.info(f"\t{i}") - self.deleted = [] - # find and remove unused images - images_system = subprocess.run("podman images -a --format json".split(" "), capture_output=True) - if len(images_system.stdout) > 0: - images_system = json.loads(images_system.stdout) - - self.delete_images(images_system) - # find and remove dangling images - images_system = subprocess.run('podman images -a --filter "dangling=true" --format json'.split(" "), capture_output=True) - if len(images_system.stdout) > 0: - images_system = json.loads(images_system.stdout) - self.delete_images(images_system) - if not self.deleted: - self.logger.info("Did not find unused images to remove") - - def handle(self, *args, **options): - self.verbosity = int(options.get('verbosity', 1)) - self.init_logging() - self.dry_run = bool(options.get('dry_run', False)) - if self.dry_run: - self.logger.info("Dry run enabled, images will not be deleted") - if settings.IS_K8S: - raise CommandError("Cannot run cleanup tool on k8s installations") - self.cleanup_images() diff --git a/awx/main/tasks.py b/awx/main/tasks.py index 7c0dd854d0..2b4961fcc2 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -27,6 +27,7 @@ import socket import threading import concurrent.futures from base64 import b64encode +import subprocess # Django from django.conf import settings @@ -59,6 +60,7 @@ from awx.main.constants import PRIVILEGE_ESCALATION_METHODS, STANDARD_INVENTORY_ from awx.main.access import access_registry from awx.main.redact import UriCleaner from awx.main.models import ( + ExecutionEnvironment, Schedule, TowerScheduleState, Instance, @@ -396,6 +398,23 @@ def purge_old_stdout_files(): logger.debug("Removing {}".format(os.path.join(settings.JOBOUTPUT_ROOT, f))) +@task(queue=get_local_queuename) +def cleanup_execution_environment_images(): + images_in_use = [ee.image for ee in ExecutionEnvironment.objects.all()] + images_system = subprocess.run("podman images -a --format json".split(" "), capture_output=True) + if len(images_system.stdout) > 0: + images_system = json.loads(images_system.stdout) + for e in images_system: + if 'Names' in e: + image_name = e['Names'][0] + else: + image_name = e["Id"] + image_size = e['Size'] / 1e6 + if image_name not in images_in_use: + logger.debug(f"Cleanup execution environment images: deleting {image_name}, {image_size:.0f} MB") + subprocess.run(['podman', 'rmi', image_name, '-f'], stdout=subprocess.DEVNULL) + + @task(queue=get_local_queuename) def cluster_node_heartbeat(): logger.debug("Cluster node heartbeat task.") diff --git a/awx/settings/defaults.py b/awx/settings/defaults.py index 39426fbc43..e1b497f1b0 100644 --- a/awx/settings/defaults.py +++ b/awx/settings/defaults.py @@ -439,6 +439,7 @@ CELERYBEAT_SCHEDULE = { 'task_manager': {'task': 'awx.main.scheduler.tasks.run_task_manager', 'schedule': timedelta(seconds=20), 'options': {'expires': 20}}, 'k8s_reaper': {'task': 'awx.main.tasks.awx_k8s_reaper', 'schedule': timedelta(seconds=60), 'options': {'expires': 50}}, 'send_subsystem_metrics': {'task': 'awx.main.analytics.analytics_tasks.send_subsystem_metrics', 'schedule': timedelta(seconds=20)}, + 'cleanup_images': {'task': 'awx.main.tasks.cleanup_execution_environment_images', 'schedule': timedelta(hours=8)}, # 'isolated_heartbeat': set up at the end of production.py and development.py } From 3912f2b57c6a8a4ccf18fc72b9b52830ffc65129 Mon Sep 17 00:00:00 2001 From: Seth Foster Date: Thu, 15 Apr 2021 13:52:37 -0400 Subject: [PATCH 5/9] remove migration file --- awx/main/migrations/0136_cleanup_ee_images.py | 32 ------------------- 1 file changed, 32 deletions(-) delete mode 100644 awx/main/migrations/0136_cleanup_ee_images.py diff --git a/awx/main/migrations/0136_cleanup_ee_images.py b/awx/main/migrations/0136_cleanup_ee_images.py deleted file mode 100644 index 86b81a0a35..0000000000 --- a/awx/main/migrations/0136_cleanup_ee_images.py +++ /dev/null @@ -1,32 +0,0 @@ -# Generated by Django 2.2.16 on 2021-04-14 16:21 - -from django.db import migrations -from django.utils.timezone import now - - -def create_cleanup_ee_images(apps, schema_editor): - SystemJobTemplate = apps.get_model('main', 'SystemJobTemplate') - ContentType = apps.get_model('contenttypes', 'ContentType') - sjt_ct = ContentType.objects.get_for_model(SystemJobTemplate) - now_dt = now() - sjt, created = SystemJobTemplate.objects.get_or_create( - job_type='cleanup_images', - defaults=dict( - name='Cleanup Execution Environment Images', - description='Remove unused execution environment images', - created=now_dt, - modified=now_dt, - polymorphic_ctype=sjt_ct, - ), - ) - - -class Migration(migrations.Migration): - - dependencies = [ - ('main', '0135_schedule_sort_fallback_to_id'), - ] - - operations = [ - migrations.RunPython(create_cleanup_ee_images), - ] From 4566e7a2a675305a90a685e802e1ce46833a8dc9 Mon Sep 17 00:00:00 2001 From: Seth Foster Date: Thu, 15 Apr 2021 14:25:40 -0400 Subject: [PATCH 6/9] check subprocess returncode --- awx/main/tasks.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/awx/main/tasks.py b/awx/main/tasks.py index 2b4961fcc2..59e9d7059d 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -412,7 +412,9 @@ def cleanup_execution_environment_images(): image_size = e['Size'] / 1e6 if image_name not in images_in_use: logger.debug(f"Cleanup execution environment images: deleting {image_name}, {image_size:.0f} MB") - subprocess.run(['podman', 'rmi', image_name, '-f'], stdout=subprocess.DEVNULL) + process = subprocess.run(['podman', 'rmi', image_name, '-f'], stdout=subprocess.DEVNULL) + if process.returncode != 0: + logger.debug(f"Unsuccessful deletion of image {image_name}") @task(queue=get_local_queuename) From c64ec6bbf87c37be6b6ea423b20dd4c9685b3a61 Mon Sep 17 00:00:00 2001 From: Seth Foster Date: Thu, 15 Apr 2021 14:45:06 -0400 Subject: [PATCH 7/9] check if is_k8s --- awx/main/tasks.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/awx/main/tasks.py b/awx/main/tasks.py index 59e9d7059d..331a00bad9 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -400,6 +400,8 @@ def purge_old_stdout_files(): @task(queue=get_local_queuename) def cleanup_execution_environment_images(): + if settings.IS_K8S: + return images_in_use = [ee.image for ee in ExecutionEnvironment.objects.all()] images_system = subprocess.run("podman images -a --format json".split(" "), capture_output=True) if len(images_system.stdout) > 0: @@ -414,7 +416,7 @@ def cleanup_execution_environment_images(): logger.debug(f"Cleanup execution environment images: deleting {image_name}, {image_size:.0f} MB") process = subprocess.run(['podman', 'rmi', image_name, '-f'], stdout=subprocess.DEVNULL) if process.returncode != 0: - logger.debug(f"Unsuccessful deletion of image {image_name}") + logger.debug(f"Unsuccessfully deleted image {image_name}") @task(queue=get_local_queuename) From 1e3cfdc986586bf18259db3f8aa9c8f01222f5f1 Mon Sep 17 00:00:00 2001 From: Seth Foster Date: Fri, 16 Apr 2021 14:56:59 -0400 Subject: [PATCH 8/9] just remove dangling images --- awx/main/tasks.py | 25 +++++++++++-------------- awx/settings/defaults.py | 2 +- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/awx/main/tasks.py b/awx/main/tasks.py index 331a00bad9..ab8cd293d4 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -402,21 +402,18 @@ def purge_old_stdout_files(): def cleanup_execution_environment_images(): if settings.IS_K8S: return - images_in_use = [ee.image for ee in ExecutionEnvironment.objects.all()] - images_system = subprocess.run("podman images -a --format json".split(" "), capture_output=True) - if len(images_system.stdout) > 0: - images_system = json.loads(images_system.stdout) + process = subprocess.run('podman images --filter="dangling=true" --format json'.split(" "), capture_output=True) + if process.returncode != 0: + logger.debug("Cleanup execution environment images: could not get list of images") + return + if len(process.stdout) > 0: + images_system = json.loads(process.stdout) for e in images_system: - if 'Names' in e: - image_name = e['Names'][0] - else: - image_name = e["Id"] - image_size = e['Size'] / 1e6 - if image_name not in images_in_use: - logger.debug(f"Cleanup execution environment images: deleting {image_name}, {image_size:.0f} MB") - process = subprocess.run(['podman', 'rmi', image_name, '-f'], stdout=subprocess.DEVNULL) - if process.returncode != 0: - logger.debug(f"Unsuccessfully deleted image {image_name}") + image_name = e["Id"] + logger.debug(f"Cleanup execution environment images: deleting {image_name}") + process = subprocess.run(['podman', 'rmi', image_name, '-f'], stdout=subprocess.DEVNULL) + if process.returncode != 0: + logger.debug(f"Failed to delete image {image_name}") @task(queue=get_local_queuename) diff --git a/awx/settings/defaults.py b/awx/settings/defaults.py index e1b497f1b0..f750d336b1 100644 --- a/awx/settings/defaults.py +++ b/awx/settings/defaults.py @@ -439,7 +439,7 @@ CELERYBEAT_SCHEDULE = { 'task_manager': {'task': 'awx.main.scheduler.tasks.run_task_manager', 'schedule': timedelta(seconds=20), 'options': {'expires': 20}}, 'k8s_reaper': {'task': 'awx.main.tasks.awx_k8s_reaper', 'schedule': timedelta(seconds=60), 'options': {'expires': 50}}, 'send_subsystem_metrics': {'task': 'awx.main.analytics.analytics_tasks.send_subsystem_metrics', 'schedule': timedelta(seconds=20)}, - 'cleanup_images': {'task': 'awx.main.tasks.cleanup_execution_environment_images', 'schedule': timedelta(hours=8)}, + 'cleanup_images': {'task': 'awx.main.tasks.cleanup_execution_environment_images', 'schedule': timedelta(hours=3)}, # 'isolated_heartbeat': set up at the end of production.py and development.py } From 8ab81216f3d450b263bf6506e40c363bfdfaed6f Mon Sep 17 00:00:00 2001 From: Seth Foster Date: Fri, 16 Apr 2021 15:03:34 -0400 Subject: [PATCH 9/9] unused import --- awx/main/tasks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/awx/main/tasks.py b/awx/main/tasks.py index ab8cd293d4..411e4b27b6 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -60,7 +60,6 @@ from awx.main.constants import PRIVILEGE_ESCALATION_METHODS, STANDARD_INVENTORY_ from awx.main.access import access_registry from awx.main.redact import UriCleaner from awx.main.models import ( - ExecutionEnvironment, Schedule, TowerScheduleState, Instance,