From 56878b49106e778c3371dce9448c31b787c75801 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gon=C3=A7alves?= Date: Wed, 11 Oct 2023 22:09:16 +0200 Subject: [PATCH] Add customizable batch_size for cleanup_activitystream and cleanup_jobs (#14412) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Daniel Gonçalves --- awx/main/management/commands/cleanup_activitystream.py | 6 +++++- awx/main/management/commands/cleanup_jobs.py | 10 ++++++---- awx/main/tasks/jobs.py | 2 ++ 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/awx/main/management/commands/cleanup_activitystream.py b/awx/main/management/commands/cleanup_activitystream.py index b73bb409c3..99042ffb22 100644 --- a/awx/main/management/commands/cleanup_activitystream.py +++ b/awx/main/management/commands/cleanup_activitystream.py @@ -24,6 +24,9 @@ class Command(BaseCommand): def add_arguments(self, parser): parser.add_argument('--days', dest='days', type=int, default=90, metavar='N', help='Remove activity stream events more than N days old') parser.add_argument('--dry-run', dest='dry_run', action='store_true', default=False, help='Dry run mode (show items that would be removed)') + parser.add_argument( + '--batch-size', dest='batch_size', type=int, default=500, metavar='X', help='Remove activity stream events in batch of X events. Defaults to 500.' + ) def init_logging(self): log_levels = dict(enumerate([logging.ERROR, logging.INFO, logging.DEBUG, 0])) @@ -48,7 +51,7 @@ class Command(BaseCommand): else: pks_to_delete.add(asobj.pk) # Cleanup objects in batches instead of deleting each one individually. - if len(pks_to_delete) >= 500: + if len(pks_to_delete) >= self.batch_size: ActivityStream.objects.filter(pk__in=pks_to_delete).delete() n_deleted_items += len(pks_to_delete) pks_to_delete.clear() @@ -63,4 +66,5 @@ class Command(BaseCommand): self.days = int(options.get('days', 30)) self.cutoff = now() - datetime.timedelta(days=self.days) self.dry_run = bool(options.get('dry_run', False)) + self.batch_size = int(options.get('batch_size', 500)) self.cleanup_activitystream() diff --git a/awx/main/management/commands/cleanup_jobs.py b/awx/main/management/commands/cleanup_jobs.py index 321d552503..455e72ad90 100644 --- a/awx/main/management/commands/cleanup_jobs.py +++ b/awx/main/management/commands/cleanup_jobs.py @@ -150,6 +150,9 @@ class Command(BaseCommand): def add_arguments(self, parser): parser.add_argument('--days', dest='days', type=int, default=90, metavar='N', help='Remove jobs/updates executed more than N days ago. Defaults to 90.') parser.add_argument('--dry-run', dest='dry_run', action='store_true', default=False, help='Dry run mode (show items that would be removed)') + parser.add_argument( + '--batch-size', dest='batch_size', type=int, default=100000, metavar='X', help='Remove jobs in batch of X jobs. Defaults to 100000.' + ) parser.add_argument('--jobs', dest='only_jobs', action='store_true', default=False, help='Remove jobs') parser.add_argument('--ad-hoc-commands', dest='only_ad_hoc_commands', action='store_true', default=False, help='Remove ad hoc commands') parser.add_argument('--project-updates', dest='only_project_updates', action='store_true', default=False, help='Remove project updates') @@ -226,8 +229,6 @@ class Command(BaseCommand): cursor.execute(f'DROP TABLE _unpartitioned_{tblname}') def cleanup_jobs(self): - batch_size = 100000 - # Hack to avoid doing N+1 queries as each item in the Job query set does # an individual query to get the underlying UnifiedJob. Job.polymorphic_super_sub_accessors_replaced = True @@ -242,8 +243,8 @@ class Command(BaseCommand): deleted = 0 info = qs.aggregate(min=Min('id'), max=Max('id')) if info['min'] is not None: - for start in range(info['min'], info['max'] + 1, batch_size): - qs_batch = qs.filter(id__gte=start, id__lte=start + batch_size) + for start in range(info['min'], info['max'] + 1, self.batch_size): + qs_batch = qs.filter(id__gte=start, id__lte=start + self.batch_size) pk_list = qs_batch.values_list('id', flat=True) _, results = qs_batch.delete() @@ -402,6 +403,7 @@ class Command(BaseCommand): self.init_logging() self.days = int(options.get('days', 90)) self.dry_run = bool(options.get('dry_run', False)) + self.batch_size = int(options.get('batch_size', 100000)) try: self.cutoff = now() - datetime.timedelta(days=self.days) except OverflowError: diff --git a/awx/main/tasks/jobs.py b/awx/main/tasks/jobs.py index 6e57dfd8e4..31c95fd102 100644 --- a/awx/main/tasks/jobs.py +++ b/awx/main/tasks/jobs.py @@ -1873,6 +1873,8 @@ class RunSystemJob(BaseTask): if system_job.job_type in ('cleanup_jobs', 'cleanup_activitystream'): if 'days' in json_vars: args.extend(['--days', str(json_vars.get('days', 60))]) + if 'batch_size' in json_vars: + args.extend(['--batch-size', str(json_vars['batch_size'])]) if 'dry_run' in json_vars and json_vars['dry_run']: args.extend(['--dry-run']) if system_job.job_type == 'cleanup_jobs':