Improve speed of cleanup_deleted system job. Fixes https://trello.com/c/2wB7DMdt

This commit is contained in:
Chris Church 2015-05-11 21:51:42 -04:00
parent 79c00dc511
commit 1a5823d0b3
2 changed files with 56 additions and 3 deletions

View File

@ -40,6 +40,29 @@ class Command(BaseCommand):
yield submodel
def cleanup_model(self, model):
n_deleted_items = 0
pks_to_delete = set()
for asobj in ActivityStream.objects.iterator():
asobj_disp = '"%s" id: %s' % (unicode(asobj), asobj.id)
if asobj.timestamp >= self.cutoff:
if self.dry_run:
self.logger.info("would skip %s" % asobj_disp)
else:
if self.dry_run:
self.logger.info("would delete %s" % asobj_disp)
else:
pks_to_delete.add(asobj.pk)
# Cleanup objects in batches instead of deleting each one individually.
if len(pks_to_delete) >= 500:
ActivityStream.objects.filter(pk__in=pks_to_delete).delete()
n_deleted_items += len(pks_to_delete)
pks_to_delete.clear()
if len(pks_to_delete):
ActivityStream.objects.filter(pk__in=pks_to_delete).delete()
n_deleted_items += len(pks_to_delete)
print("Removed %s items" % str(n_deleted_items))
name_field = None
active_field = None
n_deleted_items = 0
@ -63,7 +86,8 @@ class Command(BaseCommand):
'%s__startswith' % name_field: name_prefix,
})
self.logger.debug('cleaning up model %s', model)
for instance in qs:
pks_to_delete = set()
for instance in qs.iterator():
dt = parse_datetime(getattr(instance, name_field).split('_')[2])
if not is_aware(dt):
dt = make_aware(dt, self.cutoff.tzinfo)
@ -76,10 +100,17 @@ class Command(BaseCommand):
else:
action_text = 'would delete' if self.dry_run else 'deleting'
self.logger.info('%s %s', action_text, instance)
n_deleted_items += 1
if not self.dry_run:
instance.delete()
pks_to_delete.add(instance.pk)
# Cleanup objects in batches instead of deleting each one individually.
if len(pks_to_delete) >= 500:
model.objects.filter(pk__in=pks_to_delete).delete()
n_deleted_items += len(pks_to_delete)
pks_to_delete.clear()
if len(pks_to_delete):
model.objects.filter(pk__in=pks_to_delete).delete()
n_deleted_items += len(pks_to_delete)
return n_deleted_items
def init_logging(self):

View File

@ -252,6 +252,28 @@ class CleanupDeletedTest(BaseCommandMixin, BaseTest):
self.assertNotEqual(counts_before, counts_after)
self.assertFalse(sum(x[1] for x in counts_after.values()))
# Create lots of hosts already marked as deleted.
t = time.time()
dtnow = now()
for x in xrange(1000):
hostname = "_deleted_%s_host-%d" % (dtnow.isoformat(), x)
host = self.inventories[0].hosts.create(name=hostname, active=False)
create_elapsed = time.time() - t
# Time how long it takes to cleanup deleted items, should be no more
# then the time taken to create them.
counts_before = self.get_model_counts()
self.assertTrue(sum(x[1] for x in counts_before.values()))
t = time.time()
result, stdout, stderr = self.run_command('cleanup_deleted', days=0)
cleanup_elapsed = time.time() - t
self.assertEqual(result, None)
counts_after = self.get_model_counts()
self.assertNotEqual(counts_before, counts_after)
self.assertFalse(sum(x[1] for x in counts_after.values()))
self.assertTrue(cleanup_elapsed < create_elapsed,
'create took %0.3fs, cleanup took %0.3fs, expected < %0.3fs' % (create_elapsed, cleanup_elapsed, create_elapsed))
def get_user_counts(self):
active = User.objects.filter(is_active=True).count()
inactive = User.objects.filter(is_active=False).count()