Files
awx/awx/main/models/schedules.py
John Westcott IV c67f50831b Modifying schedules API to allow for rrulesets #5733 (#12043)
* Added schedule_rruleset lookup plugin for awx.awx
* Added DB migration for rrule size
* Updated schedule docs
* The schedule API endpoint will now return an array of errors on rule validation to try and inform the user of all errors instead of just the first
2022-04-28 15:38:20 -04:00

306 lines
14 KiB
Python

# Copyright (c) 2015 Ansible, Inc.
# All Rights Reserved.
import datetime
import logging
import re
import dateutil.rrule
import dateutil.parser
from dateutil.tz import datetime_exists, tzutc
from dateutil.zoneinfo import get_zonefile_instance
# Django
from django.db import models
from django.db.models.query import QuerySet
from django.utils.timezone import now, make_aware
from django.utils.translation import gettext_lazy as _
# AWX
from awx.api.versioning import reverse
from awx.main.models.base import PrimordialModel
from awx.main.models.jobs import LaunchTimeConfig
from awx.main.utils import ignore_inventory_computed_fields
from awx.main.consumers import emit_channel_notification
import pytz
logger = logging.getLogger('awx.main.models.schedule')
__all__ = ['Schedule']
UTC_TIMEZONES = {x: tzutc() for x in dateutil.parser.parserinfo().UTCZONE}
class ScheduleFilterMethods(object):
def enabled(self, enabled=True):
return self.filter(enabled=enabled)
def before(self, dt):
return self.filter(next_run__lt=dt)
def after(self, dt):
return self.filter(next_run__gt=dt)
def between(self, begin, end):
return self.after(begin).before(end)
class ScheduleQuerySet(ScheduleFilterMethods, QuerySet):
pass
class ScheduleManager(ScheduleFilterMethods, models.Manager):
use_for_related_objects = True
def get_queryset(self):
return ScheduleQuerySet(self.model, using=self._db)
class Schedule(PrimordialModel, LaunchTimeConfig):
class Meta:
app_label = 'main'
ordering = [models.F('next_run').desc(nulls_last=True), 'id']
unique_together = ('unified_job_template', 'name')
objects = ScheduleManager()
unified_job_template = models.ForeignKey(
'UnifiedJobTemplate',
related_name='schedules',
on_delete=models.CASCADE,
)
name = models.CharField(
max_length=512,
)
enabled = models.BooleanField(default=True, help_text=_("Enables processing of this schedule."))
dtstart = models.DateTimeField(null=True, default=None, editable=False, help_text=_("The first occurrence of the schedule occurs on or after this time."))
dtend = models.DateTimeField(
null=True, default=None, editable=False, help_text=_("The last occurrence of the schedule occurs before this time, aftewards the schedule expires.")
)
rrule = models.TextField(help_text=_("A value representing the schedules iCal recurrence rule."))
next_run = models.DateTimeField(null=True, default=None, editable=False, help_text=_("The next time that the scheduled action will run."))
@classmethod
def get_zoneinfo(self):
return sorted(get_zonefile_instance().zones)
@property
def timezone(self):
utc = tzutc()
# All rules in a ruleset will have the same dtstart so we can just take the first rule
tzinfo = Schedule.rrulestr(self.rrule)._rrule[0]._dtstart.tzinfo
if tzinfo is utc:
return 'UTC'
all_zones = Schedule.get_zoneinfo()
all_zones.sort(key=lambda x: -len(x))
fname = getattr(tzinfo, '_filename', None)
if fname:
for zone in all_zones:
if fname.endswith(zone):
return zone
logger.warning('Could not detect valid zoneinfo for {}'.format(self.rrule))
return ''
@property
# TODO: How would we handle multiple until parameters? The UI is currently using this on the edit screen of a schedule
def until(self):
# The UNTIL= datestamp (if any) coerced from UTC to the local naive time
# of the DTSTART
for r in Schedule.rrulestr(self.rrule)._rrule:
if r._until:
local_until = r._until.astimezone(r._dtstart.tzinfo)
naive_until = local_until.replace(tzinfo=None)
return naive_until.isoformat()
return ''
@classmethod
def coerce_naive_until(cls, rrule):
#
# RFC5545 specifies that the UNTIL rule part MUST ALWAYS be a date
# with UTC time. This is extra work for API implementers because
# it requires them to perform DTSTART local -> UTC datetime coercion on
# POST and UTC -> DTSTART local coercion on GET.
#
# This block of code is a departure from the RFC. If you send an
# rrule like this to the API (without a Z on the UNTIL):
#
# DTSTART;TZID=America/New_York:20180502T150000 RRULE:FREQ=HOURLY;INTERVAL=1;UNTIL=20180502T180000
#
# ...we'll assume that the naive UNTIL is intended to match the DTSTART
# timezone (America/New_York), and so we'll coerce to UTC _for you_
# automatically.
#
# Find the DTSTART rule or raise an error, its usually the first rule but that is not strictly enforced
start_date_rule = re.sub('^.*(DTSTART[^\s]+)\s.*$', r'\1', rrule)
if not start_date_rule:
raise ValueError('A DTSTART field needs to be in the rrule')
rules = re.split(r'\s+', rrule)
for index in range(0, len(rules)):
rule = rules[index]
if 'until=' in rule.lower():
# if DTSTART;TZID= is used, coerce "naive" UNTIL values
# to the proper UTC date
match_until = re.match(r".*?(?P<until>UNTIL\=[0-9]+T[0-9]+)(?P<utcflag>Z?)", rule)
if not len(match_until.group('utcflag')):
# rule = DTSTART;TZID=America/New_York:20200601T120000 RRULE:...;UNTIL=20200601T170000
# Find the UNTIL=N part of the string
# naive_until = UNTIL=20200601T170000
naive_until = match_until.group('until')
# What is the DTSTART timezone for:
# DTSTART;TZID=America/New_York:20200601T120000 RRULE:...;UNTIL=20200601T170000Z
# local_tz = tzfile('/usr/share/zoneinfo/America/New_York')
# We are going to construct a 'dummy' rule for parsing which will include the DTSTART and the rest of the rule
temp_rule = "{} {}".format(start_date_rule, rule.replace(naive_until, naive_until + 'Z'))
# If the rule is an EX rule we have to add an RRULE to it because an EX rule alone will not manifest into a ruleset
if rule.lower().startswith('ex'):
temp_rule = "{} {}".format(temp_rule, 'RRULE:FREQ=MINUTELY;INTERVAL=1;UNTIL=20380601T170000Z')
local_tz = dateutil.rrule.rrulestr(temp_rule, tzinfos=UTC_TIMEZONES, **{'forceset': True})._rrule[0]._dtstart.tzinfo
# Make a datetime object with tzinfo=<the DTSTART timezone>
# localized_until = datetime.datetime(2020, 6, 1, 17, 0, tzinfo=tzfile('/usr/share/zoneinfo/America/New_York'))
localized_until = make_aware(datetime.datetime.strptime(re.sub('^UNTIL=', '', naive_until), "%Y%m%dT%H%M%S"), local_tz)
# Coerce the datetime to UTC and format it as a string w/ Zulu format
# utc_until = UNTIL=20200601T220000Z
utc_until = 'UNTIL=' + localized_until.astimezone(pytz.utc).strftime('%Y%m%dT%H%M%SZ')
# rule was: DTSTART;TZID=America/New_York:20200601T120000 RRULE:...;UNTIL=20200601T170000
# rule is now: DTSTART;TZID=America/New_York:20200601T120000 RRULE:...;UNTIL=20200601T220000Z
rules[index] = rule.replace(naive_until, utc_until)
return " ".join(rules)
@classmethod
def rrulestr(cls, rrule, fast_forward=True, **kwargs):
"""
Apply our own custom rrule parsing requirements
"""
rrule = Schedule.coerce_naive_until(rrule)
kwargs['forceset'] = True
x = dateutil.rrule.rrulestr(rrule, tzinfos=UTC_TIMEZONES, **kwargs)
for r in x._rrule:
if r._dtstart and r._dtstart.tzinfo is None:
raise ValueError('A valid TZID must be provided (e.g., America/New_York)')
# Fast forward is a way for us to limit the number of events in the rruleset
# If we are fastforwading and we don't have a count limited rule that is minutely or hourley
# We will modify the start date of the rule to last week to prevent a large number of entries
if fast_forward:
try:
# All rules in a ruleset will have the same dtstart value
# so lets compare the first event to now to see if its > 7 days old
first_event = x[0]
if (now() - first_event).days > 7:
for rule in x._rrule:
# If any rule has a minutely or hourly rule without a count...
if rule._freq in [dateutil.rrule.MINUTELY, dateutil.rrule.HOURLY] and not rule._count:
# hourly/minutely rrules with far-past DTSTART values
# are *really* slow to precompute
# start *from* one week ago to speed things up drastically
new_start = (now() - datetime.timedelta(days=7)).strftime('%Y%m%d')
# Now we want to repalce the DTSTART:<value>T with the new date (which includes the T)
new_rrule = re.sub('(DTSTART[^:]*):[^T]+T', r'\1:{0}T'.format(new_start), rrule)
return Schedule.rrulestr(new_rrule, fast_forward=False)
except IndexError:
pass
return x
def __str__(self):
return u'%s_t%s_%s_%s' % (self.name, self.unified_job_template.id, self.id, self.next_run)
def get_absolute_url(self, request=None):
return reverse('api:schedule_detail', kwargs={'pk': self.pk}, request=request)
def get_job_kwargs(self):
config_data = self.prompts_dict()
job_kwargs, rejected, errors = self.unified_job_template._accept_or_ignore_job_kwargs(**config_data)
if errors:
logger.info('Errors creating scheduled job: {}'.format(errors))
job_kwargs['_eager_fields'] = {'launch_type': 'scheduled', 'schedule': self}
return job_kwargs
def get_end_date(ruleset):
# if we have a complex ruleset with a lot of options getting the last index of the ruleset can take some time
# And a ruleset without a count/until can come back as datetime.datetime(9999, 12, 31, 15, 0, tzinfo=tzfile('US/Eastern'))
# So we are going to do a quick scan to make sure we would have an end date
for a_rule in ruleset._rrule:
# if this rule does not have until or count in it then we have no end date
if not a_rule._until and not a_rule._count:
return None
# If we made it this far we should have an end date and can ask the ruleset what the last date is
# However, if the until/count is before dtstart we will get an IndexError when trying to get [-1]
try:
return ruleset[-1].astimezone(pytz.utc)
except IndexError:
return None
def update_computed_fields_no_save(self):
affects_fields = ['next_run', 'dtstart', 'dtend']
starting_values = {}
for field_name in affects_fields:
starting_values[field_name] = getattr(self, field_name)
future_rs = Schedule.rrulestr(self.rrule)
if self.enabled:
next_run_actual = future_rs.after(now())
if next_run_actual is not None:
if not datetime_exists(next_run_actual):
# skip imaginary dates, like 2:30 on DST boundaries
next_run_actual = future_rs.after(next_run_actual)
next_run_actual = next_run_actual.astimezone(pytz.utc)
else:
next_run_actual = None
self.next_run = next_run_actual
try:
self.dtstart = future_rs[0].astimezone(pytz.utc)
except IndexError:
self.dtstart = None
self.dtend = Schedule.get_end_date(future_rs)
changed = any(getattr(self, field_name) != starting_values[field_name] for field_name in affects_fields)
return changed
def update_computed_fields(self):
changed = self.update_computed_fields_no_save()
if not changed:
return
emit_channel_notification('schedules-changed', dict(id=self.id, group_name='schedules'))
# Must save self here before calling unified_job_template computed fields
# in order for that method to be correct
# by adding modified to update fields, we avoid updating modified time
super(Schedule, self).save(update_fields=['next_run', 'dtstart', 'dtend', 'modified'])
with ignore_inventory_computed_fields():
self.unified_job_template.update_computed_fields()
def save(self, *args, **kwargs):
self.rrule = Schedule.coerce_naive_until(self.rrule)
changed = self.update_computed_fields_no_save()
if changed and 'update_fields' in kwargs:
for field_name in ['next_run', 'dtstart', 'dtend']:
if field_name not in kwargs['update_fields']:
kwargs['update_fields'].append(field_name)
super(Schedule, self).save(*args, **kwargs)
if changed:
with ignore_inventory_computed_fields():
self.unified_job_template.update_computed_fields()
def delete(self, *args, **kwargs):
ujt = self.unified_job_template
r = super(Schedule, self).delete(*args, **kwargs)
if ujt:
with ignore_inventory_computed_fields():
ujt.update_computed_fields()
return r