Make rrule fast forwarding stable (#15601)

By stable, we mean future occurrences of the rrule
should be the same before and after the fast forward
operation.

The problem before was that we were fast forwarding to
7 days ago. For some rrules, this does not retain the old
occurrences. Thus, jobs would launch at unexpected times.

This change makes sure we fast forward in increments of
the rrule INTERVAL, thus the new dtstart should be in the
occurrence list of the old rrule.

Additionally, code is updated to fast forward
EXRULE (exclusion rules) in addition to RRULE

---------

Signed-off-by: Seth Foster <fosterbseth@gmail.com>
Co-authored-by: 🇺🇦 Sviatoslav Sydorenko (Святослав Сидоренко) <wk.cvs.github@sydorenko.org.ua>
This commit is contained in:
Seth Foster 2024-11-21 14:05:49 -05:00 committed by GitHub
parent 3ba6e2e394
commit 51896f0e1b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 208 additions and 46 deletions

View File

@ -35,6 +35,64 @@ __all__ = ['Schedule']
UTC_TIMEZONES = {x: tzutc() for x in dateutil.parser.parserinfo().UTCZONE}
def _assert_timezone_id_is_valid(rrules) -> None:
broken_rrules = [str(rrule) for rrule in rrules if rrule._dtstart and rrule._dtstart.tzinfo is None]
if not broken_rrules:
return
raise ValueError(
f'A valid TZID must be provided (e.g., America/New_York). Invalid: {broken_rrules}',
) from None
def _fast_forward_rrules(rrules, ref_dt=None):
for i, rule in enumerate(rrules):
rrules[i] = _fast_forward_rrule(rule, ref_dt=ref_dt)
return rrules
def _fast_forward_rrule(rrule, ref_dt=None):
'''
Utility to fast forward an rrule, maintaining consistency in the resulting
occurrences.
Uses the .replace() method to update the rrule with a newer dtstart
The operation ensures that the original occurrences (based on the original dtstart)
will match the occurrences after changing the dtstart.
Returns a new rrule with a new dtstart
'''
if rrule._freq not in {dateutil.rrule.HOURLY, dateutil.rrule.MINUTELY}:
return rrule
if ref_dt is None:
ref_dt = now()
if rrule._dtstart > ref_dt:
return rrule
interval = rrule._interval if rrule._interval else 1
if rrule._freq == dateutil.rrule.HOURLY:
interval *= 60 * 60
elif rrule._freq == dateutil.rrule.MINUTELY:
interval *= 60
# if after converting to seconds the interval is still a fraction,
# just return original rrule
if isinstance(interval, float) and not interval.is_integer():
return rrule
seconds_since_dtstart = (ref_dt - rrule._dtstart).total_seconds()
# it is important to fast forward by a number that is divisible by
# interval. For example, if interval is 7 hours, we fast forward by 7, 14, 21, etc. hours.
# Otherwise, the occurrences after the fast forward might not match the ones before.
# x // y is integer division, lopping off any remainder, so that we get the outcome we want.
interval_aligned_offset = datetime.timedelta(seconds=(seconds_since_dtstart // interval) * interval)
new_start = rrule._dtstart + interval_aligned_offset
new_rrule = rrule.replace(dtstart=new_start)
return new_rrule
class ScheduleFilterMethods(object):
def enabled(self, enabled=True):
return self.filter(enabled=enabled)
@ -194,41 +252,26 @@ class Schedule(PrimordialModel, LaunchTimeConfig):
return " ".join(rules)
@classmethod
def rrulestr(cls, rrule, fast_forward=True, **kwargs):
def rrulestr(cls, rrule, ref_dt=None, **kwargs):
"""
Apply our own custom rrule parsing requirements
"""
rrule = Schedule.coerce_naive_until(rrule)
kwargs['forceset'] = True
x = dateutil.rrule.rrulestr(rrule, tzinfos=UTC_TIMEZONES, **kwargs)
rruleset = dateutil.rrule.rrulestr(rrule, tzinfos=UTC_TIMEZONES, **kwargs)
for r in x._rrule:
if r._dtstart and r._dtstart.tzinfo is None:
raise ValueError('A valid TZID must be provided (e.g., America/New_York)')
_assert_timezone_id_is_valid(rruleset._rrule)
_assert_timezone_id_is_valid(rruleset._exrule)
# Fast forward is a way for us to limit the number of events in the rruleset
# If we are fastforwading and we don't have a count limited rule that is minutely or hourley
# We will modify the start date of the rule to last week to prevent a large number of entries
if fast_forward:
try:
# All rules in a ruleset will have the same dtstart value
# so lets compare the first event to now to see if its > 7 days old
first_event = x[0]
if (now() - first_event).days > 7:
for rule in x._rrule:
# If any rule has a minutely or hourly rule without a count...
if rule._freq in [dateutil.rrule.MINUTELY, dateutil.rrule.HOURLY] and not rule._count:
# hourly/minutely rrules with far-past DTSTART values
# are *really* slow to precompute
# start *from* one week ago to speed things up drastically
new_start = (now() - datetime.timedelta(days=7)).strftime('%Y%m%d')
# Now we want to repalce the DTSTART:<value>T with the new date (which includes the T)
new_rrule = re.sub('(DTSTART[^:]*):[^T]+T', r'\1:{0}T'.format(new_start), rrule)
return Schedule.rrulestr(new_rrule, fast_forward=False)
except IndexError:
pass
# If we are fast forwarding and we don't have a count limited rule that is minutely or hourly
# We will modify the start date of the rule to bring as close to the current date as possible
# Even though the API restricts each rrule to have the same dtstart, each rrule in the rruleset
# can fast forward to a difference dtstart. This is required in order to get stable occurrences.
rruleset._rrule = _fast_forward_rrules(rruleset._rrule, ref_dt=ref_dt)
rruleset._exrule = _fast_forward_rrules(rruleset._exrule, ref_dt=ref_dt)
return x
return rruleset
def __str__(self):
return u'%s_t%s_%s_%s' % (self.name, self.unified_job_template.id, self.id, self.next_run)
@ -279,10 +322,11 @@ class Schedule(PrimordialModel, LaunchTimeConfig):
next_run_actual = None
self.next_run = next_run_actual
try:
self.dtstart = future_rs[0].astimezone(pytz.utc)
except IndexError:
self.dtstart = None
if not self.dtstart:
try:
self.dtstart = future_rs[0].astimezone(pytz.utc)
except IndexError:
self.dtstart = None
self.dtend = Schedule.get_end_date(future_rs)
changed = any(getattr(self, field_name) != starting_values[field_name] for field_name in affects_fields)

View File

@ -121,30 +121,16 @@ class TestComputedFields:
assert job_template.next_schedule == expected_schedule
@pytest.mark.django_db
@pytest.mark.parametrize('freq, delta', (('MINUTELY', 1), ('HOURLY', 1)))
def test_past_week_rrule(job_template, freq, delta):
# see: https://github.com/ansible/awx/issues/8071
recent = datetime.utcnow() - timedelta(days=3)
recent = recent.replace(hour=0, minute=0, second=0, microsecond=0)
recent_dt = recent.strftime('%Y%m%d')
rrule = f'DTSTART;TZID=America/New_York:{recent_dt}T000000 RRULE:FREQ={freq};INTERVAL={delta};COUNT=5' # noqa
sched = Schedule.objects.create(name='example schedule', rrule=rrule, unified_job_template=job_template)
first_event = sched.rrulestr(sched.rrule)[0]
assert first_event.replace(tzinfo=None) == recent
@pytest.mark.django_db
@pytest.mark.parametrize('freq, delta', (('MINUTELY', 1), ('HOURLY', 1)))
def test_really_old_dtstart(job_template, freq, delta):
# see: https://github.com/ansible/awx/issues/8071
# If an event is per-minute/per-hour and was created a *really long*
# time ago, we should just bump forward to start counting "in the last week"
# time ago, we should just bump forward the dtstart
rrule = f'DTSTART;TZID=America/New_York:20150101T000000 RRULE:FREQ={freq};INTERVAL={delta}' # noqa
sched = Schedule.objects.create(name='example schedule', rrule=rrule, unified_job_template=job_template)
last_week = (datetime.utcnow() - timedelta(days=7)).date()
first_event = sched.rrulestr(sched.rrule)[0]
assert last_week == first_event.date()
assert now() - first_event < timedelta(days=1)
# the next few scheduled events should be the next minute/hour incremented
next_five_events = list(sched.rrulestr(sched.rrule).xafter(now(), count=5))

View File

@ -0,0 +1,132 @@
import pytest
import datetime
import dateutil
from django.utils.timezone import now
from awx.main.models.schedules import _fast_forward_rrule, Schedule
from dateutil.rrule import HOURLY, MINUTELY, MONTHLY
REF_DT = datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc)
@pytest.mark.parametrize(
'rrulestr',
[
pytest.param('DTSTART;TZID=America/New_York:20201118T200000 RRULE:FREQ=MINUTELY;INTERVAL=5', id='every-5-min'),
pytest.param('DTSTART;TZID=America/New_York:20201118T200000 RRULE:FREQ=HOURLY;INTERVAL=5', id='every-5-hours'),
pytest.param('DTSTART;TZID=America/New_York:20201118T200000 RRULE:FREQ=YEARLY;INTERVAL=5', id='every-5-years'),
pytest.param(
'DTSTART;TZID=America/New_York:20201118T200000 RRULE:FREQ=MINUTELY;INTERVAL=5;WKST=SU;BYMONTH=2,3;BYMONTHDAY=18;BYHOUR=5;BYMINUTE=35;BYSECOND=0',
id='every-5-minutes-at-5:35:00-am-on-the-18th-day-of-feb-or-march-with-week-starting-on-sundays',
),
pytest.param(
'DTSTART;TZID=America/New_York:20201118T200000 RRULE:FREQ=HOURLY;INTERVAL=5;WKST=SU;BYMONTH=2,3;BYHOUR=5',
id='every-5-hours-at-5-am-in-feb-or-march-with-week-starting-on-sundays',
),
],
)
def test_fast_forwarded_rrule_matches_original_occurrence(rrulestr):
'''
Assert that the resulting fast forwarded date is included in the original rrule
occurrence list
'''
rruleset = Schedule.rrulestr(rrulestr, ref_dt=REF_DT)
gen = rruleset.xafter(REF_DT, count=200)
occurrences = [i for i in gen]
orig_rruleset = dateutil.rrule.rrulestr(rrulestr, forceset=True)
gen = orig_rruleset.xafter(REF_DT, count=200)
orig_occurrences = [i for i in gen]
assert occurrences == orig_occurrences
def test_fast_forward_rrule_hours():
'''
Generate an rrule for each hour of the day
Assert that the resulting fast forwarded date is included in the original rrule
occurrence list
'''
rrulestr_prefix = 'DTSTART;TZID=America/New_York:20201118T200000 RRULE:FREQ=HOURLY;'
for interval in range(1, 24):
rrulestr = f"{rrulestr_prefix}INTERVAL={interval}"
rruleset = Schedule.rrulestr(rrulestr, ref_dt=REF_DT)
gen = rruleset.xafter(REF_DT, count=200)
occurrences = [i for i in gen]
orig_rruleset = dateutil.rrule.rrulestr(rrulestr, forceset=True)
gen = orig_rruleset.xafter(REF_DT, count=200)
orig_occurrences = [i for i in gen]
assert occurrences == orig_occurrences
def test_multiple_rrules():
'''
Create an rruleset that contains multiple rrules and an exrule
rruleA: freq HOURLY interval 5, dtstart should be fast forwarded
rruleB: freq HOURLY interval 7, dtstart should be fast forwarded
rruleC: freq MONTHLY interval 1, dtstart should not be fast forwarded
exruleA: freq HOURLY interval 5, dtstart should be fast forwarded
'''
rrulestr = '''DTSTART;TZID=America/New_York:20201118T200000
RRULE:FREQ=HOURLY;INTERVAL=5
RRULE:FREQ=HOURLY;INTERVAL=7
RRULE:FREQ=MONTHLY
EXRULE:FREQ=HOURLY;INTERVAL=5;BYDAY=MO,TU,WE'''
rruleset = Schedule.rrulestr(rrulestr, ref_dt=REF_DT)
rruleA, rruleB, rruleC = rruleset._rrule
exruleA = rruleset._exrule[0]
# assert that each rrule has its own dtstart
assert rruleA._dtstart != rruleB._dtstart
assert rruleA._dtstart != rruleC._dtstart
assert exruleA._dtstart == rruleA._dtstart
# the new dtstart should be within INTERVAL amount of hours from REF_DT
assert (REF_DT - rruleA._dtstart) < datetime.timedelta(hours=6)
assert (REF_DT - rruleB._dtstart) < datetime.timedelta(hours=8)
assert (REF_DT - exruleA._dtstart) < datetime.timedelta(hours=6)
# the freq=monthly rrule's dtstart should not have changed
dateutil_rruleset = dateutil.rrule.rrulestr(rrulestr, forceset=True)
assert rruleC._dtstart == dateutil_rruleset._rrule[2]._dtstart
gen = rruleset.xafter(REF_DT, count=200)
occurrences = [i for i in gen]
orig_rruleset = dateutil.rrule.rrulestr(rrulestr, forceset=True)
gen = orig_rruleset.xafter(REF_DT, count=200)
orig_occurrences = [i for i in gen]
assert occurrences == orig_occurrences
def test_future_date_does_not_fast_forward():
dtstart = now() + datetime.timedelta(days=30)
rrule = dateutil.rrule.rrule(freq=HOURLY, interval=7, dtstart=dtstart)
new_rrule = _fast_forward_rrule(rrule, ref_dt=REF_DT)
assert new_rrule == rrule
@pytest.mark.parametrize(
('freq', 'interval'),
[
pytest.param(MINUTELY, 15.5555, id="freq-MINUTELY-interval-15.5555"),
pytest.param(MONTHLY, 1, id="freq-MONTHLY-interval-1"),
],
)
def test_does_not_fast_forward(freq, interval):
'''
Assert a couple of rrules that should not be fast forwarded
'''
dtstart = REF_DT - datetime.timedelta(days=30)
rrule = dateutil.rrule.rrule(freq=freq, interval=interval, dtstart=dtstart)
assert rrule == _fast_forward_rrule(rrule, ref_dt=REF_DT)