From 56a14e5e9e4c1210847548ca8b953f805116a604 Mon Sep 17 00:00:00 2001 From: ndwarshuis Date: Thu, 2 Feb 2023 23:18:36 -0500 Subject: [PATCH] WIP make cron pattern faster --- lib/Internal/Insert.hs | 178 +++++++++++++++++++++++++++++++++-------- lib/Internal/Types.hs | 2 + 2 files changed, 147 insertions(+), 33 deletions(-) diff --git a/lib/Internal/Insert.hs b/lib/Internal/Insert.hs index 95e15d3..dbe07d9 100644 --- a/lib/Internal/Insert.hs +++ b/lib/Internal/Insert.hs @@ -18,6 +18,8 @@ import Internal.Types hiding (sign) import Internal.Utils import RIO hiding (to) -- import qualified RIO.Map as M + +import qualified RIO.List as L import qualified RIO.Text as T import RIO.Time @@ -35,12 +37,12 @@ import RIO.Time -- return (M.insert (d, p) res m, res) expandDatePat :: Bounds -> DatePat -> [Day] --- TODO what happens when I only have one date that matches? total waste... -expandDatePat (a, b) (Cron cp) = - fmap xGregToDay $ - filter (cronPatternMatches cp) $ - take (fromIntegral $ diffDays b a) $ - gregorians a +expandDatePat b (Cron cp) = expandCronPat b cp +-- expandDatePat (a, b) (Cron cp) = +-- fmap xGregToDay $ +-- filter (cronPatternMatches cp) $ +-- take (fromIntegral $ diffDays b a) $ +-- gregorians a expandDatePat i (Mod mp) = expandModPat mp i expandModPat :: ModPat -> Bounds -> [Day] @@ -59,39 +61,149 @@ expandModPat Month -> addGregorianMonthsClip Year -> addGregorianYearsClip +-- nextXGreg_ :: CronPat -> XGregorian -> XGregorian +-- nextXGreg_ c XGregorian {xgYear = y, xgMonth = m, xgDay = d, xgDayOfWeek = w} +-- | m == 12 && d == 31 = XGregorian (y + 1) 1 1 w_ +-- | (m == 2 && (not leap && d == 28 || (leap && d == 29))) +-- || (m `elem` [4, 6, 9, 11] && d == 30) +-- || (d == 31) = +-- XGregorian y (m + 1) 1 w_ +-- | otherwise = XGregorian y m (d + 1) w_ +-- where +-- -- don't use DayOfWeek from Data.Time since this uses mod (which uses a +-- -- division opcode) and thus will be slower than just checking for equality +-- -- and adding +-- w_ = if w == 6 then 0 else w + 1 +-- leap = isLeapYear $ fromIntegral y + +monthLength :: (Integral a, Integral b, Integral c) => a -> b -> c +monthLength y m + | m == 2 && isLeapYear (fromIntegral y) = 29 + | m == 2 = 28 + | m `elem` [4, 6, 9, 11] = 30 + | otherwise = 31 + -- TODO this can be optimized to prevent filtering a bunch of dates for -- one/a few cron patterns -cronPatternMatches :: CronPat -> XGregorian -> Bool -cronPatternMatches CronPat {..} XGregorian {..} = - testYMD xgYear cronYear - && testYMD xgMonth cronMonth - && testYMD xgDay cronDay - && testW (dayOfWeek_ xgDayOfWeek) cronWeekly +-- cronPatternMatches :: CronPat -> XGregorian -> Bool +-- cronPatternMatches CronPat {..} XGregorian {..} = +-- testYMD xgYear cronYear +-- && testYMD xgMonth cronMonth +-- && testYMD xgDay cronDay +-- && testW (dayOfWeek_ xgDayOfWeek) cronWeekly +-- where +-- testYMD z = maybe True (mdyPatternMatches (fromIntegral z)) +-- testW z = maybe True (`weekdayPatternMatches` z) + +expandCronPat :: Bounds -> CronPat -> [Day] +expandCronPat b = L.unfoldr nextCronPat . compileCronPat b + +data CompiledCronPat = CompiledCronPat + { ccpYear :: ![Int] + , ccpMonth :: !(Zipper Int) + , ccpDay :: !(Zipper Int) + , ccpWeekly :: ![Int] + , ccpMonthEnd :: !Int + , ccpDayEnd :: !Int + } + deriving (Show) + +data Zipper a = Zipper ![a] ![a] deriving (Show) + +initZipper :: [a] -> Zipper a +initZipper = Zipper [] + +resetZipper :: Zipper a -> Zipper a +resetZipper (Zipper bs as) = initZipper $ reverse bs ++ as + +shiftZipperWhile :: (a -> Bool) -> Zipper a -> Zipper a +shiftZipperWhile f z@(Zipper bs as) = case as of + [] -> z + x : xs + | f x -> shiftZipperWhile f $ Zipper (x : bs) xs + | otherwise -> z + +zipperCurrent :: Zipper a -> Either (Zipper a) (a, Zipper a) +zipperCurrent z@(Zipper _ []) = Left $ resetZipper z +zipperCurrent (Zipper bs (a : as)) = Right (a, Zipper (a : bs) as) + +compileCronPat :: Bounds -> CronPat -> CompiledCronPat +compileCronPat (x, y) CronPat {..} = + CompiledCronPat + { ccpYear = maybe [y0_ .. y1_] compileMDY_ cronYear + , ccpMonth = compileDY [1 .. 12] m0 cronMonth + , ccpDay = compileDY [1 .. 31] d0 cronDay + , ccpWeekly = maybe [] compileW cronWeekly + , ccpMonthEnd = m1 + , ccpDayEnd = d1 + } where - testYMD z = maybe True (mdyPatternMatches (fromIntegral z)) - testW z = maybe True (`weekdayPatternMatches` z) + (y0, m0, d0) = toGregorian x + (y1, m1, d1) = toGregorian y + y0_ = fromIntegral y0 + y1_ = fromIntegral y1 + compileDY def k = shiftZipperWhile (< k) . initZipper . maybe def compileMDY_ + compileMDY_ (Single z) = [fromIntegral z] + compileMDY_ (Multi zs) = fromIntegral <$> zs + compileMDY_ (Repeat RepeatPat {rpStart = s, rpBy = b, rpRepeats = rs}) = + -- TODO minor perf improvement, filter the repeats before filterng <=31 + let b' = fromIntegral b + xs = takeWhile (<= 31) $ L.iterate (+ b') $ fromIntegral s + in maybe xs (\r -> take (fromIntegral r) xs) rs --- TODO could clean this up by making an enum instance for Weekday -dayOfWeek_ :: Int -> Weekday -dayOfWeek_ d = case d of - 0 -> Sun - 1 -> Mon - 2 -> Tue - 3 -> Wed - 4 -> Thu - 5 -> Fri - _ -> Sat + compileW (OnDay w) = [fromEnum w] + compileW (OnDays ws) = fromEnum <$> ws -weekdayPatternMatches :: WeekdayPat -> Weekday -> Bool -weekdayPatternMatches (OnDay x) = (== x) -weekdayPatternMatches (OnDays xs) = (`elem` xs) +nextCronPat :: CompiledCronPat -> Maybe (Day, CompiledCronPat) +nextCronPat CompiledCronPat {ccpYear = []} = Nothing +nextCronPat c@(CompiledCronPat {..}) = + case zipperCurrent ccpMonth of + Left mz -> nextCronPat $ c {ccpYear = ys, ccpMonth = mz, ccpDay = resetZipper ccpDay} + Right (m, mz) -> case zipperCurrent ccpDay of + Left dz -> nextCronPat $ c {ccpMonth = mz, ccpDay = dz} + Right (d, dz) + | null ys && m >= ccpMonthEnd && d >= ccpDayEnd -> Nothing + | otherwise -> case dayMaybe m d of + Nothing -> nextCronPat $ c {ccpMonth = mz, ccpDay = resetZipper dz} + Just day -> Just (day, c {ccpDay = dz}) + where + y : ys = ccpYear + -- TODO not the most efficient way to check weekdays (most likely) since + -- I have to go through all the trouble of converting to a day and then + -- doing some complex math to figure out which day of the week it is + validWeekday day = + null ccpWeekly + || (not (null ccpWeekly) && dayToWeekday day `elem` ccpWeekly) + dayMaybe m d + | d > monthLength y m = Nothing + | otherwise = + let day = fromGregorian (fromIntegral y) m d + in if validWeekday day then Just day else Nothing -mdyPatternMatches :: Natural -> MDYPat -> Bool -mdyPatternMatches x p = case p of - Single y -> x == y - Multi xs -> x `elem` xs - Repeat (RepeatPat {rpStart = s, rpBy = b, rpRepeats = r}) -> - s >= x && mod x b == 0 && maybe True (\y -> x <= s + b * y) r +dayToWeekday :: Day -> Int +dayToWeekday (ModifiedJulianDay d) = mod (fromIntegral d + 2) 7 + +-- -- TODO could clean this up by making an enum instance for Weekday +-- dayOfWeek_ :: Int -> Weekday +-- dayOfWeek_ d = case d of +-- 0 -> Sun +-- 1 -> Mon +-- 2 -> Tue +-- 3 -> Wed +-- 4 -> Thu +-- 5 -> Fri +-- _ -> Sat + +-- weekdayPatternMatches :: WeekdayPat -> Weekday -> Bool +-- weekdayPatternMatches (OnDay x) = (== x) +-- weekdayPatternMatches (OnDays xs) = (`elem` xs) + +-- mdyPatternMatches :: Natural -> MDYPat -> Bool +-- mdyPatternMatches x p = case p of +-- Single y -> x == y +-- Multi xs -> x `elem` xs +-- Repeat (RepeatPat {rpStart = s, rpBy = b, rpRepeats = r}) -> +-- s >= x && mod x b == 0 && maybe True (\y -> x <= s + b * y) r withDates :: MonadUnliftIO m diff --git a/lib/Internal/Types.hs b/lib/Internal/Types.hs index 9ab0c89..9bc2daf 100644 --- a/lib/Internal/Types.hs +++ b/lib/Internal/Types.hs @@ -162,6 +162,8 @@ deriving instance Show Weekday deriving instance Hashable Weekday +deriving instance Enum Weekday + deriving instance Eq WeekdayPat deriving instance Ord WeekdayPat