From b50f16044fd9d1df61f977ce83c1851ecc0beffb Mon Sep 17 00:00:00 2001 From: ndwarshuis Date: Wed, 1 Feb 2023 20:56:29 -0500 Subject: [PATCH] ENH use faster gregorian iterator (~2x speedup) --- lib/Internal/Database/Model.hs | 1 + lib/Internal/Database/Ops.hs | 2 ++ lib/Internal/Insert.hs | 62 +++++++++++++++++++++------------- lib/Internal/Types.hs | 27 ++++++++++++++- lib/Internal/Utils.hs | 34 +++++++++++++++++++ stack.yaml | 3 +- 6 files changed, 104 insertions(+), 25 deletions(-) diff --git a/lib/Internal/Database/Model.hs b/lib/Internal/Database/Model.hs index ffc69d8..1731145 100644 --- a/lib/Internal/Database/Model.hs +++ b/lib/Internal/Database/Model.hs @@ -84,6 +84,7 @@ data DBState = DBState , kmStatementInterval :: !MaybeBounds , kmNewCommits :: ![Int] , kmConfigDir :: !FilePath + , kmBoundsCache :: !(MVar (M.Map (Bounds, DatePat) [Day])) } type MappingT m a = ReaderT DBState (SqlPersistT m) a diff --git a/lib/Internal/Database/Ops.hs b/lib/Internal/Database/Ops.hs index 178bece..c2627cd 100644 --- a/lib/Internal/Database/Ops.hs +++ b/lib/Internal/Database/Ops.hs @@ -313,6 +313,7 @@ getDBState c = do am <- updateAccounts $ accounts c cm <- updateCurrencies $ currencies c hs <- updateHashes c + v <- newMVar M.empty -- TODO not sure how I feel about this, probably will change this struct alot -- in the future so whatever...for now return $ \f -> @@ -323,4 +324,5 @@ getDBState c = do , kmStatementInterval = intervalMaybeBounds $ statementInterval $ global c , kmNewCommits = hs , kmConfigDir = f + , kmBoundsCache = v } diff --git a/lib/Internal/Insert.hs b/lib/Internal/Insert.hs index 3026899..95e15d3 100644 --- a/lib/Internal/Insert.hs +++ b/lib/Internal/Insert.hs @@ -17,14 +17,30 @@ import Internal.Statement import Internal.Types hiding (sign) import Internal.Utils import RIO hiding (to) +-- import qualified RIO.Map as M import qualified RIO.Text as T import RIO.Time -------------------------------------------------------------------------------- -- intervals +-- expandDatePat :: MonadUnliftIO m => Bounds -> DatePat -> MappingT m [Day] +-- expandDatePat d p = do +-- -- TODO crude memoization +-- v <- asks kmBoundsCache +-- modifyMVar v $ \m -> case M.lookup (d, p) m of +-- Just ds -> return (m, ds) +-- Nothing -> do +-- let res = expandDatePat_ d p +-- return (M.insert (d, p) res m, res) + expandDatePat :: Bounds -> DatePat -> [Day] -expandDatePat (a, b) (Cron cp) = filter (cronPatternMatches cp) [a .. b] +-- TODO what happens when I only have one date that matches? total waste... +expandDatePat (a, b) (Cron cp) = + fmap xGregToDay $ + filter (cronPatternMatches cp) $ + take (fromIntegral $ diffDays b a) $ + gregorians a expandDatePat i (Mod mp) = expandModPat mp i expandModPat :: ModPat -> Bounds -> [Day] @@ -45,28 +61,26 @@ expandModPat -- TODO this can be optimized to prevent filtering a bunch of dates for -- one/a few cron patterns -cronPatternMatches :: CronPat -> Day -> Bool -cronPatternMatches CronPat {..} x = - yMaybe y cronYear - && mdMaybe m cronMonth - && mdMaybe d cronDay - && wdMaybe (dayOfWeek_ x) cronWeekly +cronPatternMatches :: CronPat -> XGregorian -> Bool +cronPatternMatches CronPat {..} XGregorian {..} = + testYMD xgYear cronYear + && testYMD xgMonth cronMonth + && testYMD xgDay cronDay + && testW (dayOfWeek_ xgDayOfWeek) cronWeekly where - testMaybe = maybe True - yMaybe z = testMaybe (mdyPatternMatches (fromIntegral z)) - mdMaybe z = testMaybe (mdyPatternMatches (fromIntegral z)) - wdMaybe z = testMaybe (`weekdayPatternMatches` z) - (y, m, d) = toGregorian x + testYMD z = maybe True (mdyPatternMatches (fromIntegral z)) + testW z = maybe True (`weekdayPatternMatches` z) -dayOfWeek_ :: Day -> Weekday -dayOfWeek_ d = case dayOfWeek d of - Monday -> Mon - Tuesday -> Tue - Wednesday -> Wed - Thursday -> Thu - Friday -> Fri - Saturday -> Sat - Sunday -> Sun +-- TODO could clean this up by making an enum instance for Weekday +dayOfWeek_ :: Int -> Weekday +dayOfWeek_ d = case d of + 0 -> Sun + 1 -> Mon + 2 -> Tue + 3 -> Wed + 4 -> Thu + 5 -> Fri + _ -> Sat weekdayPatternMatches :: WeekdayPat -> Weekday -> Bool weekdayPatternMatches (OnDay x) = (== x) @@ -86,7 +100,8 @@ withDates -> MappingT m [a] withDates dp f = do bounds <- askBounds - mapM f (expandDatePat bounds dp) + let days = expandDatePat bounds dp + mapM f days askBounds :: MonadUnliftIO m => MappingT m Bounds askBounds = (liftIO . resolveBounds) =<< asks kmBudgetInterval @@ -273,7 +288,8 @@ insertManual } = do whenHash CTManual m [] $ \c -> do bounds <- (liftIO . resolveBounds) =<< asks kmStatementInterval - res <- mapM tx $ expandDatePat bounds dp + let days = expandDatePat bounds dp + res <- mapM tx days unlessLefts_ (concatEithersL res) $ lift . mapM_ (insertTx c) where tx day = txPair day from to u (dec2Rat v) e diff --git a/lib/Internal/Types.hs b/lib/Internal/Types.hs index 11ec614..cb8738b 100644 --- a/lib/Internal/Types.hs +++ b/lib/Internal/Types.hs @@ -147,18 +147,24 @@ type AcntID = T.Text deriving instance Eq TimeUnit +deriving instance Ord TimeUnit + deriving instance Show TimeUnit deriving instance Hashable TimeUnit deriving instance Eq Weekday +deriving instance Ord Weekday + deriving instance Show Weekday deriving instance Hashable Weekday deriving instance Eq WeekdayPat +deriving instance Ord WeekdayPat + deriving instance Show WeekdayPat deriving instance Hashable WeekdayPat @@ -167,12 +173,16 @@ deriving instance Show RepeatPat deriving instance Eq RepeatPat +deriving instance Ord RepeatPat + deriving instance Hashable RepeatPat deriving instance Show MDYPat deriving instance Eq MDYPat +deriving instance Ord MDYPat + deriving instance Hashable MDYPat deriving instance Eq Gregorian @@ -203,18 +213,24 @@ instance Ord GregorianM where deriving instance Eq ModPat +deriving instance Ord ModPat + deriving instance Show ModPat deriving instance Hashable ModPat deriving instance Eq CronPat +deriving instance Ord CronPat + deriving instance Show CronPat deriving instance Hashable CronPat deriving instance Eq DatePat +deriving instance Ord DatePat + deriving instance Show DatePat deriving instance Hashable DatePat @@ -296,6 +312,8 @@ data Statement | StmtImport !Import deriving (Generic, FromDhall) +deriving instance Eq Manual + deriving instance Hashable Manual data Split a v c = Split @@ -327,7 +345,7 @@ data Import = Import , impTxOpts :: !TxOpts , impSkipLines :: !Natural } - deriving (Hashable, Generic, FromDhall) + deriving (Eq, Hashable, Generic, FromDhall) deriving instance Eq MatchVal @@ -567,3 +585,10 @@ instance Exception InsertException type EitherErr = Either InsertError type EitherErrs = Either [InsertError] + +data XGregorian = XGregorian + { xgYear :: !Int + , xgMonth :: !Int + , xgDay :: !Int + , xgDayOfWeek :: !Int + } diff --git a/lib/Internal/Utils.hs b/lib/Internal/Utils.hs index 756a554..894407b 100644 --- a/lib/Internal/Utils.hs +++ b/lib/Internal/Utils.hs @@ -27,6 +27,9 @@ module Internal.Utils , acntPath2Text , showT , lookupErr + , gregorians + , uncurry3 + , xGregToDay ) where @@ -43,6 +46,33 @@ import Text.Regex.TDFA -------------------------------------------------------------------------------- -- dates +-- | find the next date +-- this is meant to go in a very tight loop and be very fast (hence no +-- complex date functions, most of which heavily use 'mod' and friends) +nextXGreg :: XGregorian -> XGregorian +nextXGreg XGregorian {xgYear = y, xgMonth = m, xgDay = d, xgDayOfWeek = w} + | m == 12 && d == 31 = XGregorian (y + 1) 1 1 w_ + | (m == 2 && (not leap && d == 28 || (leap && d == 29))) + || (m `elem` [4, 6, 9, 11] && d == 30) + || (d == 31) = + XGregorian y (m + 1) 1 w_ + | otherwise = XGregorian y m (d + 1) w_ + where + -- don't use DayOfWeek from Data.Time since this uses mod (which uses a + -- division opcode) and thus will be slower than just checking for equality + -- and adding + w_ = if w == 6 then 0 else w + 1 + leap = isLeapYear $ fromIntegral y + +gregorians :: Day -> [XGregorian] +gregorians x = L.iterate nextXGreg $ XGregorian (fromIntegral y) m d w + where + (y, m, d) = toGregorian x + w = fromEnum $ dayOfWeek x + +xGregToDay :: XGregorian -> Day +xGregToDay XGregorian {xgYear = y, xgMonth = m, xgDay = d} = fromGregorian (fromIntegral y) m d + gregTup :: Gregorian -> (Integer, Int, Int) gregTup Gregorian {..} = ( fromIntegral gYear @@ -500,5 +530,9 @@ uncurry3 f (a, b, c) = f a b c -- lpadT :: Char -> Int -> T.Text -> T.Text -- lpadT c n s = T.append (T.replicate (n - T.length s) (T.singleton c)) s +-- TODO this regular expression appears to be compiled each time, which is +-- super slow +-- NOTE: see https://github.com/haskell-hvr/regex-tdfa/issues/9 - performance +-- is likely not going to be optimal for text matchMaybe :: RegexContext Regex query b => query -> T.Text -> EitherErr b matchMaybe q re = first (const $ RegexError re) $ pureTry $ q =~ re diff --git a/stack.yaml b/stack.yaml index d9f489b..bcc4d0d 100644 --- a/stack.yaml +++ b/stack.yaml @@ -20,7 +20,7 @@ # this resolver has persistent < version 2.13.3.4 which introduced a nasty # name shadow bug -resolver: lts-19.2 +resolver: lts-20.9 # User packages to be built. # Various formats can be used as shown in the example below. @@ -40,6 +40,7 @@ resolver: lts-19.2 extra-deps: # this version isn't on the LTS yet and has an instance for DayOfWeek - dhall-1.41.2@sha256:556edac8997a5fcf451c9bbb151b1f04996318019799724cc71cc03a9a9122be,16281 +- persistent-2.13.3.3@sha256:49dd5f7dc7bbd62390d95b749df29971ce84e410e1db58bceaef5a175366e840,6762 # - git: https://github.com/commercialhaskell/stack.git # commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a #