ENH use faster gregorian iterator (~2x speedup)

This commit is contained in:
Nathan Dwarshuis 2023-02-01 20:56:29 -05:00
parent 54342fbe74
commit b50f16044f
6 changed files with 104 additions and 25 deletions

View File

@ -84,6 +84,7 @@ data DBState = DBState
, kmStatementInterval :: !MaybeBounds , kmStatementInterval :: !MaybeBounds
, kmNewCommits :: ![Int] , kmNewCommits :: ![Int]
, kmConfigDir :: !FilePath , kmConfigDir :: !FilePath
, kmBoundsCache :: !(MVar (M.Map (Bounds, DatePat) [Day]))
} }
type MappingT m a = ReaderT DBState (SqlPersistT m) a type MappingT m a = ReaderT DBState (SqlPersistT m) a

View File

@ -313,6 +313,7 @@ getDBState c = do
am <- updateAccounts $ accounts c am <- updateAccounts $ accounts c
cm <- updateCurrencies $ currencies c cm <- updateCurrencies $ currencies c
hs <- updateHashes c hs <- updateHashes c
v <- newMVar M.empty
-- TODO not sure how I feel about this, probably will change this struct alot -- TODO not sure how I feel about this, probably will change this struct alot
-- in the future so whatever...for now -- in the future so whatever...for now
return $ \f -> return $ \f ->
@ -323,4 +324,5 @@ getDBState c = do
, kmStatementInterval = intervalMaybeBounds $ statementInterval $ global c , kmStatementInterval = intervalMaybeBounds $ statementInterval $ global c
, kmNewCommits = hs , kmNewCommits = hs
, kmConfigDir = f , kmConfigDir = f
, kmBoundsCache = v
} }

View File

@ -17,14 +17,30 @@ import Internal.Statement
import Internal.Types hiding (sign) import Internal.Types hiding (sign)
import Internal.Utils import Internal.Utils
import RIO hiding (to) import RIO hiding (to)
-- import qualified RIO.Map as M
import qualified RIO.Text as T import qualified RIO.Text as T
import RIO.Time import RIO.Time
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-- intervals -- intervals
-- expandDatePat :: MonadUnliftIO m => Bounds -> DatePat -> MappingT m [Day]
-- expandDatePat d p = do
-- -- TODO crude memoization
-- v <- asks kmBoundsCache
-- modifyMVar v $ \m -> case M.lookup (d, p) m of
-- Just ds -> return (m, ds)
-- Nothing -> do
-- let res = expandDatePat_ d p
-- return (M.insert (d, p) res m, res)
expandDatePat :: Bounds -> DatePat -> [Day] expandDatePat :: Bounds -> DatePat -> [Day]
expandDatePat (a, b) (Cron cp) = filter (cronPatternMatches cp) [a .. b] -- TODO what happens when I only have one date that matches? total waste...
expandDatePat (a, b) (Cron cp) =
fmap xGregToDay $
filter (cronPatternMatches cp) $
take (fromIntegral $ diffDays b a) $
gregorians a
expandDatePat i (Mod mp) = expandModPat mp i expandDatePat i (Mod mp) = expandModPat mp i
expandModPat :: ModPat -> Bounds -> [Day] expandModPat :: ModPat -> Bounds -> [Day]
@ -45,28 +61,26 @@ expandModPat
-- TODO this can be optimized to prevent filtering a bunch of dates for -- TODO this can be optimized to prevent filtering a bunch of dates for
-- one/a few cron patterns -- one/a few cron patterns
cronPatternMatches :: CronPat -> Day -> Bool cronPatternMatches :: CronPat -> XGregorian -> Bool
cronPatternMatches CronPat {..} x = cronPatternMatches CronPat {..} XGregorian {..} =
yMaybe y cronYear testYMD xgYear cronYear
&& mdMaybe m cronMonth && testYMD xgMonth cronMonth
&& mdMaybe d cronDay && testYMD xgDay cronDay
&& wdMaybe (dayOfWeek_ x) cronWeekly && testW (dayOfWeek_ xgDayOfWeek) cronWeekly
where where
testMaybe = maybe True testYMD z = maybe True (mdyPatternMatches (fromIntegral z))
yMaybe z = testMaybe (mdyPatternMatches (fromIntegral z)) testW z = maybe True (`weekdayPatternMatches` z)
mdMaybe z = testMaybe (mdyPatternMatches (fromIntegral z))
wdMaybe z = testMaybe (`weekdayPatternMatches` z)
(y, m, d) = toGregorian x
dayOfWeek_ :: Day -> Weekday -- TODO could clean this up by making an enum instance for Weekday
dayOfWeek_ d = case dayOfWeek d of dayOfWeek_ :: Int -> Weekday
Monday -> Mon dayOfWeek_ d = case d of
Tuesday -> Tue 0 -> Sun
Wednesday -> Wed 1 -> Mon
Thursday -> Thu 2 -> Tue
Friday -> Fri 3 -> Wed
Saturday -> Sat 4 -> Thu
Sunday -> Sun 5 -> Fri
_ -> Sat
weekdayPatternMatches :: WeekdayPat -> Weekday -> Bool weekdayPatternMatches :: WeekdayPat -> Weekday -> Bool
weekdayPatternMatches (OnDay x) = (== x) weekdayPatternMatches (OnDay x) = (== x)
@ -86,7 +100,8 @@ withDates
-> MappingT m [a] -> MappingT m [a]
withDates dp f = do withDates dp f = do
bounds <- askBounds bounds <- askBounds
mapM f (expandDatePat bounds dp) let days = expandDatePat bounds dp
mapM f days
askBounds :: MonadUnliftIO m => MappingT m Bounds askBounds :: MonadUnliftIO m => MappingT m Bounds
askBounds = (liftIO . resolveBounds) =<< asks kmBudgetInterval askBounds = (liftIO . resolveBounds) =<< asks kmBudgetInterval
@ -273,7 +288,8 @@ insertManual
} = do } = do
whenHash CTManual m [] $ \c -> do whenHash CTManual m [] $ \c -> do
bounds <- (liftIO . resolveBounds) =<< asks kmStatementInterval bounds <- (liftIO . resolveBounds) =<< asks kmStatementInterval
res <- mapM tx $ expandDatePat bounds dp let days = expandDatePat bounds dp
res <- mapM tx days
unlessLefts_ (concatEithersL res) $ lift . mapM_ (insertTx c) unlessLefts_ (concatEithersL res) $ lift . mapM_ (insertTx c)
where where
tx day = txPair day from to u (dec2Rat v) e tx day = txPair day from to u (dec2Rat v) e

View File

@ -147,18 +147,24 @@ type AcntID = T.Text
deriving instance Eq TimeUnit deriving instance Eq TimeUnit
deriving instance Ord TimeUnit
deriving instance Show TimeUnit deriving instance Show TimeUnit
deriving instance Hashable TimeUnit deriving instance Hashable TimeUnit
deriving instance Eq Weekday deriving instance Eq Weekday
deriving instance Ord Weekday
deriving instance Show Weekday deriving instance Show Weekday
deriving instance Hashable Weekday deriving instance Hashable Weekday
deriving instance Eq WeekdayPat deriving instance Eq WeekdayPat
deriving instance Ord WeekdayPat
deriving instance Show WeekdayPat deriving instance Show WeekdayPat
deriving instance Hashable WeekdayPat deriving instance Hashable WeekdayPat
@ -167,12 +173,16 @@ deriving instance Show RepeatPat
deriving instance Eq RepeatPat deriving instance Eq RepeatPat
deriving instance Ord RepeatPat
deriving instance Hashable RepeatPat deriving instance Hashable RepeatPat
deriving instance Show MDYPat deriving instance Show MDYPat
deriving instance Eq MDYPat deriving instance Eq MDYPat
deriving instance Ord MDYPat
deriving instance Hashable MDYPat deriving instance Hashable MDYPat
deriving instance Eq Gregorian deriving instance Eq Gregorian
@ -203,18 +213,24 @@ instance Ord GregorianM where
deriving instance Eq ModPat deriving instance Eq ModPat
deriving instance Ord ModPat
deriving instance Show ModPat deriving instance Show ModPat
deriving instance Hashable ModPat deriving instance Hashable ModPat
deriving instance Eq CronPat deriving instance Eq CronPat
deriving instance Ord CronPat
deriving instance Show CronPat deriving instance Show CronPat
deriving instance Hashable CronPat deriving instance Hashable CronPat
deriving instance Eq DatePat deriving instance Eq DatePat
deriving instance Ord DatePat
deriving instance Show DatePat deriving instance Show DatePat
deriving instance Hashable DatePat deriving instance Hashable DatePat
@ -296,6 +312,8 @@ data Statement
| StmtImport !Import | StmtImport !Import
deriving (Generic, FromDhall) deriving (Generic, FromDhall)
deriving instance Eq Manual
deriving instance Hashable Manual deriving instance Hashable Manual
data Split a v c = Split data Split a v c = Split
@ -327,7 +345,7 @@ data Import = Import
, impTxOpts :: !TxOpts , impTxOpts :: !TxOpts
, impSkipLines :: !Natural , impSkipLines :: !Natural
} }
deriving (Hashable, Generic, FromDhall) deriving (Eq, Hashable, Generic, FromDhall)
deriving instance Eq MatchVal deriving instance Eq MatchVal
@ -567,3 +585,10 @@ instance Exception InsertException
type EitherErr = Either InsertError type EitherErr = Either InsertError
type EitherErrs = Either [InsertError] type EitherErrs = Either [InsertError]
data XGregorian = XGregorian
{ xgYear :: !Int
, xgMonth :: !Int
, xgDay :: !Int
, xgDayOfWeek :: !Int
}

View File

@ -27,6 +27,9 @@ module Internal.Utils
, acntPath2Text , acntPath2Text
, showT , showT
, lookupErr , lookupErr
, gregorians
, uncurry3
, xGregToDay
) )
where where
@ -43,6 +46,33 @@ import Text.Regex.TDFA
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-- dates -- dates
-- | find the next date
-- this is meant to go in a very tight loop and be very fast (hence no
-- complex date functions, most of which heavily use 'mod' and friends)
nextXGreg :: XGregorian -> XGregorian
nextXGreg XGregorian {xgYear = y, xgMonth = m, xgDay = d, xgDayOfWeek = w}
| m == 12 && d == 31 = XGregorian (y + 1) 1 1 w_
| (m == 2 && (not leap && d == 28 || (leap && d == 29)))
|| (m `elem` [4, 6, 9, 11] && d == 30)
|| (d == 31) =
XGregorian y (m + 1) 1 w_
| otherwise = XGregorian y m (d + 1) w_
where
-- don't use DayOfWeek from Data.Time since this uses mod (which uses a
-- division opcode) and thus will be slower than just checking for equality
-- and adding
w_ = if w == 6 then 0 else w + 1
leap = isLeapYear $ fromIntegral y
gregorians :: Day -> [XGregorian]
gregorians x = L.iterate nextXGreg $ XGregorian (fromIntegral y) m d w
where
(y, m, d) = toGregorian x
w = fromEnum $ dayOfWeek x
xGregToDay :: XGregorian -> Day
xGregToDay XGregorian {xgYear = y, xgMonth = m, xgDay = d} = fromGregorian (fromIntegral y) m d
gregTup :: Gregorian -> (Integer, Int, Int) gregTup :: Gregorian -> (Integer, Int, Int)
gregTup Gregorian {..} = gregTup Gregorian {..} =
( fromIntegral gYear ( fromIntegral gYear
@ -500,5 +530,9 @@ uncurry3 f (a, b, c) = f a b c
-- lpadT :: Char -> Int -> T.Text -> T.Text -- lpadT :: Char -> Int -> T.Text -> T.Text
-- lpadT c n s = T.append (T.replicate (n - T.length s) (T.singleton c)) s -- lpadT c n s = T.append (T.replicate (n - T.length s) (T.singleton c)) s
-- TODO this regular expression appears to be compiled each time, which is
-- super slow
-- NOTE: see https://github.com/haskell-hvr/regex-tdfa/issues/9 - performance
-- is likely not going to be optimal for text
matchMaybe :: RegexContext Regex query b => query -> T.Text -> EitherErr b matchMaybe :: RegexContext Regex query b => query -> T.Text -> EitherErr b
matchMaybe q re = first (const $ RegexError re) $ pureTry $ q =~ re matchMaybe q re = first (const $ RegexError re) $ pureTry $ q =~ re

View File

@ -20,7 +20,7 @@
# this resolver has persistent < version 2.13.3.4 which introduced a nasty # this resolver has persistent < version 2.13.3.4 which introduced a nasty
# name shadow bug # name shadow bug
resolver: lts-19.2 resolver: lts-20.9
# User packages to be built. # User packages to be built.
# Various formats can be used as shown in the example below. # Various formats can be used as shown in the example below.
@ -40,6 +40,7 @@ resolver: lts-19.2
extra-deps: extra-deps:
# this version isn't on the LTS yet and has an instance for DayOfWeek # this version isn't on the LTS yet and has an instance for DayOfWeek
- dhall-1.41.2@sha256:556edac8997a5fcf451c9bbb151b1f04996318019799724cc71cc03a9a9122be,16281 - dhall-1.41.2@sha256:556edac8997a5fcf451c9bbb151b1f04996318019799724cc71cc03a9a9122be,16281
- persistent-2.13.3.3@sha256:49dd5f7dc7bbd62390d95b749df29971ce84e410e1db58bceaef5a175366e840,6762
# - git: https://github.com/commercialhaskell/stack.git # - git: https://github.com/commercialhaskell/stack.git
# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a # commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# #