ENH use faster gregorian iterator (~2x speedup)

This commit is contained in:
Nathan Dwarshuis 2023-02-01 20:56:29 -05:00
parent 54342fbe74
commit b50f16044f
6 changed files with 104 additions and 25 deletions

View File

@ -84,6 +84,7 @@ data DBState = DBState
, kmStatementInterval :: !MaybeBounds
, kmNewCommits :: ![Int]
, kmConfigDir :: !FilePath
, kmBoundsCache :: !(MVar (M.Map (Bounds, DatePat) [Day]))
}
type MappingT m a = ReaderT DBState (SqlPersistT m) a

View File

@ -313,6 +313,7 @@ getDBState c = do
am <- updateAccounts $ accounts c
cm <- updateCurrencies $ currencies c
hs <- updateHashes c
v <- newMVar M.empty
-- TODO not sure how I feel about this, probably will change this struct alot
-- in the future so whatever...for now
return $ \f ->
@ -323,4 +324,5 @@ getDBState c = do
, kmStatementInterval = intervalMaybeBounds $ statementInterval $ global c
, kmNewCommits = hs
, kmConfigDir = f
, kmBoundsCache = v
}

View File

@ -17,14 +17,30 @@ import Internal.Statement
import Internal.Types hiding (sign)
import Internal.Utils
import RIO hiding (to)
-- import qualified RIO.Map as M
import qualified RIO.Text as T
import RIO.Time
--------------------------------------------------------------------------------
-- intervals
-- expandDatePat :: MonadUnliftIO m => Bounds -> DatePat -> MappingT m [Day]
-- expandDatePat d p = do
-- -- TODO crude memoization
-- v <- asks kmBoundsCache
-- modifyMVar v $ \m -> case M.lookup (d, p) m of
-- Just ds -> return (m, ds)
-- Nothing -> do
-- let res = expandDatePat_ d p
-- return (M.insert (d, p) res m, res)
expandDatePat :: Bounds -> DatePat -> [Day]
expandDatePat (a, b) (Cron cp) = filter (cronPatternMatches cp) [a .. b]
-- TODO what happens when I only have one date that matches? total waste...
expandDatePat (a, b) (Cron cp) =
fmap xGregToDay $
filter (cronPatternMatches cp) $
take (fromIntegral $ diffDays b a) $
gregorians a
expandDatePat i (Mod mp) = expandModPat mp i
expandModPat :: ModPat -> Bounds -> [Day]
@ -45,28 +61,26 @@ expandModPat
-- TODO this can be optimized to prevent filtering a bunch of dates for
-- one/a few cron patterns
cronPatternMatches :: CronPat -> Day -> Bool
cronPatternMatches CronPat {..} x =
yMaybe y cronYear
&& mdMaybe m cronMonth
&& mdMaybe d cronDay
&& wdMaybe (dayOfWeek_ x) cronWeekly
cronPatternMatches :: CronPat -> XGregorian -> Bool
cronPatternMatches CronPat {..} XGregorian {..} =
testYMD xgYear cronYear
&& testYMD xgMonth cronMonth
&& testYMD xgDay cronDay
&& testW (dayOfWeek_ xgDayOfWeek) cronWeekly
where
testMaybe = maybe True
yMaybe z = testMaybe (mdyPatternMatches (fromIntegral z))
mdMaybe z = testMaybe (mdyPatternMatches (fromIntegral z))
wdMaybe z = testMaybe (`weekdayPatternMatches` z)
(y, m, d) = toGregorian x
testYMD z = maybe True (mdyPatternMatches (fromIntegral z))
testW z = maybe True (`weekdayPatternMatches` z)
dayOfWeek_ :: Day -> Weekday
dayOfWeek_ d = case dayOfWeek d of
Monday -> Mon
Tuesday -> Tue
Wednesday -> Wed
Thursday -> Thu
Friday -> Fri
Saturday -> Sat
Sunday -> Sun
-- TODO could clean this up by making an enum instance for Weekday
dayOfWeek_ :: Int -> Weekday
dayOfWeek_ d = case d of
0 -> Sun
1 -> Mon
2 -> Tue
3 -> Wed
4 -> Thu
5 -> Fri
_ -> Sat
weekdayPatternMatches :: WeekdayPat -> Weekday -> Bool
weekdayPatternMatches (OnDay x) = (== x)
@ -86,7 +100,8 @@ withDates
-> MappingT m [a]
withDates dp f = do
bounds <- askBounds
mapM f (expandDatePat bounds dp)
let days = expandDatePat bounds dp
mapM f days
askBounds :: MonadUnliftIO m => MappingT m Bounds
askBounds = (liftIO . resolveBounds) =<< asks kmBudgetInterval
@ -273,7 +288,8 @@ insertManual
} = do
whenHash CTManual m [] $ \c -> do
bounds <- (liftIO . resolveBounds) =<< asks kmStatementInterval
res <- mapM tx $ expandDatePat bounds dp
let days = expandDatePat bounds dp
res <- mapM tx days
unlessLefts_ (concatEithersL res) $ lift . mapM_ (insertTx c)
where
tx day = txPair day from to u (dec2Rat v) e

View File

@ -147,18 +147,24 @@ type AcntID = T.Text
deriving instance Eq TimeUnit
deriving instance Ord TimeUnit
deriving instance Show TimeUnit
deriving instance Hashable TimeUnit
deriving instance Eq Weekday
deriving instance Ord Weekday
deriving instance Show Weekday
deriving instance Hashable Weekday
deriving instance Eq WeekdayPat
deriving instance Ord WeekdayPat
deriving instance Show WeekdayPat
deriving instance Hashable WeekdayPat
@ -167,12 +173,16 @@ deriving instance Show RepeatPat
deriving instance Eq RepeatPat
deriving instance Ord RepeatPat
deriving instance Hashable RepeatPat
deriving instance Show MDYPat
deriving instance Eq MDYPat
deriving instance Ord MDYPat
deriving instance Hashable MDYPat
deriving instance Eq Gregorian
@ -203,18 +213,24 @@ instance Ord GregorianM where
deriving instance Eq ModPat
deriving instance Ord ModPat
deriving instance Show ModPat
deriving instance Hashable ModPat
deriving instance Eq CronPat
deriving instance Ord CronPat
deriving instance Show CronPat
deriving instance Hashable CronPat
deriving instance Eq DatePat
deriving instance Ord DatePat
deriving instance Show DatePat
deriving instance Hashable DatePat
@ -296,6 +312,8 @@ data Statement
| StmtImport !Import
deriving (Generic, FromDhall)
deriving instance Eq Manual
deriving instance Hashable Manual
data Split a v c = Split
@ -327,7 +345,7 @@ data Import = Import
, impTxOpts :: !TxOpts
, impSkipLines :: !Natural
}
deriving (Hashable, Generic, FromDhall)
deriving (Eq, Hashable, Generic, FromDhall)
deriving instance Eq MatchVal
@ -567,3 +585,10 @@ instance Exception InsertException
type EitherErr = Either InsertError
type EitherErrs = Either [InsertError]
data XGregorian = XGregorian
{ xgYear :: !Int
, xgMonth :: !Int
, xgDay :: !Int
, xgDayOfWeek :: !Int
}

View File

@ -27,6 +27,9 @@ module Internal.Utils
, acntPath2Text
, showT
, lookupErr
, gregorians
, uncurry3
, xGregToDay
)
where
@ -43,6 +46,33 @@ import Text.Regex.TDFA
--------------------------------------------------------------------------------
-- dates
-- | find the next date
-- this is meant to go in a very tight loop and be very fast (hence no
-- complex date functions, most of which heavily use 'mod' and friends)
nextXGreg :: XGregorian -> XGregorian
nextXGreg XGregorian {xgYear = y, xgMonth = m, xgDay = d, xgDayOfWeek = w}
| m == 12 && d == 31 = XGregorian (y + 1) 1 1 w_
| (m == 2 && (not leap && d == 28 || (leap && d == 29)))
|| (m `elem` [4, 6, 9, 11] && d == 30)
|| (d == 31) =
XGregorian y (m + 1) 1 w_
| otherwise = XGregorian y m (d + 1) w_
where
-- don't use DayOfWeek from Data.Time since this uses mod (which uses a
-- division opcode) and thus will be slower than just checking for equality
-- and adding
w_ = if w == 6 then 0 else w + 1
leap = isLeapYear $ fromIntegral y
gregorians :: Day -> [XGregorian]
gregorians x = L.iterate nextXGreg $ XGregorian (fromIntegral y) m d w
where
(y, m, d) = toGregorian x
w = fromEnum $ dayOfWeek x
xGregToDay :: XGregorian -> Day
xGregToDay XGregorian {xgYear = y, xgMonth = m, xgDay = d} = fromGregorian (fromIntegral y) m d
gregTup :: Gregorian -> (Integer, Int, Int)
gregTup Gregorian {..} =
( fromIntegral gYear
@ -500,5 +530,9 @@ uncurry3 f (a, b, c) = f a b c
-- lpadT :: Char -> Int -> T.Text -> T.Text
-- lpadT c n s = T.append (T.replicate (n - T.length s) (T.singleton c)) s
-- TODO this regular expression appears to be compiled each time, which is
-- super slow
-- NOTE: see https://github.com/haskell-hvr/regex-tdfa/issues/9 - performance
-- is likely not going to be optimal for text
matchMaybe :: RegexContext Regex query b => query -> T.Text -> EitherErr b
matchMaybe q re = first (const $ RegexError re) $ pureTry $ q =~ re

View File

@ -20,7 +20,7 @@
# this resolver has persistent < version 2.13.3.4 which introduced a nasty
# name shadow bug
resolver: lts-19.2
resolver: lts-20.9
# User packages to be built.
# Various formats can be used as shown in the example below.
@ -40,6 +40,7 @@ resolver: lts-19.2
extra-deps:
# this version isn't on the LTS yet and has an instance for DayOfWeek
- dhall-1.41.2@sha256:556edac8997a5fcf451c9bbb151b1f04996318019799724cc71cc03a9a9122be,16281
- persistent-2.13.3.3@sha256:49dd5f7dc7bbd62390d95b749df29971ce84e410e1db58bceaef5a175366e840,6762
# - git: https://github.com/commercialhaskell/stack.git
# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
#