ENH precompile regular expressions

This commit is contained in:
Nathan Dwarshuis 2023-02-01 23:02:07 -05:00
parent b50f16044f
commit 2af7fed148
3 changed files with 87 additions and 35 deletions

View File

@ -26,8 +26,13 @@ import qualified RIO.Vector as V
readImport :: MonadUnliftIO m => Import -> MappingT m (EitherErrs [BalTx]) readImport :: MonadUnliftIO m => Import -> MappingT m (EitherErrs [BalTx])
readImport Import {..} = do readImport Import {..} = do
res <- mapM (readImport_ impSkipLines impDelim impTxOpts) impPaths let cres = concatEithersL $ compileMatch <$> impMatches
return $ (matchRecords impMatches . L.sort . concat) =<< concatEitherL res ires <- mapM (readImport_ impSkipLines impDelim impTxOpts) impPaths
let res = concatEithers2 cres (concatEitherL ires) (,)
case res of
Left es -> return $ Left es
Right (compiled, records) ->
return $ matchRecords compiled $ L.sort $ concat records
readImport_ readImport_
:: MonadUnliftIO m :: MonadUnliftIO m
@ -60,7 +65,7 @@ parseTxRecord p TxOpts {..} r = do
d' <- parseTimeM True defaultTimeLocale (T.unpack toDateFmt) d d' <- parseTimeM True defaultTimeLocale (T.unpack toDateFmt) d
return $ Just $ TxRecord d' a e os p return $ Just $ TxRecord d' a e os p
matchRecords :: [Match] -> [TxRecord] -> EitherErrs [BalTx] matchRecords :: [MatchRe] -> [TxRecord] -> EitherErrs [BalTx]
matchRecords ms rs = do matchRecords ms rs = do
(matched, unmatched, notfound) <- matchAll (matchPriorities ms) rs (matched, unmatched, notfound) <- matchAll (matchPriorities ms) rs
case (matched, unmatched, notfound) of case (matched, unmatched, notfound) of
@ -70,13 +75,13 @@ matchRecords ms rs = do
Right matched_ Right matched_
(_, us, ns) -> Left [StatementError us ns] (_, us, ns) -> Left [StatementError us ns]
matchPriorities :: [Match] -> [MatchGroup] matchPriorities :: [MatchRe] -> [MatchGroup]
matchPriorities = matchPriorities =
fmap matchToGroup fmap matchToGroup
. L.groupBy (\a b -> mPriority a == mPriority b) . L.groupBy (\a b -> mPriority a == mPriority b)
. L.sortOn (Down . mPriority) . L.sortOn (Down . mPriority)
matchToGroup :: [Match] -> MatchGroup matchToGroup :: [MatchRe] -> MatchGroup
matchToGroup ms = matchToGroup ms =
uncurry MatchGroup $ uncurry MatchGroup $
first (L.sortOn mDate) $ first (L.sortOn mDate) $
@ -84,8 +89,8 @@ matchToGroup ms =
-- TDOO could use a better struct to flatten the maybe date subtype -- TDOO could use a better struct to flatten the maybe date subtype
data MatchGroup = MatchGroup data MatchGroup = MatchGroup
{ mgDate :: ![Match] { mgDate :: ![MatchRe]
, mgNoDate :: ![Match] , mgNoDate :: ![MatchRe]
} }
deriving (Show) deriving (Show)
@ -122,7 +127,7 @@ zipperSlice f x = go
EQ -> goEq $ Unzipped bs (a : cs) as EQ -> goEq $ Unzipped bs (a : cs) as
LT -> z LT -> z
zipperMatch :: Unzipped Match -> TxRecord -> EitherErrs (Zipped Match, MatchRes RawTx) zipperMatch :: Unzipped MatchRe -> TxRecord -> EitherErrs (Zipped MatchRe, MatchRes RawTx)
zipperMatch (Unzipped bs cs as) x = go [] cs zipperMatch (Unzipped bs cs as) x = go [] cs
where where
go _ [] = Right (Zipped bs $ cs ++ as, MatchFail) go _ [] = Right (Zipped bs $ cs ++ as, MatchFail)
@ -135,7 +140,7 @@ zipperMatch (Unzipped bs cs as) x = go [] cs
ms' = maybe ms (: ms) (matchDec m) ms' = maybe ms (: ms) (matchDec m)
in Right (Zipped bs $ ps ++ ms' ++ as, skipOrPass) in Right (Zipped bs $ ps ++ ms' ++ as, skipOrPass)
zipperMatch' :: Zipped Match -> TxRecord -> EitherErrs (Zipped Match, MatchRes RawTx) zipperMatch' :: Zipped MatchRe -> TxRecord -> EitherErrs (Zipped MatchRe, MatchRes RawTx)
zipperMatch' z x = go z zipperMatch' z x = go z
where where
go (Zipped bs (a : as)) = do go (Zipped bs (a : as)) = do
@ -145,13 +150,13 @@ zipperMatch' z x = go z
skipOrPass -> Right (Zipped (maybe bs (: bs) $ matchDec a) as, skipOrPass) skipOrPass -> Right (Zipped (maybe bs (: bs) $ matchDec a) as, skipOrPass)
go z' = Right (z', MatchFail) go z' = Right (z', MatchFail)
matchDec :: Match -> Maybe Match matchDec :: MatchRe -> Maybe MatchRe
matchDec m = case mTimes m of matchDec m = case mTimes m of
Just 1 -> Nothing Just 1 -> Nothing
Just n -> Just $ m {mTimes = Just $ n - 1} Just n -> Just $ m {mTimes = Just $ n - 1}
Nothing -> Just m Nothing -> Just m
matchAll :: [MatchGroup] -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [Match]) matchAll :: [MatchGroup] -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [MatchRe])
matchAll = go ([], []) matchAll = go ([], [])
where where
go (matched, unused) gs rs = case (gs, rs) of go (matched, unused) gs rs = case (gs, rs) of
@ -161,13 +166,13 @@ matchAll = go ([], [])
(ts, unmatched, us) <- matchGroup g rs (ts, unmatched, us) <- matchGroup g rs
go (ts ++ matched, us ++ unused) gs' unmatched go (ts ++ matched, us ++ unused) gs' unmatched
matchGroup :: MatchGroup -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [Match]) matchGroup :: MatchGroup -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [MatchRe])
matchGroup MatchGroup {mgDate = ds, mgNoDate = ns} rs = do matchGroup MatchGroup {mgDate = ds, mgNoDate = ns} rs = do
(md, rest, ud) <- matchDates ds rs (md, rest, ud) <- matchDates ds rs
(mn, unmatched, un) <- matchNonDates ns rest (mn, unmatched, un) <- matchNonDates ns rest
return (md ++ mn, unmatched, filter ((/= Nothing) . mTimes) $ ud ++ un) return (md ++ mn, unmatched, filter ((/= Nothing) . mTimes) $ ud ++ un)
matchDates :: [Match] -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [Match]) matchDates :: [MatchRe] -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [MatchRe])
matchDates ms = go ([], [], initZipper ms) matchDates ms = go ([], [], initZipper ms)
where where
go (matched, unmatched, z) [] = go (matched, unmatched, z) [] =
@ -188,7 +193,7 @@ matchDates ms = go ([], [], initZipper ms)
go (m, u, z') rs go (m, u, z') rs
findDate m r = maybe EQ (`compareDate` trDate r) $ mDate m findDate m r = maybe EQ (`compareDate` trDate r) $ mDate m
matchNonDates :: [Match] -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [Match]) matchNonDates :: [MatchRe] -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [MatchRe])
matchNonDates ms = go ([], [], initZipper ms) matchNonDates ms = go ([], [], initZipper ms)
where where
go (matched, unmatched, z) [] = go (matched, unmatched, z) [] =

View File

@ -18,7 +18,7 @@ module Internal.Types where
import Data.Fix (Fix (..), foldFix) import Data.Fix (Fix (..), foldFix)
import Data.Functor.Foldable (embed) import Data.Functor.Foldable (embed)
import qualified Data.Functor.Foldable.TH as TH import qualified Data.Functor.Foldable.TH as TH
import Database.Persist.Sql hiding (In, Statement) import Database.Persist.Sql hiding (Desc, In, Statement)
import Dhall hiding (embed, maybe) import Dhall hiding (embed, maybe)
import Dhall.TH import Dhall.TH
import Language.Haskell.TH.Syntax (Lift) import Language.Haskell.TH.Syntax (Lift)
@ -26,6 +26,7 @@ import RIO
import qualified RIO.Map as M import qualified RIO.Map as M
import qualified RIO.Text as T import qualified RIO.Text as T
import RIO.Time import RIO.Time
import Text.Regex.TDFA
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
-- DHALL CONFIG -- DHALL CONFIG
@ -340,7 +341,7 @@ instance FromDhall ExpTx
data Import = Import data Import = Import
{ impPaths :: ![FilePath] { impPaths :: ![FilePath]
, impMatches :: ![Match] , impMatches :: ![Match T.Text]
, impDelim :: !Word , impDelim :: !Word
, impTxOpts :: !TxOpts , impTxOpts :: !TxOpts
, impSkipLines :: !Natural , impSkipLines :: !Natural
@ -411,14 +412,19 @@ data Field k v = Field
{ fKey :: !k { fKey :: !k
, fVal :: !v , fVal :: !v
} }
deriving (Show, Eq, Hashable, Generic, FromDhall) deriving (Show, Eq, Hashable, Generic, FromDhall, Foldable, Traversable)
instance Functor (Field f) where
fmap f (Field k v) = Field k $ f v
type FieldMap k v = Field k (M.Map k v) type FieldMap k v = Field k (M.Map k v)
data MatchOther data MatchOther re
= Desc !(Field T.Text T.Text) = Desc !(Field T.Text re)
| Val !(Field T.Text MatchVal) | Val !(Field T.Text MatchVal)
deriving (Show, Eq, Hashable, Generic, FromDhall) deriving (Eq, Hashable, Generic, FromDhall, Functor, Foldable, Traversable)
deriving instance Show (MatchOther T.Text)
data ToTx = ToTx data ToTx = ToTx
{ ttCurrency :: !SplitCur { ttCurrency :: !SplitCur
@ -427,16 +433,18 @@ data ToTx = ToTx
} }
deriving (Eq, Generic, Hashable, Show, FromDhall) deriving (Eq, Generic, Hashable, Show, FromDhall)
data Match = Match data Match re = Match
{ mDate :: !(Maybe MatchDate) { mDate :: !(Maybe MatchDate)
, mVal :: !MatchVal , mVal :: !MatchVal
, mDesc :: !(Maybe Text) , mDesc :: !(Maybe re)
, mOther :: ![MatchOther] , mOther :: ![MatchOther re]
, mTx :: !(Maybe ToTx) , mTx :: !(Maybe ToTx)
, mTimes :: !(Maybe Natural) , mTimes :: !(Maybe Natural)
, mPriority :: !Integer , mPriority :: !Integer
} }
deriving (Eq, Generic, Hashable, Show, FromDhall) deriving (Eq, Generic, Hashable, FromDhall, Functor)
deriving instance Show (Match T.Text)
deriving instance Eq TxOpts deriving instance Eq TxOpts
@ -575,7 +583,7 @@ data InsertError
| LookupError !LookupSuberr !T.Text | LookupError !LookupSuberr !T.Text
| BalanceError !BalanceType !CurID ![RawSplit] | BalanceError !BalanceType !CurID ![RawSplit]
| IncomeError !DatePat | IncomeError !DatePat
| StatementError ![TxRecord] ![Match] | StatementError ![TxRecord] ![MatchRe]
deriving (Show) deriving (Show)
newtype InsertException = InsertException [InsertError] deriving (Show) newtype InsertException = InsertException [InsertError] deriving (Show)
@ -592,3 +600,10 @@ data XGregorian = XGregorian
, xgDay :: !Int , xgDay :: !Int
, xgDayOfWeek :: !Int , xgDayOfWeek :: !Int
} }
type MatchRe = Match (T.Text, Regex)
type MatchOtherRe = MatchOther (T.Text, Regex)
instance Show (Match (T.Text, Regex)) where
show = show . fmap fst

View File

@ -30,6 +30,8 @@ module Internal.Utils
, gregorians , gregorians
, uncurry3 , uncurry3
, xGregToDay , xGregToDay
, plural
, compileMatch
) )
where where
@ -42,6 +44,7 @@ import qualified RIO.Map as M
import qualified RIO.Text as T import qualified RIO.Text as T
import RIO.Time import RIO.Time
import Text.Regex.TDFA import Text.Regex.TDFA
import Text.Regex.TDFA.Text
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-- dates -- dates
@ -143,7 +146,7 @@ resolveBounds (s, e) = do
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-- matching -- matching
matches :: Match -> TxRecord -> EitherErrs (MatchRes RawTx) matches :: MatchRe -> TxRecord -> EitherErrs (MatchRes RawTx)
matches Match {..} r@TxRecord {..} = do matches Match {..} r@TxRecord {..} = do
res <- concatEither3 val other desc $ \x y z -> x && y && z res <- concatEither3 val other desc $ \x y z -> x && y && z
if date && res if date && res
@ -153,7 +156,7 @@ matches Match {..} r@TxRecord {..} = do
val = valMatches mVal trAmount val = valMatches mVal trAmount
date = maybe True (`dateMatches` trDate) mDate date = maybe True (`dateMatches` trDate) mDate
other = foldM (\a o -> (a &&) <$> otherMatches trOther o) True mOther other = foldM (\a o -> (a &&) <$> otherMatches trOther o) True mOther
desc = maybe (return True) (matchMaybe trDesc) mDesc desc = maybe (return True) (matchMaybe trDesc . snd) mDesc
convert (ToTx cur a ss) = toTx cur a ss r convert (ToTx cur a ss) = toTx cur a ss r
toTx :: SplitCur -> SplitAcnt -> [ExpSplit] -> TxRecord -> EitherErrs RawTx toTx :: SplitCur -> SplitAcnt -> [ExpSplit] -> TxRecord -> EitherErrs RawTx
@ -193,10 +196,10 @@ valMatches MatchVal {..} x
dateMatches :: MatchDate -> Day -> Bool dateMatches :: MatchDate -> Day -> Bool
dateMatches md = (EQ ==) . compareDate md dateMatches md = (EQ ==) . compareDate md
otherMatches :: M.Map T.Text T.Text -> MatchOther -> EitherErr Bool otherMatches :: M.Map T.Text T.Text -> MatchOtherRe -> EitherErr Bool
otherMatches dict m = case m of otherMatches dict m = case m of
Val (Field n mv) -> valMatches mv =<< (readRational =<< lookup_ MatchNumeric n) Val (Field n mv) -> valMatches mv =<< (readRational =<< lookup_ MatchNumeric n)
Desc (Field n md) -> (`matchMaybe` md) =<< lookup_ MatchText n Desc (Field n (_, md)) -> (`matchMaybe` md) =<< lookup_ MatchText n
where where
lookup_ t n = lookupErr (MatchField t) n dict lookup_ t n = lookupErr (MatchField t) n dict
@ -360,14 +363,14 @@ showTx TxRecord {trDate = d, trAmount = v, trDesc = e, trFile = f} =
, ("description", doubleQuote e) , ("description", doubleQuote e)
] ]
showMatch :: Match -> T.Text showMatch :: MatchRe -> T.Text
showMatch Match {mDate = d, mVal = v, mDesc = e, mOther = o, mTimes = n, mPriority = p} = showMatch Match {mDate = d, mVal = v, mDesc = e, mOther = o, mTimes = n, mPriority = p} =
T.append "Unused match: " $ keyVals [(x, y) | (x, Just y) <- kvs] T.append "Unused match: " $ keyVals [(x, y) | (x, Just y) <- kvs]
where where
kvs = kvs =
[ ("date", showMatchDate <$> d) [ ("date", showMatchDate <$> d)
, ("val", showMatchVal v) , ("val", showMatchVal v)
, ("desc", e) , ("desc", fst <$> e)
, ("other", others) , ("other", others)
, ("counter", Just $ maybe "Inf" showT n) , ("counter", Just $ maybe "Inf" showT n)
, ("priority", Just $ showT p) , ("priority", Just $ showT p)
@ -421,8 +424,8 @@ showMatchVal MatchVal {..} = Just $ singleQuote $ keyVals [(k, v) | (k, Just v)
, ("precision", Just $ showT mvPrec) , ("precision", Just $ showT mvPrec)
] ]
showMatchOther :: MatchOther -> T.Text showMatchOther :: MatchOtherRe -> T.Text
showMatchOther (Desc (Field f re)) = showMatchOther (Desc (Field f (re, _))) =
T.unwords ["desc field", singleQuote f, "with re", singleQuote re] T.unwords ["desc field", singleQuote f, "with re", singleQuote re]
showMatchOther (Val (Field f mv)) = showMatchOther (Val (Field f mv)) =
T.unwords T.unwords
@ -534,5 +537,34 @@ uncurry3 f (a, b, c) = f a b c
-- super slow -- super slow
-- NOTE: see https://github.com/haskell-hvr/regex-tdfa/issues/9 - performance -- NOTE: see https://github.com/haskell-hvr/regex-tdfa/issues/9 - performance
-- is likely not going to be optimal for text -- is likely not going to be optimal for text
matchMaybe :: RegexContext Regex query b => query -> T.Text -> EitherErr b -- matchMaybe :: T.Text -> T.Text -> EitherErr Bool
matchMaybe q re = first (const $ RegexError re) $ pureTry $ q =~ re -- matchMaybe q pat = case compres of
-- Right re -> case execute re q of
-- Right res -> Right $ isJust res
-- Left _ -> Left $ RegexError "this should not happen"
-- Left _ -> Left $ RegexError pat
-- where
-- -- these options barely do anything in terms of performance
-- compres = compile (blankCompOpt {newSyntax = True}) (blankExecOpt {captureGroups = False}) pat
compileMatch :: Match T.Text -> EitherErrs MatchRe
compileMatch m@Match {mDesc = d, mOther = os} = do
let dres = plural $ mapM compileRegex d
let ores = concatEitherL $ fmap (mapM compileRegex) os
concatEithers2 dres ores $ \d_ os_ -> m {mDesc = d_, mOther = os_}
compileRegex :: T.Text -> EitherErr (Text, Regex)
compileRegex pat = case res of
Right re -> Right (pat, re)
Left _ -> Left $ RegexError pat
where
res =
compile
(blankCompOpt {newSyntax = True})
(blankExecOpt {captureGroups = False})
pat
matchMaybe :: T.Text -> Regex -> EitherErr Bool
matchMaybe q re = case execute re q of
Right res -> Right $ isJust res
Left _ -> Left $ RegexError "this should not happen"