ENH precompile regular expressions

This commit is contained in:
Nathan Dwarshuis 2023-02-01 23:02:07 -05:00
parent b50f16044f
commit 2af7fed148
3 changed files with 87 additions and 35 deletions

View File

@ -26,8 +26,13 @@ import qualified RIO.Vector as V
readImport :: MonadUnliftIO m => Import -> MappingT m (EitherErrs [BalTx])
readImport Import {..} = do
res <- mapM (readImport_ impSkipLines impDelim impTxOpts) impPaths
return $ (matchRecords impMatches . L.sort . concat) =<< concatEitherL res
let cres = concatEithersL $ compileMatch <$> impMatches
ires <- mapM (readImport_ impSkipLines impDelim impTxOpts) impPaths
let res = concatEithers2 cres (concatEitherL ires) (,)
case res of
Left es -> return $ Left es
Right (compiled, records) ->
return $ matchRecords compiled $ L.sort $ concat records
readImport_
:: MonadUnliftIO m
@ -60,7 +65,7 @@ parseTxRecord p TxOpts {..} r = do
d' <- parseTimeM True defaultTimeLocale (T.unpack toDateFmt) d
return $ Just $ TxRecord d' a e os p
matchRecords :: [Match] -> [TxRecord] -> EitherErrs [BalTx]
matchRecords :: [MatchRe] -> [TxRecord] -> EitherErrs [BalTx]
matchRecords ms rs = do
(matched, unmatched, notfound) <- matchAll (matchPriorities ms) rs
case (matched, unmatched, notfound) of
@ -70,13 +75,13 @@ matchRecords ms rs = do
Right matched_
(_, us, ns) -> Left [StatementError us ns]
matchPriorities :: [Match] -> [MatchGroup]
matchPriorities :: [MatchRe] -> [MatchGroup]
matchPriorities =
fmap matchToGroup
. L.groupBy (\a b -> mPriority a == mPriority b)
. L.sortOn (Down . mPriority)
matchToGroup :: [Match] -> MatchGroup
matchToGroup :: [MatchRe] -> MatchGroup
matchToGroup ms =
uncurry MatchGroup $
first (L.sortOn mDate) $
@ -84,8 +89,8 @@ matchToGroup ms =
-- TDOO could use a better struct to flatten the maybe date subtype
data MatchGroup = MatchGroup
{ mgDate :: ![Match]
, mgNoDate :: ![Match]
{ mgDate :: ![MatchRe]
, mgNoDate :: ![MatchRe]
}
deriving (Show)
@ -122,7 +127,7 @@ zipperSlice f x = go
EQ -> goEq $ Unzipped bs (a : cs) as
LT -> z
zipperMatch :: Unzipped Match -> TxRecord -> EitherErrs (Zipped Match, MatchRes RawTx)
zipperMatch :: Unzipped MatchRe -> TxRecord -> EitherErrs (Zipped MatchRe, MatchRes RawTx)
zipperMatch (Unzipped bs cs as) x = go [] cs
where
go _ [] = Right (Zipped bs $ cs ++ as, MatchFail)
@ -135,7 +140,7 @@ zipperMatch (Unzipped bs cs as) x = go [] cs
ms' = maybe ms (: ms) (matchDec m)
in Right (Zipped bs $ ps ++ ms' ++ as, skipOrPass)
zipperMatch' :: Zipped Match -> TxRecord -> EitherErrs (Zipped Match, MatchRes RawTx)
zipperMatch' :: Zipped MatchRe -> TxRecord -> EitherErrs (Zipped MatchRe, MatchRes RawTx)
zipperMatch' z x = go z
where
go (Zipped bs (a : as)) = do
@ -145,13 +150,13 @@ zipperMatch' z x = go z
skipOrPass -> Right (Zipped (maybe bs (: bs) $ matchDec a) as, skipOrPass)
go z' = Right (z', MatchFail)
matchDec :: Match -> Maybe Match
matchDec :: MatchRe -> Maybe MatchRe
matchDec m = case mTimes m of
Just 1 -> Nothing
Just n -> Just $ m {mTimes = Just $ n - 1}
Nothing -> Just m
matchAll :: [MatchGroup] -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [Match])
matchAll :: [MatchGroup] -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [MatchRe])
matchAll = go ([], [])
where
go (matched, unused) gs rs = case (gs, rs) of
@ -161,13 +166,13 @@ matchAll = go ([], [])
(ts, unmatched, us) <- matchGroup g rs
go (ts ++ matched, us ++ unused) gs' unmatched
matchGroup :: MatchGroup -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [Match])
matchGroup :: MatchGroup -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [MatchRe])
matchGroup MatchGroup {mgDate = ds, mgNoDate = ns} rs = do
(md, rest, ud) <- matchDates ds rs
(mn, unmatched, un) <- matchNonDates ns rest
return (md ++ mn, unmatched, filter ((/= Nothing) . mTimes) $ ud ++ un)
matchDates :: [Match] -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [Match])
matchDates :: [MatchRe] -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [MatchRe])
matchDates ms = go ([], [], initZipper ms)
where
go (matched, unmatched, z) [] =
@ -188,7 +193,7 @@ matchDates ms = go ([], [], initZipper ms)
go (m, u, z') rs
findDate m r = maybe EQ (`compareDate` trDate r) $ mDate m
matchNonDates :: [Match] -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [Match])
matchNonDates :: [MatchRe] -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [MatchRe])
matchNonDates ms = go ([], [], initZipper ms)
where
go (matched, unmatched, z) [] =

View File

@ -18,7 +18,7 @@ module Internal.Types where
import Data.Fix (Fix (..), foldFix)
import Data.Functor.Foldable (embed)
import qualified Data.Functor.Foldable.TH as TH
import Database.Persist.Sql hiding (In, Statement)
import Database.Persist.Sql hiding (Desc, In, Statement)
import Dhall hiding (embed, maybe)
import Dhall.TH
import Language.Haskell.TH.Syntax (Lift)
@ -26,6 +26,7 @@ import RIO
import qualified RIO.Map as M
import qualified RIO.Text as T
import RIO.Time
import Text.Regex.TDFA
-------------------------------------------------------------------------------
-- DHALL CONFIG
@ -340,7 +341,7 @@ instance FromDhall ExpTx
data Import = Import
{ impPaths :: ![FilePath]
, impMatches :: ![Match]
, impMatches :: ![Match T.Text]
, impDelim :: !Word
, impTxOpts :: !TxOpts
, impSkipLines :: !Natural
@ -411,14 +412,19 @@ data Field k v = Field
{ fKey :: !k
, fVal :: !v
}
deriving (Show, Eq, Hashable, Generic, FromDhall)
deriving (Show, Eq, Hashable, Generic, FromDhall, Foldable, Traversable)
instance Functor (Field f) where
fmap f (Field k v) = Field k $ f v
type FieldMap k v = Field k (M.Map k v)
data MatchOther
= Desc !(Field T.Text T.Text)
data MatchOther re
= Desc !(Field T.Text re)
| Val !(Field T.Text MatchVal)
deriving (Show, Eq, Hashable, Generic, FromDhall)
deriving (Eq, Hashable, Generic, FromDhall, Functor, Foldable, Traversable)
deriving instance Show (MatchOther T.Text)
data ToTx = ToTx
{ ttCurrency :: !SplitCur
@ -427,16 +433,18 @@ data ToTx = ToTx
}
deriving (Eq, Generic, Hashable, Show, FromDhall)
data Match = Match
data Match re = Match
{ mDate :: !(Maybe MatchDate)
, mVal :: !MatchVal
, mDesc :: !(Maybe Text)
, mOther :: ![MatchOther]
, mDesc :: !(Maybe re)
, mOther :: ![MatchOther re]
, mTx :: !(Maybe ToTx)
, mTimes :: !(Maybe Natural)
, mPriority :: !Integer
}
deriving (Eq, Generic, Hashable, Show, FromDhall)
deriving (Eq, Generic, Hashable, FromDhall, Functor)
deriving instance Show (Match T.Text)
deriving instance Eq TxOpts
@ -575,7 +583,7 @@ data InsertError
| LookupError !LookupSuberr !T.Text
| BalanceError !BalanceType !CurID ![RawSplit]
| IncomeError !DatePat
| StatementError ![TxRecord] ![Match]
| StatementError ![TxRecord] ![MatchRe]
deriving (Show)
newtype InsertException = InsertException [InsertError] deriving (Show)
@ -592,3 +600,10 @@ data XGregorian = XGregorian
, xgDay :: !Int
, xgDayOfWeek :: !Int
}
type MatchRe = Match (T.Text, Regex)
type MatchOtherRe = MatchOther (T.Text, Regex)
instance Show (Match (T.Text, Regex)) where
show = show . fmap fst

View File

@ -30,6 +30,8 @@ module Internal.Utils
, gregorians
, uncurry3
, xGregToDay
, plural
, compileMatch
)
where
@ -42,6 +44,7 @@ import qualified RIO.Map as M
import qualified RIO.Text as T
import RIO.Time
import Text.Regex.TDFA
import Text.Regex.TDFA.Text
--------------------------------------------------------------------------------
-- dates
@ -143,7 +146,7 @@ resolveBounds (s, e) = do
--------------------------------------------------------------------------------
-- matching
matches :: Match -> TxRecord -> EitherErrs (MatchRes RawTx)
matches :: MatchRe -> TxRecord -> EitherErrs (MatchRes RawTx)
matches Match {..} r@TxRecord {..} = do
res <- concatEither3 val other desc $ \x y z -> x && y && z
if date && res
@ -153,7 +156,7 @@ matches Match {..} r@TxRecord {..} = do
val = valMatches mVal trAmount
date = maybe True (`dateMatches` trDate) mDate
other = foldM (\a o -> (a &&) <$> otherMatches trOther o) True mOther
desc = maybe (return True) (matchMaybe trDesc) mDesc
desc = maybe (return True) (matchMaybe trDesc . snd) mDesc
convert (ToTx cur a ss) = toTx cur a ss r
toTx :: SplitCur -> SplitAcnt -> [ExpSplit] -> TxRecord -> EitherErrs RawTx
@ -193,10 +196,10 @@ valMatches MatchVal {..} x
dateMatches :: MatchDate -> Day -> Bool
dateMatches md = (EQ ==) . compareDate md
otherMatches :: M.Map T.Text T.Text -> MatchOther -> EitherErr Bool
otherMatches :: M.Map T.Text T.Text -> MatchOtherRe -> EitherErr Bool
otherMatches dict m = case m of
Val (Field n mv) -> valMatches mv =<< (readRational =<< lookup_ MatchNumeric n)
Desc (Field n md) -> (`matchMaybe` md) =<< lookup_ MatchText n
Desc (Field n (_, md)) -> (`matchMaybe` md) =<< lookup_ MatchText n
where
lookup_ t n = lookupErr (MatchField t) n dict
@ -360,14 +363,14 @@ showTx TxRecord {trDate = d, trAmount = v, trDesc = e, trFile = f} =
, ("description", doubleQuote e)
]
showMatch :: Match -> T.Text
showMatch :: MatchRe -> T.Text
showMatch Match {mDate = d, mVal = v, mDesc = e, mOther = o, mTimes = n, mPriority = p} =
T.append "Unused match: " $ keyVals [(x, y) | (x, Just y) <- kvs]
where
kvs =
[ ("date", showMatchDate <$> d)
, ("val", showMatchVal v)
, ("desc", e)
, ("desc", fst <$> e)
, ("other", others)
, ("counter", Just $ maybe "Inf" showT n)
, ("priority", Just $ showT p)
@ -421,8 +424,8 @@ showMatchVal MatchVal {..} = Just $ singleQuote $ keyVals [(k, v) | (k, Just v)
, ("precision", Just $ showT mvPrec)
]
showMatchOther :: MatchOther -> T.Text
showMatchOther (Desc (Field f re)) =
showMatchOther :: MatchOtherRe -> T.Text
showMatchOther (Desc (Field f (re, _))) =
T.unwords ["desc field", singleQuote f, "with re", singleQuote re]
showMatchOther (Val (Field f mv)) =
T.unwords
@ -534,5 +537,34 @@ uncurry3 f (a, b, c) = f a b c
-- super slow
-- NOTE: see https://github.com/haskell-hvr/regex-tdfa/issues/9 - performance
-- is likely not going to be optimal for text
matchMaybe :: RegexContext Regex query b => query -> T.Text -> EitherErr b
matchMaybe q re = first (const $ RegexError re) $ pureTry $ q =~ re
-- matchMaybe :: T.Text -> T.Text -> EitherErr Bool
-- matchMaybe q pat = case compres of
-- Right re -> case execute re q of
-- Right res -> Right $ isJust res
-- Left _ -> Left $ RegexError "this should not happen"
-- Left _ -> Left $ RegexError pat
-- where
-- -- these options barely do anything in terms of performance
-- compres = compile (blankCompOpt {newSyntax = True}) (blankExecOpt {captureGroups = False}) pat
compileMatch :: Match T.Text -> EitherErrs MatchRe
compileMatch m@Match {mDesc = d, mOther = os} = do
let dres = plural $ mapM compileRegex d
let ores = concatEitherL $ fmap (mapM compileRegex) os
concatEithers2 dres ores $ \d_ os_ -> m {mDesc = d_, mOther = os_}
compileRegex :: T.Text -> EitherErr (Text, Regex)
compileRegex pat = case res of
Right re -> Right (pat, re)
Left _ -> Left $ RegexError pat
where
res =
compile
(blankCompOpt {newSyntax = True})
(blankExecOpt {captureGroups = False})
pat
matchMaybe :: T.Text -> Regex -> EitherErr Bool
matchMaybe q re = case execute re q of
Right res -> Right $ isJust res
Left _ -> Left $ RegexError "this should not happen"