ENH precompile regular expressions
This commit is contained in:
parent
b50f16044f
commit
2af7fed148
|
@ -26,8 +26,13 @@ import qualified RIO.Vector as V
|
|||
|
||||
readImport :: MonadUnliftIO m => Import -> MappingT m (EitherErrs [BalTx])
|
||||
readImport Import {..} = do
|
||||
res <- mapM (readImport_ impSkipLines impDelim impTxOpts) impPaths
|
||||
return $ (matchRecords impMatches . L.sort . concat) =<< concatEitherL res
|
||||
let cres = concatEithersL $ compileMatch <$> impMatches
|
||||
ires <- mapM (readImport_ impSkipLines impDelim impTxOpts) impPaths
|
||||
let res = concatEithers2 cres (concatEitherL ires) (,)
|
||||
case res of
|
||||
Left es -> return $ Left es
|
||||
Right (compiled, records) ->
|
||||
return $ matchRecords compiled $ L.sort $ concat records
|
||||
|
||||
readImport_
|
||||
:: MonadUnliftIO m
|
||||
|
@ -60,7 +65,7 @@ parseTxRecord p TxOpts {..} r = do
|
|||
d' <- parseTimeM True defaultTimeLocale (T.unpack toDateFmt) d
|
||||
return $ Just $ TxRecord d' a e os p
|
||||
|
||||
matchRecords :: [Match] -> [TxRecord] -> EitherErrs [BalTx]
|
||||
matchRecords :: [MatchRe] -> [TxRecord] -> EitherErrs [BalTx]
|
||||
matchRecords ms rs = do
|
||||
(matched, unmatched, notfound) <- matchAll (matchPriorities ms) rs
|
||||
case (matched, unmatched, notfound) of
|
||||
|
@ -70,13 +75,13 @@ matchRecords ms rs = do
|
|||
Right matched_
|
||||
(_, us, ns) -> Left [StatementError us ns]
|
||||
|
||||
matchPriorities :: [Match] -> [MatchGroup]
|
||||
matchPriorities :: [MatchRe] -> [MatchGroup]
|
||||
matchPriorities =
|
||||
fmap matchToGroup
|
||||
. L.groupBy (\a b -> mPriority a == mPriority b)
|
||||
. L.sortOn (Down . mPriority)
|
||||
|
||||
matchToGroup :: [Match] -> MatchGroup
|
||||
matchToGroup :: [MatchRe] -> MatchGroup
|
||||
matchToGroup ms =
|
||||
uncurry MatchGroup $
|
||||
first (L.sortOn mDate) $
|
||||
|
@ -84,8 +89,8 @@ matchToGroup ms =
|
|||
|
||||
-- TDOO could use a better struct to flatten the maybe date subtype
|
||||
data MatchGroup = MatchGroup
|
||||
{ mgDate :: ![Match]
|
||||
, mgNoDate :: ![Match]
|
||||
{ mgDate :: ![MatchRe]
|
||||
, mgNoDate :: ![MatchRe]
|
||||
}
|
||||
deriving (Show)
|
||||
|
||||
|
@ -122,7 +127,7 @@ zipperSlice f x = go
|
|||
EQ -> goEq $ Unzipped bs (a : cs) as
|
||||
LT -> z
|
||||
|
||||
zipperMatch :: Unzipped Match -> TxRecord -> EitherErrs (Zipped Match, MatchRes RawTx)
|
||||
zipperMatch :: Unzipped MatchRe -> TxRecord -> EitherErrs (Zipped MatchRe, MatchRes RawTx)
|
||||
zipperMatch (Unzipped bs cs as) x = go [] cs
|
||||
where
|
||||
go _ [] = Right (Zipped bs $ cs ++ as, MatchFail)
|
||||
|
@ -135,7 +140,7 @@ zipperMatch (Unzipped bs cs as) x = go [] cs
|
|||
ms' = maybe ms (: ms) (matchDec m)
|
||||
in Right (Zipped bs $ ps ++ ms' ++ as, skipOrPass)
|
||||
|
||||
zipperMatch' :: Zipped Match -> TxRecord -> EitherErrs (Zipped Match, MatchRes RawTx)
|
||||
zipperMatch' :: Zipped MatchRe -> TxRecord -> EitherErrs (Zipped MatchRe, MatchRes RawTx)
|
||||
zipperMatch' z x = go z
|
||||
where
|
||||
go (Zipped bs (a : as)) = do
|
||||
|
@ -145,13 +150,13 @@ zipperMatch' z x = go z
|
|||
skipOrPass -> Right (Zipped (maybe bs (: bs) $ matchDec a) as, skipOrPass)
|
||||
go z' = Right (z', MatchFail)
|
||||
|
||||
matchDec :: Match -> Maybe Match
|
||||
matchDec :: MatchRe -> Maybe MatchRe
|
||||
matchDec m = case mTimes m of
|
||||
Just 1 -> Nothing
|
||||
Just n -> Just $ m {mTimes = Just $ n - 1}
|
||||
Nothing -> Just m
|
||||
|
||||
matchAll :: [MatchGroup] -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [Match])
|
||||
matchAll :: [MatchGroup] -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [MatchRe])
|
||||
matchAll = go ([], [])
|
||||
where
|
||||
go (matched, unused) gs rs = case (gs, rs) of
|
||||
|
@ -161,13 +166,13 @@ matchAll = go ([], [])
|
|||
(ts, unmatched, us) <- matchGroup g rs
|
||||
go (ts ++ matched, us ++ unused) gs' unmatched
|
||||
|
||||
matchGroup :: MatchGroup -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [Match])
|
||||
matchGroup :: MatchGroup -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [MatchRe])
|
||||
matchGroup MatchGroup {mgDate = ds, mgNoDate = ns} rs = do
|
||||
(md, rest, ud) <- matchDates ds rs
|
||||
(mn, unmatched, un) <- matchNonDates ns rest
|
||||
return (md ++ mn, unmatched, filter ((/= Nothing) . mTimes) $ ud ++ un)
|
||||
|
||||
matchDates :: [Match] -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [Match])
|
||||
matchDates :: [MatchRe] -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [MatchRe])
|
||||
matchDates ms = go ([], [], initZipper ms)
|
||||
where
|
||||
go (matched, unmatched, z) [] =
|
||||
|
@ -188,7 +193,7 @@ matchDates ms = go ([], [], initZipper ms)
|
|||
go (m, u, z') rs
|
||||
findDate m r = maybe EQ (`compareDate` trDate r) $ mDate m
|
||||
|
||||
matchNonDates :: [Match] -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [Match])
|
||||
matchNonDates :: [MatchRe] -> [TxRecord] -> EitherErrs ([RawTx], [TxRecord], [MatchRe])
|
||||
matchNonDates ms = go ([], [], initZipper ms)
|
||||
where
|
||||
go (matched, unmatched, z) [] =
|
||||
|
|
|
@ -18,7 +18,7 @@ module Internal.Types where
|
|||
import Data.Fix (Fix (..), foldFix)
|
||||
import Data.Functor.Foldable (embed)
|
||||
import qualified Data.Functor.Foldable.TH as TH
|
||||
import Database.Persist.Sql hiding (In, Statement)
|
||||
import Database.Persist.Sql hiding (Desc, In, Statement)
|
||||
import Dhall hiding (embed, maybe)
|
||||
import Dhall.TH
|
||||
import Language.Haskell.TH.Syntax (Lift)
|
||||
|
@ -26,6 +26,7 @@ import RIO
|
|||
import qualified RIO.Map as M
|
||||
import qualified RIO.Text as T
|
||||
import RIO.Time
|
||||
import Text.Regex.TDFA
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-- DHALL CONFIG
|
||||
|
@ -340,7 +341,7 @@ instance FromDhall ExpTx
|
|||
|
||||
data Import = Import
|
||||
{ impPaths :: ![FilePath]
|
||||
, impMatches :: ![Match]
|
||||
, impMatches :: ![Match T.Text]
|
||||
, impDelim :: !Word
|
||||
, impTxOpts :: !TxOpts
|
||||
, impSkipLines :: !Natural
|
||||
|
@ -411,14 +412,19 @@ data Field k v = Field
|
|||
{ fKey :: !k
|
||||
, fVal :: !v
|
||||
}
|
||||
deriving (Show, Eq, Hashable, Generic, FromDhall)
|
||||
deriving (Show, Eq, Hashable, Generic, FromDhall, Foldable, Traversable)
|
||||
|
||||
instance Functor (Field f) where
|
||||
fmap f (Field k v) = Field k $ f v
|
||||
|
||||
type FieldMap k v = Field k (M.Map k v)
|
||||
|
||||
data MatchOther
|
||||
= Desc !(Field T.Text T.Text)
|
||||
data MatchOther re
|
||||
= Desc !(Field T.Text re)
|
||||
| Val !(Field T.Text MatchVal)
|
||||
deriving (Show, Eq, Hashable, Generic, FromDhall)
|
||||
deriving (Eq, Hashable, Generic, FromDhall, Functor, Foldable, Traversable)
|
||||
|
||||
deriving instance Show (MatchOther T.Text)
|
||||
|
||||
data ToTx = ToTx
|
||||
{ ttCurrency :: !SplitCur
|
||||
|
@ -427,16 +433,18 @@ data ToTx = ToTx
|
|||
}
|
||||
deriving (Eq, Generic, Hashable, Show, FromDhall)
|
||||
|
||||
data Match = Match
|
||||
data Match re = Match
|
||||
{ mDate :: !(Maybe MatchDate)
|
||||
, mVal :: !MatchVal
|
||||
, mDesc :: !(Maybe Text)
|
||||
, mOther :: ![MatchOther]
|
||||
, mDesc :: !(Maybe re)
|
||||
, mOther :: ![MatchOther re]
|
||||
, mTx :: !(Maybe ToTx)
|
||||
, mTimes :: !(Maybe Natural)
|
||||
, mPriority :: !Integer
|
||||
}
|
||||
deriving (Eq, Generic, Hashable, Show, FromDhall)
|
||||
deriving (Eq, Generic, Hashable, FromDhall, Functor)
|
||||
|
||||
deriving instance Show (Match T.Text)
|
||||
|
||||
deriving instance Eq TxOpts
|
||||
|
||||
|
@ -575,7 +583,7 @@ data InsertError
|
|||
| LookupError !LookupSuberr !T.Text
|
||||
| BalanceError !BalanceType !CurID ![RawSplit]
|
||||
| IncomeError !DatePat
|
||||
| StatementError ![TxRecord] ![Match]
|
||||
| StatementError ![TxRecord] ![MatchRe]
|
||||
deriving (Show)
|
||||
|
||||
newtype InsertException = InsertException [InsertError] deriving (Show)
|
||||
|
@ -592,3 +600,10 @@ data XGregorian = XGregorian
|
|||
, xgDay :: !Int
|
||||
, xgDayOfWeek :: !Int
|
||||
}
|
||||
|
||||
type MatchRe = Match (T.Text, Regex)
|
||||
|
||||
type MatchOtherRe = MatchOther (T.Text, Regex)
|
||||
|
||||
instance Show (Match (T.Text, Regex)) where
|
||||
show = show . fmap fst
|
||||
|
|
|
@ -30,6 +30,8 @@ module Internal.Utils
|
|||
, gregorians
|
||||
, uncurry3
|
||||
, xGregToDay
|
||||
, plural
|
||||
, compileMatch
|
||||
)
|
||||
where
|
||||
|
||||
|
@ -42,6 +44,7 @@ import qualified RIO.Map as M
|
|||
import qualified RIO.Text as T
|
||||
import RIO.Time
|
||||
import Text.Regex.TDFA
|
||||
import Text.Regex.TDFA.Text
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- dates
|
||||
|
@ -143,7 +146,7 @@ resolveBounds (s, e) = do
|
|||
--------------------------------------------------------------------------------
|
||||
-- matching
|
||||
|
||||
matches :: Match -> TxRecord -> EitherErrs (MatchRes RawTx)
|
||||
matches :: MatchRe -> TxRecord -> EitherErrs (MatchRes RawTx)
|
||||
matches Match {..} r@TxRecord {..} = do
|
||||
res <- concatEither3 val other desc $ \x y z -> x && y && z
|
||||
if date && res
|
||||
|
@ -153,7 +156,7 @@ matches Match {..} r@TxRecord {..} = do
|
|||
val = valMatches mVal trAmount
|
||||
date = maybe True (`dateMatches` trDate) mDate
|
||||
other = foldM (\a o -> (a &&) <$> otherMatches trOther o) True mOther
|
||||
desc = maybe (return True) (matchMaybe trDesc) mDesc
|
||||
desc = maybe (return True) (matchMaybe trDesc . snd) mDesc
|
||||
convert (ToTx cur a ss) = toTx cur a ss r
|
||||
|
||||
toTx :: SplitCur -> SplitAcnt -> [ExpSplit] -> TxRecord -> EitherErrs RawTx
|
||||
|
@ -193,10 +196,10 @@ valMatches MatchVal {..} x
|
|||
dateMatches :: MatchDate -> Day -> Bool
|
||||
dateMatches md = (EQ ==) . compareDate md
|
||||
|
||||
otherMatches :: M.Map T.Text T.Text -> MatchOther -> EitherErr Bool
|
||||
otherMatches :: M.Map T.Text T.Text -> MatchOtherRe -> EitherErr Bool
|
||||
otherMatches dict m = case m of
|
||||
Val (Field n mv) -> valMatches mv =<< (readRational =<< lookup_ MatchNumeric n)
|
||||
Desc (Field n md) -> (`matchMaybe` md) =<< lookup_ MatchText n
|
||||
Desc (Field n (_, md)) -> (`matchMaybe` md) =<< lookup_ MatchText n
|
||||
where
|
||||
lookup_ t n = lookupErr (MatchField t) n dict
|
||||
|
||||
|
@ -360,14 +363,14 @@ showTx TxRecord {trDate = d, trAmount = v, trDesc = e, trFile = f} =
|
|||
, ("description", doubleQuote e)
|
||||
]
|
||||
|
||||
showMatch :: Match -> T.Text
|
||||
showMatch :: MatchRe -> T.Text
|
||||
showMatch Match {mDate = d, mVal = v, mDesc = e, mOther = o, mTimes = n, mPriority = p} =
|
||||
T.append "Unused match: " $ keyVals [(x, y) | (x, Just y) <- kvs]
|
||||
where
|
||||
kvs =
|
||||
[ ("date", showMatchDate <$> d)
|
||||
, ("val", showMatchVal v)
|
||||
, ("desc", e)
|
||||
, ("desc", fst <$> e)
|
||||
, ("other", others)
|
||||
, ("counter", Just $ maybe "Inf" showT n)
|
||||
, ("priority", Just $ showT p)
|
||||
|
@ -421,8 +424,8 @@ showMatchVal MatchVal {..} = Just $ singleQuote $ keyVals [(k, v) | (k, Just v)
|
|||
, ("precision", Just $ showT mvPrec)
|
||||
]
|
||||
|
||||
showMatchOther :: MatchOther -> T.Text
|
||||
showMatchOther (Desc (Field f re)) =
|
||||
showMatchOther :: MatchOtherRe -> T.Text
|
||||
showMatchOther (Desc (Field f (re, _))) =
|
||||
T.unwords ["desc field", singleQuote f, "with re", singleQuote re]
|
||||
showMatchOther (Val (Field f mv)) =
|
||||
T.unwords
|
||||
|
@ -534,5 +537,34 @@ uncurry3 f (a, b, c) = f a b c
|
|||
-- super slow
|
||||
-- NOTE: see https://github.com/haskell-hvr/regex-tdfa/issues/9 - performance
|
||||
-- is likely not going to be optimal for text
|
||||
matchMaybe :: RegexContext Regex query b => query -> T.Text -> EitherErr b
|
||||
matchMaybe q re = first (const $ RegexError re) $ pureTry $ q =~ re
|
||||
-- matchMaybe :: T.Text -> T.Text -> EitherErr Bool
|
||||
-- matchMaybe q pat = case compres of
|
||||
-- Right re -> case execute re q of
|
||||
-- Right res -> Right $ isJust res
|
||||
-- Left _ -> Left $ RegexError "this should not happen"
|
||||
-- Left _ -> Left $ RegexError pat
|
||||
-- where
|
||||
-- -- these options barely do anything in terms of performance
|
||||
-- compres = compile (blankCompOpt {newSyntax = True}) (blankExecOpt {captureGroups = False}) pat
|
||||
|
||||
compileMatch :: Match T.Text -> EitherErrs MatchRe
|
||||
compileMatch m@Match {mDesc = d, mOther = os} = do
|
||||
let dres = plural $ mapM compileRegex d
|
||||
let ores = concatEitherL $ fmap (mapM compileRegex) os
|
||||
concatEithers2 dres ores $ \d_ os_ -> m {mDesc = d_, mOther = os_}
|
||||
|
||||
compileRegex :: T.Text -> EitherErr (Text, Regex)
|
||||
compileRegex pat = case res of
|
||||
Right re -> Right (pat, re)
|
||||
Left _ -> Left $ RegexError pat
|
||||
where
|
||||
res =
|
||||
compile
|
||||
(blankCompOpt {newSyntax = True})
|
||||
(blankExecOpt {captureGroups = False})
|
||||
pat
|
||||
|
||||
matchMaybe :: T.Text -> Regex -> EitherErr Bool
|
||||
matchMaybe q re = case execute re q of
|
||||
Right res -> Right $ isJust res
|
||||
Left _ -> Left $ RegexError "this should not happen"
|
||||
|
|
Loading…
Reference in New Issue