From 281d67bfc95044d05c8e60aaabb553b98235a143 Mon Sep 17 00:00:00 2001 From: ndwarshuis Date: Sun, 5 Feb 2023 11:34:37 -0500 Subject: [PATCH] ENH compile rational regex parsers once --- lib/Internal/Statement.hs | 16 +++++++++------- lib/Internal/Types.hs | 21 +++++++++++++-------- lib/Internal/Utils.hs | 34 ++++++++++++++++++++++++---------- 3 files changed, 46 insertions(+), 25 deletions(-) diff --git a/lib/Internal/Statement.hs b/lib/Internal/Statement.hs index 8f135af..5576b73 100644 --- a/lib/Internal/Statement.hs +++ b/lib/Internal/Statement.hs @@ -26,19 +26,21 @@ import qualified RIO.Vector as V readImport :: MonadUnliftIO m => Import -> MappingT m (EitherErrs [BalTx]) readImport Import {..} = do + let ores = plural $ compileOptions impTxOpts let cres = concatEithersL $ compileMatch <$> impMatches - ires <- mapM (readImport_ impSkipLines impDelim impTxOpts) impPaths - let res = concatEithers2 cres (concatEitherL ires) (,) - case res of + case concatEithers2 ores cres (,) of + Right (compiledOptions, compiledMatches) -> do + ires <- mapM (readImport_ impSkipLines impDelim compiledOptions) impPaths + case concatEitherL ires of + Right records -> return $ matchRecords compiledMatches $ L.sort $ concat records + Left es -> return $ Left es Left es -> return $ Left es - Right (compiled, records) -> - return $ matchRecords compiled $ L.sort $ concat records readImport_ :: MonadUnliftIO m => Natural -> Word - -> TxOpts + -> TxOptsRe -> FilePath -> MappingT m (EitherErr [TxRecord]) readImport_ n delim tns p = do @@ -53,7 +55,7 @@ readImport_ n delim tns p = do -- TODO handle this better, this maybe thing is a hack to skip lines with -- blank dates but will likely want to make this more flexible -parseTxRecord :: FilePath -> TxOpts -> NamedRecord -> Parser (Maybe TxRecord) +parseTxRecord :: FilePath -> TxOptsRe -> NamedRecord -> Parser (Maybe TxRecord) parseTxRecord p TxOpts {..} r = do d <- r .: T.encodeUtf8 toDate if d == "" diff --git a/lib/Internal/Types.hs b/lib/Internal/Types.hs index f566297..7e1c2a0 100644 --- a/lib/Internal/Types.hs +++ b/lib/Internal/Types.hs @@ -54,7 +54,6 @@ makeHaskellTypesWith , SingleConstructor "ModPat" "ModPat" "(./dhall/Types.dhall).ModPat.Type" , SingleConstructor "CronPat" "CronPat" "(./dhall/Types.dhall).CronPat.Type" , SingleConstructor "Decimal" "D" "(./dhall/Types.dhall).Decimal" - , SingleConstructor "TxOpts" "TxOpts" "(./dhall/Types.dhall).TxOpts.Type" , SingleConstructor "MatchVal" "MatchVal" "(./dhall/Types.dhall).MatchVal.Type" , SingleConstructor "Manual" "Manual" "(./dhall/Types.dhall).Manual" , SingleConstructor "Tax" "Tax" "(./dhall/Types.dhall).Tax" @@ -341,11 +340,21 @@ type ExpTx = Tx ExpSplit instance FromDhall ExpTx +data TxOpts re = TxOpts + { toDate :: !T.Text + , toAmount :: !T.Text + , toDesc :: !T.Text + , toOther :: ![T.Text] + , toDateFmt :: !T.Text + , toAmountFmt :: !re + } + deriving (Eq, Generic, Hashable, Show, FromDhall) + data Import = Import { impPaths :: ![FilePath] , impMatches :: ![Match T.Text] , impDelim :: !Word - , impTxOpts :: !TxOpts + , impTxOpts :: !(TxOpts T.Text) , impSkipLines :: !Natural } deriving (Eq, Hashable, Generic, FromDhall) @@ -448,12 +457,6 @@ data Match re = Match deriving instance Show (Match T.Text) -deriving instance Eq TxOpts - -deriving instance Hashable TxOpts - -deriving instance Show TxOpts - -------------------------------------------------------------------------------- -- Specialized dhall types @@ -609,6 +612,8 @@ data XGregorian = XGregorian type MatchRe = Match (T.Text, Regex) +type TxOptsRe = TxOpts (T.Text, Regex) + type MatchOtherRe = MatchOther (T.Text, Regex) instance Show (Match (T.Text, Regex)) where diff --git a/lib/Internal/Utils.hs b/lib/Internal/Utils.hs index bb1ae93..7058850 100644 --- a/lib/Internal/Utils.hs +++ b/lib/Internal/Utils.hs @@ -34,6 +34,7 @@ module Internal.Utils , xGregToDay , plural , compileMatch + , compileOptions ) where @@ -241,8 +242,8 @@ lookupErr what k m = case M.lookup k m of Just x -> Right x _ -> Left $ LookupError what $ showT k -parseRational :: MonadFail m => T.Text -> T.Text -> m Rational -parseRational pat s = case ms of +parseRational :: MonadFail m => (T.Text, Regex) -> T.Text -> m Rational +parseRational (pat, re) s = case matchGroupsMaybe s re of [sign, x, ""] -> uncurry (*) <$> readWhole sign x [sign, x, y] -> do d <- readT "decimal" y @@ -251,7 +252,6 @@ parseRational pat s = case ms of return $ k * (w + d % p) _ -> msg "malformed decimal" where - (_, _, _, ms) = (s =~ pat) :: (T.Text, T.Text, T.Text, [T.Text]) readT what t = case readMaybe $ T.unpack t of Just d -> return $ fromInteger d _ -> msg $ T.unwords ["could not parse", what, t] @@ -260,9 +260,9 @@ parseRational pat s = case ms of T.unpack $ T.concat [ m - , "; pattern = " + , "; pattern=" , pat - , "; query = " + , "; query=" , s ] readSign x @@ -568,24 +568,38 @@ uncurry3 f (a, b, c) = f a b c -- -- these options barely do anything in terms of performance -- compres = compile (blankCompOpt {newSyntax = True}) (blankExecOpt {captureGroups = False}) pat +compileOptions :: TxOpts T.Text -> EitherErr TxOptsRe +compileOptions o@TxOpts {toAmountFmt = pat} = do + re <- compileRegex True pat + return $ o {toAmountFmt = re} + compileMatch :: Match T.Text -> EitherErrs MatchRe compileMatch m@Match {mDesc = d, mOther = os} = do - let dres = plural $ mapM compileRegex d - let ores = concatEitherL $ fmap (mapM compileRegex) os + let dres = plural $ mapM go d + let ores = concatEitherL $ fmap (mapM go) os concatEithers2 dres ores $ \d_ os_ -> m {mDesc = d_, mOther = os_} + where + go = compileRegex False -compileRegex :: T.Text -> EitherErr (Text, Regex) -compileRegex pat = case res of +compileRegex :: Bool -> T.Text -> EitherErr (Text, Regex) +compileRegex groups pat = case res of Right re -> Right (pat, re) Left _ -> Left $ RegexError pat where res = compile (blankCompOpt {newSyntax = True}) - (blankExecOpt {captureGroups = False}) + (blankExecOpt {captureGroups = groups}) pat matchMaybe :: T.Text -> Regex -> EitherErr Bool matchMaybe q re = case execute re q of Right res -> Right $ isJust res Left _ -> Left $ RegexError "this should not happen" + +matchGroupsMaybe :: T.Text -> Regex -> [T.Text] +matchGroupsMaybe q re = case regexec re q of + Right Nothing -> [] + Right (Just (_, _, _, xs)) -> xs + -- this should never fail as regexec always returns Right + Left _ -> []