ENH compile rational regex parsers once

This commit is contained in:
Nathan Dwarshuis 2023-02-05 11:34:37 -05:00
parent 159204b47a
commit 281d67bfc9
3 changed files with 46 additions and 25 deletions

View File

@ -26,19 +26,21 @@ import qualified RIO.Vector as V
readImport :: MonadUnliftIO m => Import -> MappingT m (EitherErrs [BalTx]) readImport :: MonadUnliftIO m => Import -> MappingT m (EitherErrs [BalTx])
readImport Import {..} = do readImport Import {..} = do
let ores = plural $ compileOptions impTxOpts
let cres = concatEithersL $ compileMatch <$> impMatches let cres = concatEithersL $ compileMatch <$> impMatches
ires <- mapM (readImport_ impSkipLines impDelim impTxOpts) impPaths case concatEithers2 ores cres (,) of
let res = concatEithers2 cres (concatEitherL ires) (,) Right (compiledOptions, compiledMatches) -> do
case res of ires <- mapM (readImport_ impSkipLines impDelim compiledOptions) impPaths
case concatEitherL ires of
Right records -> return $ matchRecords compiledMatches $ L.sort $ concat records
Left es -> return $ Left es
Left es -> return $ Left es Left es -> return $ Left es
Right (compiled, records) ->
return $ matchRecords compiled $ L.sort $ concat records
readImport_ readImport_
:: MonadUnliftIO m :: MonadUnliftIO m
=> Natural => Natural
-> Word -> Word
-> TxOpts -> TxOptsRe
-> FilePath -> FilePath
-> MappingT m (EitherErr [TxRecord]) -> MappingT m (EitherErr [TxRecord])
readImport_ n delim tns p = do readImport_ n delim tns p = do
@ -53,7 +55,7 @@ readImport_ n delim tns p = do
-- TODO handle this better, this maybe thing is a hack to skip lines with -- TODO handle this better, this maybe thing is a hack to skip lines with
-- blank dates but will likely want to make this more flexible -- blank dates but will likely want to make this more flexible
parseTxRecord :: FilePath -> TxOpts -> NamedRecord -> Parser (Maybe TxRecord) parseTxRecord :: FilePath -> TxOptsRe -> NamedRecord -> Parser (Maybe TxRecord)
parseTxRecord p TxOpts {..} r = do parseTxRecord p TxOpts {..} r = do
d <- r .: T.encodeUtf8 toDate d <- r .: T.encodeUtf8 toDate
if d == "" if d == ""

View File

@ -54,7 +54,6 @@ makeHaskellTypesWith
, SingleConstructor "ModPat" "ModPat" "(./dhall/Types.dhall).ModPat.Type" , SingleConstructor "ModPat" "ModPat" "(./dhall/Types.dhall).ModPat.Type"
, SingleConstructor "CronPat" "CronPat" "(./dhall/Types.dhall).CronPat.Type" , SingleConstructor "CronPat" "CronPat" "(./dhall/Types.dhall).CronPat.Type"
, SingleConstructor "Decimal" "D" "(./dhall/Types.dhall).Decimal" , SingleConstructor "Decimal" "D" "(./dhall/Types.dhall).Decimal"
, SingleConstructor "TxOpts" "TxOpts" "(./dhall/Types.dhall).TxOpts.Type"
, SingleConstructor "MatchVal" "MatchVal" "(./dhall/Types.dhall).MatchVal.Type" , SingleConstructor "MatchVal" "MatchVal" "(./dhall/Types.dhall).MatchVal.Type"
, SingleConstructor "Manual" "Manual" "(./dhall/Types.dhall).Manual" , SingleConstructor "Manual" "Manual" "(./dhall/Types.dhall).Manual"
, SingleConstructor "Tax" "Tax" "(./dhall/Types.dhall).Tax" , SingleConstructor "Tax" "Tax" "(./dhall/Types.dhall).Tax"
@ -341,11 +340,21 @@ type ExpTx = Tx ExpSplit
instance FromDhall ExpTx instance FromDhall ExpTx
data TxOpts re = TxOpts
{ toDate :: !T.Text
, toAmount :: !T.Text
, toDesc :: !T.Text
, toOther :: ![T.Text]
, toDateFmt :: !T.Text
, toAmountFmt :: !re
}
deriving (Eq, Generic, Hashable, Show, FromDhall)
data Import = Import data Import = Import
{ impPaths :: ![FilePath] { impPaths :: ![FilePath]
, impMatches :: ![Match T.Text] , impMatches :: ![Match T.Text]
, impDelim :: !Word , impDelim :: !Word
, impTxOpts :: !TxOpts , impTxOpts :: !(TxOpts T.Text)
, impSkipLines :: !Natural , impSkipLines :: !Natural
} }
deriving (Eq, Hashable, Generic, FromDhall) deriving (Eq, Hashable, Generic, FromDhall)
@ -448,12 +457,6 @@ data Match re = Match
deriving instance Show (Match T.Text) deriving instance Show (Match T.Text)
deriving instance Eq TxOpts
deriving instance Hashable TxOpts
deriving instance Show TxOpts
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-- Specialized dhall types -- Specialized dhall types
@ -609,6 +612,8 @@ data XGregorian = XGregorian
type MatchRe = Match (T.Text, Regex) type MatchRe = Match (T.Text, Regex)
type TxOptsRe = TxOpts (T.Text, Regex)
type MatchOtherRe = MatchOther (T.Text, Regex) type MatchOtherRe = MatchOther (T.Text, Regex)
instance Show (Match (T.Text, Regex)) where instance Show (Match (T.Text, Regex)) where

View File

@ -34,6 +34,7 @@ module Internal.Utils
, xGregToDay , xGregToDay
, plural , plural
, compileMatch , compileMatch
, compileOptions
) )
where where
@ -241,8 +242,8 @@ lookupErr what k m = case M.lookup k m of
Just x -> Right x Just x -> Right x
_ -> Left $ LookupError what $ showT k _ -> Left $ LookupError what $ showT k
parseRational :: MonadFail m => T.Text -> T.Text -> m Rational parseRational :: MonadFail m => (T.Text, Regex) -> T.Text -> m Rational
parseRational pat s = case ms of parseRational (pat, re) s = case matchGroupsMaybe s re of
[sign, x, ""] -> uncurry (*) <$> readWhole sign x [sign, x, ""] -> uncurry (*) <$> readWhole sign x
[sign, x, y] -> do [sign, x, y] -> do
d <- readT "decimal" y d <- readT "decimal" y
@ -251,7 +252,6 @@ parseRational pat s = case ms of
return $ k * (w + d % p) return $ k * (w + d % p)
_ -> msg "malformed decimal" _ -> msg "malformed decimal"
where where
(_, _, _, ms) = (s =~ pat) :: (T.Text, T.Text, T.Text, [T.Text])
readT what t = case readMaybe $ T.unpack t of readT what t = case readMaybe $ T.unpack t of
Just d -> return $ fromInteger d Just d -> return $ fromInteger d
_ -> msg $ T.unwords ["could not parse", what, t] _ -> msg $ T.unwords ["could not parse", what, t]
@ -260,9 +260,9 @@ parseRational pat s = case ms of
T.unpack $ T.unpack $
T.concat T.concat
[ m [ m
, "; pattern = " , "; pattern="
, pat , pat
, "; query = " , "; query="
, s , s
] ]
readSign x readSign x
@ -568,24 +568,38 @@ uncurry3 f (a, b, c) = f a b c
-- -- these options barely do anything in terms of performance -- -- these options barely do anything in terms of performance
-- compres = compile (blankCompOpt {newSyntax = True}) (blankExecOpt {captureGroups = False}) pat -- compres = compile (blankCompOpt {newSyntax = True}) (blankExecOpt {captureGroups = False}) pat
compileOptions :: TxOpts T.Text -> EitherErr TxOptsRe
compileOptions o@TxOpts {toAmountFmt = pat} = do
re <- compileRegex True pat
return $ o {toAmountFmt = re}
compileMatch :: Match T.Text -> EitherErrs MatchRe compileMatch :: Match T.Text -> EitherErrs MatchRe
compileMatch m@Match {mDesc = d, mOther = os} = do compileMatch m@Match {mDesc = d, mOther = os} = do
let dres = plural $ mapM compileRegex d let dres = plural $ mapM go d
let ores = concatEitherL $ fmap (mapM compileRegex) os let ores = concatEitherL $ fmap (mapM go) os
concatEithers2 dres ores $ \d_ os_ -> m {mDesc = d_, mOther = os_} concatEithers2 dres ores $ \d_ os_ -> m {mDesc = d_, mOther = os_}
where
go = compileRegex False
compileRegex :: T.Text -> EitherErr (Text, Regex) compileRegex :: Bool -> T.Text -> EitherErr (Text, Regex)
compileRegex pat = case res of compileRegex groups pat = case res of
Right re -> Right (pat, re) Right re -> Right (pat, re)
Left _ -> Left $ RegexError pat Left _ -> Left $ RegexError pat
where where
res = res =
compile compile
(blankCompOpt {newSyntax = True}) (blankCompOpt {newSyntax = True})
(blankExecOpt {captureGroups = False}) (blankExecOpt {captureGroups = groups})
pat pat
matchMaybe :: T.Text -> Regex -> EitherErr Bool matchMaybe :: T.Text -> Regex -> EitherErr Bool
matchMaybe q re = case execute re q of matchMaybe q re = case execute re q of
Right res -> Right $ isJust res Right res -> Right $ isJust res
Left _ -> Left $ RegexError "this should not happen" Left _ -> Left $ RegexError "this should not happen"
matchGroupsMaybe :: T.Text -> Regex -> [T.Text]
matchGroupsMaybe q re = case regexec re q of
Right Nothing -> []
Right (Just (_, _, _, xs)) -> xs
-- this should never fail as regexec always returns Right
Left _ -> []