ADD better parsing for statements

This commit is contained in:
Nathan Dwarshuis 2023-08-16 21:01:06 -04:00
parent 2ebfe7a125
commit 001ca0ff37
4 changed files with 201 additions and 81 deletions

View File

@ -278,54 +278,87 @@ let DatePat =
-}
< Cron : CronPat.Type | Mod : ModPat.Type >
let TxOpts_ =
{- Additional metadata to use when parsing a statement -}
let TxAmount1 =
\(re : Type) ->
{ Type =
{ toDate :
{-
Column title for date
-}
Text
, toAmount :
{-
Column title for amount
-}
Text
, toDesc :
{-
Column title for description
-}
Text
, toOther :
{-
Titles of other columns to include; these will be available in
a map for use in downstream processing (see 'Field')
-}
List Text
, toDateFmt :
{-
Format of the date field as specified in the
Data.Time.Format.formattime Haskell function.
-}
Text
, toAmountFmt :
{- Format of the amount field. Must include three fields for the
sign, numerator, and denominator of the amount.
-}
re
}
, default =
{ toDate = "Date"
, toAmount = "Amount"
, toDesc = "Description"
, toOther = [] : List Text
, toDateFmt = "%0m/%0d/%Y"
, toAmountFmt = "([-+])?([0-9]+)\\.?([0-9]+)?"
}
{ a1Column : Text
, a1Fmt :
{-
Format of the amount field. Must include three fields for the
sign, numerator, and denominator of the amount.
-}
re
}
let TxOpts = TxOpts_ Text
let TxAmount2 =
\(re : Type) ->
{ a2Positive : Text
, a2Negative : Text
, a2Fmt :
{-
Format of the amount field. Must include two fields for the
numerator and denominator of the amount.
-}
re
}
let TxAmountSpec =
\(re : Type) ->
< AmountSingle : TxAmount1 re | AmountDual : TxAmount2 re >
let TxOpts_ =
{-
Additional metadata to use when parsing a statement
-}
\(re : Type) ->
{ toDate :
{-
Column title for date
-}
Text
, toAmount :
{-
Column title for amount
-}
TxAmountSpec re
, toDesc :
{-
Column title for description
-}
Text
, toOther :
{-
Titles of other columns to include; these will be available in
a map for use in downstream processing (see 'Field')
-}
List Text
, toDateFmt :
{-
Format of the date field as specified in the
Data.Time.Format.formattime Haskell function.
-}
Text
, toSkipBlankDate : Bool
, toSkipBlankAmount : Bool
, toSkipBlankDescription : Bool
, toSkipBlankOther : List Text
}
let TxOpts =
{ Type = TxOpts_ Text
, default =
{ toDate = "Date"
, toAmount =
(TxAmountSpec Text).AmountSingle
{ a1Column = "Amount", a1Fmt = "([-+])?([0-9\\.]+)" }
, toDesc = "Description"
, toOther = [] : List Text
, toDateFmt = "%0m/%0d/%Y"
, toSkipBlankDate = False
, toSkipBlankAmount = False
, toSkipBlankDescription = False
, toSkipBlankOther = [] : List Text
}
}
let Field =
{-
@ -1165,4 +1198,7 @@ in { CurID
, SingleAlloAmount
, AcntMatcher_
, AcntMatcher
, TxAmountSpec
, TxAmount1
, TxAmount2
}

View File

@ -32,7 +32,7 @@ readHistoryCRUD
readHistoryCRUD root o@CRUDOps {coCreate = (ts, ss)} = do
-- TODO multithread this for some extra fun :)
ss' <- mapM (readHistStmt root) ss
ss' <- mapErrorsIO (readHistStmt root) ss
fromEitherM $ runExceptT $ do
let sRes = mapErrors (ExceptT . return) ss'
let tRes = mapErrors readHistTransfer ts
@ -112,16 +112,43 @@ readImport_ n delim tns p = do
-- TODO handle this better, this maybe thing is a hack to skip lines with
-- blank dates but will likely want to make this more flexible
parseTxRecord :: FilePath -> TxOptsRe -> NamedRecord -> Parser (Maybe TxRecord)
parseTxRecord p TxOpts {toDate, toAmountFmt, toDesc, toAmount, toOther, toDateFmt} r = do
d <- r .: T.encodeUtf8 toDate
if d == ""
then return Nothing
else do
a <- parseDecimal toAmountFmt =<< r .: T.encodeUtf8 toAmount
e <- r .: T.encodeUtf8 toDesc
os <- M.fromList <$> mapM (\n -> (n,) <$> r .: T.encodeUtf8 n) toOther
d' <- parseTimeM True defaultTimeLocale (T.unpack toDateFmt) d
return $ Just $ TxRecord d' a e os p
parseTxRecord
p
TxOpts
{ toDate
, toDesc
, toAmount
, toOther
, toDateFmt
, toSkipBlankDate
, toSkipBlankAmount
, toSkipBlankDescription
, toSkipBlankOther
}
r = do
d <- r .: T.encodeUtf8 toDate
e <- r .: T.encodeUtf8 toDesc
os <- M.fromList <$> mapM (\n -> (n,) <$> r .: T.encodeUtf8 n) toOther
(af, ax) <- case toAmount of
AmountSingle TxAmount1 {a1Column, a1Fmt} -> do
f <- r .: T.encodeUtf8 a1Column
return (a1Fmt, Right f)
AmountDual TxAmount2 {a2Positive, a2Negative, a2Fmt} -> do
f1 <- r .: T.encodeUtf8 a2Positive
f2 <- r .: T.encodeUtf8 a2Negative
return (a2Fmt, Left (f1, f2))
if (toSkipBlankDate && d == "")
|| (toSkipBlankDescription && e == "")
|| (toSkipBlankAmount && (ax == Right "" || ax == Left ("", "")))
|| elem "" (mapMaybe (`M.lookup` os) toSkipBlankOther)
then return Nothing
else do
a' <- case ax of
Right a -> parseDecimal True af a
Left ("", a) -> ((-1) *) <$> parseDecimal False af a
Left (a, _) -> parseDecimal False af a
d' <- parseTimeM True defaultTimeLocale (T.unpack toDateFmt) d
return $ Just $ TxRecord d' a' e os p
matchRecords :: MonadFinance m => [StatementParserRe] -> [TxRecord] -> AppExceptT m [Tx ()]
matchRecords ms rs = do
@ -468,9 +495,15 @@ readRational s = case T.split (== '.') s of
err = throwError $ AppException [ConversionError s False]
compileOptions :: TxOpts T.Text -> AppExcept TxOptsRe
compileOptions o@TxOpts {toAmountFmt = pat} = do
re <- compileRegex True pat
return $ o {toAmountFmt = re}
compileOptions = mapM (compileRegex True)
-- compileOptions o@TxOpts {toAmount = pat} = case pat of
-- AmountSingle (TxAmount1 {a1Fmt}) -> do
-- re <- compileRegex True a1Fmt
-- return $ o {toAmountFmt = re}
-- AmountDual (TxAmount2 {a2Fmt}) -> do
-- re <- compileRegex True a2Fmt
-- return $ o {toAmountFmt = re}
compileMatch :: StatementParser T.Text -> AppExcept StatementParserRe
compileMatch m@StatementParser {spDesc, spOther} = do
@ -480,19 +513,15 @@ compileMatch m@StatementParser {spDesc, spOther} = do
dres = mapM go spDesc
ores = combineErrors $ fmap (mapM go) spOther
parseDecimal :: MonadFail m => (T.Text, Regex) -> T.Text -> m Decimal
parseDecimal (pat, re) s = case matchGroupsMaybe s re of
[sign, x, ""] -> Decimal 0 . uncurry (*) <$> readWhole sign x
[sign, x, y] -> do
d <- readT "decimal" y
let p = T.length y
(k, w) <- readWhole sign x
return $ Decimal (fromIntegral p) (k * (w * (10 ^ p) + d))
parseDecimal :: MonadFail m => Bool -> (T.Text, Regex) -> T.Text -> m Decimal
parseDecimal wantSign (pat, re) s = case (wantSign, matchGroupsMaybe s re) of
(True, [sign, num]) -> do
k <- readSign sign
x <- readNum num
return $ k * x
(False, [num]) -> readNum num
_ -> msg "malformed decimal"
where
readT what t = case readMaybe $ T.unpack t of
Just d -> return $ fromInteger d
_ -> msg $ T.unwords ["could not parse", what, singleQuote t]
msg :: MonadFail m => T.Text -> m a
msg m =
fail $
@ -502,7 +531,10 @@ parseDecimal (pat, re) s = case matchGroupsMaybe s re of
| x == "-" = return (-1)
| x == "+" || x == "" = return 1
| otherwise = msg $ T.append "invalid sign: " x
readWhole sign x = do
w <- readT "whole number" x
k <- readSign sign
return (k, w)
readNum x =
maybe
(msg $ T.unwords ["could not parse", singleQuote x])
return
$ readMaybe
$ T.unpack
$ T.filter (/= ',') x

View File

@ -102,7 +102,7 @@ newtype EntryIndex = EntryIndex {unEntryIndex :: Int}
deriving newtype (Show, Eq, Ord, Num, Real, Enum, Integral, PersistField, PersistFieldSql)
newtype TxDesc = TxDesc {unTxDesc :: T.Text}
deriving newtype (Show, Eq, Ord, PersistField, PersistFieldSql, FromField)
deriving newtype (Show, Eq, Ord, PersistField, PersistFieldSql, FromField, IsString)
newtype Precision = Precision {unPrecision :: Word8}
deriving newtype (Eq, Ord, Num, Show, Real, Enum, Integral, PersistField, PersistFieldSql)

View File

@ -49,14 +49,12 @@ makeHaskellTypesWith
, SingleConstructor "ModPat" "ModPat" "(./dhall/Types.dhall).ModPat.Type"
, SingleConstructor "CronPat" "CronPat" "(./dhall/Types.dhall).CronPat.Type"
, SingleConstructor "ValMatcher" "ValMatcher" "(./dhall/Types.dhall).ValMatcher.Type"
, SingleConstructor "TxAmount1" "TxAmount1" "(./dhall/Types.dhall).TxAmount1"
, SingleConstructor "TxAmount2" "TxAmount2" "(./dhall/Types.dhall).TxAmount2"
, SingleConstructor
"Amount"
"Amount"
"\\(w : Type) -> \\(v : Type) -> ((./dhall/Types.dhall).Amount w v).Type"
, SingleConstructor
"TxOpts"
"TxOpts"
"\\(re : Type) -> ((./dhall/Types.dhall).TxOpts_ re).Type"
, SingleConstructor
"AcntMatcher_"
"AcntMatcher_"
@ -468,12 +466,44 @@ deriving instance Eq a => Eq (TxOpts a)
deriving instance Generic (TxOpts a)
deriving instance Hashable a => Hashable (TxOpts a)
deriving instance Hashable (TxOpts T.Text)
deriving instance FromDhall a => FromDhall (TxOpts a)
deriving instance FromDhall (TxOpts T.Text)
deriving instance Show a => Show (TxOpts a)
deriving instance Eq re => Eq (TxAmount1 re)
deriving instance Eq re => Eq (TxAmount2 re)
deriving instance Show re => Show (TxAmount1 re)
deriving instance Show re => Show (TxAmount2 re)
deriving instance Generic (TxAmount1 T.Text)
deriving instance Generic (TxAmount2 T.Text)
deriving instance Hashable (TxAmount1 T.Text)
deriving instance Hashable (TxAmount2 T.Text)
deriving instance FromDhall (TxAmount1 T.Text)
deriving instance FromDhall (TxAmount2 T.Text)
deriving instance Functor TxAmount1
deriving instance Functor TxAmount2
deriving instance Foldable TxAmount1
deriving instance Foldable TxAmount2
deriving instance Traversable TxAmount1
deriving instance Traversable TxAmount2
data Statement = Statement
{ stmtPaths :: ![FilePath]
, stmtParsers :: ![StatementParser T.Text]
@ -483,6 +513,28 @@ data Statement = Statement
}
deriving (Eq, Hashable, Generic, FromDhall, Show)
data TxAmountSpec re = AmountSingle (TxAmount1 re) | AmountDual (TxAmount2 re)
deriving (Eq, Show, Functor, Foldable, Traversable)
deriving instance Generic (TxAmountSpec T.Text)
deriving instance FromDhall (TxAmountSpec T.Text)
deriving instance Hashable (TxAmountSpec T.Text)
data TxOpts re = TxOpts
{ toDate :: !T.Text
, toAmount :: !(TxAmountSpec re)
, toDesc :: !T.Text
, toOther :: ![T.Text]
, toDateFmt :: !T.Text
, toSkipBlankDate :: !Bool
, toSkipBlankAmount :: !Bool
, toSkipBlankDescription :: !Bool
, toSkipBlankOther :: ![Text]
}
deriving (Functor, Foldable, Traversable)
-- | the value of a field in entry (text version)
-- can either be a raw (constant) value, a lookup from the record, or a map
-- between the lookup and some other value