From 8e2019ac5b165398ad0ccf33a681716b2c5f6024 Mon Sep 17 00:00:00 2001 From: ndwarshuis Date: Wed, 16 Aug 2023 22:24:20 -0400 Subject: [PATCH] ENH skip lines without all fields --- dhall/Types.dhall | 33 ++++++++++++++-- lib/Internal/History.hs | 75 +++++++++++++++++++++++++------------ lib/Internal/Types/Dhall.hs | 1 + 3 files changed, 81 insertions(+), 28 deletions(-) diff --git a/dhall/Types.dhall b/dhall/Types.dhall index 2982509..29f6e05 100644 --- a/dhall/Types.dhall +++ b/dhall/Types.dhall @@ -351,10 +351,34 @@ let TxOpts_ = Data.Time.Format.formattime Haskell function. -} Text - , toSkipBlankDate : Bool - , toSkipBlankAmount : Bool - , toSkipBlankDescription : Bool - , toSkipBlankOther : List Text + , toSkipBlankDate : + {- + Skip line if date field is a blank + -} + Bool + , toSkipBlankAmount : + {- + Skip line if amount field(s) is(are) a blank + -} + Bool + , toSkipBlankDescription : + {- + Skip line if description field is a blank + -} + Bool + , toSkipBlankOther : + {- + Skip line if any arbitrary fields are blank (these fields must also + be listed in 'toOther' to be considered) + -} + List Text + , toSkipMissingFields : + {- + Skip line if any fields are missing (this is different from blank; + 'missing' means there is no field with name 'X', 'blank' means that + there is a field 'X' and its value is an empty string) + -} + Bool } let TxAmountSpec = TxAmountSpec_ Text @@ -371,6 +395,7 @@ let TxOpts = , toSkipBlankAmount = False , toSkipBlankDescription = False , toSkipBlankOther = [] : List Text + , toSkipMissingFields = False } } diff --git a/lib/Internal/History.hs b/lib/Internal/History.hs index 07927f0..62bc536 100644 --- a/lib/Internal/History.hs +++ b/lib/Internal/History.hs @@ -124,31 +124,58 @@ parseTxRecord , toSkipBlankAmount , toSkipBlankDescription , toSkipBlankOther + , toSkipMissingFields } - r = do - d <- r .: T.encodeUtf8 toDate - e <- r .: T.encodeUtf8 toDesc - os <- M.fromList <$> mapM (\n -> (n,) <$> r .: T.encodeUtf8 n) toOther - (af, ax) <- case toAmount of - AmountSingle TxAmount1 {a1Column, a1Fmt} -> do - f <- r .: T.encodeUtf8 a1Column - return (a1Fmt, Right f) - AmountDual TxAmount2 {a2Positive, a2Negative, a2Fmt} -> do - f1 <- r .: T.encodeUtf8 a2Positive - f2 <- r .: T.encodeUtf8 a2Negative - return (a2Fmt, Left (f1, f2)) - if (toSkipBlankDate && d == "") - || (toSkipBlankDescription && e == "") - || (toSkipBlankAmount && (ax == Right "" || ax == Left ("", ""))) - || elem "" (mapMaybe (`M.lookup` os) toSkipBlankOther) - then return Nothing - else do - a' <- case ax of - Right a -> parseDecimal True af a - Left ("", a) -> ((-1) *) <$> parseDecimal False af a - Left (a, _) -> parseDecimal False af a - d' <- parseTimeM True defaultTimeLocale (T.unpack toDateFmt) d - return $ Just $ TxRecord d' a' e os p + r = + do + -- TODO this is confusing as hell + -- + -- try and parse all fields; if a parse fails, either trip an error + -- or return a Nothing if we want to deliberately skip missing fields + d <- getField toDate + e <- getField toDesc + os <- + fmap M.fromList . sequence + <$> mapM (\n -> fmap (n,) <$> getField n) toOther + (af, ax) <- case toAmount of + -- the amount column is extra confusing because it can either be one + -- or two columns, so keep track of this with a maybe + AmountSingle TxAmount1 {a1Column, a1Fmt} -> do + f <- getField a1Column + return (a1Fmt, Right <$> f) + AmountDual TxAmount2 {a2Positive, a2Negative, a2Fmt} -> do + f1 <- getField a2Positive + f2 <- getField a2Negative + return $ (a2Fmt,) $ case (f1, f2) of + (Just a, Just b) -> Just $ Left (a, b) + _ -> Nothing + case (d, e, os, ax) of + -- If all lookups were successful, check that none of the fields are + -- blank, and if they are return nothing to skip this line + (Just d', Just e', Just os', Just ax') -> + if (toSkipBlankDate && d' == "") + || (toSkipBlankDescription && e' == "") + || (toSkipBlankAmount && (ax' == Right "" || ax' == Left ("", ""))) + || elem "" (mapMaybe (`M.lookup` os') toSkipBlankOther) + then return Nothing + else -- if we are skipping nothing, proceed to parse the date and amount + -- columns + do + a <- case ax' of + Right a -> parseDecimal True af a + Left ("", a) -> ((-1) *) <$> parseDecimal False af a + Left (a, _) -> parseDecimal False af a + d'' <- parseTimeM True defaultTimeLocale (T.unpack toDateFmt) d' + return $ Just $ TxRecord d'' a e' os' p + -- if no lookups succeeded, return nothing to skip this line. Note that + -- a parse fail will trigger a failure error further up, so that case + -- is already dealt with implicitly + _ -> return Nothing + where + getField :: FromField a => T.Text -> Parser (Maybe a) + getField f = case runParser $ r .: T.encodeUtf8 f of + Left err -> if toSkipMissingFields then return Nothing else fail err + Right x -> return $ Just x matchRecords :: MonadFinance m => [StatementParserRe] -> [TxRecord] -> AppExceptT m [Tx ()] matchRecords ms rs = do diff --git a/lib/Internal/Types/Dhall.hs b/lib/Internal/Types/Dhall.hs index 1564693..42fea62 100644 --- a/lib/Internal/Types/Dhall.hs +++ b/lib/Internal/Types/Dhall.hs @@ -532,6 +532,7 @@ data TxOpts re = TxOpts , toSkipBlankAmount :: !Bool , toSkipBlankDescription :: !Bool , toSkipBlankOther :: ![Text] + , toSkipMissingFields :: !Bool } deriving (Functor, Foldable, Traversable)