ENH skip lines without all fields

This commit is contained in:
Nathan Dwarshuis 2023-08-16 22:24:20 -04:00
parent 4835ab15ca
commit 8e2019ac5b
3 changed files with 81 additions and 28 deletions

View File

@ -351,10 +351,34 @@ let TxOpts_ =
Data.Time.Format.formattime Haskell function. Data.Time.Format.formattime Haskell function.
-} -}
Text Text
, toSkipBlankDate : Bool , toSkipBlankDate :
, toSkipBlankAmount : Bool {-
, toSkipBlankDescription : Bool Skip line if date field is a blank
, toSkipBlankOther : List Text -}
Bool
, toSkipBlankAmount :
{-
Skip line if amount field(s) is(are) a blank
-}
Bool
, toSkipBlankDescription :
{-
Skip line if description field is a blank
-}
Bool
, toSkipBlankOther :
{-
Skip line if any arbitrary fields are blank (these fields must also
be listed in 'toOther' to be considered)
-}
List Text
, toSkipMissingFields :
{-
Skip line if any fields are missing (this is different from blank;
'missing' means there is no field with name 'X', 'blank' means that
there is a field 'X' and its value is an empty string)
-}
Bool
} }
let TxAmountSpec = TxAmountSpec_ Text let TxAmountSpec = TxAmountSpec_ Text
@ -371,6 +395,7 @@ let TxOpts =
, toSkipBlankAmount = False , toSkipBlankAmount = False
, toSkipBlankDescription = False , toSkipBlankDescription = False
, toSkipBlankOther = [] : List Text , toSkipBlankOther = [] : List Text
, toSkipMissingFields = False
} }
} }

View File

@ -124,31 +124,58 @@ parseTxRecord
, toSkipBlankAmount , toSkipBlankAmount
, toSkipBlankDescription , toSkipBlankDescription
, toSkipBlankOther , toSkipBlankOther
, toSkipMissingFields
} }
r = do r =
d <- r .: T.encodeUtf8 toDate do
e <- r .: T.encodeUtf8 toDesc -- TODO this is confusing as hell
os <- M.fromList <$> mapM (\n -> (n,) <$> r .: T.encodeUtf8 n) toOther --
(af, ax) <- case toAmount of -- try and parse all fields; if a parse fails, either trip an error
AmountSingle TxAmount1 {a1Column, a1Fmt} -> do -- or return a Nothing if we want to deliberately skip missing fields
f <- r .: T.encodeUtf8 a1Column d <- getField toDate
return (a1Fmt, Right f) e <- getField toDesc
AmountDual TxAmount2 {a2Positive, a2Negative, a2Fmt} -> do os <-
f1 <- r .: T.encodeUtf8 a2Positive fmap M.fromList . sequence
f2 <- r .: T.encodeUtf8 a2Negative <$> mapM (\n -> fmap (n,) <$> getField n) toOther
return (a2Fmt, Left (f1, f2)) (af, ax) <- case toAmount of
if (toSkipBlankDate && d == "") -- the amount column is extra confusing because it can either be one
|| (toSkipBlankDescription && e == "") -- or two columns, so keep track of this with a maybe
|| (toSkipBlankAmount && (ax == Right "" || ax == Left ("", ""))) AmountSingle TxAmount1 {a1Column, a1Fmt} -> do
|| elem "" (mapMaybe (`M.lookup` os) toSkipBlankOther) f <- getField a1Column
then return Nothing return (a1Fmt, Right <$> f)
else do AmountDual TxAmount2 {a2Positive, a2Negative, a2Fmt} -> do
a' <- case ax of f1 <- getField a2Positive
Right a -> parseDecimal True af a f2 <- getField a2Negative
Left ("", a) -> ((-1) *) <$> parseDecimal False af a return $ (a2Fmt,) $ case (f1, f2) of
Left (a, _) -> parseDecimal False af a (Just a, Just b) -> Just $ Left (a, b)
d' <- parseTimeM True defaultTimeLocale (T.unpack toDateFmt) d _ -> Nothing
return $ Just $ TxRecord d' a' e os p case (d, e, os, ax) of
-- If all lookups were successful, check that none of the fields are
-- blank, and if they are return nothing to skip this line
(Just d', Just e', Just os', Just ax') ->
if (toSkipBlankDate && d' == "")
|| (toSkipBlankDescription && e' == "")
|| (toSkipBlankAmount && (ax' == Right "" || ax' == Left ("", "")))
|| elem "" (mapMaybe (`M.lookup` os') toSkipBlankOther)
then return Nothing
else -- if we are skipping nothing, proceed to parse the date and amount
-- columns
do
a <- case ax' of
Right a -> parseDecimal True af a
Left ("", a) -> ((-1) *) <$> parseDecimal False af a
Left (a, _) -> parseDecimal False af a
d'' <- parseTimeM True defaultTimeLocale (T.unpack toDateFmt) d'
return $ Just $ TxRecord d'' a e' os' p
-- if no lookups succeeded, return nothing to skip this line. Note that
-- a parse fail will trigger a failure error further up, so that case
-- is already dealt with implicitly
_ -> return Nothing
where
getField :: FromField a => T.Text -> Parser (Maybe a)
getField f = case runParser $ r .: T.encodeUtf8 f of
Left err -> if toSkipMissingFields then return Nothing else fail err
Right x -> return $ Just x
matchRecords :: MonadFinance m => [StatementParserRe] -> [TxRecord] -> AppExceptT m [Tx ()] matchRecords :: MonadFinance m => [StatementParserRe] -> [TxRecord] -> AppExceptT m [Tx ()]
matchRecords ms rs = do matchRecords ms rs = do

View File

@ -532,6 +532,7 @@ data TxOpts re = TxOpts
, toSkipBlankAmount :: !Bool , toSkipBlankAmount :: !Bool
, toSkipBlankDescription :: !Bool , toSkipBlankDescription :: !Bool
, toSkipBlankOther :: ![Text] , toSkipBlankOther :: ![Text]
, toSkipMissingFields :: !Bool
} }
deriving (Functor, Foldable, Traversable) deriving (Functor, Foldable, Traversable)