From 5c3874d4bd905f46015506d5bb4ba1b1760f7a5a Mon Sep 17 00:00:00 2001 From: ndwarshuis Date: Sun, 5 Feb 2023 12:29:43 -0500 Subject: [PATCH] Revert "ENH use bytestring to speed up regex matching" This reverts commit 26c0dff080ffbdccbb79390fa05cb601574fc1ba. --- budget.cabal | 2 -- lib/Internal/Types.hs | 7 +++--- lib/Internal/Utils.hs | 57 ++++++++++++++++--------------------------- package.yaml | 1 - 4 files changed, 24 insertions(+), 43 deletions(-) diff --git a/budget.cabal b/budget.cabal index 80c9495..90ea7b0 100644 --- a/budget.cabal +++ b/budget.cabal @@ -39,7 +39,6 @@ library ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints -Wpartial-fields -Werror -O2 build-depends: base >=4.12 && <10 - , bytestring , cassava , conduit >=1.3.4.2 , containers >=0.6.4.1 @@ -72,7 +71,6 @@ executable pwncash build-depends: base >=4.12 && <10 , budget - , bytestring , cassava , conduit >=1.3.4.2 , containers >=0.6.4.1 diff --git a/lib/Internal/Types.hs b/lib/Internal/Types.hs index 1dcde73..7e1c2a0 100644 --- a/lib/Internal/Types.hs +++ b/lib/Internal/Types.hs @@ -23,7 +23,6 @@ import Dhall hiding (embed, maybe) import Dhall.TH import Language.Haskell.TH.Syntax (Lift) import RIO -import qualified RIO.ByteString as B import qualified RIO.Map as M import qualified RIO.Text as T import RIO.Time @@ -518,8 +517,8 @@ data AcntPath = AcntPath data TxRecord = TxRecord { trDate :: !Day , trAmount :: !Rational - , trDesc :: !B.ByteString - , trOther :: !(M.Map T.Text B.ByteString) + , trDesc :: !T.Text + , trOther :: !(M.Map T.Text T.Text) , trFile :: !FilePath } deriving (Show, Eq, Ord) @@ -587,7 +586,7 @@ data InsertError | MatchValPrecisionError !Natural !Natural | InsertIOError !T.Text | ParseError !T.Text - | ConversionError !B.ByteString + | ConversionError !T.Text | LookupError !LookupSuberr !T.Text | BalanceError !BalanceType !CurID ![RawSplit] | IncomeError !DatePat diff --git a/lib/Internal/Utils.hs b/lib/Internal/Utils.hs index d23b98b..aa3446b 100644 --- a/lib/Internal/Utils.hs +++ b/lib/Internal/Utils.hs @@ -38,18 +38,16 @@ module Internal.Utils ) where -import qualified Data.ByteString.Char8 as B8 import Data.Time.Format.ISO8601 import GHC.Real import Internal.Types import RIO -import qualified RIO.ByteString as B import qualified RIO.List as L import qualified RIO.Map as M import qualified RIO.Text as T import RIO.Time import Text.Regex.TDFA -import Text.Regex.TDFA.ByteString +import Text.Regex.TDFA.Text -------------------------------------------------------------------------------- -- dates @@ -174,7 +172,7 @@ toTx sc sa toSplits r@TxRecord {..} = in Tx { txTags = [] , txDate = trDate - , txDescr = decodeUtf8Lenient trDesc + , txDescr = trDesc , txSplits = fromSplit : ss_ } where @@ -198,7 +196,7 @@ valMatches MatchVal {..} x dateMatches :: MatchDate -> Day -> Bool dateMatches md = (EQ ==) . compareDate md -otherMatches :: M.Map T.Text B.ByteString -> MatchOtherRe -> EitherErr Bool +otherMatches :: M.Map T.Text T.Text -> MatchOtherRe -> EitherErr Bool otherMatches dict m = case m of Val (Field n mv) -> valMatches mv =<< (readRational =<< lookup_ MatchNumeric n) Desc (Field n (_, md)) -> (`matchMaybe` md) =<< lookup_ MatchText n @@ -228,13 +226,13 @@ resolveCurrency = resolveSplitField CurField resolveSplitField :: SplitIDType -> TxRecord -> SplitAcnt -> EitherErrs T.Text resolveSplitField t TxRecord {trOther = o} s = case s of ConstT p -> Right p - LookupT f -> decodeUtf8Lenient <$> plural (lookup_ f o) + LookupT f -> plural $ lookup_ f o MapT (Field f m) -> plural $ do k <- lookup_ f o - lookup_ (decodeUtf8Lenient k) m + lookup_ k m Map2T (Field (f1, f2) m) -> do (k1, k2) <- concatEither2 (lookup_ f1 o) (lookup_ f2 o) (,) - plural $ lookup_ (decodeUtf8Lenient k1, decodeUtf8Lenient k2) m + plural $ lookup_ (k1, k2) m where lookup_ :: (Ord k, Show k) => k -> M.Map k v -> EitherErr v lookup_ = lookupErr (SplitIDField t) @@ -244,58 +242,46 @@ lookupErr what k m = case M.lookup k m of Just x -> Right x _ -> Left $ LookupError what $ showT k -parseRational :: MonadFail m => (T.Text, Regex) -> B.ByteString -> m Rational +parseRational :: MonadFail m => (T.Text, Regex) -> T.Text -> m Rational parseRational (pat, re) s = case matchGroupsMaybe s re of [sign, x, ""] -> uncurry (*) <$> readWhole sign x [sign, x, y] -> do d <- readT "decimal" y - let p = 10 ^ B.length y + let p = 10 ^ T.length y (k, w) <- readWhole sign x return $ k * (w + d % p) _ -> msg "malformed decimal" where - readT what t = case readWholeInteger t of + readT what t = case readMaybe $ T.unpack t of Just d -> return $ fromInteger d - _ -> msg $ T.unwords ["could not parse", what, decodeUtf8Lenient t] + _ -> msg $ T.unwords ["could not parse", what, t] msg m = fail $ T.unpack $ - T.unwords [m, "-", keyVals [("pattern", pat), ("query", decodeUtf8Lenient s)]] + T.unwords [m, "-", keyVals [("pattern", pat), ("query", s)]] readSign x | x == "-" = return (-1) | x == "+" || x == "" = return 1 - | otherwise = msg $ T.append "invalid sign: " $ decodeUtf8Lenient x + | otherwise = msg $ T.append "invalid sign: " x readWhole sign x = do w <- readT "whole number" x k <- readSign sign return (k, w) -readRational :: B.ByteString -> EitherErr Rational -readRational s = case B.split 46 s of -- 46 is '.' +readRational :: T.Text -> EitherErr Rational +readRational s = case T.split (== '.') s of [x] -> maybe err (return . fromInteger) $ readT x [x, y] -> case (readT x, readT y) of (Just x', Just y') -> - let p = 10 ^ B.length y + let p = 10 ^ T.length y k = if x' >= 0 then 1 else -1 in return $ fromInteger x' + k * y' % p _ -> err _ -> err where - readT i = case B8.readInteger i of - Just (x, "") -> Just x - _ -> Nothing + readT = readMaybe . T.unpack err = Left $ ConversionError s -readWholeInteger :: B.ByteString -> Maybe Integer -readWholeInteger s = case B8.readInteger s of - Just (x, "") -> Just x - _ -> Nothing - --- readWholeInt :: B.ByteString -> Maybe Int --- readWholeInt s = case B8.readInt s of --- Just (x, "") -> Just x --- _ -> Nothing - -- TODO smells like a lens -- mapTxSplits :: (a -> b) -> Tx a -> Tx b -- mapTxSplits f t@Tx {txSplits = ss} = t {txSplits = fmap f ss} @@ -346,8 +332,7 @@ showError other = (: []) $ case other of ZeroLength -> "Zero repeat length" ZeroRepeats -> "Zero repeats" (RegexError re) -> T.append "could not make regex from pattern: " re - (ConversionError x) -> - T.append "Could not convert to rational number: " $ decodeUtf8Lenient x + (ConversionError x) -> T.append "Could not convert to rational number: " x (InsertIOError msg) -> T.append "IO Error: " msg (ParseError msg) -> T.append "Parse Error: " msg (MatchValPrecisionError d p) -> @@ -390,7 +375,7 @@ showTx TxRecord {trDate = d, trAmount = v, trDesc = e, trFile = f} = [ ("path", T.pack f) , ("date", T.pack $ iso8601Show d) , ("value", showT (fromRational v :: Float)) - , ("description", doubleQuote $ decodeUtf8Lenient e) + , ("description", doubleQuote e) ] showMatch :: MatchRe -> T.Text @@ -599,14 +584,14 @@ compileRegex groups pat = case res of compile (blankCompOpt {newSyntax = True}) (blankExecOpt {captureGroups = groups}) - (encodeUtf8 pat) + pat -matchMaybe :: B.ByteString -> Regex -> EitherErr Bool +matchMaybe :: T.Text -> Regex -> EitherErr Bool matchMaybe q re = case execute re q of Right res -> Right $ isJust res Left _ -> Left $ RegexError "this should not happen" -matchGroupsMaybe :: B.ByteString -> Regex -> [B.ByteString] +matchGroupsMaybe :: T.Text -> Regex -> [T.Text] matchGroupsMaybe q re = case regexec re q of Right Nothing -> [] Right (Just (_, _, _, xs)) -> xs diff --git a/package.yaml b/package.yaml index 2d68c2b..fe911f1 100644 --- a/package.yaml +++ b/package.yaml @@ -41,7 +41,6 @@ dependencies: - recursion-schemes - data-fix - filepath -- bytestring library: source-dirs: lib/