ENH use bytestring to speed up regex matching
This commit is contained in:
parent
bd72bec920
commit
26c0dff080
|
@ -39,6 +39,7 @@ library
|
|||
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints -Wpartial-fields -Werror -O2
|
||||
build-depends:
|
||||
base >=4.12 && <10
|
||||
, bytestring
|
||||
, cassava
|
||||
, conduit >=1.3.4.2
|
||||
, containers >=0.6.4.1
|
||||
|
@ -71,6 +72,7 @@ executable pwncash
|
|||
build-depends:
|
||||
base >=4.12 && <10
|
||||
, budget
|
||||
, bytestring
|
||||
, cassava
|
||||
, conduit >=1.3.4.2
|
||||
, containers >=0.6.4.1
|
||||
|
|
|
@ -23,6 +23,7 @@ import Dhall hiding (embed, maybe)
|
|||
import Dhall.TH
|
||||
import Language.Haskell.TH.Syntax (Lift)
|
||||
import RIO
|
||||
import qualified RIO.ByteString as B
|
||||
import qualified RIO.Map as M
|
||||
import qualified RIO.Text as T
|
||||
import RIO.Time
|
||||
|
@ -517,8 +518,8 @@ data AcntPath = AcntPath
|
|||
data TxRecord = TxRecord
|
||||
{ trDate :: !Day
|
||||
, trAmount :: !Rational
|
||||
, trDesc :: !T.Text
|
||||
, trOther :: !(M.Map T.Text T.Text)
|
||||
, trDesc :: !B.ByteString
|
||||
, trOther :: !(M.Map T.Text B.ByteString)
|
||||
, trFile :: !FilePath
|
||||
}
|
||||
deriving (Show, Eq, Ord)
|
||||
|
@ -586,7 +587,7 @@ data InsertError
|
|||
| MatchValPrecisionError !Natural !Natural
|
||||
| InsertIOError !T.Text
|
||||
| ParseError !T.Text
|
||||
| ConversionError !T.Text
|
||||
| ConversionError !B.ByteString
|
||||
| LookupError !LookupSuberr !T.Text
|
||||
| BalanceError !BalanceType !CurID ![RawSplit]
|
||||
| IncomeError !DatePat
|
||||
|
|
|
@ -38,16 +38,18 @@ module Internal.Utils
|
|||
)
|
||||
where
|
||||
|
||||
import qualified Data.ByteString.Char8 as B8
|
||||
import Data.Time.Format.ISO8601
|
||||
import GHC.Real
|
||||
import Internal.Types
|
||||
import RIO
|
||||
import qualified RIO.ByteString as B
|
||||
import qualified RIO.List as L
|
||||
import qualified RIO.Map as M
|
||||
import qualified RIO.Text as T
|
||||
import RIO.Time
|
||||
import Text.Regex.TDFA
|
||||
import Text.Regex.TDFA.Text
|
||||
import Text.Regex.TDFA.ByteString
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- dates
|
||||
|
@ -172,7 +174,7 @@ toTx sc sa toSplits r@TxRecord {..} =
|
|||
in Tx
|
||||
{ txTags = []
|
||||
, txDate = trDate
|
||||
, txDescr = trDesc
|
||||
, txDescr = decodeUtf8Lenient trDesc
|
||||
, txSplits = fromSplit : ss_
|
||||
}
|
||||
where
|
||||
|
@ -196,7 +198,7 @@ valMatches MatchVal {..} x
|
|||
dateMatches :: MatchDate -> Day -> Bool
|
||||
dateMatches md = (EQ ==) . compareDate md
|
||||
|
||||
otherMatches :: M.Map T.Text T.Text -> MatchOtherRe -> EitherErr Bool
|
||||
otherMatches :: M.Map T.Text B.ByteString -> MatchOtherRe -> EitherErr Bool
|
||||
otherMatches dict m = case m of
|
||||
Val (Field n mv) -> valMatches mv =<< (readRational =<< lookup_ MatchNumeric n)
|
||||
Desc (Field n (_, md)) -> (`matchMaybe` md) =<< lookup_ MatchText n
|
||||
|
@ -226,13 +228,13 @@ resolveCurrency = resolveSplitField CurField
|
|||
resolveSplitField :: SplitIDType -> TxRecord -> SplitAcnt -> EitherErrs T.Text
|
||||
resolveSplitField t TxRecord {trOther = o} s = case s of
|
||||
ConstT p -> Right p
|
||||
LookupT f -> plural $ lookup_ f o
|
||||
LookupT f -> decodeUtf8Lenient <$> plural (lookup_ f o)
|
||||
MapT (Field f m) -> plural $ do
|
||||
k <- lookup_ f o
|
||||
lookup_ k m
|
||||
lookup_ (decodeUtf8Lenient k) m
|
||||
Map2T (Field (f1, f2) m) -> do
|
||||
(k1, k2) <- concatEither2 (lookup_ f1 o) (lookup_ f2 o) (,)
|
||||
plural $ lookup_ (k1, k2) m
|
||||
plural $ lookup_ (decodeUtf8Lenient k1, decodeUtf8Lenient k2) m
|
||||
where
|
||||
lookup_ :: (Ord k, Show k) => k -> M.Map k v -> EitherErr v
|
||||
lookup_ = lookupErr (SplitIDField t)
|
||||
|
@ -242,46 +244,58 @@ lookupErr what k m = case M.lookup k m of
|
|||
Just x -> Right x
|
||||
_ -> Left $ LookupError what $ showT k
|
||||
|
||||
parseRational :: MonadFail m => (T.Text, Regex) -> T.Text -> m Rational
|
||||
parseRational :: MonadFail m => (T.Text, Regex) -> B.ByteString -> m Rational
|
||||
parseRational (pat, re) s = case matchGroupsMaybe s re of
|
||||
[sign, x, ""] -> uncurry (*) <$> readWhole sign x
|
||||
[sign, x, y] -> do
|
||||
d <- readT "decimal" y
|
||||
let p = 10 ^ T.length y
|
||||
let p = 10 ^ B.length y
|
||||
(k, w) <- readWhole sign x
|
||||
return $ k * (w + d % p)
|
||||
_ -> msg "malformed decimal"
|
||||
where
|
||||
readT what t = case readMaybe $ T.unpack t of
|
||||
readT what t = case readWholeInteger t of
|
||||
Just d -> return $ fromInteger d
|
||||
_ -> msg $ T.unwords ["could not parse", what, t]
|
||||
_ -> msg $ T.unwords ["could not parse", what, decodeUtf8Lenient t]
|
||||
msg m =
|
||||
fail $
|
||||
T.unpack $
|
||||
T.unwords [m, "-", keyVals [("pattern", pat), ("query", s)]]
|
||||
T.unwords [m, "-", keyVals [("pattern", pat), ("query", decodeUtf8Lenient s)]]
|
||||
readSign x
|
||||
| x == "-" = return (-1)
|
||||
| x == "+" || x == "" = return 1
|
||||
| otherwise = msg $ T.append "invalid sign: " x
|
||||
| otherwise = msg $ T.append "invalid sign: " $ decodeUtf8Lenient x
|
||||
readWhole sign x = do
|
||||
w <- readT "whole number" x
|
||||
k <- readSign sign
|
||||
return (k, w)
|
||||
|
||||
readRational :: T.Text -> EitherErr Rational
|
||||
readRational s = case T.split (== '.') s of
|
||||
readRational :: B.ByteString -> EitherErr Rational
|
||||
readRational s = case B.split 46 s of -- 46 is '.'
|
||||
[x] -> maybe err (return . fromInteger) $ readT x
|
||||
[x, y] -> case (readT x, readT y) of
|
||||
(Just x', Just y') ->
|
||||
let p = 10 ^ T.length y
|
||||
let p = 10 ^ B.length y
|
||||
k = if x' >= 0 then 1 else -1
|
||||
in return $ fromInteger x' + k * y' % p
|
||||
_ -> err
|
||||
_ -> err
|
||||
where
|
||||
readT = readMaybe . T.unpack
|
||||
readT i = case B8.readInteger i of
|
||||
Just (x, "") -> Just x
|
||||
_ -> Nothing
|
||||
err = Left $ ConversionError s
|
||||
|
||||
readWholeInteger :: B.ByteString -> Maybe Integer
|
||||
readWholeInteger s = case B8.readInteger s of
|
||||
Just (x, "") -> Just x
|
||||
_ -> Nothing
|
||||
|
||||
-- readWholeInt :: B.ByteString -> Maybe Int
|
||||
-- readWholeInt s = case B8.readInt s of
|
||||
-- Just (x, "") -> Just x
|
||||
-- _ -> Nothing
|
||||
|
||||
-- TODO smells like a lens
|
||||
-- mapTxSplits :: (a -> b) -> Tx a -> Tx b
|
||||
-- mapTxSplits f t@Tx {txSplits = ss} = t {txSplits = fmap f ss}
|
||||
|
@ -332,7 +346,8 @@ showError other = (: []) $ case other of
|
|||
ZeroLength -> "Zero repeat length"
|
||||
ZeroRepeats -> "Zero repeats"
|
||||
(RegexError re) -> T.append "could not make regex from pattern: " re
|
||||
(ConversionError x) -> T.append "Could not convert to rational number: " x
|
||||
(ConversionError x) ->
|
||||
T.append "Could not convert to rational number: " $ decodeUtf8Lenient x
|
||||
(InsertIOError msg) -> T.append "IO Error: " msg
|
||||
(ParseError msg) -> T.append "Parse Error: " msg
|
||||
(MatchValPrecisionError d p) ->
|
||||
|
@ -375,7 +390,7 @@ showTx TxRecord {trDate = d, trAmount = v, trDesc = e, trFile = f} =
|
|||
[ ("path", T.pack f)
|
||||
, ("date", T.pack $ iso8601Show d)
|
||||
, ("value", showT (fromRational v :: Float))
|
||||
, ("description", doubleQuote e)
|
||||
, ("description", doubleQuote $ decodeUtf8Lenient e)
|
||||
]
|
||||
|
||||
showMatch :: MatchRe -> T.Text
|
||||
|
@ -584,14 +599,14 @@ compileRegex groups pat = case res of
|
|||
compile
|
||||
(blankCompOpt {newSyntax = True})
|
||||
(blankExecOpt {captureGroups = groups})
|
||||
pat
|
||||
(encodeUtf8 pat)
|
||||
|
||||
matchMaybe :: T.Text -> Regex -> EitherErr Bool
|
||||
matchMaybe :: B.ByteString -> Regex -> EitherErr Bool
|
||||
matchMaybe q re = case execute re q of
|
||||
Right res -> Right $ isJust res
|
||||
Left _ -> Left $ RegexError "this should not happen"
|
||||
|
||||
matchGroupsMaybe :: T.Text -> Regex -> [T.Text]
|
||||
matchGroupsMaybe :: B.ByteString -> Regex -> [B.ByteString]
|
||||
matchGroupsMaybe q re = case regexec re q of
|
||||
Right Nothing -> []
|
||||
Right (Just (_, _, _, xs)) -> xs
|
||||
|
|
|
@ -41,6 +41,7 @@ dependencies:
|
|||
- recursion-schemes
|
||||
- data-fix
|
||||
- filepath
|
||||
- bytestring
|
||||
|
||||
library:
|
||||
source-dirs: lib/
|
||||
|
|
Loading…
Reference in New Issue