ENH use bytestring to speed up regex matching
This commit is contained in:
parent
bd72bec920
commit
26c0dff080
|
@ -39,6 +39,7 @@ library
|
||||||
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints -Wpartial-fields -Werror -O2
|
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints -Wpartial-fields -Werror -O2
|
||||||
build-depends:
|
build-depends:
|
||||||
base >=4.12 && <10
|
base >=4.12 && <10
|
||||||
|
, bytestring
|
||||||
, cassava
|
, cassava
|
||||||
, conduit >=1.3.4.2
|
, conduit >=1.3.4.2
|
||||||
, containers >=0.6.4.1
|
, containers >=0.6.4.1
|
||||||
|
@ -71,6 +72,7 @@ executable pwncash
|
||||||
build-depends:
|
build-depends:
|
||||||
base >=4.12 && <10
|
base >=4.12 && <10
|
||||||
, budget
|
, budget
|
||||||
|
, bytestring
|
||||||
, cassava
|
, cassava
|
||||||
, conduit >=1.3.4.2
|
, conduit >=1.3.4.2
|
||||||
, containers >=0.6.4.1
|
, containers >=0.6.4.1
|
||||||
|
|
|
@ -23,6 +23,7 @@ import Dhall hiding (embed, maybe)
|
||||||
import Dhall.TH
|
import Dhall.TH
|
||||||
import Language.Haskell.TH.Syntax (Lift)
|
import Language.Haskell.TH.Syntax (Lift)
|
||||||
import RIO
|
import RIO
|
||||||
|
import qualified RIO.ByteString as B
|
||||||
import qualified RIO.Map as M
|
import qualified RIO.Map as M
|
||||||
import qualified RIO.Text as T
|
import qualified RIO.Text as T
|
||||||
import RIO.Time
|
import RIO.Time
|
||||||
|
@ -517,8 +518,8 @@ data AcntPath = AcntPath
|
||||||
data TxRecord = TxRecord
|
data TxRecord = TxRecord
|
||||||
{ trDate :: !Day
|
{ trDate :: !Day
|
||||||
, trAmount :: !Rational
|
, trAmount :: !Rational
|
||||||
, trDesc :: !T.Text
|
, trDesc :: !B.ByteString
|
||||||
, trOther :: !(M.Map T.Text T.Text)
|
, trOther :: !(M.Map T.Text B.ByteString)
|
||||||
, trFile :: !FilePath
|
, trFile :: !FilePath
|
||||||
}
|
}
|
||||||
deriving (Show, Eq, Ord)
|
deriving (Show, Eq, Ord)
|
||||||
|
@ -586,7 +587,7 @@ data InsertError
|
||||||
| MatchValPrecisionError !Natural !Natural
|
| MatchValPrecisionError !Natural !Natural
|
||||||
| InsertIOError !T.Text
|
| InsertIOError !T.Text
|
||||||
| ParseError !T.Text
|
| ParseError !T.Text
|
||||||
| ConversionError !T.Text
|
| ConversionError !B.ByteString
|
||||||
| LookupError !LookupSuberr !T.Text
|
| LookupError !LookupSuberr !T.Text
|
||||||
| BalanceError !BalanceType !CurID ![RawSplit]
|
| BalanceError !BalanceType !CurID ![RawSplit]
|
||||||
| IncomeError !DatePat
|
| IncomeError !DatePat
|
||||||
|
|
|
@ -38,16 +38,18 @@ module Internal.Utils
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
|
|
||||||
|
import qualified Data.ByteString.Char8 as B8
|
||||||
import Data.Time.Format.ISO8601
|
import Data.Time.Format.ISO8601
|
||||||
import GHC.Real
|
import GHC.Real
|
||||||
import Internal.Types
|
import Internal.Types
|
||||||
import RIO
|
import RIO
|
||||||
|
import qualified RIO.ByteString as B
|
||||||
import qualified RIO.List as L
|
import qualified RIO.List as L
|
||||||
import qualified RIO.Map as M
|
import qualified RIO.Map as M
|
||||||
import qualified RIO.Text as T
|
import qualified RIO.Text as T
|
||||||
import RIO.Time
|
import RIO.Time
|
||||||
import Text.Regex.TDFA
|
import Text.Regex.TDFA
|
||||||
import Text.Regex.TDFA.Text
|
import Text.Regex.TDFA.ByteString
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
-- dates
|
-- dates
|
||||||
|
@ -172,7 +174,7 @@ toTx sc sa toSplits r@TxRecord {..} =
|
||||||
in Tx
|
in Tx
|
||||||
{ txTags = []
|
{ txTags = []
|
||||||
, txDate = trDate
|
, txDate = trDate
|
||||||
, txDescr = trDesc
|
, txDescr = decodeUtf8Lenient trDesc
|
||||||
, txSplits = fromSplit : ss_
|
, txSplits = fromSplit : ss_
|
||||||
}
|
}
|
||||||
where
|
where
|
||||||
|
@ -196,7 +198,7 @@ valMatches MatchVal {..} x
|
||||||
dateMatches :: MatchDate -> Day -> Bool
|
dateMatches :: MatchDate -> Day -> Bool
|
||||||
dateMatches md = (EQ ==) . compareDate md
|
dateMatches md = (EQ ==) . compareDate md
|
||||||
|
|
||||||
otherMatches :: M.Map T.Text T.Text -> MatchOtherRe -> EitherErr Bool
|
otherMatches :: M.Map T.Text B.ByteString -> MatchOtherRe -> EitherErr Bool
|
||||||
otherMatches dict m = case m of
|
otherMatches dict m = case m of
|
||||||
Val (Field n mv) -> valMatches mv =<< (readRational =<< lookup_ MatchNumeric n)
|
Val (Field n mv) -> valMatches mv =<< (readRational =<< lookup_ MatchNumeric n)
|
||||||
Desc (Field n (_, md)) -> (`matchMaybe` md) =<< lookup_ MatchText n
|
Desc (Field n (_, md)) -> (`matchMaybe` md) =<< lookup_ MatchText n
|
||||||
|
@ -226,13 +228,13 @@ resolveCurrency = resolveSplitField CurField
|
||||||
resolveSplitField :: SplitIDType -> TxRecord -> SplitAcnt -> EitherErrs T.Text
|
resolveSplitField :: SplitIDType -> TxRecord -> SplitAcnt -> EitherErrs T.Text
|
||||||
resolveSplitField t TxRecord {trOther = o} s = case s of
|
resolveSplitField t TxRecord {trOther = o} s = case s of
|
||||||
ConstT p -> Right p
|
ConstT p -> Right p
|
||||||
LookupT f -> plural $ lookup_ f o
|
LookupT f -> decodeUtf8Lenient <$> plural (lookup_ f o)
|
||||||
MapT (Field f m) -> plural $ do
|
MapT (Field f m) -> plural $ do
|
||||||
k <- lookup_ f o
|
k <- lookup_ f o
|
||||||
lookup_ k m
|
lookup_ (decodeUtf8Lenient k) m
|
||||||
Map2T (Field (f1, f2) m) -> do
|
Map2T (Field (f1, f2) m) -> do
|
||||||
(k1, k2) <- concatEither2 (lookup_ f1 o) (lookup_ f2 o) (,)
|
(k1, k2) <- concatEither2 (lookup_ f1 o) (lookup_ f2 o) (,)
|
||||||
plural $ lookup_ (k1, k2) m
|
plural $ lookup_ (decodeUtf8Lenient k1, decodeUtf8Lenient k2) m
|
||||||
where
|
where
|
||||||
lookup_ :: (Ord k, Show k) => k -> M.Map k v -> EitherErr v
|
lookup_ :: (Ord k, Show k) => k -> M.Map k v -> EitherErr v
|
||||||
lookup_ = lookupErr (SplitIDField t)
|
lookup_ = lookupErr (SplitIDField t)
|
||||||
|
@ -242,46 +244,58 @@ lookupErr what k m = case M.lookup k m of
|
||||||
Just x -> Right x
|
Just x -> Right x
|
||||||
_ -> Left $ LookupError what $ showT k
|
_ -> Left $ LookupError what $ showT k
|
||||||
|
|
||||||
parseRational :: MonadFail m => (T.Text, Regex) -> T.Text -> m Rational
|
parseRational :: MonadFail m => (T.Text, Regex) -> B.ByteString -> m Rational
|
||||||
parseRational (pat, re) s = case matchGroupsMaybe s re of
|
parseRational (pat, re) s = case matchGroupsMaybe s re of
|
||||||
[sign, x, ""] -> uncurry (*) <$> readWhole sign x
|
[sign, x, ""] -> uncurry (*) <$> readWhole sign x
|
||||||
[sign, x, y] -> do
|
[sign, x, y] -> do
|
||||||
d <- readT "decimal" y
|
d <- readT "decimal" y
|
||||||
let p = 10 ^ T.length y
|
let p = 10 ^ B.length y
|
||||||
(k, w) <- readWhole sign x
|
(k, w) <- readWhole sign x
|
||||||
return $ k * (w + d % p)
|
return $ k * (w + d % p)
|
||||||
_ -> msg "malformed decimal"
|
_ -> msg "malformed decimal"
|
||||||
where
|
where
|
||||||
readT what t = case readMaybe $ T.unpack t of
|
readT what t = case readWholeInteger t of
|
||||||
Just d -> return $ fromInteger d
|
Just d -> return $ fromInteger d
|
||||||
_ -> msg $ T.unwords ["could not parse", what, t]
|
_ -> msg $ T.unwords ["could not parse", what, decodeUtf8Lenient t]
|
||||||
msg m =
|
msg m =
|
||||||
fail $
|
fail $
|
||||||
T.unpack $
|
T.unpack $
|
||||||
T.unwords [m, "-", keyVals [("pattern", pat), ("query", s)]]
|
T.unwords [m, "-", keyVals [("pattern", pat), ("query", decodeUtf8Lenient s)]]
|
||||||
readSign x
|
readSign x
|
||||||
| x == "-" = return (-1)
|
| x == "-" = return (-1)
|
||||||
| x == "+" || x == "" = return 1
|
| x == "+" || x == "" = return 1
|
||||||
| otherwise = msg $ T.append "invalid sign: " x
|
| otherwise = msg $ T.append "invalid sign: " $ decodeUtf8Lenient x
|
||||||
readWhole sign x = do
|
readWhole sign x = do
|
||||||
w <- readT "whole number" x
|
w <- readT "whole number" x
|
||||||
k <- readSign sign
|
k <- readSign sign
|
||||||
return (k, w)
|
return (k, w)
|
||||||
|
|
||||||
readRational :: T.Text -> EitherErr Rational
|
readRational :: B.ByteString -> EitherErr Rational
|
||||||
readRational s = case T.split (== '.') s of
|
readRational s = case B.split 46 s of -- 46 is '.'
|
||||||
[x] -> maybe err (return . fromInteger) $ readT x
|
[x] -> maybe err (return . fromInteger) $ readT x
|
||||||
[x, y] -> case (readT x, readT y) of
|
[x, y] -> case (readT x, readT y) of
|
||||||
(Just x', Just y') ->
|
(Just x', Just y') ->
|
||||||
let p = 10 ^ T.length y
|
let p = 10 ^ B.length y
|
||||||
k = if x' >= 0 then 1 else -1
|
k = if x' >= 0 then 1 else -1
|
||||||
in return $ fromInteger x' + k * y' % p
|
in return $ fromInteger x' + k * y' % p
|
||||||
_ -> err
|
_ -> err
|
||||||
_ -> err
|
_ -> err
|
||||||
where
|
where
|
||||||
readT = readMaybe . T.unpack
|
readT i = case B8.readInteger i of
|
||||||
|
Just (x, "") -> Just x
|
||||||
|
_ -> Nothing
|
||||||
err = Left $ ConversionError s
|
err = Left $ ConversionError s
|
||||||
|
|
||||||
|
readWholeInteger :: B.ByteString -> Maybe Integer
|
||||||
|
readWholeInteger s = case B8.readInteger s of
|
||||||
|
Just (x, "") -> Just x
|
||||||
|
_ -> Nothing
|
||||||
|
|
||||||
|
-- readWholeInt :: B.ByteString -> Maybe Int
|
||||||
|
-- readWholeInt s = case B8.readInt s of
|
||||||
|
-- Just (x, "") -> Just x
|
||||||
|
-- _ -> Nothing
|
||||||
|
|
||||||
-- TODO smells like a lens
|
-- TODO smells like a lens
|
||||||
-- mapTxSplits :: (a -> b) -> Tx a -> Tx b
|
-- mapTxSplits :: (a -> b) -> Tx a -> Tx b
|
||||||
-- mapTxSplits f t@Tx {txSplits = ss} = t {txSplits = fmap f ss}
|
-- mapTxSplits f t@Tx {txSplits = ss} = t {txSplits = fmap f ss}
|
||||||
|
@ -332,7 +346,8 @@ showError other = (: []) $ case other of
|
||||||
ZeroLength -> "Zero repeat length"
|
ZeroLength -> "Zero repeat length"
|
||||||
ZeroRepeats -> "Zero repeats"
|
ZeroRepeats -> "Zero repeats"
|
||||||
(RegexError re) -> T.append "could not make regex from pattern: " re
|
(RegexError re) -> T.append "could not make regex from pattern: " re
|
||||||
(ConversionError x) -> T.append "Could not convert to rational number: " x
|
(ConversionError x) ->
|
||||||
|
T.append "Could not convert to rational number: " $ decodeUtf8Lenient x
|
||||||
(InsertIOError msg) -> T.append "IO Error: " msg
|
(InsertIOError msg) -> T.append "IO Error: " msg
|
||||||
(ParseError msg) -> T.append "Parse Error: " msg
|
(ParseError msg) -> T.append "Parse Error: " msg
|
||||||
(MatchValPrecisionError d p) ->
|
(MatchValPrecisionError d p) ->
|
||||||
|
@ -375,7 +390,7 @@ showTx TxRecord {trDate = d, trAmount = v, trDesc = e, trFile = f} =
|
||||||
[ ("path", T.pack f)
|
[ ("path", T.pack f)
|
||||||
, ("date", T.pack $ iso8601Show d)
|
, ("date", T.pack $ iso8601Show d)
|
||||||
, ("value", showT (fromRational v :: Float))
|
, ("value", showT (fromRational v :: Float))
|
||||||
, ("description", doubleQuote e)
|
, ("description", doubleQuote $ decodeUtf8Lenient e)
|
||||||
]
|
]
|
||||||
|
|
||||||
showMatch :: MatchRe -> T.Text
|
showMatch :: MatchRe -> T.Text
|
||||||
|
@ -584,14 +599,14 @@ compileRegex groups pat = case res of
|
||||||
compile
|
compile
|
||||||
(blankCompOpt {newSyntax = True})
|
(blankCompOpt {newSyntax = True})
|
||||||
(blankExecOpt {captureGroups = groups})
|
(blankExecOpt {captureGroups = groups})
|
||||||
pat
|
(encodeUtf8 pat)
|
||||||
|
|
||||||
matchMaybe :: T.Text -> Regex -> EitherErr Bool
|
matchMaybe :: B.ByteString -> Regex -> EitherErr Bool
|
||||||
matchMaybe q re = case execute re q of
|
matchMaybe q re = case execute re q of
|
||||||
Right res -> Right $ isJust res
|
Right res -> Right $ isJust res
|
||||||
Left _ -> Left $ RegexError "this should not happen"
|
Left _ -> Left $ RegexError "this should not happen"
|
||||||
|
|
||||||
matchGroupsMaybe :: T.Text -> Regex -> [T.Text]
|
matchGroupsMaybe :: B.ByteString -> Regex -> [B.ByteString]
|
||||||
matchGroupsMaybe q re = case regexec re q of
|
matchGroupsMaybe q re = case regexec re q of
|
||||||
Right Nothing -> []
|
Right Nothing -> []
|
||||||
Right (Just (_, _, _, xs)) -> xs
|
Right (Just (_, _, _, xs)) -> xs
|
||||||
|
|
|
@ -41,6 +41,7 @@ dependencies:
|
||||||
- recursion-schemes
|
- recursion-schemes
|
||||||
- data-fix
|
- data-fix
|
||||||
- filepath
|
- filepath
|
||||||
|
- bytestring
|
||||||
|
|
||||||
library:
|
library:
|
||||||
source-dirs: lib/
|
source-dirs: lib/
|
||||||
|
|
Loading…
Reference in New Issue