Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for padding and char replication #14

Merged
merged 3 commits into from
Sep 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 171 additions & 4 deletions bench/BenchChar.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,12 @@ import qualified Data.ByteString.Builder as B
import Data.Char
import qualified Data.Text as T
import Data.Text.Builder.Linear.Buffer
import qualified Data.Text.Lazy as TL
import Data.Text.Lazy (toStrict)
import qualified Data.Text.Lazy.Builder as TB
import Data.Text.Lazy.Builder (toLazyText, singleton)
import qualified Data.Text.Internal.Fusion.Common as Fusion
import qualified Data.Text.Internal.Fusion as Fusion
import Test.Tasty.Bench

#ifdef MIN_VERSION_text_builder
Expand All @@ -22,6 +26,10 @@ import qualified Text.Builder
import qualified ByteString.StrictBuilder
#endif

--------------------------------------------------------------------------------
-- Single char
--------------------------------------------------------------------------------

benchLazyBuilder ∷ Int → T.Text
benchLazyBuilder = toStrict . toLazyText . go mempty
where
Expand Down Expand Up @@ -57,11 +65,11 @@ benchLinearBuilder m = runBuffer (\b → go b m)
go !acc 0 = acc
go !acc n = let ch = chr n in go (ch .<| (acc |>. ch)) (n - 1)

benchChar ∷ Benchmark
benchChar = bgroup "Char" $ map mkGroup [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6]
benchSingleChar ∷ Benchmark
benchSingleChar = bgroup "Single" $ map mkGroupChar [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6]

mkGroup :: Int → Benchmark
mkGroup n = bgroup (show n)
mkGroupChar :: Int → Benchmark
mkGroupChar n = bgroup (show n)
[ bench "Data.Text.Lazy.Builder" $ nf benchLazyBuilder n
, bench "Data.ByteString.Builder" $ nf benchLazyBuilderBS n
#ifdef MIN_VERSION_text_builder
Expand All @@ -72,3 +80,162 @@ mkGroup n = bgroup (show n)
#endif
, bench "Data.Text.Builder.Linear" $ nf benchLinearBuilder n
]

--------------------------------------------------------------------------------
-- Multiple chars
--------------------------------------------------------------------------------

charCount :: Word
charCount = 3

benchCharsLazyBuilder ∷ Int → T.Text
benchCharsLazyBuilder = TL.toStrict . TB.toLazyText . go mempty
where
go !acc 0 = acc
go !acc n = let ch = chr n in go (replicateChar ch <> (acc <> replicateChar ch)) (n - 1)

replicateChar ch = TB.fromText (Fusion.unstream (Fusion.replicateCharI charCount ch))

{- [FIXME] bad performance
benchCharsLazyBuilderBS ∷ Int → B.ByteString
benchCharsLazyBuilderBS = B.toStrict . B.toLazyByteString . go mempty
where
go !acc 0 = acc
go !acc n =
let ch = chr n
in go (replicateChar ch <> (acc <> replicateChar ch)) (n - 1)

replicateChar ch = stimes charCount (B.charUtf8 ch)
-}

#ifdef MIN_VERSION_text_builder
benchCharsStrictBuilder ∷ Int → T.Text
benchCharsStrictBuilder = Text.Builder.run . go mempty
where
go !acc 0 = acc
go !acc n = let ch = chr n in go (replicateChar ch <> (acc <> replicateChar ch)) (n - 1)

-- [TODO] Is there a better way?
replicateChar ch = Text.Builder.padFromRight (fromIntegral charCount) ch mempty
#endif

{- [TODO]
#ifdef MIN_VERSION_bytestring_strict_builder
benchCharsStrictBuilderBS ∷ Int → B.ByteString
benchCharsStrictBuilderBS = ByteString.StrictBuilder.builderBytes . go mempty
where
go !acc 0 = acc
go !acc n = let ch = chr n in go _ (n - 1)
#endif
-}

benchCharsLinearBuilder ∷ Int → T.Text
benchCharsLinearBuilder m = runBuffer (\b → go b m)
where
go ∷ Buffer ⊸ Int → Buffer
go !acc 0 = acc
go !acc n = let ch = chr n in go (prependChars charCount ch (appendChars charCount ch acc)) (n - 1)

benchMultipleChars ∷ Benchmark
benchMultipleChars = bgroup "Multiple" $ map mkGroupChars [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6]

mkGroupChars :: Int → Benchmark
mkGroupChars n = bgroup (show n)
[ bench "Data.Text.Lazy.Builder" $ nf benchCharsLazyBuilder n
-- , bench "Data.ByteString.Builder" $ nf benchCharsLazyBuilderBS n
#ifdef MIN_VERSION_text_builder
, bench "Text.Builder" $ nf benchCharsStrictBuilder n
#endif
-- #ifdef MIN_VERSION_bytestring_strict_builder
-- , bench "ByteString.StrictBuilder" $ nf benchCharsStrictBuilderBS n
-- #endif
, bench "Data.Text.Builder.Linear" $ nf benchCharsLinearBuilder n
]

--------------------------------------------------------------------------------
-- Padding
--------------------------------------------------------------------------------

benchPaddingLazyBuilder ∷ Int → T.Text
benchPaddingLazyBuilder = toStrict . toLazyText . go mempty 0
where
go !acc !_ 0 = acc
go !acc l n =
let ch = chr n
!l' = l + 2 * fromIntegral charCount
in go (withText (T.justifyLeft l' ch)
(withText (T.justifyRight (l + fromIntegral charCount) ch) acc))
l'
(n - 1)

withText f = TB.fromText . f . TL.toStrict . TB.toLazyText

{- [TODO]
benchPaddingLazyBuilderBS ∷ Int → B.ByteString
benchPaddingLazyBuilderBS = B.toStrict . B.toLazyByteString . go mempty
where
go !acc 0 = acc
go !acc n = let ch = chr n in go _ (n - 1)
-}

#ifdef MIN_VERSION_text_builder
benchPaddingStrictBuilder ∷ Int → T.Text
benchPaddingStrictBuilder = Text.Builder.run . go mempty 0
where
go !acc !_ 0 = acc
go !acc l n =
let ch = chr n
!l' = l + 2 * fromIntegral charCount
in go (Text.Builder.padFromRight l' ch (Text.Builder.padFromLeft (l + fromIntegral charCount) ch acc))
l'
(n - 1)
#endif

{- [TODO]
#ifdef MIN_VERSION_bytestring_strict_builder
benchPaddingStrictBuilderBS ∷ Int → B.ByteString
benchPaddingStrictBuilderBS = ByteString.StrictBuilder.builderBytes . go mempty
where
go !acc 0 = acc
go !acc n = let ch = chr n in go _ (n - 1)
#endif
-}

benchPaddingLinearBuilder ∷ Int → T.Text
benchPaddingLinearBuilder m = runBuffer (\b → go b 0 m)
where
go ∷ Buffer ⊸ Word → Int → Buffer
go !acc !_ 0 = acc
go !acc l n =
let ch = chr n
!l' = l + 2 * charCount
in go (justifyLeft l' ch (justifyRight (l + charCount) ch acc))
l'
(n - 1)

benchPadding ∷ Benchmark
benchPadding = bgroup "Padding" $ map mkGroupPadding [1e0, 1e1, 1e2, 1e3, 1e4{-, 1e5, 1e6-}] -- NOTE: too long with 1e5

mkGroupPadding :: Int → Benchmark
mkGroupPadding n = bgroup (show n)
[ bench "Data.Text.Lazy.Builder" $ nf benchPaddingLazyBuilder n
-- , bench "Data.ByteString.Builder" $ nf benchPaddingLazyBuilderBS n
#ifdef MIN_VERSION_text_builder
, bench "Text.Builder" $ nf benchPaddingStrictBuilder n
#endif
-- #ifdef MIN_VERSION_bytestring_strict_builder
-- , bench "ByteString.StrictBuilder" $ nf benchPaddingStrictBuilderBS n
-- #endif
, bench "Data.Text.Builder.Linear" $ nf benchPaddingLinearBuilder n
]

--------------------------------------------------------------------------------
-- All benchmarks
--------------------------------------------------------------------------------

benchChar ∷ Benchmark
benchChar = bgroup "Char"
[ benchSingleChar
, benchMultipleChars
, benchPadding ]

73 changes: 73 additions & 0 deletions src/Data/Text/Builder/Linear/Array.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
-- |
-- Copyright: (c) 2022 Andrew Lelechenko
wismill marked this conversation as resolved.
Show resolved Hide resolved
-- (c) 2023 Pierre Le Marre
-- Licence: BSD3
-- Maintainer: Andrew Lelechenko <[email protected]>
--
-- Low-level routines for 'A.MArray' manipulations.
module Data.Text.Builder.Linear.Array (
unsafeThaw,
sizeofByteArray,
isPinned,
unsafeTile,
unsafeReplicate,
) where

import Data.Text.Array qualified as A
import GHC.Exts (Int (..), isByteArrayPinned#, isTrue#, setByteArray#, sizeofByteArray#, unsafeCoerce#)
import GHC.ST (ST (..))

unsafeThaw ∷ A.Array → ST s (A.MArray s)
unsafeThaw (A.ByteArray a) = ST $ \s# →
(# s#, A.MutableByteArray (unsafeCoerce# a) #)

sizeofByteArray ∷ A.Array → Int
sizeofByteArray (A.ByteArray a) = I# (sizeofByteArray# a)

isPinned ∷ A.Array → Bool
isPinned (A.ByteArray a) = isTrue# (isByteArrayPinned# a)

-- | Replicate an ASCII character
--
-- __Warning:__ it is the responsibility of the caller to ensure that the 'Int'
-- is a valid ASCII character.
unsafeReplicate
∷ A.MArray s
-- ^ Mutable array
→ Int
-- ^ Offset
→ Int
-- ^ Count
→ Int
-- ^ ASCII character
→ ST s ()
unsafeReplicate (A.MutableByteArray dst#) (I# dstOff#) (I# count#) (I# w#) =
ST (\s# → (# setByteArray# dst# dstOff# count# w# s#, () #))
{-# INLINE unsafeReplicate #-}

-- | Duplicate a portion of an array in-place.
--
-- Example of use:
--
-- @
-- -- Write @count@ times the char @c@
-- let cLen = utf8Length c; totalLen = cLen * count
-- in unsafeWrite dst dstOff ch *> 'unsafeTile' dst dstOff totalLen cLen
-- @
unsafeTile
∷ A.MArray s
-- ^ Mutable array
→ Int
-- ^ Start of the portion to duplicate
→ Int
-- ^ Total length of the duplicate
→ Int
-- ^ Length of the portion to duplicate
→ ST s ()
unsafeTile dest destOff totalLen = go
where
-- Adapted from Data.Text.Array.tile
go l
| 2 * l > totalLen = A.copyM dest (destOff + l) dest destOff (totalLen - l)
| otherwise = A.copyM dest (destOff + l) dest destOff l *> go (2 * l)
{-# INLINE unsafeTile #-}
11 changes: 11 additions & 0 deletions src/Data/Text/Builder/Linear/Buffer.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
-- |
-- Copyright: (c) 2022 Andrew Lelechenko
-- (c) 2023 Pierre Le Marre
-- Licence: BSD3
-- Maintainer: Andrew Lelechenko <[email protected]>
--
Expand All @@ -15,6 +16,7 @@ module Data.Text.Builder.Linear.Buffer (
consumeBuffer,
eraseBuffer,
foldlIntoBuffer,
newEmptyBuffer,
(><),

-- * Single character
Expand All @@ -23,6 +25,10 @@ module Data.Text.Builder.Linear.Buffer (

-- * Multiple characters

-- ** Character replication
prependChars,
appendChars,

-- ** Text
(|>),
(<|),
Expand All @@ -34,6 +40,11 @@ module Data.Text.Builder.Linear.Buffer (
( #<| ), -- NOTE: extra spaces required because of -XUnboxedTuples
(<|#),

-- * Padding
justifyLeft,
justifyRight,
center,

-- * Number formatting

-- ** Decimal
Expand Down
Loading