Skip to content

Commit

Permalink
Add support for padding and char replication
Browse files Browse the repository at this point in the history
- Add `Char` replication functions `prependChars` and `appendChars`
- Add padding functions `justifyLeft`, `justifyRight` and `center`.
- Move low-level `MArray` manipulation to their own module
  `Data.Text.Builder.Linear.Array`.
- Add corresponding tests and benchmarks.
  • Loading branch information
wismill committed Sep 10, 2023
1 parent b6d899a commit ea52747
Show file tree
Hide file tree
Showing 7 changed files with 410 additions and 20 deletions.
175 changes: 171 additions & 4 deletions bench/BenchChar.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,12 @@ import qualified Data.ByteString.Builder as B
import Data.Char
import qualified Data.Text as T
import Data.Text.Builder.Linear.Buffer
import qualified Data.Text.Lazy as TL
import Data.Text.Lazy (toStrict)
import qualified Data.Text.Lazy.Builder as TB
import Data.Text.Lazy.Builder (toLazyText, singleton)
import qualified Data.Text.Internal.Fusion.Common as Fusion
import qualified Data.Text.Internal.Fusion as Fusion
import Test.Tasty.Bench

#ifdef MIN_VERSION_text_builder
Expand All @@ -22,6 +26,10 @@ import qualified Text.Builder
import qualified ByteString.StrictBuilder
#endif

--------------------------------------------------------------------------------
-- Single char
--------------------------------------------------------------------------------

benchLazyBuilder Int T.Text
benchLazyBuilder = toStrict . toLazyText . go mempty
where
Expand Down Expand Up @@ -57,11 +65,11 @@ benchLinearBuilder m = runBuffer (\b → go b m)
go !acc 0 = acc
go !acc n = let ch = chr n in go (ch .<| (acc |>. ch)) (n - 1)

benchChar Benchmark
benchChar = bgroup "Char" $ map mkGroup [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6]
benchSingleChar Benchmark
benchSingleChar = bgroup "Single" $ map mkGroupChar [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6]

mkGroup :: Int Benchmark
mkGroup n = bgroup (show n)
mkGroupChar :: Int Benchmark
mkGroupChar n = bgroup (show n)
[ bench "Data.Text.Lazy.Builder" $ nf benchLazyBuilder n
, bench "Data.ByteString.Builder" $ nf benchLazyBuilderBS n
#ifdef MIN_VERSION_text_builder
Expand All @@ -72,3 +80,162 @@ mkGroup n = bgroup (show n)
#endif
, bench "Data.Text.Builder.Linear" $ nf benchLinearBuilder n
]

--------------------------------------------------------------------------------
-- Multiple chars
--------------------------------------------------------------------------------

charCount :: Word
charCount = 3

benchCharsLazyBuilder Int T.Text
benchCharsLazyBuilder = TL.toStrict . TB.toLazyText . go mempty
where
go !acc 0 = acc
go !acc n = let ch = chr n in go (replicateChar ch <> (acc <> replicateChar ch)) (n - 1)

replicateChar ch = TB.fromText (Fusion.unstream (Fusion.replicateCharI charCount ch))

{- [FIXME] bad performance
benchCharsLazyBuilderBS ∷ Int → B.ByteString
benchCharsLazyBuilderBS = B.toStrict . B.toLazyByteString . go mempty
where
go !acc 0 = acc
go !acc n =
let ch = chr n
in go (replicateChar ch <> (acc <> replicateChar ch)) (n - 1)
replicateChar ch = stimes charCount (B.charUtf8 ch)
-}

#ifdef MIN_VERSION_text_builder
benchCharsStrictBuilder Int T.Text
benchCharsStrictBuilder = Text.Builder.run . go mempty
where
go !acc 0 = acc
go !acc n = let ch = chr n in go (replicateChar ch <> (acc <> replicateChar ch)) (n - 1)

-- [TODO] Is there a better way?
replicateChar ch = Text.Builder.padFromRight (fromIntegral charCount) ch mempty
#endif

{- [TODO]
#ifdef MIN_VERSION_bytestring_strict_builder
benchCharsStrictBuilderBS ∷ Int → B.ByteString
benchCharsStrictBuilderBS = ByteString.StrictBuilder.builderBytes . go mempty
where
go !acc 0 = acc
go !acc n = let ch = chr n in go _ (n - 1)
#endif
-}

benchCharsLinearBuilder Int T.Text
benchCharsLinearBuilder m = runBuffer (\b go b m)
where
go Buffer Int Buffer
go !acc 0 = acc
go !acc n = let ch = chr n in go (prependChars charCount ch (appendChars charCount ch acc)) (n - 1)

benchMultipleChars Benchmark
benchMultipleChars = bgroup "Multiple" $ map mkGroupChars [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6]

mkGroupChars :: Int Benchmark
mkGroupChars n = bgroup (show n)
[ bench "Data.Text.Lazy.Builder" $ nf benchCharsLazyBuilder n
-- , bench "Data.ByteString.Builder" $ nf benchCharsLazyBuilderBS n
#ifdef MIN_VERSION_text_builder
, bench "Text.Builder" $ nf benchCharsStrictBuilder n
#endif
-- #ifdef MIN_VERSION_bytestring_strict_builder
-- , bench "ByteString.StrictBuilder" $ nf benchCharsStrictBuilderBS n
-- #endif
, bench "Data.Text.Builder.Linear" $ nf benchCharsLinearBuilder n
]

--------------------------------------------------------------------------------
-- Padding
--------------------------------------------------------------------------------

benchPaddingLazyBuilder Int T.Text
benchPaddingLazyBuilder = toStrict . toLazyText . go mempty 0
where
go !acc !_ 0 = acc
go !acc l n =
let ch = chr n
!l' = l + 2 * fromIntegral charCount
in go (withText (T.justifyLeft l' ch)
(withText (T.justifyRight (l + fromIntegral charCount) ch) acc))
l'
(n - 1)

withText f = TB.fromText . f . TL.toStrict . TB.toLazyText

{- [TODO]
benchPaddingLazyBuilderBS ∷ Int → B.ByteString
benchPaddingLazyBuilderBS = B.toStrict . B.toLazyByteString . go mempty
where
go !acc 0 = acc
go !acc n = let ch = chr n in go _ (n - 1)
-}

#ifdef MIN_VERSION_text_builder
benchPaddingStrictBuilder Int T.Text
benchPaddingStrictBuilder = Text.Builder.run . go mempty 0
where
go !acc !_ 0 = acc
go !acc l n =
let ch = chr n
!l' = l + 2 * fromIntegral charCount
in go (Text.Builder.padFromRight l' ch (Text.Builder.padFromLeft (l + fromIntegral charCount) ch acc))
l'
(n - 1)
#endif

{- [TODO]
#ifdef MIN_VERSION_bytestring_strict_builder
benchPaddingStrictBuilderBS ∷ Int → B.ByteString
benchPaddingStrictBuilderBS = ByteString.StrictBuilder.builderBytes . go mempty
where
go !acc 0 = acc
go !acc n = let ch = chr n in go _ (n - 1)
#endif
-}

benchPaddingLinearBuilder Int T.Text
benchPaddingLinearBuilder m = runBuffer (\b go b 0 m)
where
go Buffer Word Int Buffer
go !acc !_ 0 = acc
go !acc l n =
let ch = chr n
!l' = l + 2 * charCount
in go (justifyLeft l' ch (justifyRight (l + charCount) ch acc))
l'
(n - 1)

benchPadding Benchmark
benchPadding = bgroup "Padding" $ map mkGroupPadding [1e0, 1e1, 1e2, 1e3, 1e4{-, 1e5, 1e6-}] -- NOTE: too long with 1e5

mkGroupPadding :: Int Benchmark
mkGroupPadding n = bgroup (show n)
[ bench "Data.Text.Lazy.Builder" $ nf benchPaddingLazyBuilder n
-- , bench "Data.ByteString.Builder" $ nf benchPaddingLazyBuilderBS n
#ifdef MIN_VERSION_text_builder
, bench "Text.Builder" $ nf benchPaddingStrictBuilder n
#endif
-- #ifdef MIN_VERSION_bytestring_strict_builder
-- , bench "ByteString.StrictBuilder" $ nf benchPaddingStrictBuilderBS n
-- #endif
, bench "Data.Text.Builder.Linear" $ nf benchPaddingLinearBuilder n
]

--------------------------------------------------------------------------------
-- All benchmarks
--------------------------------------------------------------------------------

benchChar Benchmark
benchChar = bgroup "Char"
[ benchSingleChar
, benchMultipleChars
, benchPadding ]

72 changes: 72 additions & 0 deletions src/Data/Text/Builder/Linear/Array.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
-- |
-- Copyright: (c) 2022 Andrew Lelechenko
-- Licence: BSD3
-- Maintainer: Andrew Lelechenko <[email protected]>
--
-- Low-level routines for 'A.MArray' manipulations.
module Data.Text.Builder.Linear.Array (
unsafeThaw,
sizeofByteArray,
isPinned,
unsafeTile,
unsafeReplicate,
) where

import Data.Text.Array qualified as A
import GHC.Exts (Int (..), isByteArrayPinned#, isTrue#, setByteArray#, sizeofByteArray#, unsafeCoerce#)
import GHC.ST (ST (..))

unsafeThaw A.Array ST s (A.MArray s)
unsafeThaw (A.ByteArray a) = ST $ \s#
(# s#, A.MutableByteArray (unsafeCoerce# a) #)

sizeofByteArray A.Array Int
sizeofByteArray (A.ByteArray a) = I# (sizeofByteArray# a)

isPinned A.Array Bool
isPinned (A.ByteArray a) = isTrue# (isByteArrayPinned# a)

-- | Replicate an ASCII character
--
-- __Warning:__ it is the responsibility of the caller to ensure that the 'Int'
-- is a valid ASCII character.
unsafeReplicate
A.MArray s
-- ^ Mutable array
Int
-- ^ Offset
Int
-- ^ Count
Int
-- ^ ASCII character
ST s ()
unsafeReplicate (A.MutableByteArray dst#) (I# dstOff#) (I# count#) (I# w#) =
ST (\s# (# setByteArray# dst# dstOff# count# w# s#, () #))
{-# INLINE unsafeReplicate #-}

-- | Duplicate a portion of an array in-place.
--
-- Example of use:
--
-- @
-- -- Write @count@ times the char @c@
-- let cLen = utf8Length c; totalLen = cLen * count
-- in unsafeWrite dst dstOff ch *> 'unsafeTile' dst dstOff totalLen cLen
-- @
unsafeTile
A.MArray s
-- ^ Mutable array
Int
-- ^ Start of the portion to duplicate
Int
-- ^ Total length of the duplicate
Int
-- ^ Length of the portion to duplicate
ST s ()
unsafeTile dest destOff totalLen = go
where
-- Adapted from Data.Text.Array.tile
go l
| 2 * l > totalLen = A.copyM dest (destOff + l) dest destOff (totalLen - l)
| otherwise = A.copyM dest (destOff + l) dest destOff l *> go (2 * l)
{-# INLINE unsafeTile #-}
9 changes: 9 additions & 0 deletions src/Data/Text/Builder/Linear/Buffer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ module Data.Text.Builder.Linear.Buffer (

-- * Multiple characters

-- ** Character replication
prependChars,
appendChars,

-- ** Text
(|>),
(<|),
Expand All @@ -34,6 +38,11 @@ module Data.Text.Builder.Linear.Buffer (
( #<| ), -- NOTE: extra spaces required because of -XUnboxedTuples
(<|#),

-- * Padding
justifyLeft,
justifyRight,
center,

-- * Number formatting

-- ** Decimal
Expand Down
Loading

0 comments on commit ea52747

Please sign in to comment.