From ba785df67bc165c33e4180fef296f8a76789b25f Mon Sep 17 00:00:00 2001 From: Brian McKeon <135748266+brianjosephmckeon@users.noreply.github.com> Date: Thu, 1 Feb 2024 18:50:41 -0500 Subject: [PATCH] Prepare 0.1.1.1 release Reformatted. Added workflows. Updated package metadata. Removed redundant doctest suite. --- .github/CODEOWNERS | 1 + .github/workflows/build.yaml | 12 ++ .github/workflows/release.yaml | 12 ++ .gitignore | 1 + CHANGELOG.md | 4 + Setup.hs | 2 - base62.cabal | 82 ++++----- fourmolu.yaml | 51 ++++++ src/Data/Word/Base62.hs | 300 ++++++++++++++++----------------- test/Doctest.hs | 8 - test/Main.hs | 80 ++++----- 11 files changed, 300 insertions(+), 253 deletions(-) create mode 100644 .github/CODEOWNERS create mode 100644 .github/workflows/build.yaml create mode 100644 .github/workflows/release.yaml delete mode 100644 Setup.hs create mode 100644 fourmolu.yaml delete mode 100644 test/Doctest.hs diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..f6c0b22 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +@byteverse/l3c diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..085bbaf --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,12 @@ +name: build +on: + pull_request: + branches: + - "*" + +jobs: + call-workflow: + uses: byteverse/.github/.github/workflows/build.yaml@main + secrets: inherit + with: + release: false diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..bd0bbd5 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,12 @@ +name: release +on: + push: + tags: + - "*" + +jobs: + call-workflow: + uses: byteverse/.github/.github/workflows/build.yaml@main + secrets: inherit + with: + release: true diff --git a/.gitignore b/.gitignore index 28d589b..cde1485 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.vscode/ dist dist-* cabal-dev diff --git a/CHANGELOG.md b/CHANGELOG.md index 8fea4b4..9427e5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Revision history for base62 +## 0.1.1.1 -- 2024-02-01 + +* Update package metadata. + ## 0.1.1.0 -- 2023-08-08 * Add `shortText128`. diff --git a/Setup.hs b/Setup.hs deleted file mode 100644 index 9a994af..0000000 --- a/Setup.hs +++ /dev/null @@ -1,2 +0,0 @@ -import Distribution.Simple -main = defaultMain diff --git a/base62.cabal b/base62.cabal index 02934b7..dc22191 100644 --- a/base62.cabal +++ b/base62.cabal @@ -1,58 +1,50 @@ -cabal-version: 2.2 -name: base62 -version: 0.1.1.0 -synopsis: Base62 encoding and decoding -description: - Encode and decode using the base62 encoding scheme. -homepage: https://github.com/byteverse/base62 -bug-reports: https://github.com/byteverse/base62/issues -license: BSD-3-Clause -license-file: LICENSE -author: Andrew Martin -maintainer: andrew.thaddeus@gmail.com -copyright: 2019 Andrew Martin -category: Data -extra-source-files: CHANGELOG.md +cabal-version: 2.2 +name: base62 +version: 0.1.1.1 +synopsis: Base62 encoding and decoding +description: Encode and decode using the base62 encoding scheme. +homepage: https://github.com/byteverse/base62 +bug-reports: https://github.com/byteverse/base62/issues +license: BSD-3-Clause +license-file: LICENSE +author: Andrew Martin +maintainer: amartin@layer3com.com +copyright: 2019 Andrew Martin +category: Data +extra-doc-files: CHANGELOG.md library - exposed-modules: - Data.Word.Base62 + exposed-modules: Data.Word.Base62 build-depends: - , base >=4.17 && <5 - , bytebuild >=0.3.4 && <0.4 - , byteslice >=0.2 && <0.3 - , bytestring >=0.11.4 - , natural-arithmetic >=0.1 && <0.2 - , primitive >=0.7 && <0.10 - , text >=2.0.2 - , text-short >=0.1.5 - , wide-word >=0.1.0.8 && <0.2 - hs-source-dirs: src + , base >=4.17 && <5 + , bytebuild >=0.3.4 && <0.4 + , byteslice >=0.2 && <0.3 + , bytestring >=0.11.4 && <0.12 + , natural-arithmetic >=0.1 && <0.2 + , primitive >=0.7 && <0.10 + , text >=2.0.2 && <2.1 + , text-short >=0.1.5 && <0.2 + , wide-word >=0.1.0.8 && <0.2 + + hs-source-dirs: src default-language: Haskell2010 - ghc-options: -Wall -O2 + ghc-options: -Wall -O2 test-suite test default-language: Haskell2010 - type: exitcode-stdio-1.0 - hs-source-dirs: test - main-is: Main.hs + type: exitcode-stdio-1.0 + hs-source-dirs: test + main-is: Main.hs build-depends: - , base >=4.12.0.0 && <5 + , base >=4.12.0.0 && <5 , base62 - , tasty - , tasty-quickcheck - , tasty-hunit , byteslice , primitive + , tasty + , tasty-hunit + , tasty-quickcheck , wide-word -test-suite doctest - type: exitcode-stdio-1.0 - hs-source-dirs: test - main-is: Doctest.hs - build-depends: - , base - , doctest >= 0.10 - , base62 - , wide-word - default-language: Haskell2010 +source-repository head + type: git + location: git://github.com/byteverse/base62.git diff --git a/fourmolu.yaml b/fourmolu.yaml new file mode 100644 index 0000000..40cd005 --- /dev/null +++ b/fourmolu.yaml @@ -0,0 +1,51 @@ +# Number of spaces per indentation step +indentation: 2 + +# Max line length for automatic line breaking +column-limit: 200 + +# Styling of arrows in type signatures (choices: trailing, leading, or leading-args) +function-arrows: trailing + +# How to place commas in multi-line lists, records, etc. (choices: leading or trailing) +comma-style: leading + +# Styling of import/export lists (choices: leading, trailing, or diff-friendly) +import-export-style: leading + +# Whether to full-indent or half-indent 'where' bindings past the preceding body +indent-wheres: false + +# Whether to leave a space before an opening record brace +record-brace-space: true + +# Number of spaces between top-level declarations +newlines-between-decls: 1 + +# How to print Haddock comments (choices: single-line, multi-line, or multi-line-compact) +haddock-style: multi-line + +# How to print module docstring +haddock-style-module: null + +# Styling of let blocks (choices: auto, inline, newline, or mixed) +let-style: auto + +# How to align the 'in' keyword with respect to the 'let' keyword (choices: left-align, right-align, or no-space) +in-style: right-align + +# Whether to put parentheses around a single constraint (choices: auto, always, or never) +single-constraint-parens: always + +# Output Unicode syntax (choices: detect, always, or never) +unicode: never + +# Give the programmer more choice on where to insert blank lines +respectful: true + +# Fixity information for operators +fixities: [] + +# Module reexports Fourmolu should know about +reexports: [] + diff --git a/src/Data/Word/Base62.hs b/src/Data/Word/Base62.hs index 04afb16..904596e 100644 --- a/src/Data/Word/Base62.hs +++ b/src/Data/Word/Base62.hs @@ -1,27 +1,26 @@ -{-# language BangPatterns #-} -{-# language DataKinds #-} -{-# language GADTSyntax #-} -{-# language LambdaCase #-} -{-# language MagicHash #-} -{-# language PatternSynonyms #-} -{-# language RankNTypes #-} -{-# language ScopedTypeVariables #-} -{-# language TypeApplications #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTSyntax #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE UnboxedTuples #-} --- | This module provides functions for encoding fixed-width words --- using the base-62 encoding scheme. The encoding functions in this --- module produce byte sequences that are ASCII-compatible text --- encodings (e.g. ISO-8859-1 and UTF-8). Similarly, the decoding --- functions only decode byte sequences that are an ASCII-compatible --- text encoding of characters in the class @[A-Za-Z0-9]@. Other --- encodings (notably UTF-16) are not supported but would be --- accepted in a pull request. +{- | This module provides functions for encoding fixed-width words +using the base-62 encoding scheme. The encoding functions in this +module produce byte sequences that are ASCII-compatible text +encodings (e.g. ISO-8859-1 and UTF-8). Similarly, the decoding +functions only decode byte sequences that are an ASCII-compatible +text encoding of characters in the class @[A-Za-Z0-9]@. Other +encodings (notably UTF-16) are not supported but would be +accepted in a pull request. +-} module Data.Word.Base62 ( -- * 64-bit Word encode64 , builder64 , decode64 + -- * 128-bit Word , encode128 , shortText128 @@ -30,139 +29,130 @@ module Data.Word.Base62 , decode128 ) where -import Data.ByteString.Short.Internal (ShortByteString(SBS)) -import Data.Bytes.Builder.Bounded.Unsafe (Builder(..)) -import Data.Bytes.Types (Bytes(Bytes)) +import Data.ByteString.Short.Internal (ShortByteString (SBS)) +import Data.Bytes.Builder.Bounded.Unsafe (Builder (..)) +import Data.Bytes.Types (Bytes (Bytes)) import Data.Char (ord) -import Data.Primitive (ByteArray(..),readByteArray,writeByteArray) -import Data.Primitive (MutableByteArray(MutableByteArray)) +import Data.Primitive (ByteArray (..), MutableByteArray (MutableByteArray), readByteArray, writeByteArray) import Data.Text (Text) import Data.Text.Short (ShortText) -import Data.WideWord.Word128 (Word128(Word128)) -import GHC.Exts (ByteArray#,Int#,Int(I#),(+#),(-#)) -import GHC.Exts (Char(C#),Word64#,Word8#,quotRemWord#,indexCharArray#) -import GHC.Exts (isTrue#,(>#)) -import GHC.Exts (wordToWord64#,word64ToWord#) -import GHC.ST (ST(ST)) -import GHC.Word (Word64(W64#),Word8(W8#)) +import Data.WideWord.Word128 (Word128 (Word128)) +import GHC.Exts (ByteArray#, Char (C#), Int (I#), Int#, Word64#, Word8#, indexCharArray#, isTrue#, quotRemWord#, word64ToWord#, wordToWord64#, (+#), (-#), (>#)) +import GHC.ST (ST (ST)) +import GHC.Word (Word64 (W64#), Word8 (W8#)) import qualified Arithmetic.Nat as Nat import qualified Data.Bytes as Bytes import qualified Data.Bytes.Builder.Bounded as Builder -import qualified GHC.Exts as Exts import qualified Data.Text.Short as TS import qualified Data.Text.Short.Unsafe as TS +import qualified GHC.Exts as Exts --- $setup --- >>> :set -XNumericUnderscores --- >>> import qualified Data.Bytes as Bytes - --- | Base62 encode a 64-bit word. Leading zero bits are suppressed. --- --- >>> putStrLn (Bytes.toLatinString (Bytes.fromByteArray (encode64 213635))) --- tZj --- --- Note that this will encode the number 0 as the character 0 rather --- than as the empty byte array. +{- | Base62 encode a 64-bit word. Leading zero bits are suppressed. +Note that this will encode the number 0 as the character '0' rather +than as the empty byte array. +-} encode64 :: Word64 -> ByteArray encode64 = Builder.run Nat.constant . builder64 -- | Base62 encode a 64-bit word as a builder. builder64 :: Word64 -> Builder 11 -{-# inline builder64 #-} +{-# INLINE builder64 #-} builder64 (W64# w) = builder64# w -- | Base62 encode a 128-bit word. Leading zero bits are suppressed. --- --- >>> let octillion = 1_000_000_000_000_000_000_000_000_000 --- >>> putStrLn (Bytes.toLatinString (Bytes.fromByteArray (encode128 octillion))) --- 1IdHllabYuAOlNK4 encode128 :: Word128 -> ByteArray -{-# inline encode128 #-} +{-# INLINE encode128 #-} encode128 = Builder.run Nat.constant . builder128 -- | Base62 encode a 128-bit word as @ShortText@. shortText128 :: Word128 -> ShortText -{-# inline shortText128 #-} +{-# INLINE shortText128 #-} shortText128 !w = case encode128 w of ByteArray x -> TS.fromShortByteStringUnsafe (SBS x) -- | Base62 encode a 128-bit word as @Text@. text128 :: Word128 -> Text -{-# inline text128 #-} +{-# INLINE text128 #-} text128 = TS.toText . shortText128 -- | Base62 encode a 128-bit word as a builder. builder128 :: Word128 -> Builder 22 -{-# inline builder128 #-} +{-# INLINE builder128 #-} builder128 (Word128 (W64# a) (W64# b)) = builder128# a b builder64# :: Word64# -> Builder 11 -{-# noinline builder64# #-} -builder64# w0 = Builder - (\marr off0 s0 -> case word64ToWord# w0 of - 0## -> case Exts.writeWord8Array# marr off0 (Exts.wordToWord8# 48##) s0 of - s1 -> (# s1, off0 +# 1# #) - _ -> let go ix w s1 = case word64ToWord# w of - 0## -> case reverseBytes (MutableByteArray marr) (I# off0) (I# (ix -# 1# )) of - ST f -> case f s1 of - (# s2, (_ :: ()) #) -> (# s2, ix #) - _ -> - let !(# q0, r0 #) = quotRemWord# (word64ToWord# w) 62## - !q = wordToWord64# q0 - !r = wordToWord64# r0 - in case Exts.writeWord8Array# marr ix (unW8 (encodeByte (W64# r))) s1 of - s2 -> go (ix +# 1#) q s2 - in go off0 w0 s0 - ) +{-# NOINLINE builder64# #-} +builder64# w0 = + Builder + ( \marr off0 s0 -> case word64ToWord# w0 of + 0## -> case Exts.writeWord8Array# marr off0 (Exts.wordToWord8# 48##) s0 of + s1 -> (# s1, off0 +# 1# #) + _ -> + let go ix w s1 = case word64ToWord# w of + 0## -> case reverseBytes (MutableByteArray marr) (I# off0) (I# (ix -# 1#)) of + ST f -> case f s1 of + (# s2, (_ :: ()) #) -> (# s2, ix #) + _ -> + let !(# q0, r0 #) = quotRemWord# (word64ToWord# w) 62## + !q = wordToWord64# q0 + !r = wordToWord64# r0 + in case Exts.writeWord8Array# marr ix (unW8 (encodeByte (W64# r))) s1 of + s2 -> go (ix +# 1#) q s2 + in go off0 w0 s0 + ) -- Always outputs exactly ten digits. They do not need to be reversed. builder62pow10# :: Word64# -> Builder 10 -{-# noinline builder62pow10# #-} -builder62pow10# w0 = Builder - (\marr off0 s0 -> - let go ix d w s1 = case d of - 0# -> (# s1, ix +# 11# #) - _ -> - let !(# q0, r0 #) = quotRemWord# (word64ToWord# w) 62## - !q = wordToWord64# q0 - !r = wordToWord64# r0 - in case Exts.writeWord8Array# marr ix (unW8 (encodeByte (W64# r))) s1 of - s2 -> go (ix -# 1# ) (d -# 1# ) q s2 - in go (off0 +# 9# ) 10# w0 s0 - ) +{-# NOINLINE builder62pow10# #-} +builder62pow10# w0 = + Builder + ( \marr off0 s0 -> + let go ix d w s1 = case d of + 0# -> (# s1, ix +# 11# #) + _ -> + let !(# q0, r0 #) = quotRemWord# (word64ToWord# w) 62## + !q = wordToWord64# q0 + !r = wordToWord64# r0 + in case Exts.writeWord8Array# marr ix (unW8 (encodeByte (W64# r))) s1 of + s2 -> go (ix -# 1#) (d -# 1#) q s2 + in go (off0 +# 9#) 10# w0 s0 + ) builder128# :: Word64# -> Word64# -> Builder 22 -{-# noinline builder128# #-} -builder128# wa wb = Builder - (\marr off0 s0 -> case word64ToWord# wa of - 0## -> case builder64# wb of Builder f -> f marr off0 s0 - _ -> case quotRem (Word128 (W64# wa) (W64# wb)) (Word128 0 n62pow10) of - (upper@(Word128 upperHi (W64# upperLo)), (Word128 shouldBeZeroA (W64# lower))) -> case shouldBeZeroA of - 0 -> case upperHi of - 0 -> case builder64# upperLo `Builder.append` builder62pow10# lower of Builder f -> f marr off0 s0 - _ -> case quotRem upper (Word128 0 n62pow10) of - (Word128 shouldBeZeroB (W64# x),Word128 shouldBeZeroC (W64# y)) -> case shouldBeZeroB of - 0 -> case shouldBeZeroC of - 0 -> case builder64# x `Builder.append` (builder62pow10# y `Builder.append` builder62pow10# lower) of Builder f -> f marr off0 s0 - _ -> errorWithoutStackTrace "Data.Word.Base62: logical error c" - _ -> errorWithoutStackTrace "Data.Word.Base62: logical error b" - _ -> errorWithoutStackTrace "Data.Word.Base62: logical error a" - ) +{-# NOINLINE builder128# #-} +builder128# wa wb = + Builder + ( \marr off0 s0 -> case word64ToWord# wa of + 0## -> case builder64# wb of Builder f -> f marr off0 s0 + _ -> case quotRem (Word128 (W64# wa) (W64# wb)) (Word128 0 n62pow10) of + (upper@(Word128 upperHi (W64# upperLo)), (Word128 shouldBeZeroA (W64# lower))) -> case shouldBeZeroA of + 0 -> case upperHi of + 0 -> case builder64# upperLo `Builder.append` builder62pow10# lower of Builder f -> f marr off0 s0 + _ -> case quotRem upper (Word128 0 n62pow10) of + (Word128 shouldBeZeroB (W64# x), Word128 shouldBeZeroC (W64# y)) -> case shouldBeZeroB of + 0 -> case shouldBeZeroC of + 0 -> case builder64# x `Builder.append` (builder62pow10# y `Builder.append` builder62pow10# lower) of Builder f -> f marr off0 s0 + _ -> errorWithoutStackTrace "Data.Word.Base62: logical error c" + _ -> errorWithoutStackTrace "Data.Word.Base62: logical error b" + _ -> errorWithoutStackTrace "Data.Word.Base62: logical error a" + ) -- Reverse the bytes in the designated slice. This takes -- an inclusive start offset and an inclusive end offset. reverseBytes :: MutableByteArray s -> Int -> Int -> ST s () -{-# inline reverseBytes #-} -reverseBytes arr begin end = go begin end where - go ixA ixB = if ixA < ixB - then do - a :: Word8 <- readByteArray arr ixA - b :: Word8 <- readByteArray arr ixB - writeByteArray arr ixA b - writeByteArray arr ixB a - go (ixA + 1) (ixB - 1) - else pure () +{-# INLINE reverseBytes #-} +reverseBytes arr begin end = go begin end + where + go ixA ixB = + if ixA < ixB + then do + a :: Word8 <- readByteArray arr ixA + b :: Word8 <- readByteArray arr ixB + writeByteArray arr ixA b + writeByteArray arr ixB a + go (ixA + 1) (ixB - 1) + else pure () -- Precondition: argument is less than 62. encodeByte :: Word64 -> Word8 @@ -190,16 +180,12 @@ unsafeW8 (W64# w) = W8# (Exts.wordToWord8# (word64ToWord# w)) unW8 :: Word8 -> Word8# unW8 (W8# w) = w --- | Decode a base62-encoded 64-bit word. This rejects the empty --- string rather than decoding it as zero. This also rejects encoded --- numbers greater than or equal to @2^64@. --- --- >>> decode64 (Bytes.fromAsciiString "LygHa16AHYB") --- Just 18446744073709551611 --- >>> decode64 (Bytes.fromAsciiString "1IdHllabYuAOlNK4") --- Nothing +{- | Decode a base62-encoded 64-bit word. This rejects the empty +string rather than decoding it as zero. This also rejects encoded +numbers greater than or equal to @2^64@. +-} decode64 :: Bytes -> Maybe Word64 -{-# inline decode64 #-} +{-# INLINE decode64 #-} decode64 b@(Bytes _ _ len) = case len of 0 -> Nothing _ -> case decode64# 0 b of @@ -210,14 +196,14 @@ decode64 b@(Bytes _ _ len) = case len of -- we do not put a noinline pragma on it. It is recursive, -- so it cannot inline anywhere. decode64# :: Word64 -> Bytes -> (# (# #) | Word64# #) -decode64# !acc@(W64# acc# ) b@(Bytes arr off len) = case len of +decode64# !acc@(W64# acc#) b@(Bytes arr off len) = case len of 0 -> (# | acc# #) _ -> case decodeByte (indexAsciiArray arr off) of Nothing -> (# (# #) | #) Just w -> -- If we overflow, the accumulator will shrink. We -- return Nothing in this case. - let (overflow,acc') = unsignedPushBase62 acc w + let (overflow, acc') = unsignedPushBase62 acc w in if overflow then (# (# #) | #) else decode64# acc' (Bytes.unsafeDrop 1 b) @@ -235,54 +221,50 @@ n62pow20 = Word128 0 n62pow10 * Word128 0 n62pow10 -- consumed by this particular function. The caller should always -- set @d@ to 0. unsafeDecode62pow10# :: Word64 -> ByteArray -> Int -> Int -> (# (# #) | Word64# #) -unsafeDecode62pow10# !acc@(W64# acc# ) !arr !off !d = if d < 10 - then case decodeByte (indexAsciiArray arr off) of - Nothing -> (# (# #) | #) - Just w -> - let acc' = acc * 62 + w - in unsafeDecode62pow10# acc' arr (off + 1) (d + 1) - else (# | acc# #) +unsafeDecode62pow10# !acc@(W64# acc#) !arr !off !d = + if d < 10 + then case decodeByte (indexAsciiArray arr off) of + Nothing -> (# (# #) | #) + Just w -> + let acc' = acc * 62 + w + in unsafeDecode62pow10# acc' arr (off + 1) (d + 1) + else (# | acc# #) --- | Decode a base62-encoded 128-bit word. This rejects the empty --- string rather than decoding it as zero. This also rejects encoded --- numbers greater than or equal to @2^128@. --- --- >>> decode128 (Bytes.fromAsciiString "LygHa16AHYB") --- Just 18446744073709551611 --- >>> decode128 (Bytes.fromAsciiString "7n42DGM5Tflk9n8mt7Fhc6") --- Just 340282366920938463463374607431768211454 --- >>> decode128 (Bytes.fromAsciiString "7n42DGM5Tflk9n8mt7Fhc9") --- Nothing +{- | Decode a base62-encoded 128-bit word. This rejects the empty +string rather than decoding it as zero. This also rejects encoded +numbers greater than or equal to @2^128@. +-} decode128 :: Bytes -> Maybe Word128 -{-# inline decode128 #-} +{-# INLINE decode128 #-} decode128 (Bytes (ByteArray arr) (I# off) (I# len)) = case decode128# arr off len of (# (# #) | #) -> Nothing (# | (# a, b #) #) -> Just (Word128 (W64# a) (W64# b)) decode128# :: ByteArray# -> Int# -> Int# -> (# (# #) | (# Word64#, Word64# #) #) -{-# noinline decode128# #-} +{-# NOINLINE decode128# #-} decode128# arr off len - | isTrue# (len ># 22# ) = (# (# #) | #) -- always overflows - | isTrue# (len ># 20# ) = - case unsafeDecode62pow10# 0 (ByteArray arr) (I# (off +# len -# 10# )) 0 of + | isTrue# (len ># 22#) = (# (# #) | #) -- always overflows + | isTrue# (len ># 20#) = + case unsafeDecode62pow10# 0 (ByteArray arr) (I# (off +# len -# 10#)) 0 of (# (# #) | #) -> (# (# #) | #) - (# | c #) -> case unsafeDecode62pow10# 0 (ByteArray arr) (I# (off +# len -# 20# )) 0 of + (# | c #) -> case unsafeDecode62pow10# 0 (ByteArray arr) (I# (off +# len -# 20#)) 0 of (# (# #) | #) -> (# (# #) | #) - (# | b #) -> case decode64# 0 (Bytes (ByteArray arr) (I# off) (I# (len -# 20# ))) of + (# | b #) -> case decode64# 0 (Bytes (ByteArray arr) (I# off) (I# (len -# 20#))) of (# (# #) | #) -> (# (# #) | #) - (# | a #) -> if W64# a < 484 - then - let r0 = Word128 0 (W64# c) + (Word128 0 n62pow10 * (Word128 0 (W64# b))) - !r1@(Word128 (W64# r1x) (W64# r1y)) = n62pow20 * Word128 0 (W64# a) + r0 - in if r1 >= r0 - then (# | (# r1x, r1y #) #) - else (# (# #) | #) - else (# (# #) | #) - | isTrue# (len ># 10# ) = - case unsafeDecode62pow10# 0 (ByteArray arr) (I# (off +# len -# 10# )) 0 of + (# | a #) -> + if W64# a < 484 + then + let r0 = Word128 0 (W64# c) + (Word128 0 n62pow10 * (Word128 0 (W64# b))) + !r1@(Word128 (W64# r1x) (W64# r1y)) = n62pow20 * Word128 0 (W64# a) + r0 + in if r1 >= r0 + then (# | (# r1x, r1y #) #) + else (# (# #) | #) + else (# (# #) | #) + | isTrue# (len ># 10#) = + case unsafeDecode62pow10# 0 (ByteArray arr) (I# (off +# len -# 10#)) 0 of (# (# #) | #) -> (# (# #) | #) - (# | b #) -> case decode64# 0 (Bytes (ByteArray arr) (I# off) (I# (len -# 10# ))) of + (# | b #) -> case decode64# 0 (Bytes (ByteArray arr) (I# off) (I# (len -# 10#))) of (# (# #) | #) -> (# (# #) | #) (# | a #) -> case Word128 0 (W64# b) + (fromIntegral n62pow10 * Word128 0 (W64# a)) of Word128 (W64# x) (W64# y) -> (# | (# x, y #) #) @@ -293,15 +275,15 @@ decode128# arr off len indexAsciiArray :: ByteArray -> Int -> Char indexAsciiArray (ByteArray arr) (I# i) = C# (indexCharArray# arr i) -unsignedPushBase62 :: Word64 -> Word64 -> (Bool,Word64) -{-# inline unsignedPushBase62 #-} -unsignedPushBase62 (W64# a) (W64# b) = +unsignedPushBase62 :: Word64 -> Word64 -> (Bool, Word64) +{-# INLINE unsignedPushBase62 #-} +unsignedPushBase62 (W64# a) (W64# b) = let !(# ca, r0 #) = Exts.timesWord2# (word64ToWord# a) 62## in case ca of 0## -> let !r0' = wordToWord64# r0 !r1 = Exts.plusWord64# r0' b in case Exts.ltWord64# r1 r0' of - 1# -> (True,0) - _ -> (False,W64# r1) - _ -> (True,0) + 1# -> (True, 0) + _ -> (False, W64# r1) + _ -> (True, 0) diff --git a/test/Doctest.hs b/test/Doctest.hs deleted file mode 100644 index cada807..0000000 --- a/test/Doctest.hs +++ /dev/null @@ -1,8 +0,0 @@ -import Test.DocTest (doctest) - -main :: IO () -main = doctest - [ "-fobject-code" - , "src/Data/Word/Base62.hs" - ] - diff --git a/test/Main.hs b/test/Main.hs index 0c7517c..a8c1d55 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -1,63 +1,65 @@ {-# LANGUAGE LambdaCase #-} -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE TypeApplications #-} - {-# OPTIONS_GHC -fno-warn-orphans #-} import Control.Applicative (liftA2) -import Data.Primitive (ByteArray) import Data.Bits ((.&.)) import Data.Char (chr) -import Test.Tasty (TestTree,defaultMain,testGroup) -import Test.Tasty.HUnit (testCase,(@=?)) -import Test.Tasty.QuickCheck (testProperty,(===),choose) -import Test.Tasty.QuickCheck (Arbitrary,arbitrary,counterexample) -import Data.WideWord.Word128 (Word128(Word128)) +import Data.Primitive (ByteArray) +import Data.WideWord.Word128 (Word128 (Word128)) +import Test.Tasty (TestTree, defaultMain, testGroup) +import Test.Tasty.HUnit (testCase, (@=?)) +import Test.Tasty.QuickCheck (Arbitrary, arbitrary, choose, counterexample, testProperty, (===)) -import qualified Test.Tasty.QuickCheck import qualified Data.Bytes as Bytes import qualified Data.Bytes.Text.Ascii as Ascii import qualified Data.Word.Base62 as W import qualified GHC.Exts as Exts +import qualified Test.Tasty.QuickCheck main :: IO () main = defaultMain tests tests :: TestTree -tests = testGroup "base62" - [ testGroup "Word64" - [ testProperty "isomorphic" $ \w -> - Just w === W.decode64 (Bytes.fromByteArray (W.encode64 w)) - , testCase "A" $ - Nothing - @=? - W.decode64 (Ascii.fromString "1IdHllabYuAOlNK4") - ] - , testGroup "Word128" - [ testProperty "isomorphic" $ \w -> - let enc = W.encode128 w in - counterexample ("Encoded as: " ++ show enc ++ "\nRendered as: " ++ str enc) - $ Just w === W.decode128 (Bytes.fromByteArray enc) - , testCase "A" $ - Nothing - @=? - W.decode128 (Ascii.fromString "7n42DGM5Tflk9n8mt7Fhc9") +tests = + testGroup + "base62" + [ testGroup + "Word64" + [ testProperty "isomorphic" $ \w -> + Just w === W.decode64 (Bytes.fromByteArray (W.encode64 w)) + , testCase "A" $ + Nothing + @=? W.decode64 (Ascii.fromString "1IdHllabYuAOlNK4") + ] + , testGroup + "Word128" + [ testProperty "isomorphic" $ \w -> + let enc = W.encode128 w + in counterexample ("Encoded as: " ++ show enc ++ "\nRendered as: " ++ str enc) $ + Just w === W.decode128 (Bytes.fromByteArray enc) + , testCase "A" $ + Nothing + @=? W.decode128 (Ascii.fromString "7n42DGM5Tflk9n8mt7Fhc9") + ] ] - ] instance Arbitrary Word128 where -- It is useful to explicitly generate some small values -- since they follow a different code path than large ones. - arbitrary = choose (0,2 :: Int) >>= \case - 0 -> fmap (Word128 0) arbitrary - 1 -> liftA2 Word128 - (fmap (0xFFFF .&.) arbitrary) - arbitrary - 2 -> liftA2 Word128 arbitrary arbitrary - _ -> error "Word128.arbitrary: not possible" - shrink x = if x > 5 - then [x - div x 9, x - div x 11, x - div x 13] - else [] + arbitrary = + choose (0, 2 :: Int) >>= \case + 0 -> fmap (Word128 0) arbitrary + 1 -> + liftA2 + Word128 + (fmap (0xFFFF .&.) arbitrary) + arbitrary + 2 -> liftA2 Word128 arbitrary arbitrary + _ -> error "Word128.arbitrary: not possible" + shrink x = + if x > 5 + then [x - div x 9, x - div x 11, x - div x 13] + else [] str :: ByteArray -> String str = map (chr . fromIntegral) . Exts.toList