Skip to content

Commit 83ed3d1

Browse files
committed
Change names of marks/input span to use "byte span" based names
Change: markInput --> openByteSpan unmarkInput --> closeByteSpan getInputSpan --> peekByteSpan
1 parent 9a0b746 commit 83ed3d1

File tree

7 files changed

+151
-114
lines changed

7 files changed

+151
-114
lines changed

cborg/ChangeLog.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
## 0.3.0.0
44

5-
* Add support for grabbing the input bytes of intervals, i.e. `getInputSpan`.
5+
* Add support for retrieving selected byte spans from the input data stream,
6+
using `openByteSpan`, `closeByteSpan` and `peekByteSpan`.
67

78
## 0.2.10.0
89

cborg/cborg.cabal

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ test-suite cborg-tests
129129
Tests.Boundary
130130
Tests.ATerm
131131
Tests.ByteOffset
132-
Tests.GetInputSpan
132+
Tests.ByteSpan
133133
Tests.Canonical
134134
Tests.PreEncoded
135135
Tests.Regress

cborg/src/Codec/CBOR/Decoding.hs

Lines changed: 50 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,13 @@ module Codec.CBOR.Decoding
7474
, peekAvailable -- :: Decoder s Int
7575
, ByteOffset
7676
, peekByteOffset -- :: Decoder s ByteOffset
77+
, decodeWithByteOffsets
78+
, ByteSpan
79+
, openByteSpan -- :: Decoder s ()
80+
, closeByteSpan -- :: Decoder s ()
81+
, peekByteSpan -- :: Decoder s LazyByteString
7782
, decodeWithByteSpan
7883

79-
, markInput -- :: Decoder s ()
80-
, unmarkInput -- :: Decoder s ()
81-
, getInputSpan -- :: Decoder s LazyByteString
82-
8384
-- ** Canonical CBOR
8485
-- $canonical
8586
, decodeWordCanonical -- :: Decoder s Word
@@ -198,9 +199,9 @@ data DecodeAction s a
198199
#else
199200
| PeekByteOffset (Int# -> ST s (DecodeAction s a))
200201
#endif
201-
| MarkInput (ST s (DecodeAction s a))
202-
| UnmarkInput (ST s (DecodeAction s a))
203-
| GetInputSpan (LBS.ByteString -> ST s (DecodeAction s a))
202+
| OpenByteSpan (ST s (DecodeAction s a))
203+
| CloseByteSpan (ST s (DecodeAction s a))
204+
| PeekByteSpan (LBS.ByteString -> ST s (DecodeAction s a))
204205

205206
-- All the canonical variants
206207
| ConsumeWordCanonical (Word# -> ST s (DecodeAction s a))
@@ -983,9 +984,9 @@ type ByteOffset = Int64
983984
-- input bytes that makes up the encoded form of a term.
984985
--
985986
-- By keeping track of the byte offsets before and after decoding a subterm
986-
-- (a pattern captured by 'decodeWithByteSpan') and if the overall input data
987-
-- is retained then this is enables later retrieving the span of bytes for the
988-
-- subterm.
987+
-- (a pattern captured by 'decodeWithByteOffsets') and if the overall input
988+
-- data is retained then this is enables later retrieving the span of bytes for
989+
-- the subterm.
989990
--
990991
-- @since 0.2.2.0
991992
peekByteOffset :: Decoder s ByteOffset
@@ -998,38 +999,65 @@ peekByteOffset = Decoder (\k -> return (PeekByteOffset (\off# -> k (I64#
998999
))))
9991000
{-# INLINE peekByteOffset #-}
10001001

1002+
-- | This captures the pattern of getting the byte offsets before and after
1003+
-- decoding a subterm.
1004+
--
1005+
-- > !before <- peekByteOffset
1006+
-- > x <- decode
1007+
-- > !after <- peekByteOffset
1008+
--
1009+
-- @since 0.3.0.0
1010+
decodeWithByteOffsets :: Decoder s a -> Decoder s (a, ByteOffset, ByteOffset)
1011+
decodeWithByteOffsets da = do
1012+
!before <- peekByteOffset
1013+
x <- da
1014+
!after <- peekByteOffset
1015+
return (x, before, after)
1016+
1017+
-- | A span of bytes within the overall byte sequence that makes up the
1018+
-- input to the 'Decoder'.
1019+
--
1020+
type ByteSpan = LBS.ByteString
1021+
10011022
-- |
10021023
--
10031024
-- @since 0.3.0.0
1004-
markInput :: Decoder s ()
1005-
markInput = Decoder (\k -> return (MarkInput (k ())))
1025+
openByteSpan :: Decoder s ()
1026+
openByteSpan = Decoder (\k -> return (OpenByteSpan (k ())))
10061027

10071028
-- |
10081029
--
10091030
-- @since 0.3.0.0
1010-
unmarkInput :: Decoder s ()
1011-
unmarkInput = Decoder (\k -> return (UnmarkInput (k ())))
1031+
closeByteSpan :: Decoder s ()
1032+
closeByteSpan = Decoder (\k -> return (CloseByteSpan (k ())))
10121033

10131034
-- |
10141035
--
1036+
-- Note: the 'ByteSpan' returned is a slice of the original decoder input
1037+
-- stream, and thus will retain the input buffers. If you need to retain the
1038+
-- byte span for long, it is advisable to copy the span.
1039+
--
10151040
-- @since 0.3.0.0
1016-
getInputSpan :: Decoder s LBS.ByteString
1017-
getInputSpan = Decoder (\k -> return (GetInputSpan k))
1041+
peekByteSpan :: Decoder s ByteSpan
1042+
peekByteSpan = Decoder (\k -> return (PeekByteSpan k))
10181043

10191044

10201045
-- | This captures the pattern of getting the byte offsets before and after
10211046
-- decoding a subterm.
10221047
--
1023-
-- > !before <- peekByteOffset
1048+
-- > openByteSpan
10241049
-- > x <- decode
1025-
-- > !after <- peekByteOffset
1050+
-- > !after <- peekByteSpan
1051+
-- > closeByteSpan
10261052
--
1027-
decodeWithByteSpan :: Decoder s a -> Decoder s (a, ByteOffset, ByteOffset)
1053+
-- @since 0.3.0.0
1054+
decodeWithByteSpan :: Decoder s a -> Decoder s (a, ByteSpan)
10281055
decodeWithByteSpan da = do
1029-
!before <- peekByteOffset
1056+
openByteSpan
10301057
x <- da
1031-
!after <- peekByteOffset
1032-
return (x, before, after)
1058+
!bs <- peekByteSpan
1059+
closeByteSpan
1060+
return (x, bs)
10331061

10341062
{-
10351063
expectExactly :: Word -> Decoder (Word :#: s) s

cborg/src/Codec/CBOR/FlatTerm.hs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -466,17 +466,18 @@ fromFlatTerm decoder ft =
466466
go ts@(tk:_) (PeekTokenType k) = k (tokenTypeOf tk) >>= go ts
467467
go ts (PeekTokenType _) = unexpected "peekTokenType" ts
468468

469-
-- We don't have real bytes so we have to give these two operations
470-
-- different interpretations: remaining tokens and just 0 for offsets.
469+
-- We don't have real bytes so we have to give these operations
470+
-- different interpretations: remaining tokens and just 0 for offsets, and
471+
-- empty for byte spans.
471472
go ts (PeekAvailable k) = k (unI# (length ts)) >>= go ts
472473
#if defined(ARCH_32bit)
473474
go ts (PeekByteOffset k)= k (unI64# 0) >>= go ts
474475
#else
475476
go ts (PeekByteOffset k)= k 0# >>= go ts
476477
#endif
477-
go ts (MarkInput k) = k >>= go ts
478-
go ts (UnmarkInput k) = k >>= go ts
479-
go ts (GetInputSpan k) = k LBS.empty >>= go ts
478+
go ts (OpenByteSpan k) = k >>= go ts
479+
go ts (CloseByteSpan k) = k >>= go ts
480+
go ts (PeekByteSpan k) = k LBS.empty >>= go ts
480481

481482
go _ (Fail msg) = return $ Left msg
482483
go [] (Done x) = return $ Right x

cborg/src/Codec/CBOR/Read.hs

Lines changed: 54 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -245,9 +245,9 @@ data SlowPath s a
245245
#else
246246
| SlowPeekByteOffset {-# UNPACK #-} !ByteString (Int# -> ST s (DecodeAction s a))
247247
#endif
248-
| SlowMarkInput {-# UNPACK #-} !ByteString (ST s (DecodeAction s a))
249-
| SlowUnmarkInput {-# UNPACK #-} !ByteString (ST s (DecodeAction s a))
250-
| SlowGetInputSpan {-# UNPACK #-} !ByteString (LBS.ByteString -> ST s (DecodeAction s a))
248+
| SlowOpenByteSpan {-# UNPACK #-} !ByteString (ST s (DecodeAction s a))
249+
| SlowCloseByteSpan {-# UNPACK #-} !ByteString (ST s (DecodeAction s a))
250+
| SlowPeekByteSpan {-# UNPACK #-} !ByteString (LBS.ByteString -> ST s (DecodeAction s a))
251251
| SlowDecodeAction {-# UNPACK #-} !ByteString (DecodeAction s a)
252252
| SlowFail {-# UNPACK #-} !ByteString String
253253

@@ -704,9 +704,9 @@ go_fast !bs (PeekTokenType k) =
704704
go_fast !bs (PeekAvailable k) = k (case BS.length bs of I# len# -> len#) >>= go_fast bs
705705

706706
go_fast !bs da@PeekByteOffset{} = go_fast_end bs da
707-
go_fast !bs da@MarkInput{} = go_fast_end bs da
708-
go_fast !bs da@UnmarkInput{} = go_fast_end bs da
709-
go_fast !bs da@GetInputSpan{} = go_fast_end bs da
707+
go_fast !bs da@OpenByteSpan{} = go_fast_end bs da
708+
go_fast !bs da@CloseByteSpan{} = go_fast_end bs da
709+
go_fast !bs da@PeekByteSpan{} = go_fast_end bs da
710710
go_fast !bs da@D.Fail{} = go_fast_end bs da
711711
go_fast !bs da@D.Done{} = go_fast_end bs da
712712

@@ -727,9 +727,9 @@ go_fast_end !bs (D.Done x) = return $! FastDone bs x
727727
go_fast_end !bs (PeekAvailable k) = k (case BS.length bs of I# len# -> len#) >>= go_fast_end bs
728728

729729
go_fast_end !bs (PeekByteOffset k) = return $! SlowPeekByteOffset bs k
730-
go_fast_end !bs (MarkInput k) = return $! SlowMarkInput bs k
731-
go_fast_end !bs (UnmarkInput k) = return $! SlowUnmarkInput bs k
732-
go_fast_end !bs (GetInputSpan k) = return $! SlowGetInputSpan bs k
730+
go_fast_end !bs (OpenByteSpan k) = return $! SlowOpenByteSpan bs k
731+
go_fast_end !bs (CloseByteSpan k) = return $! SlowCloseByteSpan bs k
732+
go_fast_end !bs (PeekByteSpan k) = return $! SlowPeekByteSpan bs k
733733

734734
-- the next two cases only need the 1 byte token header
735735
go_fast_end !bs da | BS.null bs = return $! SlowDecodeAction bs da
@@ -1207,8 +1207,12 @@ go_fast_end !bs (ConsumeMapLenOrIndef k) =
12071207
DecodedToken sz (I# n#) -> k n# >>= go_fast_end (BS.unsafeDrop sz bs)
12081208

12091209

1210-
-- Marks is a record of byteoffsets for input marks as well as input chunks seen since the first mark.
1211-
-- The structure is a stack to support nesting marks / getInputSpan
1210+
-- | 'Marks' is a record of 'ByteOffsets' for input byte span marks, as well as
1211+
-- input chunks seen since the first mark.
1212+
--
1213+
-- The structure is a stack to support nesting of 'OpenByteSpan' and
1214+
-- 'CloseByteSpan'.
1215+
--
12121216
data Marks
12131217
= NoMarks
12141218
| Marks !ByteOffsets !Chunks
@@ -1230,39 +1234,39 @@ data Chunks
12301234
| C_Cons !ByteOffset !ByteString !Chunks
12311235
deriving Show
12321236

1233-
slowMarkInput :: ByteString -> ByteOffset -> Marks -> Marks
1234-
slowMarkInput bs off NoMarks = Marks (BO_Last off) (C_Cons off bs C_Nil)
1235-
slowMarkInput _ off (Marks offs chunks) = Marks (BO_Cons off offs) chunks
1237+
markByteSpan :: ByteString -> ByteOffset -> Marks -> Marks
1238+
markByteSpan bs off NoMarks = Marks (BO_Last off) (C_Cons off bs C_Nil)
1239+
markByteSpan _ off (Marks offs chunks) = Marks (BO_Cons off offs) chunks
12361240

1237-
slowUnmarkInput :: Marks -> Marks
1238-
slowUnmarkInput NoMarks = NoMarks
1239-
slowUnmarkInput (Marks (BO_Last _) _) = NoMarks
1240-
slowUnmarkInput (Marks (BO_Cons _ offs) chunks) = Marks offs chunks
1241+
unmarkByteSpan :: Marks -> Marks
1242+
unmarkByteSpan NoMarks = NoMarks
1243+
unmarkByteSpan (Marks (BO_Last _) _) = NoMarks
1244+
unmarkByteSpan (Marks (BO_Cons _ offs) chunks) = Marks offs chunks
12411245

1242-
slowMarkChunk :: ByteString -> ByteOffset -> Marks -> Marks
1243-
slowMarkChunk _ _ NoMarks = NoMarks
1244-
slowMarkChunk bs off (Marks offs chunks) = Marks offs (C_Cons off bs chunks)
1246+
markChunk :: ByteString -> ByteOffset -> Marks -> Marks
1247+
markChunk _ _ NoMarks = NoMarks
1248+
markChunk bs off (Marks offs chunks) = Marks offs (C_Cons off bs chunks)
12451249

1246-
slowGetInputSpan :: ByteOffset -> Marks -> LBS.ByteString
1247-
slowGetInputSpan _ NoMarks = LBS.empty
1248-
slowGetInputSpan !offset (Marks offs chunks) =
1250+
peekMarkedByteSpan :: ByteOffset -> Marks -> LBS.ByteString
1251+
peekMarkedByteSpan _ NoMarks = LBS.empty
1252+
peekMarkedByteSpan !offset (Marks offs chunks) =
12491253
-- traceShow (off, offset, chunks) $ traceShowId $
1250-
slowGetInputSpan1 (headByteOffsets offs) offset chunks
1254+
peekMarkedByteSpan1 (headByteOffsets offs) offset chunks
12511255

1252-
slowGetInputSpan1 :: ByteOffset -> ByteOffset -> Chunks -> LBS.ByteString
1253-
slowGetInputSpan1 !_ !_ C_Nil = LBS.empty
1254-
slowGetInputSpan1 !a !b (C_Cons c bs chunks) =
1256+
peekMarkedByteSpan1 :: ByteOffset -> ByteOffset -> Chunks -> LBS.ByteString
1257+
peekMarkedByteSpan1 !_ !_ C_Nil = LBS.empty
1258+
peekMarkedByteSpan1 !a !b (C_Cons c bs chunks) =
12551259
assert (b >= c) $
12561260
if a <= c
12571261
-- a <= b, a <= c: take a prefix of the current chunk, recurse
1258-
then slowGetInputSpan2 [BS.take (int64ToInt (b - c)) bs] a chunks
1262+
then peekMarkedByteSpan2 [BS.take (int64ToInt (b - c)) bs] a chunks
12591263
-- c < a <= b: the input span is completely inside the current chunk.
12601264
else LBS.fromStrict $ BS.take (int64ToInt (b - a)) $ BS.drop (int64ToInt (a - c)) bs
12611265

1262-
slowGetInputSpan2 :: [ByteString] -> ByteOffset -> Chunks -> LBS.ByteString
1263-
slowGetInputSpan2 bss !_ C_Nil = LBS.fromChunks bss
1264-
slowGetInputSpan2 bss !a (C_Cons c bs chunks)
1265-
| a < c = slowGetInputSpan2 (bs : bss) a chunks
1266+
peekMarkedByteSpan2 :: [ByteString] -> ByteOffset -> Chunks -> LBS.ByteString
1267+
peekMarkedByteSpan2 bss !_ C_Nil = LBS.fromChunks bss
1268+
peekMarkedByteSpan2 bss !a (C_Cons c bs chunks)
1269+
| a < c = peekMarkedByteSpan2 (bs : bss) a chunks
12661270
| a == c = LBS.fromChunks (bs : bss)
12671271
| otherwise = LBS.fromChunks (BS.drop (int64ToInt (a - c)) bs : bss)
12681272

@@ -1318,7 +1322,7 @@ go_slow da bs !offset !marks = do
13181322
mbs <- needChunk
13191323
case mbs of
13201324
Nothing -> decodeFail bs' offset' "end of input"
1321-
Just bs'' -> go_slow da' bs'' offset' (slowMarkChunk bs'' offset' marks)
1325+
Just bs'' -> go_slow da' bs'' offset' (markChunk bs'' offset' marks)
13221326
where
13231327
!offset' = offset + intToInt64 (BS.length bs - BS.length bs')
13241328

@@ -1342,18 +1346,18 @@ go_slow da bs !offset !marks = do
13421346
where
13431347
!offset'@(I64# off#) = offset + intToInt64 (BS.length bs - BS.length bs')
13441348

1345-
SlowMarkInput bs' k ->
1346-
lift k >>= \daz -> go_slow daz bs' offset' (slowMarkInput bs' offset' marks)
1349+
SlowOpenByteSpan bs' k ->
1350+
lift k >>= \daz -> go_slow daz bs' offset' (markByteSpan bs' offset' marks)
13471351
where
13481352
!offset' = offset + intToInt64 (BS.length bs - BS.length bs')
13491353

1350-
SlowUnmarkInput bs' k ->
1351-
lift k >>= \daz -> go_slow daz bs' offset' (slowUnmarkInput marks)
1354+
SlowCloseByteSpan bs' k ->
1355+
lift k >>= \daz -> go_slow daz bs' offset' (unmarkByteSpan marks)
13521356
where
13531357
!offset' = offset + intToInt64 (BS.length bs - BS.length bs')
13541358

1355-
SlowGetInputSpan bs' k ->
1356-
lift (k (slowGetInputSpan offset' marks)) >>= \daz -> go_slow daz bs' offset' marks
1359+
SlowPeekByteSpan bs' k ->
1360+
lift (k (peekMarkedByteSpan offset' marks)) >>= \daz -> go_slow daz bs' offset' marks
13571361
where
13581362
!offset' = offset + intToInt64 (BS.length bs - BS.length bs')
13591363

@@ -1384,7 +1388,7 @@ go_slow_fixup da !bs !offset !marks = do
13841388
| otherwise
13851389
-> go_slow_fixup da (bs <> bs') offset marks'
13861390
where
1387-
marks' = slowMarkChunk bs' (offset + intToInt64 (BS.length bs)) marks
1391+
marks' = markChunk bs' (offset + intToInt64 (BS.length bs)) marks
13881392

13891393
-- We've now got more input, but we have one token that spanned the old and
13901394
-- new input buffers, so we have to decode that one before carrying on
@@ -1468,15 +1472,15 @@ go_slow_overlapped da sz bs_cur bs_next !offset !marks =
14681472
where
14691473
!(I64# off#) = offset'
14701474

1471-
SlowMarkInput bs_empty k ->
1475+
SlowOpenByteSpan bs_empty k ->
14721476
assert (BS.null bs_empty) $
1473-
lift k >>= \daz -> go_slow daz bs' offset' (slowMarkInput bs' offset' marks)
1474-
SlowUnmarkInput bs_empty k ->
1477+
lift k >>= \daz -> go_slow daz bs' offset' (markByteSpan bs' offset' marks)
1478+
SlowCloseByteSpan bs_empty k ->
14751479
assert (BS.null bs_empty) $
1476-
lift k >>= \daz -> go_slow daz bs' offset' (slowUnmarkInput marks)
1477-
SlowGetInputSpan bs_empty k ->
1480+
lift k >>= \daz -> go_slow daz bs' offset' (unmarkByteSpan marks)
1481+
SlowPeekByteSpan bs_empty k ->
14781482
assert (BS.null bs_empty) $
1479-
lift (k (slowGetInputSpan offset' marks)) >>= \daz -> go_slow daz bs' offset' marks
1483+
lift (k (peekMarkedByteSpan offset' marks)) >>= \daz -> go_slow daz bs' offset' marks
14801484

14811485
SlowFail bs_unconsumed msg ->
14821486
decodeFail (bs_unconsumed <> bs') offset'' msg
@@ -1506,13 +1510,13 @@ getTokenVarLen len bs offset marks =
15061510
| let n = len - BS.length bs
15071511
, BS.length bs' >= n ->
15081512
let !tok = bs <> BS.unsafeTake n bs'
1509-
in return (tok, BS.drop n bs', slowMarkChunk bs' offset marks)
1513+
in return (tok, BS.drop n bs', markChunk bs' offset marks)
15101514

15111515
| otherwise -> getTokenVarLenSlow
15121516
[bs',bs]
15131517
(len - (BS.length bs + BS.length bs'))
15141518
(offset + intToInt64 (BS.length bs'))
1515-
(slowMarkChunk bs' offset marks)
1519+
(markChunk bs' offset marks)
15161520

15171521
getTokenVarLenSlow
15181522
:: [ByteString] -- ^ prefix chunks, in reverse order
@@ -1527,7 +1531,7 @@ getTokenVarLenSlow bss n offset marks = do
15271531
Just bs
15281532
| BS.length bs >= n ->
15291533
let !tok = BS.concat (reverse (BS.unsafeTake n bs : bss))
1530-
in return (tok, BS.drop n bs, slowMarkChunk bs offset marks)
1534+
in return (tok, BS.drop n bs, markChunk bs offset marks)
15311535
| otherwise -> getTokenVarLenSlow (bs:bss) (n - BS.length bs) (offset + intToInt64 (BS.length bs)) marks
15321536

15331537

cborg/tests/Main.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@ import qualified Tests.UnitTests as UnitTests
77
import qualified Tests.Properties as Properties
88
import qualified Tests.Boundary as Boundary
99
import qualified Tests.ByteOffset as ByteOffset
10+
import qualified Tests.ByteSpan as ByteSpan
1011
import qualified Tests.Canonical as Canonical
1112
import qualified Tests.Regress as Regress
1213
import qualified Tests.UTF8 as UTF8
1314
import qualified Tests.PreEncoded as PreEncoded
14-
import qualified Tests.GetInputSpan as GetInputSpan
1515

1616
main :: IO ()
1717
main = defaultMain tests
@@ -23,7 +23,7 @@ tests =
2323
, UnitTests.testTree
2424
, Properties.testTree
2525
, ByteOffset.testTree
26-
, GetInputSpan.testTree
26+
, ByteSpan.testTree
2727
, Boundary.testTree
2828
, Canonical.testTree
2929
, Regress.testTree

0 commit comments

Comments
 (0)