@@ -79,26 +79,43 @@ class CursorBasedCheckpointReader(CheckpointReader):
79
79
"""
80
80
CursorBasedCheckpointReader is used by streams that implement a Cursor in order to manage state. This allows the checkpoint
81
81
reader to delegate the complexity of fetching state to the cursor and focus on the iteration over a stream's partitions.
82
- Right now only low-code connectors provide cursor implementations, but the logic is extensible to any stream that adheres
83
- to the Cursor interface.
82
+
83
+ This reader supports the Cursor interface used by Python and low-code sources. Not to be confused with Cursor interface
84
+ that belongs to the Concurrent CDK.
84
85
"""
85
86
86
87
def __init__ (self , cursor : Cursor , stream_slices : Iterable [Optional [Mapping [str , Any ]]], read_state_from_cursor : bool = False ):
87
- # The first attempt of an RFR stream has an empty {} incoming state, but should still make a first attempt to read records
88
- # from the first page in next().
89
88
self ._cursor = cursor
90
89
self ._stream_slices = iter (stream_slices )
90
+ # read_state_from_cursor is used to delineate that partitions should determine when to stop syncing dynamically according
91
+ # to the value of the state at runtime. This currently only applies to streams that use resumable full refresh.
91
92
self ._read_state_from_cursor = read_state_from_cursor
92
93
self ._current_slice : Optional [StreamSlice ] = None
93
94
self ._finished_sync = False
94
95
95
96
def next (self ) -> Optional [Mapping [str , Any ]]:
97
+ """
98
+ The next() method returns the next slice of data should be synced for the current stream according to its cursor.
99
+ This function support iterating over a stream's slices across two dimensions. The first dimension is the stream's
100
+ partitions like parent records for a substream. The inner dimension is iterating over the cursor value like a
101
+ date range for incremental streams or a pagination checkpoint for resumable full refresh.
102
+
103
+ basic algorithm for iterating through a stream's slices is:
104
+ 1. The first time next() is invoked we get the first partition and return it
105
+ 2. For streams whose cursor value is determined dynamically using stream state
106
+ 1. Get the current state for the current partition
107
+ 2. If the current partition's state is complete, get the next partition
108
+ 3. If the current partition's state is still in progress, emit the next cursor value
109
+ 3. If a stream has processed all partitions, the iterator will raise a StopIteration exception signaling there are no more
110
+ slices left for extracting more records.
111
+ """
112
+
96
113
try :
97
114
if self ._current_slice is None :
98
115
self ._current_slice = self ._get_next_slice ()
99
116
return self ._current_slice
100
117
if self ._read_state_from_cursor :
101
- state_for_slice = self ._cursor .select_state (self ._current_slice . get ( "partition" ) )
118
+ state_for_slice = self ._cursor .select_state (self ._current_slice )
102
119
if state_for_slice == {"__ab_full_refresh_sync_complete" : True }:
103
120
self ._current_slice = self ._get_next_slice ()
104
121
else :
0 commit comments