Skip to content

Commit

Permalink
Merge pull request #129018 from itsbilal/backport24.1-127997
Browse files Browse the repository at this point in the history
release-24.1: kvserver: split snapshot SSTables for mvcc keys into multiple SSTs
  • Loading branch information
itsbilal authored Oct 3, 2024
2 parents 25fab96 + 8c76056 commit fb85fd6
Show file tree
Hide file tree
Showing 31 changed files with 536 additions and 177 deletions.
9 changes: 5 additions & 4 deletions pkg/cli/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -515,10 +515,10 @@ func runDebugRangeData(cmd *cobra.Command, args []string) error {
var results int
return rditer.IterateReplicaKeySpans(cmd.Context(), &desc, snapshot, debugCtx.replicated,
rditer.ReplicatedSpansAll,
func(iter storage.EngineIterator, _ roachpb.Span, keyType storage.IterKeyType) error {
func(iter storage.EngineIterator, _ roachpb.Span) error {
for ok := true; ok && err == nil; ok, err = iter.NextEngineKey() {
switch keyType {
case storage.IterKeyTypePointsOnly:
hasPoint, hasRange := iter.HasPointAndRange()
if hasPoint {
key, err := iter.UnsafeEngineKey()
if err != nil {
return err
Expand All @@ -532,8 +532,9 @@ func runDebugRangeData(cmd *cobra.Command, args []string) error {
if results == debugCtx.maxResults {
return iterutil.StopIteration()
}
}

case storage.IterKeyTypeRangesOnly:
if hasRange && iter.RangeKeyChanged() {
bounds, err := iter.EngineRangeBounds()
if err != nil {
return err
Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvserver/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,7 @@ go_test(
"@com_github_cockroachdb_errors//oserror",
"@com_github_cockroachdb_logtags//:logtags",
"@com_github_cockroachdb_pebble//:pebble",
"@com_github_cockroachdb_pebble//sstable",
"@com_github_cockroachdb_pebble//vfs",
"@com_github_cockroachdb_redact//:redact",
"@com_github_dustin_go_humanize//:go-humanize",
Expand Down
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/client_merge_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3903,7 +3903,7 @@ func TestStoreRangeMergeRaftSnapshot(t *testing.T) {

err := rditer.IterateReplicaKeySpans(
context.Background(), inSnap.Desc, sendingEngSnapshot, true /* replicatedOnly */, rditer.ReplicatedSpansAll,
func(iter storage.EngineIterator, span roachpb.Span, keyType storage.IterKeyType) error {
func(iter storage.EngineIterator, span roachpb.Span) error {
fw, ok := sstFileWriters[string(span.Key)]
if !ok || !fw.span.Equal(span) {
return errors.Errorf("unexpected span %s", span)
Expand Down
6 changes: 6 additions & 0 deletions pkg/kv/kvserver/kvserverpb/raft.proto
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,12 @@ message SnapshotRequest {
// file contents.
bool external_replicate = 13;

// If true, the snapshot is iterating over range keys and point
// keys in key order, as opposed to iterating over point keys first
// and range keys second. The receiver can take advantage of this
// to split points/range keys into multiple sstables for ingestion.
bool range_keys_in_order = 14;

reserved 1, 4, 6, 7, 8, 9;
}

Expand Down
37 changes: 17 additions & 20 deletions pkg/kv/kvserver/rditer/replica_data_iter.go
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ func IterateReplicaKeySpans(
reader storage.Reader,
replicatedOnly bool,
replicatedSpansFilter ReplicatedSpansFilter,
visitor func(storage.EngineIterator, roachpb.Span, storage.IterKeyType) error,
visitor func(storage.EngineIterator, roachpb.Span) error,
) error {
if !reader.ConsistentIterators() {
panic("reader must provide consistent iterators")
Expand All @@ -425,28 +425,25 @@ func IterateReplicaKeySpans(
UnreplicatedByRangeID: true,
})
}
keyTypes := []storage.IterKeyType{storage.IterKeyTypePointsOnly, storage.IterKeyTypeRangesOnly}
for _, span := range spans {
for _, keyType := range keyTypes {
err := func() error {
iter, err := reader.NewEngineIterator(ctx, storage.IterOptions{
KeyTypes: keyType,
LowerBound: span.Key,
UpperBound: span.EndKey,
})
if err != nil {
return err
}
defer iter.Close()
ok, err := iter.SeekEngineKeyGE(storage.EngineKey{Key: span.Key})
if err == nil && ok {
err = visitor(iter, span, keyType)
}
return err
}()
err := func() error {
iter, err := reader.NewEngineIterator(ctx, storage.IterOptions{
KeyTypes: storage.IterKeyTypePointsAndRanges,
LowerBound: span.Key,
UpperBound: span.EndKey,
})
if err != nil {
return iterutil.Map(err)
return err
}
defer iter.Close()
ok, err := iter.SeekEngineKeyGE(storage.EngineKey{Key: span.Key})
if err == nil && ok {
err = visitor(iter, span)
}
return err
}()
if err != nil {
return iterutil.Map(err)
}
}
return nil
Expand Down
21 changes: 10 additions & 11 deletions pkg/kv/kvserver/rditer/replica_data_iter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ func verifyIterateReplicaKeySpans(

require.NoError(t, IterateReplicaKeySpans(context.Background(), desc, readWriter, replicatedOnly,
replicatedSpansFilter,
func(iter storage.EngineIterator, span roachpb.Span, keyType storage.IterKeyType) error {
func(iter storage.EngineIterator, span roachpb.Span) error {
var err error
for ok := true; ok && err == nil; ok, err = iter.NextEngineKey() {
// Span should not be empty.
Expand All @@ -186,8 +186,8 @@ func verifyIterateReplicaKeySpans(
require.True(t, span.ContainsKey(key.Key), "%s not in %s", key, span)
require.True(t, key.IsLockTableKey() || key.IsMVCCKey(), "%s neither lock nor MVCC", key)

switch keyType {
case storage.IterKeyTypePointsOnly:
hasPoint, hasRange := iter.HasPointAndRange()
if hasPoint {
var mvccKey storage.MVCCKey
if key.IsMVCCKey() {
var err error
Expand All @@ -213,8 +213,8 @@ func verifyIterateReplicaKeySpans(
fmt.Sprintf("%x", key.Version),
mvccKey.String(),
})

case storage.IterKeyTypeRangesOnly:
}
if hasRange && iter.RangeKeyChanged() {
bounds, err := iter.EngineRangeBounds()
require.NoError(t, err)
require.True(t, span.Contains(bounds), "%s not contained in %s", bounds, span)
Expand All @@ -234,9 +234,6 @@ func verifyIterateReplicaKeySpans(
mvccRangeKey.String(),
})
}

default:
t.Fatalf("unexpected key type %v", keyType)
}
}
return err
Expand Down Expand Up @@ -482,9 +479,11 @@ func TestReplicaDataIteratorGlobalRangeKey(t *testing.T) {
var actualSpans []roachpb.Span
require.NoError(t, IterateReplicaKeySpans(
context.Background(), &desc, snapshot, replicatedOnly, ReplicatedSpansAll,
func(iter storage.EngineIterator, span roachpb.Span, keyType storage.IterKeyType) error {
func(iter storage.EngineIterator, span roachpb.Span) error {
// We should never see any point keys.
require.Equal(t, storage.IterKeyTypeRangesOnly, keyType)
hasPoint, hasRange := iter.HasPointAndRange()
require.False(t, hasPoint)
require.True(t, hasRange)

// The iterator should already be positioned on the range key, which should
// span the entire key span and be the only range key.
Expand Down Expand Up @@ -590,7 +589,7 @@ func benchReplicaEngineDataIterator(b *testing.B, numRanges, numKeysPerRange, va
for _, desc := range descs {
err := IterateReplicaKeySpans(
context.Background(), &desc, snapshot, false /* replicatedOnly */, ReplicatedSpansAll,
func(iter storage.EngineIterator, _ roachpb.Span, _ storage.IterKeyType) error {
func(iter storage.EngineIterator, _ roachpb.Span) error {
var err error
for ok := true; ok && err == nil; ok, err = iter.NextEngineKey() {
_, _ = iter.UnsafeEngineKey()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,25 @@ echo
+---------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+
| SPAN | KEY HEX | ENDKEY HEX | VERSION HEX | PRETTY |
+---------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+
| /Local/RangeID/1/{r""-s""} | 016989723a61 | 016989723a78 | 000000000000000109 | /Local/RangeID/1/r":{a"-x"}/0.000000001,0 |
| /Local/RangeID/1/{r""-s""} | 016989726162632d120ce61c175eb445878c36dcf4062ada4c0001 | | | /Local/RangeID/1/r/AbortSpan/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" |
| /Local/RangeID/1/{r""-s""} | 016989726162632d129855a1ef8eb94c06a106cab1dda78a2b0001 | | | /Local/RangeID/1/r/AbortSpan/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" |
| /Local/RangeID/1/{r""-s""} | 016989726c67632d | | | /Local/RangeID/1/r/RangeGCThreshold |
| /Local/RangeID/1/{r""-s""} | 016989727261736b | | | /Local/RangeID/1/r/RangeAppliedState |
| /Local/RangeID/1/{r""-s""} | 01698972726c6c2d | | | /Local/RangeID/1/r/RangeLease |
| /Local/RangeID/1/{r""-s""} | 016989723a61 | 016989723a78 | 000000000000000109 | /Local/RangeID/1/r":{a"-x"}/0.000000001,0 |
| /Local/RangeID/1/{u""-v""} | 016989753a61 | 016989753a78 | 000000000000000109 | /Local/RangeID/1/u":{a"-x"}/0.000000001,0 |
| /Local/RangeID/1/{u""-v""} | 0169897572667462 | | | /Local/RangeID/1/u/RangeTombstone |
| /Local/RangeID/1/{u""-v""} | 0169897572667468 | | | /Local/RangeID/1/u/RaftHardState |
| /Local/RangeID/1/{u""-v""} | 016989757266746c0000000000000001 | | | /Local/RangeID/1/u/RaftLog/logIndex:1 |
| /Local/RangeID/1/{u""-v""} | 016989757266746c0000000000000002 | | | /Local/RangeID/1/u/RaftLog/logIndex:2 |
| /Local/RangeID/1/{u""-v""} | 01698975726c7274 | | | /Local/RangeID/1/u/RangeLastReplicaGCTimestamp |
| /Local/RangeID/1/{u""-v""} | 016989753a61 | 016989753a78 | 000000000000000109 | /Local/RangeID/1/u":{a"-x"}/0.000000001,0 |
| /Local/Range"{a"-b"} | 016b1261000172647363 | | 0000000000000001 | /Local/Range"a"/RangeDescriptor/0.000000001,0 |
| /Local/Range"{a"-b"} | 016b1261000174786e2d0ce61c175eb445878c36dcf4062ada4c | | | /Local/Range"a"/Transaction/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" |
| /Local/Range"{a"-b"} | 016b126100ff000174786e2d9855a1ef8eb94c06a106cab1dda78a2b | | | /Local/Range"a\x00"/Transaction/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" |
| /Local/Range"{a"-b"} | 016b1261ffffffff000174786e2d295e727c8ca9437cbb5e8e2ebbad996f | | | /Local/Range"a\xff\xff\xff\xff"/Transaction/"295e727c-8ca9-437c-bb5e-8e2ebbad996f" |
| /Local/Lock/Local/Range"{a"-b"} | 017a6b12016b126100ff01726473630001 | | 030ce61c175eb445878c36dcf4062ada4c | /Local/Range"a"/RangeDescriptor |
| /Local/Lock"{a"-b"} | 017a6b12610001 | | 030ce61c175eb445878c36dcf4062ada4c | "a" |
| {a-b} | 61 | 62 | 000000000000000109 | {a-b}/0.000000001,0 |
| {a-b} | 61 | | 0000000000000001 | "a"/0.000000001,0 |
| {a-b} | 61ffffffff | | 0000000000000001 | "a\xff\xff\xff\xff"/0.000000001,0 |
| {a-b} | 61 | 62 | 000000000000000109 | {a-b}/0.000000001,0 |
+---------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@ echo
+---------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+
| SPAN | KEY HEX | ENDKEY HEX | VERSION HEX | PRETTY |
+---------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+
| /Local/RangeID/1/{r""-s""} | 016989723a61 | 016989723a78 | 000000000000000109 | /Local/RangeID/1/r":{a"-x"}/0.000000001,0 |
| /Local/RangeID/1/{r""-s""} | 016989726162632d120ce61c175eb445878c36dcf4062ada4c0001 | | | /Local/RangeID/1/r/AbortSpan/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" |
| /Local/RangeID/1/{r""-s""} | 016989726162632d129855a1ef8eb94c06a106cab1dda78a2b0001 | | | /Local/RangeID/1/r/AbortSpan/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" |
| /Local/RangeID/1/{r""-s""} | 016989726c67632d | | | /Local/RangeID/1/r/RangeGCThreshold |
| /Local/RangeID/1/{r""-s""} | 016989727261736b | | | /Local/RangeID/1/r/RangeAppliedState |
| /Local/RangeID/1/{r""-s""} | 01698972726c6c2d | | | /Local/RangeID/1/r/RangeLease |
| /Local/RangeID/1/{r""-s""} | 016989723a61 | 016989723a78 | 000000000000000109 | /Local/RangeID/1/r":{a"-x"}/0.000000001,0 |
| /Local/RangeID/1/{u""-v""} | 016989753a61 | 016989753a78 | 000000000000000109 | /Local/RangeID/1/u":{a"-x"}/0.000000001,0 |
| /Local/RangeID/1/{u""-v""} | 0169897572667462 | | | /Local/RangeID/1/u/RangeTombstone |
| /Local/RangeID/1/{u""-v""} | 0169897572667468 | | | /Local/RangeID/1/u/RaftHardState |
| /Local/RangeID/1/{u""-v""} | 016989757266746c0000000000000001 | | | /Local/RangeID/1/u/RaftLog/logIndex:1 |
| /Local/RangeID/1/{u""-v""} | 016989757266746c0000000000000002 | | | /Local/RangeID/1/u/RaftLog/logIndex:2 |
| /Local/RangeID/1/{u""-v""} | 01698975726c7274 | | | /Local/RangeID/1/u/RangeLastReplicaGCTimestamp |
| /Local/RangeID/1/{u""-v""} | 016989753a61 | 016989753a78 | 000000000000000109 | /Local/RangeID/1/u":{a"-x"}/0.000000001,0 |
| /Local/Range"{a"-b"} | 016b1261000172647363 | | 0000000000000001 | /Local/Range"a"/RangeDescriptor/0.000000001,0 |
| /Local/Range"{a"-b"} | 016b1261000174786e2d0ce61c175eb445878c36dcf4062ada4c | | | /Local/Range"a"/Transaction/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" |
| /Local/Range"{a"-b"} | 016b126100ff000174786e2d9855a1ef8eb94c06a106cab1dda78a2b | | | /Local/Range"a\x00"/Transaction/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" |
Expand Down
Loading

0 comments on commit fb85fd6

Please sign in to comment.