Skip to content

Commit

Permalink
kvserver: split snapshot SSTables for mvcc keys into multiple SSTs
Browse files Browse the repository at this point in the history
Previously, we'd only create one sstable for all mvcc keys
in a range when ingesting a rebalance/recovery snapshot into
Pebble. This increased write-amp in Pebble as more sstables
would have to be compacted into it (or the sstable then split
into smaller ones in Pebble), and had other consequences
such as massive filter blocks in the large singular sstable.

This change adds a new cluster setting,
kv.snapshot_rebalance.max_sst_size, that sets the max size of the
sstables containing user/mvcc keys in a range. If an sstable exceeds
this size in multiSSTWriter, we roll over that sstable and create a
new one.

Epic: CRDB-8471
Fixes: cockroachdb#67284

Release note (performance improvement): Reduce the write-amplification
impact of rebalances by splitting snapshot sstable files into smaller ones
before ingesting them into Pebble.
  • Loading branch information
itsbilal committed Aug 7, 2024
1 parent 6ae5e64 commit fb1a9d5
Show file tree
Hide file tree
Showing 4 changed files with 321 additions and 67 deletions.
59 changes: 54 additions & 5 deletions pkg/kv/kvserver/replica_sst_snapshot_storage_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,10 +276,12 @@ func TestMultiSSTWriterInitSST(t *testing.T) {
EndKey: roachpb.RKeyMax,
}
keySpans := rditer.MakeReplicatedKeySpans(&desc)
localSpans := keySpans[:len(keySpans)-1]
mvccSpan := keySpans[len(keySpans)-1]

msstw, err := newMultiSSTWriter(
ctx, cluster.MakeTestingClusterSettings(), scratch, keySpans, 0,
false, /* skipRangeDelForLastSpan */
ctx, cluster.MakeTestingClusterSettings(), scratch, localSpans, mvccSpan, 0,
false, /* skipRangeDelForMVCCSpan */
)
require.NoError(t, err)
_, err = msstw.Finish(ctx)
Expand Down Expand Up @@ -315,6 +317,51 @@ func TestMultiSSTWriterInitSST(t *testing.T) {
}
}

// TestMultiSSTWriterSize tests the effect of lowering the max size
// of sstables in a multiSSTWriter, and ensuring that the produced sstables
// are still correct.
func TestMultiSSTWriterSize(t *testing.T) {
defer leaktest.AfterTest(t)()

ctx := context.Background()
testRangeID := roachpb.RangeID(1)
testSnapUUID := uuid.Must(uuid.FromBytes([]byte("foobar1234567890")))
testLimiter := rate.NewLimiter(rate.Inf, 0)

cleanup, eng := newOnDiskEngine(ctx, t)
defer cleanup()
defer eng.Close()

sstSnapshotStorage := NewSSTSnapshotStorage(eng, testLimiter)
scratch := sstSnapshotStorage.NewScratchSpace(testRangeID, testSnapUUID)
settings := cluster.MakeTestingClusterSettings()
MaxSnapshotSSTableSize.Override(ctx, &settings.SV, 100)

desc := roachpb.RangeDescriptor{
StartKey: roachpb.RKey("d"),
EndKey: roachpb.RKeyMax,
}
keySpans := rditer.MakeReplicatedKeySpans(&desc)
localSpans := keySpans[:len(keySpans)-1]
mvccSpan := keySpans[len(keySpans)-1]

multiSSTWriter, err := newMultiSSTWriter(ctx, settings, scratch, localSpans, mvccSpan, 0, false)
require.NoError(t, err)
require.Equal(t, int64(0), multiSSTWriter.dataSize)

for i := range localSpans {
require.NoError(t, multiSSTWriter.Put(ctx, storage.EngineKey{Key: localSpans[i].Key}, []byte("foo")))
}

for i := 0; i < 100; i++ {
require.NoError(t, multiSSTWriter.Put(ctx, storage.EngineKey{Key: roachpb.Key(append(desc.StartKey, byte(i)))}, []byte("foobarbaz")))
}

_, err = multiSSTWriter.Finish(ctx)
require.NoError(t, err)
require.Greater(t, len(scratch.SSTs()), len(keySpans))
}

// TestMultiSSTWriterAddLastSpan tests that multiSSTWriter initializes each of
// the SST files associated with the replicated key ranges by writing a range
// deletion tombstone that spans the entire range of each respectively, except
Expand Down Expand Up @@ -342,14 +389,16 @@ func TestMultiSSTWriterAddLastSpan(t *testing.T) {
EndKey: roachpb.RKeyMax,
}
keySpans := rditer.MakeReplicatedKeySpans(&desc)
localSpans := keySpans[:len(keySpans)-1]
mvccSpan := keySpans[len(keySpans)-1]

msstw, err := newMultiSSTWriter(
ctx, cluster.MakeTestingClusterSettings(), scratch, keySpans, 0,
true, /* skipRangeDelForLastSpan */
ctx, cluster.MakeTestingClusterSettings(), scratch, localSpans, mvccSpan, 0,
true, /* skipRangeDelForMVCCSpan */
)
require.NoError(t, err)
if addRangeDel {
require.NoError(t, msstw.addRangeDelForLastSpan())
require.NoError(t, msstw.addClearForMVCCSpan())
}
testKey := storage.MVCCKey{Key: roachpb.RKey("d1").AsRawKey(), Timestamp: hlc.Timestamp{WallTime: 1}}
testEngineKey, _ := storage.DecodeEngineKey(storage.EncodeMVCCKey(testKey))
Expand Down
Loading

0 comments on commit fb1a9d5

Please sign in to comment.