Skip to content

Commit 4ea4352

Browse files
committed
feat: Key Prefix Encoding + Decoding
1 parent dc253aa commit 4ea4352

File tree

3 files changed

+49
-13
lines changed

3 files changed

+49
-13
lines changed

src/main/scala/io/github/leibnizhu/tinylsm/Block.scala

+38-7
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,14 @@ class Block(val data: Array[Byte], val offsets: Array[Int]) {
3434
buffer.appendAll(intLow2Bytes(offsets.length))
3535
buffer.toArray
3636
}
37+
38+
def getFirstKey(): MemTableKey = {
39+
val buffer = ByteArrayReader(data)
40+
val overlapLen = buffer.readUint16()
41+
assert(overlapLen == 0)
42+
val keyLen = buffer.readUint16()
43+
buffer.readBytes(keyLen)
44+
}
3745
}
3846

3947
object Block {
@@ -80,10 +88,16 @@ class BlockBuilder(val blockSize: Int) {
8088
// 显然,新数据的offset就是当前data长度
8189
offsets += data.length
8290

83-
// key的长度
84-
data.appendAll(intLow2Bytes(key.length))
91+
// overlap 格式
92+
// key_overlap_len (u16) | rest_key_len (u16) | key (rest_key_len)
93+
// 当前key与firstKey的共同前缀byte数量
94+
val overlap = commonPrefix(key)
95+
// key_overlap_len
96+
data.appendAll(intLow2Bytes(overlap))
97+
// rest_key_len (u16)
98+
data.appendAll(intLow2Bytes(key.length - overlap))
8599
// key内容
86-
data.appendAll(key)
100+
data.appendAll(key.slice(overlap, key.length))
87101
// value的长度
88102
data.appendAll(intLow2Bytes(value.length))
89103
// value内容
@@ -95,6 +109,20 @@ class BlockBuilder(val blockSize: Int) {
95109
true
96110
}
97111

112+
/**
113+
* @param key 指定key
114+
* @return 指定key与 firstKey 有多少个相同的前缀byte
115+
*/
116+
private def commonPrefix(key: MemTableKey): Int = {
117+
if (firstKey.isEmpty) {
118+
return 0
119+
}
120+
var index = 0
121+
while (index < firstKey.get.length && index < key.length && firstKey.get(index) == key(index)) {
122+
index += 1
123+
}
124+
index
125+
}
98126

99127
/**
100128
* @return 按data和offsets估算的体积
@@ -138,6 +166,7 @@ class BlockIterator(block: Block) extends MemTableStorageIterator {
138166
* 当前value在Block中data的下标
139167
*/
140168
private var curValuePos: (Int, Int) = (0, 0)
169+
private val firstKey = block.getFirstKey()
141170

142171
def seekToFirst(): Unit = {
143172
seekToIndex(0)
@@ -211,10 +240,12 @@ class BlockIterator(block: Block) extends MemTableStorageIterator {
211240

212241
// 根据 offset 段获取entry位置
213242
val entryOffset = block.offsets(index)
214-
// 先后读取key长度、key、value长度
215-
val keyLength = low2BytesToInt(block.data(entryOffset), block.data(entryOffset + 1))
216-
curKey = Some(block.data.slice(entryOffset + 2, entryOffset + 2 + keyLength))
217-
val valueOffset = entryOffset + 2 + keyLength
243+
// 先后读取overlap长度、剩余key长度、剩余key、value长度
244+
val overlapLength = low2BytesToInt(block.data(entryOffset), block.data(entryOffset + 1))
245+
val restKeyLength = low2BytesToInt(block.data(entryOffset + 2), block.data(entryOffset + 3))
246+
curKey = Some(firstKey.slice(0, overlapLength) ++
247+
block.data.slice(entryOffset + 4, entryOffset + 4 + restKeyLength))
248+
val valueOffset = entryOffset + 4 + restKeyLength
218249
val valueLength = low2BytesToInt(block.data(valueOffset), block.data(valueOffset + 1))
219250
curValuePos = (valueOffset + 2, valueOffset + 2 + valueLength)
220251
this.index = index

src/test/scala/io/github/leibnizhu/tinylsm/LsmStorageTest.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ class LsmStorageTest extends AnyFunSuite {
291291
for (i <- 0 until 6000) {
292292
storage.put(i.toString, value)
293293
}
294-
Thread.sleep(500)
294+
Thread.sleep(1000)
295295
storage.inner.state.read(st => assert(st.l0SsTables.nonEmpty))
296296
storage.close()
297297
}

src/test/scala/io/github/leibnizhu/tinylsm/SsTableTest.scala

+10-5
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ class SsTableTest extends AnyFunSuite {
7878
test("week1_day4_task2_sst_iterator") {
7979
val sst = generateSst()
8080
val iterator = SsTableIterator.createAndSeekToFirst(sst)
81-
for(_ <- 0 until 5) {
82-
for(i <- 0 until keyNum) {
81+
for (_ <- 0 until 5) {
82+
for (i <- 0 until keyNum) {
8383
val key = iterator.key()
8484
val value = iterator.value()
8585
assertResult(keyOf(i))(new String(key))
@@ -93,8 +93,8 @@ class SsTableTest extends AnyFunSuite {
9393
test("week1_day4_task1_sst_seek_key") {
9494
val sst = generateSst()
9595
val iterator = SsTableIterator.createAndSeekToKey(sst, keyOf(0).getBytes)
96-
for(offset <- 1 to 5) {
97-
for(i <- 0 until keyNum) {
96+
for (offset <- 1 to 5) {
97+
for (i <- 0 until keyNum) {
9898
val key = iterator.key()
9999
val value = iterator.value()
100100
assertResult(keyOf(i))(new String(key))
@@ -104,7 +104,7 @@ class SsTableTest extends AnyFunSuite {
104104
iterator.seekToKey("k".getBytes)
105105
}
106106
}
107-
107+
108108
test("week1_day7_task2_sst_decode") {
109109
val sst1 = generateSst()
110110
val sst2 = SsTable.open(0, None, FileObject.open(sst1.file.file.get))
@@ -113,4 +113,9 @@ class SsTableTest extends AnyFunSuite {
113113
assertResult(bloom1.hashFuncNum)(bloom2.hashFuncNum)
114114
assertResult(bloom1.filter)(bloom2.filter)
115115
}
116+
117+
test("week1_day7_task3_block_key_compression") {
118+
val sst = generateSst()
119+
assert(sst.blockMeta.length <= 25)
120+
}
116121
}

0 commit comments

Comments
 (0)