From 58ed2752b3d28fa11f10ef5c6137c4924077591f Mon Sep 17 00:00:00 2001 From: "Leibniz.Hu" Date: Mon, 26 Feb 2024 12:00:40 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20Docker=E3=80=81cli=20and=20web=20server?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 14 + README.md | 26 +- bin/build.sh | 6 + bin/tinylsm | 4 + bin/tinylsm-cli | 4 + build.sbt | 2 + .../github/leibnizhu/tinylsm/LsmStorage.scala | 31 ++- .../leibnizhu/tinylsm/TinyLsmWebServer.scala | 252 +++++++++++++++++- .../github/leibnizhu/tinylsm/BloomTest.scala | 2 +- .../leibnizhu/tinylsm/LsmStorageTest.scala | 16 +- .../leibnizhu/tinylsm/SsTableTest.scala | 4 +- 11 files changed, 336 insertions(+), 25 deletions(-) create mode 100644 Dockerfile create mode 100755 bin/build.sh create mode 100755 bin/tinylsm create mode 100755 bin/tinylsm-cli diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..f7eb1c0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +FROM openjdk:17-jdk-slim +ENV LANG=C.UTF-8 LANGUAGE=C.UTF-8 LC_ALL=C.UTF-8 +RUN sed -i -E 's/(security|deb)\.debian\.org/mirrors.aliyun.com/g' /etc/apt/sources.list \ + && apt-get clean && apt-get update \ + && apt-get -y install tini + +COPY bin/* /etc/tinylsm/ +COPY target/scala-3.3.1/TinyLsmAssembly.jar /etc/tinylsm/TinyLsmAssembly.jar + +RUN ln -s /etc/tinylsm/tinylsm-cli /usr/bin/tinylsm-cli + +WORKDIR /etc/tinylsm +ENTRYPOINT ["tini", "--"] +CMD ["bash", "/etc/tinylsm/tinylsm"] \ No newline at end of file diff --git a/README.md b/README.md index 3da6104..d5e5ac7 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,31 @@ # TinyLSM + Tiny LSM in scala -Requirement: +## Requirement + - JDK 11 or later +## Configuration + +Configuration lookup order: + +1. JVM system properties, e.g., `-Dkey.subkey=value` +2. Operation system environment e.g., `export TINY_LSM_KEY_SUBKEY=value` +3. `.env` file in classpath, using operation system environment key +4. config file specified by JVM system properties `config.file` or operation system environment `TINY_LSM_CONFIG_FILE`, + using JVM system properties key + +| environment key | system properties name | meaning | default value | +|--------------------------|------------------------|------------------------------------------------------------------------------|---------------------------| +| TINY_LSM_PORT | port | | 9527 | +| TINY_LSM_LISTEN | listen | | 0.0.0.0 | +| TINY_LSM_BLOCK_SIZE | block.size | Block size in bytes | 4096 | +| TINY_LSM_TARGET_SST_SIZE | block.size | SST size in bytes, also the approximate memtable capacity limit | 2 << 20 (2MB) | +| TINY_LSM_MEMTABLE_NUM | memtable.num | Maximum number of memtables in memory, flush to L0 when exceeding this limit | 50 | +| TINY_LSM_ENABLE_WAL | enable.wal | | true | +| TINY_LSM_SERIALIZABLE | serializable | | false | +| TINY_LSM_DATA_DIR | data.dir | | /etc/tinylsm/data | +| TINY_LSM_CONFIG_FILE | config.file | | /etc/tinylsm/tinylsm.conf | + [Reference](https://skyzh.github.io/mini-lsm/00-preface.html) \ No newline at end of file diff --git a/bin/build.sh b/bin/build.sh new file mode 100755 index 0000000..55e83e3 --- /dev/null +++ b/bin/build.sh @@ -0,0 +1,6 @@ +#!/bin/bash +SCRIPT_DIR=$(cd $(dirname $0);pwd) +cd $SCRIPT_DIR/.. + +sbt assembly +docker build . -f Dockerfile -t ${DOCKER_IMAGE_TAG:-tiny-lsm:0.0.1} --network=host \ No newline at end of file diff --git a/bin/tinylsm b/bin/tinylsm new file mode 100755 index 0000000..14b82ed --- /dev/null +++ b/bin/tinylsm @@ -0,0 +1,4 @@ +#!/bin/bash +SCRIPT_DIR=$(cd $(dirname $0);pwd) + +java -cp .:TinyLsmAssembly.jar io.github.leibnizhu.tinylsm.TinyLsmWebServer \ No newline at end of file diff --git a/bin/tinylsm-cli b/bin/tinylsm-cli new file mode 100755 index 0000000..9623f62 --- /dev/null +++ b/bin/tinylsm-cli @@ -0,0 +1,4 @@ +#!/bin/bash +SCRIPT_DIR=$(cd $(dirname $0);pwd) + +java -cp .:TinyLsmAssembly.jar io.github.leibnizhu.tinylsm.TinyLsmCli $@ \ No newline at end of file diff --git a/build.sbt b/build.sbt index f6b1dc8..c5a0aef 100644 --- a/build.sbt +++ b/build.sbt @@ -4,8 +4,10 @@ version := "0.1" libraryDependencies ++= Seq( "com.lihaoyi" %% "cask" % "0.9.2" % "compile", + "com.lihaoyi" %% "requests" % "0.8.0"% "compile", "com.github.blemale" %% "scaffeine" % "5.2.1" % "compile", "org.jboss.slf4j" % "slf4j-jboss-logging" % "1.2.1.Final" % "compile", + "org.jline" % "jline" % "3.25.1" % "compile", "org.scalatest" %% "scalatest" % "3.2.9" % Test, "org.mockito" % "mockito-core" % "4.11.0" % Test ) diff --git a/src/main/scala/io/github/leibnizhu/tinylsm/LsmStorage.scala b/src/main/scala/io/github/leibnizhu/tinylsm/LsmStorage.scala index a648e44..1f66165 100644 --- a/src/main/scala/io/github/leibnizhu/tinylsm/LsmStorage.scala +++ b/src/main/scala/io/github/leibnizhu/tinylsm/LsmStorage.scala @@ -261,9 +261,9 @@ private[tinylsm] class LsmStorageInner( * sst的范围是否包含用户指定的scan范围 * * @param userBegin scan指定的左边界 - * @param userEnd scan指定的右边界 - * @param sstBegin sst的左边,第一个key - * @param sstEnd sst的右边,最后一个key + * @param userEnd scan指定的右边界 + * @param sstBegin sst的左边,第一个key + * @param sstEnd sst的右边,最后一个key * @return sst是否满足scan范围 */ private def rangeOverlap(userBegin: Bound, userEnd: Bound, @@ -358,7 +358,7 @@ case class LsmStorageOptions blockSize: Int, // SST大小,单位是 bytes, 同时也是MemTable容量限制的近似值 targetSstSize: Int, - // MemTable在内存中的最大占用空间, 到达这个大小后会 flush 到 L0 + // MemTable在内存中的最多个数, 超过这么多MemTable后会 flush 到 L0 numMemTableLimit: Int, // Compaction配置 compactionOptions: CompactionOptions, @@ -366,4 +366,25 @@ case class LsmStorageOptions enableWal: Boolean, // 是否可序列化 serializable: Boolean -) \ No newline at end of file +) + +object LsmStorageOptions { + def defaultOption(): LsmStorageOptions = LsmStorageOptions( + 4096, + 2 << 20, + 50, + NoCompaction(), + false, + false) + + def fromConfig(): LsmStorageOptions = + LsmStorageOptions( + Config.BlockSize.getInt, + Config.TargetSstSize.getInt, + Config.MemTableLimitNum.getInt, + // TODO + NoCompaction(), + Config.EnableWal.getBoolean, + Config.Serializable.getBoolean + ) +} \ No newline at end of file diff --git a/src/main/scala/io/github/leibnizhu/tinylsm/TinyLsmWebServer.scala b/src/main/scala/io/github/leibnizhu/tinylsm/TinyLsmWebServer.scala index 8b65469..1da656a 100644 --- a/src/main/scala/io/github/leibnizhu/tinylsm/TinyLsmWebServer.scala +++ b/src/main/scala/io/github/leibnizhu/tinylsm/TinyLsmWebServer.scala @@ -1,18 +1,29 @@ package io.github.leibnizhu.tinylsm import cask.model.Response +import org.jline.reader.impl.completer.{AggregateCompleter, ArgumentCompleter, NullCompleter, StringsCompleter} +import org.jline.reader.{Completer, EndOfFileException, LineReaderBuilder, UserInterruptException} +import org.jline.terminal.TerminalBuilder +import requests.{RequestFailedException, get} -import java.io.File +import java.io.{File, FileInputStream} +import java.net.URLEncoder +import java.nio.charset.StandardCharsets +import java.util.Properties +import scala.collection.mutable +import scala.jdk.CollectionConverters.* object TinyLsmWebServer extends cask.MainRoutes { - override def port: Int = Config.Port.getInt() + override def port: Int = Config.Port.getInt override def host: String = Config.Host.get() - private val lsmOptions = LsmStorageOptions(4096, 2 << 20, 50, NoCompaction(), false, false) - private val tempDir = new File(System.getProperty("java.io.tmpdir") + File.separator + "TinyLsm") - private val storage = LsmStorageInner(tempDir, lsmOptions) + override def debugMode = false + + private val lsmOptions = LsmStorageOptions.fromConfig() + private val dataDir = new File(Config.DataDir.get()) + private val storage = LsmStorageInner(dataDir, lsmOptions) @cask.get("/key/:key") def getByKey(key: String): Response[String] = { @@ -30,18 +41,35 @@ object TinyLsmWebServer extends cask.MainRoutes { storage.put(key, value) } + @cask.get("/scan") + def scan(): Unit = { + //TODO + } + + Config.print() initialize() } -enum Config(private val envName: String, private val defaultVal: String) { +enum Config(private val envName: String, val defaultVal: String) { - private val sysPropName = toSysPropertyName(envName) + private val sysPropName = Config.toPropertyName(envName) case Port extends Config("TINY_LSM_PORT", "9527") case Host extends Config("TINY_LSM_LISTEN", "0.0.0.0") + case BlockSize extends Config("TINY_LSM_BLOCK_SIZE", "4096") + case TargetSstSize extends Config("TINY_LSM_TARGET_SST_SIZE", (2 << 20).toString) + case MemTableLimitNum extends Config("TINY_LSM_MEMTABLE_NUM", "50") + // TODO Compaction配置 + // compactionOptions: CompactionOptions, + case EnableWal extends Config("TINY_LSM_ENABLE_WAL", "true") + case Serializable extends Config("TINY_LSM_SERIALIZABLE", "true") + case DataDir extends Config("TINY_LSM_DATA_DIR", "/etc/tinylsm/data") - private def toSysPropertyName(str: String): String = str.replace("_", ".").toLowerCase + /** + * 优先级: + * SystemProperty > Environment > .env > ConfigFile + */ def get(): String = { val sysProp = System.getProperty(sysPropName) if (sysProp != null && sysProp.nonEmpty) { @@ -51,8 +79,214 @@ enum Config(private val envName: String, private val defaultVal: String) { if (envProp != null && envProp.nonEmpty) { return sysProp } + val envFileProp = Config.envFileProperties.getProperty(envName) + if (envFileProp != null && envFileProp.nonEmpty) { + return envFileProp + } + val configFileProp = Config.configFileProperties.getProperty(sysPropName) + if (configFileProp != null && configFileProp.nonEmpty) { + return configFileProp + } defaultVal } - def getInt(): Int = get().toInt + def getInt: Int = get().toInt + + def getBoolean: Boolean = get().toBoolean +} + +object Config { + private val envFileProperties = loadEnvFile() + private val configFileProperties = loadConfigFile() + + private def toPropertyName(str: String): String = str.replaceAll("^TINY_LSM_", "").replace("_", ".").toLowerCase + + private def loadEnvFile(): Properties = { + val prop = Properties() + val envFileStream = Thread.currentThread().getContextClassLoader.getResourceAsStream(".env") + if (envFileStream != null) { + prop.load(envFileStream) + } + prop + } + + private def loadConfigFile(): Properties = { + val configFileEnvName = "TINY_LSM_CONFIG_FILE" + val configFileSysPropName = toPropertyName(configFileEnvName) + val configFile = System.getProperty(configFileSysPropName, + System.getenv().getOrDefault(configFileEnvName, "/etc/tinylsm/tinylsm.conf")) + val prop = Properties() + if (new File(configFile).exists()) { + prop.load(new FileInputStream(configFile)) + } + prop + } + + def print(): Unit = { + println("TinyLsm configurations:") + Config.values.foreach(c => println(s"\t${c.sysPropName} => ${c.get()}")) + } +} + +object TinyLsmCli { + + def main(args: Array[String]): Unit = { + val argMap = parseArgs(args) + val cliContext = CliContext(argMap) + val terminal = TerminalBuilder.builder().name("TinyLsm cli").system(true).build + val lineReader = LineReaderBuilder.builder().terminal(terminal).appName("TinyLsm cli").completer(getCompleter).build + + // REPL 循环 + while (true) { + try { + // 输入命令提示信息, 获取输入的信息 + val line = lineReader.readLine("TinyLsm> ") + if (null != line && line.trim.nonEmpty) { + // 解析输入的命令 解析成list + val words = lineReader.getParsedLine.words(); + executeCommand(words.asScala, cliContext) + } + } catch + case e: UserInterruptException => gracefullyExit() + case e: EndOfFileException => gracefullyExit() + } + } + + private def gracefullyExit(): Unit = { + println("Please use :quit next time ^_^") + System.exit(1) + } + + private def executeCommand(words: mutable.Buffer[String], cliContext: CliContext): Unit = { + words.head match + case ":quit" => System.exit(0) + case ":help" => printHelp() + case "get" => if (words.length < 2) { + println("Invalid command, use: get ") + } else { + cliContext.get(words(1)) + } + case "delete" => if (words.length < 2) { + println("Invalid command, use: delete ") + } else { + cliContext.delete(words(1)) + } + case "put" => if (words.length < 3) { + println("Invalid command, use: put ") + } else { + cliContext.put(words(1), words(2)) + } + case _ => println("Unsupported command: " + words.head) + } + + private def printHelp(): Unit = println( + """ + |Help + | get : Get value by key. + | delete : Delete a key. + | put : Put value by key. + | :help : Show this help info. + | :quit : Quit TinyLsm cli.""".stripMargin) + + private def getCompleter: Completer = + new AggregateCompleter( + new ArgumentCompleter(new StringsCompleter("get"), NullCompleter.INSTANCE), + new ArgumentCompleter(new StringsCompleter("put"), NullCompleter.INSTANCE), + new ArgumentCompleter(new StringsCompleter("delete"), NullCompleter.INSTANCE), + new ArgumentCompleter(new StringsCompleter("scan"), NullCompleter.INSTANCE), + new ArgumentCompleter(new StringsCompleter(":help"), NullCompleter.INSTANCE), + new ArgumentCompleter(new StringsCompleter(":quit"), NullCompleter.INSTANCE), + ) + + private def parseArgs(args: Array[String]): Map[String, Any] = { + if (args == null || args.isEmpty) { + Map() + } else { + var i = 0 + val result = mutable.HashMap[String, Any]() + while (i < args.length) { + val cur = args(i) + cur match + case "--playground" => + result.put("playground", true) + case "-h" => + result.put("host", args(i + 1)) + i += 1 + case "-p" => + result.put("port", args(i + 1).toInt) + i += 1 + case _ => + println("Unsupported argument: " + cur) + i += 1 + } + result.toMap + } + } +} + +class CliContext(playgroundMode: Boolean, + playgroundLsm: Option[TinyLsm], + host: String, + port: Int) { + + def get(key: String): Unit = { + if (playgroundMode) { + val value = playgroundLsm.get.get(key) + if (value.isDefined) { + println(value.get) + } else { + println("> Key does not exists: " + key) + } + } else { + try { + val encodedKey = URLEncoder.encode(key, StandardCharsets.UTF_8) + val r = requests.get(s"http://$host:$port/key/$encodedKey") + println(r.text()) + } catch + case e: RequestFailedException => if (e.response.statusCode == 404) { + println(">>> Key does not exists: " + key) + } else { + println(">>> Server error: " + e.response.text()) + } + } + } + + def delete(key: String): Unit = { + if (playgroundMode) { + playgroundLsm.get.delete(key) + println("Done") + } else { + val encodedKey = URLEncoder.encode(key, StandardCharsets.UTF_8) + requests.delete(s"http://$host:$port/key/$encodedKey") + } + } + + def put(key: String, value: String): Unit = { + if (playgroundMode) { + playgroundLsm.get.put(key, value) + println("Done") + } else { + val encodedKey = URLEncoder.encode(key, StandardCharsets.UTF_8) + val encodedValue = URLEncoder.encode(value, StandardCharsets.UTF_8) + requests.post(s"http://$host:$port/key/$encodedKey?value=$encodedValue") + } + } +} + +object CliContext { + def apply(argMap: Map[String, Any]): CliContext = { + val playgroundMode = argMap.getOrElse("playground", false).asInstanceOf[Boolean] + val playgroundLsm = if (playgroundMode) { + val tempDir = System.getProperty("java.io.tmpdir") + File.separator + "TinyLsmPlayground" + Some(TinyLsm(new File(tempDir), LsmStorageOptions.defaultOption())) + } else { + None + } + new CliContext( + playgroundMode, + playgroundLsm, + argMap.getOrElse("host", "localhost").asInstanceOf[String], + argMap.getOrElse("port", Config.Port.defaultVal.toInt).asInstanceOf[Int] + ) + } } \ No newline at end of file diff --git a/src/test/scala/io/github/leibnizhu/tinylsm/BloomTest.scala b/src/test/scala/io/github/leibnizhu/tinylsm/BloomTest.scala index 0e5cfd8..e69ebaf 100644 --- a/src/test/scala/io/github/leibnizhu/tinylsm/BloomTest.scala +++ b/src/test/scala/io/github/leibnizhu/tinylsm/BloomTest.scala @@ -37,7 +37,7 @@ class BloomTest extends AnyFunSuite { } cnt += 1 } - println(s"total key: $cnt, may match key: $x, falsePositiveRate: ${"%1.4f".format(x.toDouble/cnt)}") + println(s"total key: $cnt, may match key: $x, falsePositiveRate: ${"%1.4f".format(x.toDouble / cnt)}") assert(x != cnt) assert(x != 0) } diff --git a/src/test/scala/io/github/leibnizhu/tinylsm/LsmStorageTest.scala b/src/test/scala/io/github/leibnizhu/tinylsm/LsmStorageTest.scala index 66195c7..179966b 100644 --- a/src/test/scala/io/github/leibnizhu/tinylsm/LsmStorageTest.scala +++ b/src/test/scala/io/github/leibnizhu/tinylsm/LsmStorageTest.scala @@ -8,7 +8,7 @@ import java.io.File class LsmStorageTest extends AnyFunSuite { test("week1_day1_task2_storage_integration") { - val options = LsmStorageOptions(4096, 2 << 20, 50, NoCompaction(), false, false) + val options = LsmStorageOptions.defaultOption() val storage = LsmStorageInner(tempDir(), options) assert(storage.get("0").isEmpty) @@ -27,7 +27,7 @@ class LsmStorageTest extends AnyFunSuite { } test("week1_day1_task3_storage_integration") { - val options = LsmStorageOptions(4096, 2 << 20, 50, NoCompaction(), false, false) + val options = LsmStorageOptions.defaultOption() val storage = LsmStorageInner(tempDir(), options) storage.put("1", "233") @@ -88,7 +88,7 @@ class LsmStorageTest extends AnyFunSuite { } test("week1_day2_task4_integration") { - val options = LsmStorageOptions(4096, 2 << 20, 50, NoCompaction(), false, false) + val options = LsmStorageOptions.defaultOption() val storage = LsmStorageInner(tempDir(), options) storage.put("1", "233") @@ -128,7 +128,7 @@ class LsmStorageTest extends AnyFunSuite { } test("week1_day5_task2_storage_scan") { - val options = LsmStorageOptions(4096, 2 << 20, 50, NoCompaction(), false, false) + val options = LsmStorageOptions.defaultOption() val storage = LsmStorageInner(tempDir(), options) storage.put("1", "233") storage.put("2", "2333") @@ -178,7 +178,7 @@ class LsmStorageTest extends AnyFunSuite { } test("week1_day5_task3_storage_get") { - val options = LsmStorageOptions(4096, 2 << 20, 50, NoCompaction(), false, false) + val options = LsmStorageOptions.defaultOption() val storage = LsmStorageInner(tempDir(), options) storage.put("1", "233") storage.put("2", "2333") @@ -212,7 +212,7 @@ class LsmStorageTest extends AnyFunSuite { } test("week1_day6_task1_storage_scan") { - val options = LsmStorageOptions(4096, 2 << 20, 50, NoCompaction(), false, false) + val options = LsmStorageOptions.defaultOption() val storage = LsmStorageInner(tempDir(), options) storage.put("0", "2333333") storage.put("00", "2333333") @@ -252,7 +252,7 @@ class LsmStorageTest extends AnyFunSuite { } test("week1_day6_task1_storage_get") { - val options = LsmStorageOptions(4096, 2 << 20, 50, NoCompaction(), false, false) + val options = LsmStorageOptions.defaultOption() val storage = LsmStorageInner(tempDir(), options) storage.put("0", "2333333") storage.put("00", "2333333") @@ -297,7 +297,7 @@ class LsmStorageTest extends AnyFunSuite { } test("week1_day6_task3_sst_filter") { - val options = LsmStorageOptions(4096, 2 << 20, 50, NoCompaction(), false, false) + val options = LsmStorageOptions.defaultOption() val storage = LsmStorageInner(tempDir(), options) val keyFormat = "%05d" for (i <- 1 to 10000) { diff --git a/src/test/scala/io/github/leibnizhu/tinylsm/SsTableTest.scala b/src/test/scala/io/github/leibnizhu/tinylsm/SsTableTest.scala index 6097f27..dce6079 100644 --- a/src/test/scala/io/github/leibnizhu/tinylsm/SsTableTest.scala +++ b/src/test/scala/io/github/leibnizhu/tinylsm/SsTableTest.scala @@ -116,6 +116,8 @@ class SsTableTest extends AnyFunSuite { test("week1_day7_task3_block_key_compression") { val sst = generateSst() - assert(sst.blockMeta.length <= 25) + // 据观察,没开启的时候是20个block + // key都是 key_xxx,压缩后,每个key多了2byte记录前缀长度,少了4-6个前缀byte,估算 ((4+6)/2 -2)/7 + assert(sst.blockMeta.length < 20) } }