Skip to content

Commit eadd1e9

Browse files
committed
Implement simple durable Raft storage based on RocksDB
This commit adds RocksDbStorage which implements raft::Storage. The RocksDbStorage is a durable storage implementation which is used by the RaftMetadataStore to store the raft state durably. This fixes #1791.
1 parent 6b19d9d commit eadd1e9

File tree

7 files changed

+501
-94
lines changed

7 files changed

+501
-94
lines changed

crates/metadata-store/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ mod grpc;
1212
mod grpc_svc;
1313
pub mod local;
1414
pub mod raft;
15+
mod util;
1516

1617
use bytestring::ByteString;
1718
use restate_core::metadata_store::VersionedValue;

crates/metadata-store/src/local/store.rs

+13-49
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
// by the Apache License, Version 2.0.
1010

1111
use crate::{
12-
MetadataStoreRequest, PreconditionViolation, RequestError, RequestReceiver, RequestSender,
12+
util, MetadataStoreRequest, PreconditionViolation, RequestError, RequestReceiver, RequestSender,
1313
};
1414
use bytes::{BufMut, BytesMut};
1515
use bytestring::ByteString;
@@ -23,7 +23,7 @@ use restate_types::config::{MetadataStoreOptions, RocksDbOptions};
2323
use restate_types::live::BoxedLiveLoad;
2424
use restate_types::storage::{StorageCodec, StorageDecode, StorageEncode};
2525
use restate_types::Version;
26-
use rocksdb::{BoundColumnFamily, DBCompressionType, WriteBatch, WriteOptions, DB};
26+
use rocksdb::{BoundColumnFamily, WriteBatch, WriteOptions, DB};
2727
use std::sync::Arc;
2828
use tokio::sync::mpsc;
2929
use tracing::{debug, trace};
@@ -56,13 +56,17 @@ impl LocalMetadataStore {
5656
let db_name = DbName::new(DB_NAME);
5757
let db_manager = RocksDbManager::get();
5858
let cfs = vec![CfName::new(KV_PAIRS)];
59-
let db_spec = DbSpecBuilder::new(db_name.clone(), options.data_dir(), db_options(options))
60-
.add_cf_pattern(
61-
CfPrefixPattern::ANY,
62-
cf_options(options.rocksdb_memory_budget()),
63-
)
64-
.ensure_column_families(cfs)
65-
.build_as_db();
59+
let db_spec = DbSpecBuilder::new(
60+
db_name.clone(),
61+
options.data_dir(),
62+
util::db_options(options),
63+
)
64+
.add_cf_pattern(
65+
CfPrefixPattern::ANY,
66+
util::cf_options(options.rocksdb_memory_budget()),
67+
)
68+
.ensure_column_families(cfs)
69+
.build_as_db();
6670

6771
let db = db_manager
6872
.open_db(updateable_rocksdb_options.clone(), db_spec)
@@ -295,43 +299,3 @@ impl LocalMetadataStore {
295299
}
296300
}
297301
}
298-
299-
fn db_options(_options: &MetadataStoreOptions) -> rocksdb::Options {
300-
rocksdb::Options::default()
301-
}
302-
303-
fn cf_options(
304-
memory_budget: usize,
305-
) -> impl Fn(rocksdb::Options) -> rocksdb::Options + Send + Sync + 'static {
306-
move |mut opts| {
307-
set_memory_related_opts(&mut opts, memory_budget);
308-
opts.set_compaction_style(rocksdb::DBCompactionStyle::Level);
309-
opts.set_num_levels(3);
310-
311-
opts.set_compression_per_level(&[
312-
DBCompressionType::None,
313-
DBCompressionType::None,
314-
DBCompressionType::Zstd,
315-
]);
316-
317-
//
318-
opts
319-
}
320-
}
321-
322-
fn set_memory_related_opts(opts: &mut rocksdb::Options, memtables_budget: usize) {
323-
// We set the budget to allow 1 mutable + 3 immutable.
324-
opts.set_write_buffer_size(memtables_budget / 4);
325-
326-
// merge 2 memtables when flushing to L0
327-
opts.set_min_write_buffer_number_to_merge(2);
328-
opts.set_max_write_buffer_number(4);
329-
// start flushing L0->L1 as soon as possible. each file on level0 is
330-
// (memtable_memory_budget / 2). This will flush level 0 when it's bigger than
331-
// memtable_memory_budget.
332-
opts.set_level_zero_file_num_compaction_trigger(2);
333-
// doesn't really matter much, but we don't want to create too many files
334-
opts.set_target_file_size_base(memtables_budget as u64 / 8);
335-
// make Level1 size equal to Level0 size, so that L0->L1 compactions are fast
336-
opts.set_max_bytes_for_level_base(memtables_budget as u64);
337-
}

crates/metadata-store/src/raft/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,5 @@
99
// by the Apache License, Version 2.0.
1010

1111
pub mod service;
12-
mod store;
1312
mod storage;
13+
mod store;

crates/metadata-store/src/raft/service.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ impl RaftMetadataStoreService {
3333
impl MetadataStoreService for RaftMetadataStoreService {
3434
async fn run(mut self) -> Result<(), Error> {
3535
let store_options = self.options.live_load();
36-
let store = RaftMetadataStore::new().map_err(Error::generic)?;
36+
let store = RaftMetadataStore::create().await.map_err(Error::generic)?;
3737

3838
let mut builder = GrpcServiceBuilder::default();
3939

0 commit comments

Comments
 (0)