Skip to content

Commit

Permalink
[fix](restore) Lock tablet before modify segment files (#45711)
Browse files Browse the repository at this point in the history
There is a race condition between the tablet checkpoint and the snapshot
move task since the checkpoint will depend on the segment files to check
data size correctness, and the move task will delete the tablet
directory and move the downloaded files into it.

This PR makes the move task to take tablet locks, before deleting the
directory.
  • Loading branch information
w41ter authored Dec 20, 2024
1 parent af11693 commit 034085a
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 43 deletions.
4 changes: 2 additions & 2 deletions be/src/olap/tablet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2766,7 +2766,7 @@ void Tablet::check_table_size_correctness() {
const std::vector<RowsetMetaSharedPtr>& all_rs_metas = _tablet_meta->all_rs_metas();
for (const auto& rs_meta : all_rs_metas) {
int64_t total_segment_size = get_segment_file_size(rs_meta);
int64_t total_inverted_index_size = get_inverted_index_file_szie(rs_meta);
int64_t total_inverted_index_size = get_inverted_index_file_size(rs_meta);
if (rs_meta->data_disk_size() != total_segment_size ||
rs_meta->index_disk_size() != total_inverted_index_size ||
rs_meta->data_disk_size() + rs_meta->index_disk_size() != rs_meta->total_disk_size()) {
Expand Down Expand Up @@ -2817,7 +2817,7 @@ int64_t Tablet::get_segment_file_size(const RowsetMetaSharedPtr& rs_meta) {
return total_segment_size;
}

int64_t Tablet::get_inverted_index_file_szie(const RowsetMetaSharedPtr& rs_meta) {
int64_t Tablet::get_inverted_index_file_size(const RowsetMetaSharedPtr& rs_meta) {
const auto& fs = rs_meta->fs();
if (!fs) {
LOG(WARNING) << "get fs failed, resource_id={}" << rs_meta->resource_id();
Expand Down
5 changes: 3 additions & 2 deletions be/src/olap/tablet.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ class Tablet final : public BaseTablet {
std::mutex& get_push_lock() { return _ingest_lock; }
std::mutex& get_base_compaction_lock() { return _base_compaction_lock; }
std::mutex& get_cumulative_compaction_lock() { return _cumulative_compaction_lock; }
std::shared_mutex& get_meta_store_lock() { return _meta_store_lock; }

std::shared_timed_mutex& get_migration_lock() { return _migration_lock; }

Expand Down Expand Up @@ -531,7 +532,7 @@ class Tablet final : public BaseTablet {
void check_table_size_correctness();
std::string get_segment_path(const RowsetMetaSharedPtr& rs_meta, int64_t seg_id);
int64_t get_segment_file_size(const RowsetMetaSharedPtr& rs_meta);
int64_t get_inverted_index_file_szie(const RowsetMetaSharedPtr& rs_meta);
int64_t get_inverted_index_file_size(const RowsetMetaSharedPtr& rs_meta);

public:
static const int64_t K_INVALID_CUMULATIVE_POINT = -1;
Expand Down Expand Up @@ -588,7 +589,7 @@ class Tablet final : public BaseTablet {
std::shared_ptr<CumulativeCompactionPolicy> _cumulative_compaction_policy;
std::string_view _cumulative_compaction_type;

// use a seperate thread to check all tablets paths existance
// use a separate thread to check all tablets paths existence
std::atomic<bool> _is_tablet_path_exists;

int64_t _last_missed_version;
Expand Down
97 changes: 58 additions & 39 deletions be/src/runtime/snapshot_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -765,49 +765,68 @@ Status SnapshotLoader::move(const std::string& snapshot_path, TabletSharedPtr ta
return Status::InternalError(err_msg);
}

if (overwrite) {
std::vector<std::string> snapshot_files;
RETURN_IF_ERROR(_get_existing_files_from_local(snapshot_path, &snapshot_files));

// 1. simply delete the old dir and replace it with the snapshot dir
try {
// This remove seems soft enough, because we already get
// tablet id and schema hash from this path, which
// means this path is a valid path.
std::filesystem::remove_all(tablet_path);
VLOG_CRITICAL << "remove dir: " << tablet_path;
std::filesystem::create_directory(tablet_path);
VLOG_CRITICAL << "re-create dir: " << tablet_path;
} catch (const std::filesystem::filesystem_error& e) {
std::stringstream ss;
ss << "failed to move tablet path: " << tablet_path << ". err: " << e.what();
LOG(WARNING) << ss.str();
return Status::InternalError(ss.str());
}
if (!overwrite) {
throw Exception(Status::FatalError("only support overwrite now"));
}

// link files one by one
// files in snapshot dir will be moved in snapshot clean process
std::vector<std::string> linked_files;
for (auto& file : snapshot_files) {
auto full_src_path = fmt::format("{}/{}", snapshot_path, file);
auto full_dest_path = fmt::format("{}/{}", tablet_path, file);
if (link(full_src_path.c_str(), full_dest_path.c_str()) != 0) {
LOG(WARNING) << "failed to link file from " << full_src_path << " to "
<< full_dest_path << ", err: " << std::strerror(errno);

// clean the already linked files
for (auto& linked_file : linked_files) {
remove(linked_file.c_str());
}
// Medium migration/clone/checkpoint/compaction may change or check the
// files and tablet meta, so we need to take these locks.
std::unique_lock migration_lock(tablet->get_migration_lock(), std::try_to_lock);
std::unique_lock base_compact_lock(tablet->get_base_compaction_lock(), std::try_to_lock);
std::unique_lock cumu_compact_lock(tablet->get_cumulative_compaction_lock(), std::try_to_lock);
std::unique_lock cold_compact_lock(tablet->get_cold_compaction_lock(), std::try_to_lock);
std::unique_lock build_idx_lock(tablet->get_build_inverted_index_lock(), std::try_to_lock);
std::unique_lock meta_store_lock(tablet->get_meta_store_lock(), std::try_to_lock);
if (!migration_lock.owns_lock() || !base_compact_lock.owns_lock() ||
!cumu_compact_lock.owns_lock() || !cold_compact_lock.owns_lock() ||
!build_idx_lock.owns_lock() || !meta_store_lock.owns_lock()) {
// This error should be retryable
auto status = Status::ObtainLockFailed("failed to get tablet locks, tablet: {}", tablet_id);
LOG(WARNING) << status << ", snapshot path: " << snapshot_path
<< ", tablet path: " << tablet_path;
return status;
}

return Status::InternalError("move tablet failed");
std::vector<std::string> snapshot_files;
RETURN_IF_ERROR(_get_existing_files_from_local(snapshot_path, &snapshot_files));

// FIXME: the below logic will demage the tablet files if failed in the middle.

// 1. simply delete the old dir and replace it with the snapshot dir
try {
// This remove seems soft enough, because we already get
// tablet id and schema hash from this path, which
// means this path is a valid path.
std::filesystem::remove_all(tablet_path);
VLOG_CRITICAL << "remove dir: " << tablet_path;
std::filesystem::create_directory(tablet_path);
VLOG_CRITICAL << "re-create dir: " << tablet_path;
} catch (const std::filesystem::filesystem_error& e) {
std::stringstream ss;
ss << "failed to move tablet path: " << tablet_path << ". err: " << e.what();
LOG(WARNING) << ss.str();
return Status::InternalError(ss.str());
}

// link files one by one
// files in snapshot dir will be moved in snapshot clean process
std::vector<std::string> linked_files;
for (auto& file : snapshot_files) {
auto full_src_path = fmt::format("{}/{}", snapshot_path, file);
auto full_dest_path = fmt::format("{}/{}", tablet_path, file);
if (link(full_src_path.c_str(), full_dest_path.c_str()) != 0) {
LOG(WARNING) << "failed to link file from " << full_src_path << " to " << full_dest_path
<< ", err: " << std::strerror(errno);

// clean the already linked files
for (auto& linked_file : linked_files) {
remove(linked_file.c_str());
}
linked_files.push_back(full_dest_path);
VLOG_CRITICAL << "link file from " << full_src_path << " to " << full_dest_path;
}

} else {
throw Exception(Status::FatalError("only support overwrite now"));
return Status::InternalError("move tablet failed");
}
linked_files.push_back(full_dest_path);
VLOG_CRITICAL << "link file from " << full_src_path << " to " << full_dest_path;
}

// snapshot loader not need to change tablet uid
Expand Down

0 comments on commit 034085a

Please sign in to comment.