Skip to content

Commit

Permalink
test: Add fault injection in cache fuzzer (facebookincubator#11969)
Browse files Browse the repository at this point in the history
Summary: Pull Request resolved: facebookincubator#11969

Differential Revision: D67662693
  • Loading branch information
zacw7 authored and facebook-github-bot committed Dec 27, 2024
1 parent 883b989 commit b23a86d
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 11 deletions.
6 changes: 0 additions & 6 deletions velox/common/caching/SsdCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,6 @@ SsdCache::SsdCache(const Config& config)
numShards_(config.numShards),
groupStats_(std::make_unique<FileGroupStats>()),
executor_(config.executor) {
// Make sure the given path of Ssd files has the prefix for local file system.
// Local file system would be derived based on the prefix.
VELOX_CHECK(
filePrefix_.find('/') == 0,
"Ssd path '{}' does not start with '/' that points to local file system.",
filePrefix_);
VELOX_CHECK_NOT_NULL(executor_);

VELOX_SSD_CACHE_LOG(INFO) << "SSD cache config: " << config.toString();
Expand Down
4 changes: 4 additions & 0 deletions velox/docs/develop/testing/cache-fuzzer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,8 @@ Here is a full list of supported command line arguments.

* ``–-num_restarts``: Number of cache restarts in one iteration.

* ``–-enable_file_faulty_injection``: Enable fault injection on read and write
operations for cache-related files. When enabled, the file read and write
operations will fail 5 out of 100 times.

If running from CLion IDE, add ``--logtostderr=1`` to see the full output.
45 changes: 40 additions & 5 deletions velox/exec/fuzzer/CacheFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "velox/common/caching/FileIds.h"
#include "velox/common/caching/SsdCache.h"
#include "velox/common/file/FileSystems.h"
#include "velox/common/file/tests/FaultyFileSystem.h"
#include "velox/common/memory/Memory.h"
#include "velox/common/memory/MmapAllocator.h"
#include "velox/dwio/common/CachedBufferedInput.h"
Expand Down Expand Up @@ -76,8 +77,15 @@ DEFINE_int64(

DEFINE_int32(num_restarts, 3, "Number of cache restarts in one iteration.");

DEFINE_bool(
enable_file_faulty_injection,
true,
"Enable fault injection on read and write operations for cache-related files. When enabled, "
"the file read and write operations will fail 5 out of 100 times.");

using namespace facebook::velox::cache;
using namespace facebook::velox::dwio::common;
using namespace facebook::velox::tests::utils;

namespace facebook::velox::exec::test {
namespace {
Expand All @@ -90,6 +98,7 @@ class CacheFuzzer {

private:
static constexpr int32_t kRandomized = -1;
static constexpr int32_t kFileFaultInjectionRate = 5;

void seed(size_t seed) {
currentSeed_ = seed;
Expand Down Expand Up @@ -149,6 +158,7 @@ class CacheFuzzer {
std::vector<std::vector<std::pair<int32_t, int32_t>>> fileFragments_;
std::vector<std::unique_ptr<CachedBufferedInput>> inputs_;
std::shared_ptr<exec::test::TempDirectoryPath> sourceDataDir_;
std::shared_ptr<exec::test::TempDirectoryPath> cacheDataDir_;
std::unique_ptr<memory::MemoryManager> memoryManager_;
std::unique_ptr<folly::IOThreadPoolExecutor> executor_;
std::shared_ptr<AsyncDataCache> cache_;
Expand All @@ -175,10 +185,14 @@ bool isDone(size_t i, T startTime) {
CacheFuzzer::CacheFuzzer(size_t initialSeed) {
seed(initialSeed);
filesystems::registerLocalFileSystem();
registerFaultyFileSystem();
}

void CacheFuzzer::initSourceDataFiles() {
// Skip errors on source data files.
sourceDataDir_ = exec::test::TempDirectoryPath::create();
cacheDataDir_ =
exec::test::TempDirectoryPath::create(FLAGS_enable_file_faulty_injection);
fs_ = filesystems::getFileSystem(sourceDataDir_->getPath(), nullptr);

// Create files with random sizes.
Expand Down Expand Up @@ -211,6 +225,23 @@ void CacheFuzzer::initSourceDataFiles() {
fileSizes_.emplace_back(fileSize);
}
}

if (FLAGS_enable_file_faulty_injection) {
faultyFileSystem()->setFileInjectionHook([&](FaultFileOperation* op) {
std::random_device rd;
boost::random::uniform_int_distribution<int> dist(1, 100);
if ((op->type == FaultFileOperation::Type::kWrite ||
op->type == FaultFileOperation::Type::kAppend) &&
dist(rd) <= kFileFaultInjectionRate) {
VELOX_FAIL("Inject hook write failure");
}
if ((op->type == FaultFileOperation::Type::kReadv ||
op->type == FaultFileOperation::Type::kRead) &&
dist(rd) <= kFileFaultInjectionRate) {
VELOX_FAIL("Inject hook read failure");
}
});
}
}

int64_t CacheFuzzer::getMemoryCacheBytes(bool restartCache) {
Expand Down Expand Up @@ -252,7 +283,6 @@ int32_t CacheFuzzer::getSsdCacheShards(bool restartCache) {
lastNumSsdCacheShards_ = FLAGS_num_ssd_cache_shards;
}
}

return lastNumSsdCacheShards_;
}

Expand Down Expand Up @@ -302,7 +332,7 @@ void CacheFuzzer::initializeCache(bool restartCache) {
enableChecksumReadVerification(restartCache);

SsdCache::Config config(
fmt::format("{}/cache", sourceDataDir_->getPath()),
fmt::format("{}/cache", cacheDataDir_->getPath()),
ssdCacheBytes,
numSsdCacheShards,
executor_.get(),
Expand Down Expand Up @@ -334,9 +364,10 @@ void CacheFuzzer::initializeCache(bool restartCache) {
{});

LOG(INFO) << fmt::format(
"Initialized cache with {} memory space, {} SSD cache",
"Initialized cache with {} memory space, {} SSD cache, {} file faulty injection",
succinctBytes(memoryCacheBytes),
ssdCacheBytes == 0 ? "with" : "without");
ssdCacheBytes == 0 ? "with" : "without",
FLAGS_enable_file_faulty_injection ? "with" : "without");
}

void CacheFuzzer::initializeInputs() {
Expand Down Expand Up @@ -415,19 +446,23 @@ void CacheFuzzer::resetCache() {

void CacheFuzzer::resetSourceDataFiles() {
const auto& sourceDataDirPath = sourceDataDir_->getPath();
const auto& cacheDataDirPath = cacheDataDir_->getPath();
if (fs_->exists(sourceDataDirPath)) {
fs_->rmdir(sourceDataDirPath);
}
if (fs_->exists(cacheDataDirPath)) {
fs_->rmdir(cacheDataDirPath);
}
fs_.reset();
sourceDataDir_.reset();
cacheDataDir_.reset();
fileNames_.clear();
fileIds_.clear();
fileSizes_.clear();
fileIds().testingReset();
}

void CacheFuzzer::read(uint32_t fileIdx, int32_t fragmentIdx) {
// TODO: Faulty injection.
const auto [offset, length] = fileFragments_[fileIdx][fragmentIdx];
auto stream = inputs_[fileIdx]->read(offset, length, LogType::TEST);
const void* buffer;
Expand Down

0 comments on commit b23a86d

Please sign in to comment.