From e0d27834f0afa2a9f26628e2157c16d01ed5ed3e Mon Sep 17 00:00:00 2001 From: Zac Wen Date: Thu, 26 Dec 2024 18:02:45 -0800 Subject: [PATCH] test: Add fault injection in cache fuzzer Differential Revision: D67662693 --- velox/common/caching/SsdCache.cpp | 6 --- velox/docs/develop/testing/cache-fuzzer.rst | 4 ++ velox/exec/fuzzer/CacheFuzzer.cpp | 43 ++++++++++++++++++--- 3 files changed, 42 insertions(+), 11 deletions(-) diff --git a/velox/common/caching/SsdCache.cpp b/velox/common/caching/SsdCache.cpp index a6f7453acf786..d8b542e357116 100644 --- a/velox/common/caching/SsdCache.cpp +++ b/velox/common/caching/SsdCache.cpp @@ -33,12 +33,6 @@ SsdCache::SsdCache(const Config& config) numShards_(config.numShards), groupStats_(std::make_unique()), executor_(config.executor) { - // Make sure the given path of Ssd files has the prefix for local file system. - // Local file system would be derived based on the prefix. - VELOX_CHECK( - filePrefix_.find('/') == 0, - "Ssd path '{}' does not start with '/' that points to local file system.", - filePrefix_); VELOX_CHECK_NOT_NULL(executor_); VELOX_SSD_CACHE_LOG(INFO) << "SSD cache config: " << config.toString(); diff --git a/velox/docs/develop/testing/cache-fuzzer.rst b/velox/docs/develop/testing/cache-fuzzer.rst index cc0733e31d76d..ecdef05d7e3c1 100644 --- a/velox/docs/develop/testing/cache-fuzzer.rst +++ b/velox/docs/develop/testing/cache-fuzzer.rst @@ -63,4 +63,8 @@ Here is a full list of supported command line arguments. * ``–-num_restarts``: Number of cache restarts in one iteration. +* ``–-enable_file_faulty_injection``: Enable fault injection on read and write + operations for cache-related files. When enabled, write operation will fail + 1 out of 100 times, and read operation will fail 5 out of 100 times. + If running from CLion IDE, add ``--logtostderr=1`` to see the full output. diff --git a/velox/exec/fuzzer/CacheFuzzer.cpp b/velox/exec/fuzzer/CacheFuzzer.cpp index e2151ec05dc13..f764f9fa26da5 100644 --- a/velox/exec/fuzzer/CacheFuzzer.cpp +++ b/velox/exec/fuzzer/CacheFuzzer.cpp @@ -20,9 +20,11 @@ #include #include +#include #include "velox/common/caching/FileIds.h" #include "velox/common/caching/SsdCache.h" #include "velox/common/file/FileSystems.h" +#include "velox/common/file/tests/FaultyFileSystem.h" #include "velox/common/memory/Memory.h" #include "velox/common/memory/MmapAllocator.h" #include "velox/dwio/common/CachedBufferedInput.h" @@ -76,8 +78,15 @@ DEFINE_int64( DEFINE_int32(num_restarts, 3, "Number of cache restarts in one iteration."); +DEFINE_bool( + enable_file_faulty_injection, + true, + "Enable fault injection on read and write operations for cache-related files. When enabled, " + "write operation will fail 1 out of 100 times, and read operation will fail 5 out of 100 times."); + using namespace facebook::velox::cache; using namespace facebook::velox::dwio::common; +using namespace facebook::velox::tests::utils; namespace facebook::velox::exec::test { namespace { @@ -90,6 +99,9 @@ class CacheFuzzer { private: static constexpr int32_t kRandomized = -1; + static constexpr int32_t kFileWriteErrorRate = 1; + static constexpr int32_t kFileReadErrorRate = 5; + inline static const std::regex kDataFileNamePattern{".*/file_\\d+"}; void seed(size_t seed) { currentSeed_ = seed; @@ -175,10 +187,12 @@ bool isDone(size_t i, T startTime) { CacheFuzzer::CacheFuzzer(size_t initialSeed) { seed(initialSeed); filesystems::registerLocalFileSystem(); + registerFaultyFileSystem(); } void CacheFuzzer::initSourceDataFiles() { - sourceDataDir_ = exec::test::TempDirectoryPath::create(); + sourceDataDir_ = + exec::test::TempDirectoryPath::create(FLAGS_enable_file_faulty_injection); fs_ = filesystems::getFileSystem(sourceDataDir_->getPath(), nullptr); // Create files with random sizes. @@ -211,6 +225,25 @@ void CacheFuzzer::initSourceDataFiles() { fileSizes_.emplace_back(fileSize); } } + + if (FLAGS_enable_file_faulty_injection) { + faultyFileSystem()->setFileInjectionHook([&](FaultFileOperation* op) { + if (std::regex_match(op->path, kDataFileNamePattern)) { + // Skip errors on input data files. + return; + } + std::random_device rd; + boost::random::uniform_int_distribution dist(1, 100); + if (op->type == FaultFileOperation::Type::kWrite && + dist(rd) <= kFileWriteErrorRate) { + VELOX_FAIL("Inject hook write failure"); + } + if (op->type == FaultFileOperation::Type::kReadv && + dist(rd) <= kFileReadErrorRate) { + VELOX_FAIL("Inject hook read failure"); + } + }); + } } int64_t CacheFuzzer::getMemoryCacheBytes(bool restartCache) { @@ -252,7 +285,6 @@ int32_t CacheFuzzer::getSsdCacheShards(bool restartCache) { lastNumSsdCacheShards_ = FLAGS_num_ssd_cache_shards; } } - return lastNumSsdCacheShards_; } @@ -334,9 +366,10 @@ void CacheFuzzer::initializeCache(bool restartCache) { {}); LOG(INFO) << fmt::format( - "Initialized cache with {} memory space, {} SSD cache", + "Initialized cache with {} memory space, {} SSD cache, {} file faulty injection", succinctBytes(memoryCacheBytes), - ssdCacheBytes == 0 ? "with" : "without"); + ssdCacheBytes == 0 ? "with" : "without", + FLAGS_enable_file_faulty_injection ? "with" : "without"); } void CacheFuzzer::initializeInputs() { @@ -418,6 +451,7 @@ void CacheFuzzer::resetSourceDataFiles() { if (fs_->exists(sourceDataDirPath)) { fs_->rmdir(sourceDataDirPath); } + faultyFileSystem()->clearFileFaultInjections(); fs_.reset(); sourceDataDir_.reset(); fileNames_.clear(); @@ -427,7 +461,6 @@ void CacheFuzzer::resetSourceDataFiles() { } void CacheFuzzer::read(uint32_t fileIdx, int32_t fragmentIdx) { - // TODO: Faulty injection. const auto [offset, length] = fileFragments_[fileIdx][fragmentIdx]; auto stream = inputs_[fileIdx]->read(offset, length, LogType::TEST); const void* buffer;