diff --git a/velox/common/caching/SsdCache.cpp b/velox/common/caching/SsdCache.cpp index a6f7453acf786..2347b45982fd6 100644 --- a/velox/common/caching/SsdCache.cpp +++ b/velox/common/caching/SsdCache.cpp @@ -36,7 +36,7 @@ SsdCache::SsdCache(const Config& config) // Make sure the given path of Ssd files has the prefix for local file system. // Local file system would be derived based on the prefix. VELOX_CHECK( - filePrefix_.find('/') == 0, + filePrefix_.find('/') == 0 || filePrefix_.find("faulty:/") == 0, "Ssd path '{}' does not start with '/' that points to local file system.", filePrefix_); VELOX_CHECK_NOT_NULL(executor_); diff --git a/velox/docs/develop/testing/cache-fuzzer.rst b/velox/docs/develop/testing/cache-fuzzer.rst index cc0733e31d76d..b5e2548d6fb12 100644 --- a/velox/docs/develop/testing/cache-fuzzer.rst +++ b/velox/docs/develop/testing/cache-fuzzer.rst @@ -63,4 +63,8 @@ Here is a full list of supported command line arguments. * ``–-num_restarts``: Number of cache restarts in one iteration. +* ``–-enable_file_faulty_injection``: Enable fault injection on read and write + operations for cache-related files. When enabled, the file read and write + operations will fail 5 out of 100 times. + If running from CLion IDE, add ``--logtostderr=1`` to see the full output. diff --git a/velox/exec/fuzzer/CacheFuzzer.cpp b/velox/exec/fuzzer/CacheFuzzer.cpp index e2151ec05dc13..e944d03b01a48 100644 --- a/velox/exec/fuzzer/CacheFuzzer.cpp +++ b/velox/exec/fuzzer/CacheFuzzer.cpp @@ -23,6 +23,7 @@ #include "velox/common/caching/FileIds.h" #include "velox/common/caching/SsdCache.h" #include "velox/common/file/FileSystems.h" +#include "velox/common/file/tests/FaultyFileSystem.h" #include "velox/common/memory/Memory.h" #include "velox/common/memory/MmapAllocator.h" #include "velox/dwio/common/CachedBufferedInput.h" @@ -76,8 +77,15 @@ DEFINE_int64( DEFINE_int32(num_restarts, 3, "Number of cache restarts in one iteration."); +DEFINE_bool( + enable_file_faulty_injection, + true, + "Enable fault injection on read and write operations for cache-related files. When enabled, " + "the file read and write operations will fail 5 out of 100 times."); + using namespace facebook::velox::cache; using namespace facebook::velox::dwio::common; +using namespace facebook::velox::tests::utils; namespace facebook::velox::exec::test { namespace { @@ -90,6 +98,7 @@ class CacheFuzzer { private: static constexpr int32_t kRandomized = -1; + static constexpr int32_t kFileFaultInjectionPct = 5; void seed(size_t seed) { currentSeed_ = seed; @@ -149,6 +158,7 @@ class CacheFuzzer { std::vector>> fileFragments_; std::vector> inputs_; std::shared_ptr sourceDataDir_; + std::shared_ptr cacheDataDir_; std::unique_ptr memoryManager_; std::unique_ptr executor_; std::shared_ptr cache_; @@ -175,10 +185,14 @@ bool isDone(size_t i, T startTime) { CacheFuzzer::CacheFuzzer(size_t initialSeed) { seed(initialSeed); filesystems::registerLocalFileSystem(); + registerFaultyFileSystem(); } void CacheFuzzer::initSourceDataFiles() { + // Skip errors on source data files. sourceDataDir_ = exec::test::TempDirectoryPath::create(); + cacheDataDir_ = + exec::test::TempDirectoryPath::create(FLAGS_enable_file_faulty_injection); fs_ = filesystems::getFileSystem(sourceDataDir_->getPath(), nullptr); // Create files with random sizes. @@ -211,6 +225,23 @@ void CacheFuzzer::initSourceDataFiles() { fileSizes_.emplace_back(fileSize); } } + + if (FLAGS_enable_file_faulty_injection) { + faultyFileSystem()->setFileInjectionHook([&](FaultFileOperation* op) { + std::random_device rd; + boost::random::uniform_int_distribution dist(1, 100); + if ((op->type == FaultFileOperation::Type::kWrite || + op->type == FaultFileOperation::Type::kAppend) && + dist(rd) <= kFileFaultInjectionPct) { + VELOX_FAIL("Inject hook write failure"); + } + if ((op->type == FaultFileOperation::Type::kReadv || + op->type == FaultFileOperation::Type::kRead) && + dist(rd) <= kFileFaultInjectionPct) { + VELOX_FAIL("Inject hook read failure"); + } + }); + } } int64_t CacheFuzzer::getMemoryCacheBytes(bool restartCache) { @@ -252,7 +283,6 @@ int32_t CacheFuzzer::getSsdCacheShards(bool restartCache) { lastNumSsdCacheShards_ = FLAGS_num_ssd_cache_shards; } } - return lastNumSsdCacheShards_; } @@ -302,7 +332,7 @@ void CacheFuzzer::initializeCache(bool restartCache) { enableChecksumReadVerification(restartCache); SsdCache::Config config( - fmt::format("{}/cache", sourceDataDir_->getPath()), + fmt::format("{}/cache", cacheDataDir_->getPath()), ssdCacheBytes, numSsdCacheShards, executor_.get(), @@ -334,9 +364,10 @@ void CacheFuzzer::initializeCache(bool restartCache) { {}); LOG(INFO) << fmt::format( - "Initialized cache with {} memory space, {} SSD cache", + "Initialized cache with {} memory space, {} SSD cache, {} file faulty injection", succinctBytes(memoryCacheBytes), - ssdCacheBytes == 0 ? "with" : "without"); + ssdCacheBytes == 0 ? "with" : "without", + FLAGS_enable_file_faulty_injection ? "with" : "without"); } void CacheFuzzer::initializeInputs() { @@ -415,11 +446,16 @@ void CacheFuzzer::resetCache() { void CacheFuzzer::resetSourceDataFiles() { const auto& sourceDataDirPath = sourceDataDir_->getPath(); + const auto& cacheDataDirPath = cacheDataDir_->getPath(); if (fs_->exists(sourceDataDirPath)) { fs_->rmdir(sourceDataDirPath); } + if (fs_->exists(cacheDataDirPath)) { + fs_->rmdir(cacheDataDirPath); + } fs_.reset(); sourceDataDir_.reset(); + cacheDataDir_.reset(); fileNames_.clear(); fileIds_.clear(); fileSizes_.clear(); @@ -427,7 +463,6 @@ void CacheFuzzer::resetSourceDataFiles() { } void CacheFuzzer::read(uint32_t fileIdx, int32_t fragmentIdx) { - // TODO: Faulty injection. const auto [offset, length] = fileFragments_[fileIdx][fragmentIdx]; auto stream = inputs_[fileIdx]->read(offset, length, LogType::TEST); const void* buffer;