Skip to content

Commit 6bee697

Browse files
committed
Add file system error injection in table writer fuzzer
1 parent e67f11b commit 6bee697

10 files changed

+178
-21
lines changed

velox/common/file/tests/FaultyFileSystem.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,11 @@ class FaultyFileSystem : public FileSystem {
5555

5656
std::unique_ptr<ReadFile> openFileForRead(
5757
std::string_view path,
58-
const FileOptions& options) override;
58+
const FileOptions& options = {}) override;
5959

6060
std::unique_ptr<WriteFile> openFileForWrite(
6161
std::string_view path,
62-
const FileOptions& options) override;
62+
const FileOptions& options = {}) override;
6363

6464
void remove(std::string_view path) override;
6565

velox/dwio/common/FileSink.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ class LocalFileSink : public FileSink {
168168
protected:
169169
void doClose() override;
170170

171-
private:
171+
protected:
172172
std::unique_ptr<WriteFile> writeFile_;
173173
};
174174

velox/dwio/common/tests/CMakeLists.txt

+3
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
# limitations under the License.
1414

1515
add_subdirectory(utils)
16+
velox_add_library(velox_dwio_faulty_file_sink FaultyFileSink.cpp)
17+
velox_link_libraries(velox_dwio_faulty_file_sink velox_file_test_utils
18+
velox_dwio_common)
1619

1720
add_executable(
1821
velox_dwio_common_test
+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include "velox/dwio/common/tests/FaultyFileSink.h"
18+
#include "velox/common/base/Fs.h"
19+
#include "velox/common/file/FileSystems.h"
20+
#include "velox/common/file/tests/FaultyFileSystem.h"
21+
#include "velox/dwio/common/FileSink.h"
22+
#include "velox/dwio/common/exception/Exception.h"
23+
24+
namespace facebook::velox::dwio::common {
25+
namespace {
26+
constexpr std::string_view kFileScheme("faulty:");
27+
28+
std::unique_ptr<FileSink> createFaultyFileSink(
29+
const std::string& filePath,
30+
const FileSink::Options& options) {
31+
if (filePath.find(kFileScheme) == 0) {
32+
return std::make_unique<FaultyFileSink>(filePath, options);
33+
}
34+
return nullptr;
35+
}
36+
} // namespace
37+
38+
FaultyFileSink::FaultyFileSink(const std::string& name, const Options& options)
39+
: LocalFileSink{name, options} {
40+
const auto dir = fs::path(name.substr(7)).parent_path();
41+
if (!fs::exists(dir)) {
42+
VELOX_CHECK(velox::common::generateFileDirectory(dir.c_str()));
43+
}
44+
auto fs = std::dynamic_pointer_cast<tests::utils::FaultyFileSystem>(
45+
filesystems::getFileSystem(name_, nullptr));
46+
writeFile_ = fs->openFileForWrite(name_);
47+
}
48+
49+
void registerFaultyFileSinks() {
50+
facebook::velox::dwio::common::FileSink::registerFactory(
51+
(createFaultyFileSink));
52+
}
53+
} // namespace facebook::velox::dwio::common
+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include <chrono>
20+
21+
#include "velox/common/config/Config.h"
22+
#include "velox/common/file/File.h"
23+
#include "velox/common/file/tests/FaultyFile.h"
24+
#include "velox/common/io/IoStatistics.h"
25+
#include "velox/dwio/common/Closeable.h"
26+
#include "velox/dwio/common/DataBuffer.h"
27+
#include "velox/dwio/common/FileSink.h"
28+
#include "velox/dwio/common/MetricsLog.h"
29+
30+
namespace facebook::velox::dwio::common {
31+
using namespace facebook::velox::io;
32+
33+
class FaultyFileSink : public LocalFileSink {
34+
public:
35+
FaultyFileSink(const std::string& name, const Options& options);
36+
};
37+
38+
void registerFaultyFileSinks();
39+
40+
} // namespace facebook::velox::dwio::common

velox/exec/fuzzer/CMakeLists.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ target_link_libraries(
2929
velox_hive_connector
3030
velox_dwio_dwrf_reader
3131
velox_dwio_dwrf_writer
32-
velox_dwio_catalog_fbhive)
32+
velox_dwio_catalog_fbhive
33+
velox_dwio_faulty_file_sink)
3334

3435
add_library(velox_aggregation_fuzzer_base AggregationFuzzerBase.cpp)
3536

velox/exec/fuzzer/WriterFuzzer.cpp

+65-12
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "velox/common/base/Fs.h"
2323
#include "velox/common/encode/Base64.h"
2424
#include "velox/common/file/FileSystems.h"
25+
#include "velox/common/file/tests/FaultyFileSystem.h"
2526
#include "velox/connectors/hive/HiveConnector.h"
2627
#include "velox/connectors/hive/HiveConnectorSplit.h"
2728
#include "velox/connectors/hive/TableHandle.h"
@@ -36,6 +37,11 @@
3637
#include "velox/vector/VectorSaver.h"
3738
#include "velox/vector/fuzzer/VectorFuzzer.h"
3839

40+
DEFINE_bool(
41+
file_system_error_injection,
42+
true,
43+
"When enabled, inject file system write error with certain possibility");
44+
3945
DEFINE_int32(steps, 10, "Number of plans to generate and test.");
4046

4147
DEFINE_int32(
@@ -63,6 +69,8 @@ using namespace facebook::velox::test;
6369
namespace facebook::velox::exec::test {
6470

6571
namespace {
72+
using facebook::velox::filesystems::FileSystem;
73+
using tests::utils::FaultyFileSystem;
6674

6775
class WriterFuzzer {
6876
public:
@@ -113,6 +121,11 @@ class WriterFuzzer {
113121
std::vector<TypePtr> types,
114122
size_t partitionOffset);
115123

124+
// Generates file system write error with certain possibility
125+
bool injectWriterError(
126+
const std::shared_ptr<TempDirectoryPath>& tempDirPath,
127+
const std::vector<std::string>& partitionKeys);
128+
116129
void verifyWriter(
117130
const std::vector<RowVectorPtr>& input,
118131
const std::vector<std::string>& names,
@@ -123,7 +136,8 @@ class WriterFuzzer {
123136
const std::vector<std::string>& bucketColumns,
124137
int32_t sortColumnOffset,
125138
const std::vector<std::shared_ptr<const HiveSortingColumn>>& sortBy,
126-
const std::string& outputDirectoryPath);
139+
const std::string& outputDirectoryPath,
140+
const bool writeErrorInjected);
127141

128142
// Generates table column handles based on table column properties
129143
std::unordered_map<std::string, std::shared_ptr<connector::ColumnHandle>>
@@ -244,6 +258,11 @@ class WriterFuzzer {
244258
std::shared_ptr<memory::MemoryPool> writerPool_{
245259
rootPool_->addAggregateChild("writerFuzzerWriter")};
246260
VectorFuzzer vectorFuzzer_;
261+
262+
const std::shared_ptr<FaultyFileSystem> faultyFs_ =
263+
std::dynamic_pointer_cast<FaultyFileSystem>(
264+
filesystems::getFileSystem("/", nullptr));
265+
const std::string injectedErrorMsg_ = "Injected Faulty File Error";
247266
};
248267
} // namespace
249268

@@ -254,9 +273,10 @@ void writerFuzzer(
254273
writerFuzzer.go();
255274
}
256275

257-
std::vector<std::string> listFolders(std::string_view path) {
276+
std::vector<std::string> listFolders(
277+
const std::shared_ptr<FileSystem>& fileSystem,
278+
std::string_view path) {
258279
std::vector<std::string> folders;
259-
auto fileSystem = filesystems::getFileSystem("/", nullptr);
260280
for (auto& p : std::filesystem::recursive_directory_iterator(
261281
fileSystem->extractPath(path))) {
262282
if (p.is_directory())
@@ -340,7 +360,9 @@ void WriterFuzzer::go() {
340360
}
341361
auto input = generateInputData(names, types, partitionOffset);
342362

343-
auto tempDirPath = exec::test::TempDirectoryPath::create();
363+
auto tempDirPath = exec::test::TempDirectoryPath::create(true);
364+
bool writeErrorInjected = injectWriterError(tempDirPath, partitionKeys);
365+
344366
verifyWriter(
345367
input,
346368
names,
@@ -351,7 +373,10 @@ void WriterFuzzer::go() {
351373
bucketColumns,
352374
sortColumnOffset,
353375
sortBy,
354-
tempDirPath->getPath());
376+
tempDirPath->getPath(),
377+
writeErrorInjected);
378+
379+
faultyFs_->clearFileFaultInjections();
355380

356381
LOG(INFO) << "==============================> Done with iteration "
357382
<< iteration++;
@@ -413,6 +438,24 @@ std::vector<RowVectorPtr> WriterFuzzer::generateInputData(
413438
return input;
414439
}
415440

441+
bool WriterFuzzer::injectWriterError(
442+
const std::shared_ptr<TempDirectoryPath>& tempDirPath,
443+
const std::vector<std::string>& partitionKeys) {
444+
if (FLAGS_file_system_error_injection && partitionKeys.empty() &&
445+
vectorFuzzer_.coinToss(0.01)) {
446+
std::exception_ptr fileError;
447+
try {
448+
VELOX_FAIL(injectedErrorMsg_);
449+
} catch (VeloxRuntimeError&) {
450+
fileError = std::current_exception();
451+
}
452+
faultyFs_->setFileInjectionError(
453+
fileError, {tests::utils::FaultFileOperation::Type::kWrite});
454+
return true;
455+
}
456+
return false;
457+
}
458+
416459
void WriterFuzzer::verifyWriter(
417460
const std::vector<RowVectorPtr>& input,
418461
const std::vector<std::string>& names,
@@ -423,7 +466,8 @@ void WriterFuzzer::verifyWriter(
423466
const std::vector<std::string>& bucketColumns,
424467
const int32_t sortColumnOffset,
425468
const std::vector<std::shared_ptr<const HiveSortingColumn>>& sortBy,
426-
const std::string& outputDirectoryPath) {
469+
const std::string& outputDirectoryPath,
470+
const bool writeErrorInjected) {
427471
const auto plan = PlanBuilder()
428472
.values(input)
429473
.tableWrite(
@@ -436,7 +480,17 @@ void WriterFuzzer::verifyWriter(
436480

437481
const auto maxDrivers =
438482
boost::random::uniform_int_distribution<int32_t>(1, 16)(rng_);
439-
const auto result = veloxToPrestoResult(execute(plan, maxDrivers));
483+
RowVectorPtr result;
484+
try {
485+
result = veloxToPrestoResult(execute(plan, maxDrivers));
486+
} catch (VeloxRuntimeError& error) {
487+
if (writeErrorInjected) {
488+
VELOX_CHECK(
489+
error.message() == injectedErrorMsg_,
490+
"write plan failed with different error code");
491+
return;
492+
}
493+
}
440494

441495
const auto dropSql = "DROP TABLE IF EXISTS tmp_write";
442496
const auto sql = referenceQueryRunner_->toSql(plan).value();
@@ -661,8 +715,7 @@ void WriterFuzzer::comparePartitionAndBucket(
661715
// static
662716
std::map<std::string, int32_t> WriterFuzzer::getPartitionNameAndFilecount(
663717
const std::string& tableDirectoryPath) {
664-
auto fileSystem = filesystems::getFileSystem("/", nullptr);
665-
auto directories = listFolders(tableDirectoryPath);
718+
auto directories = listFolders(faultyFs_, tableDirectoryPath);
666719
std::map<std::string, int32_t> partitionNameAndFileCount;
667720

668721
for (std::string directory : directories) {
@@ -672,20 +725,20 @@ std::map<std::string, int32_t> WriterFuzzer::getPartitionNameAndFilecount(
672725
}
673726

674727
// Count non-empty non-hidden files
675-
const auto files = fileSystem->list(directory);
728+
const auto files = faultyFs_->list(directory);
676729
int32_t fileCount = 0;
677730
for (const auto& file : files) {
678731
// Presto query runner sometime creates empty files, ignore those.
679732
if (file.find("/.") == std::string::npos &&
680-
fileSystem->openFileForRead(file)->size() > 0) {
733+
faultyFs_->openFileForRead(file)->size() > 0) {
681734
fileCount++;
682735
}
683736
}
684737

685738
// Remove the path prefix to get the partition name
686739
// For example: /test/tmp_write/p0=1/p1=2020
687740
// partition name is /p0=1/p1=2020
688-
directory.erase(0, fileSystem->extractPath(tableDirectoryPath).length());
741+
directory.erase(0, faultyFs_->extractPath(tableDirectoryPath).length());
689742

690743
partitionNameAndFileCount.emplace(directory, fileCount);
691744
}

velox/exec/fuzzer/WriterFuzzer.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
*/
1616
#pragma once
1717

18+
#include "velox/common/file/FileSystems.h"
1819
#include "velox/exec/fuzzer/ReferenceQueryRunner.h"
1920
#include "velox/vector/fuzzer/VectorFuzzer.h"
2021

@@ -30,6 +31,8 @@ void writerFuzzer(
3031
std::unique_ptr<ReferenceQueryRunner> referenceQueryRunner);
3132

3233
/// Returns all the folders in a local fs path recursively.
33-
std::vector<std::string> listFolders(std::string_view path);
34+
std::vector<std::string> listFolders(
35+
const std::shared_ptr<facebook::velox::filesystems::FileSystem>& fileSystem,
36+
std::string_view path);
3437

3538
} // namespace facebook::velox::exec::test

velox/exec/fuzzer/WriterFuzzerRunner.h

+4
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@
2323
#include <vector>
2424

2525
#include "velox/common/file/FileSystems.h"
26+
#include "velox/common/file/tests/FaultyFileSystem.h"
2627
#include "velox/connectors/hive/HiveConnector.h"
2728
#include "velox/dwio/common/FileSink.h"
29+
#include "velox/dwio/common/tests/FaultyFileSink.h"
2830
#include "velox/dwio/dwrf/RegisterDwrfReader.h"
2931
#include "velox/dwio/dwrf/RegisterDwrfWriter.h"
3032
#include "velox/exec/fuzzer/FuzzerUtil.h"
@@ -74,6 +76,7 @@ class WriterFuzzerRunner {
7476
size_t seed,
7577
std::unique_ptr<ReferenceQueryRunner> referenceQueryRunner) {
7678
filesystems::registerLocalFileSystem();
79+
tests::utils::registerFaultyFileSystem();
7780
connector::registerConnectorFactory(
7881
std::make_shared<connector::hive::HiveConnectorFactory>());
7982
auto hiveConnector =
@@ -87,6 +90,7 @@ class WriterFuzzerRunner {
8790
dwrf::registerDwrfReaderFactory();
8891
dwrf::registerDwrfWriterFactory();
8992
dwio::common::registerFileSinks();
93+
dwio::common::registerFaultyFileSinks();
9094
facebook::velox::exec::test::writerFuzzer(
9195
seed, std::move(referenceQueryRunner));
9296
// Calling gtest here so that it can be recognized as tests in CI systems.

0 commit comments

Comments
 (0)