Skip to content

Commit fb9222f

Browse files
kewang1024facebook-github-bot
authored andcommitted
Add faulty injection in writer fuzzer (facebookincubator#11375)
Summary: Pull Request resolved: facebookincubator#11375 Reviewed By: xiaoxmeng Differential Revision: D65380360 Pulled By: kewang1024
1 parent 789ce65 commit fb9222f

10 files changed

+184
-15
lines changed

velox/common/file/tests/FaultyFileSystem.h

+2-3
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
namespace facebook::velox::tests::utils {
2727

2828
using namespace filesystems;
29-
3029
/// Implements faulty filesystem for io fault injection in unit test. It is a
3130
/// wrapper on top of a real file system, and by default it delegates the the
3231
/// file operation to the real file system underneath.
@@ -55,11 +54,11 @@ class FaultyFileSystem : public FileSystem {
5554

5655
std::unique_ptr<ReadFile> openFileForRead(
5756
std::string_view path,
58-
const FileOptions& options) override;
57+
const FileOptions& options = {}) override;
5958

6059
std::unique_ptr<WriteFile> openFileForWrite(
6160
std::string_view path,
62-
const FileOptions& options) override;
61+
const FileOptions& options = {}) override;
6362

6463
void remove(std::string_view path) override;
6564

velox/dwio/common/FileSink.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,17 @@ LocalFileSink::LocalFileSink(const std::string& name, const Options& options)
145145
writeFile_ = fs->openFileForWrite(name_);
146146
}
147147

148+
LocalFileSink::LocalFileSink(
149+
const std::string& name,
150+
const Options& options,
151+
bool initializeWriter)
152+
: FileSink{name, options}, writeFile_() {
153+
const auto dir = fs::path(name_).parent_path();
154+
if (!fs::exists(dir)) {
155+
VELOX_CHECK(velox::common::generateFileDirectory(dir.c_str()));
156+
}
157+
}
158+
148159
void LocalFileSink::doClose() {
149160
LOG(INFO) << "closing file: " << name()
150161
<< ", total size: " << succinctBytes(size_);

velox/dwio/common/FileSink.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -166,9 +166,14 @@ class LocalFileSink : public FileSink {
166166
static void registerFactory();
167167

168168
protected:
169+
// 'initializeWriter' is false if it is used by FaultyFileSink which setups
170+
// the write file through the fault filesystem.
171+
LocalFileSink(
172+
const std::string& name,
173+
const Options& options,
174+
bool initializeWriter);
169175
void doClose() override;
170176

171-
private:
172177
std::unique_ptr<WriteFile> writeFile_;
173178
};
174179

velox/dwio/common/tests/CMakeLists.txt

+3
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
# limitations under the License.
1414

1515
add_subdirectory(utils)
16+
velox_add_library(velox_dwio_faulty_file_sink FaultyFileSink.cpp)
17+
velox_link_libraries(velox_dwio_faulty_file_sink velox_file_test_utils
18+
velox_dwio_common)
1619

1720
add_executable(
1821
velox_dwio_common_test
+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include "velox/dwio/common/tests/FaultyFileSink.h"
18+
#include "velox/common/base/Fs.h"
19+
#include "velox/common/file/FileSystems.h"
20+
#include "velox/common/file/tests/FaultyFileSystem.h"
21+
#include "velox/dwio/common/FileSink.h"
22+
#include "velox/dwio/common/exception/Exception.h"
23+
24+
namespace facebook::velox::dwio::common {
25+
namespace {
26+
using tests::utils::FaultyFileSystem;
27+
28+
std::unique_ptr<FileSink> createFaultyFileSink(
29+
const std::string& filePath,
30+
const FileSink::Options& options) {
31+
if (filePath.find("faulty:") == 0) {
32+
return std::make_unique<FaultyFileSink>(filePath, options);
33+
}
34+
return nullptr;
35+
}
36+
} // namespace
37+
38+
FaultyFileSink::FaultyFileSink(
39+
const std::string& faultyFilePath,
40+
const Options& options)
41+
: LocalFileSink{faultyFilePath.substr(7), options, false},
42+
faultyFilePath_(faultyFilePath) {
43+
auto fs = filesystems::getFileSystem(faultyFilePath_, nullptr);
44+
writeFile_ = fs->openFileForWrite(faultyFilePath_);
45+
}
46+
47+
void registerFaultyFileSinks() {
48+
facebook::velox::dwio::common::FileSink::registerFactory(
49+
(createFaultyFileSink));
50+
}
51+
} // namespace facebook::velox::dwio::common
+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include <chrono>
20+
21+
#include "velox/common/config/Config.h"
22+
#include "velox/common/file/File.h"
23+
#include "velox/common/file/tests/FaultyFile.h"
24+
#include "velox/common/io/IoStatistics.h"
25+
#include "velox/dwio/common/Closeable.h"
26+
#include "velox/dwio/common/DataBuffer.h"
27+
#include "velox/dwio/common/FileSink.h"
28+
#include "velox/dwio/common/MetricsLog.h"
29+
30+
namespace facebook::velox::dwio::common {
31+
using namespace facebook::velox::io;
32+
33+
class FaultyFileSink : public LocalFileSink {
34+
public:
35+
FaultyFileSink(const std::string& faultyFilePath, const Options& options);
36+
37+
private:
38+
const std::string faultyFilePath_;
39+
};
40+
41+
void registerFaultyFileSinks();
42+
43+
} // namespace facebook::velox::dwio::common

velox/exec/fuzzer/CMakeLists.txt

+5-2
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ target_link_libraries(
2929
velox_hive_connector
3030
velox_dwio_dwrf_reader
3131
velox_dwio_dwrf_writer
32-
velox_dwio_catalog_fbhive)
32+
velox_dwio_catalog_fbhive
33+
velox_dwio_faulty_file_sink)
3334

3435
add_library(velox_aggregation_fuzzer_base AggregationFuzzerBase.cpp)
3536

@@ -102,7 +103,9 @@ target_link_libraries(
102103
velox_exec_test_lib
103104
velox_expression_test_utility
104105
velox_temp_path
105-
velox_vector_test_lib)
106+
velox_vector_test_lib
107+
velox_dwio_faulty_file_sink
108+
velox_file_test_utils)
106109

107110
add_library(velox_memory_arbitration_fuzzer MemoryArbitrationFuzzer.cpp)
108111

velox/exec/fuzzer/WriterFuzzer.cpp

+54-9
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "velox/common/base/Fs.h"
2323
#include "velox/common/encode/Base64.h"
2424
#include "velox/common/file/FileSystems.h"
25+
#include "velox/common/file/tests/FaultyFileSystem.h"
2526
#include "velox/connectors/hive/HiveConnector.h"
2627
#include "velox/connectors/hive/HiveConnectorSplit.h"
2728
#include "velox/connectors/hive/TableHandle.h"
@@ -36,6 +37,11 @@
3637
#include "velox/vector/VectorSaver.h"
3738
#include "velox/vector/fuzzer/VectorFuzzer.h"
3839

40+
DEFINE_bool(
41+
file_system_error_injection,
42+
true,
43+
"When enabled, inject file system write error with certain possibility");
44+
3945
DEFINE_int32(steps, 10, "Number of plans to generate and test.");
4046

4147
DEFINE_int32(
@@ -63,6 +69,9 @@ using namespace facebook::velox::test;
6369
namespace facebook::velox::exec::test {
6470

6571
namespace {
72+
using facebook::velox::filesystems::FileSystem;
73+
using tests::utils::FaultFileOperation;
74+
using tests::utils::FaultyFileSystem;
6675

6776
class WriterFuzzer {
6877
public:
@@ -123,7 +132,7 @@ class WriterFuzzer {
123132
const std::vector<std::string>& bucketColumns,
124133
int32_t sortColumnOffset,
125134
const std::vector<std::shared_ptr<const HiveSortingColumn>>& sortBy,
126-
const std::string& outputDirectoryPath);
135+
const std::shared_ptr<TempDirectoryPath>& outputDirectoryPath);
127136

128137
// Generates table column handles based on table column properties
129138
std::unordered_map<std::string, std::shared_ptr<connector::ColumnHandle>>
@@ -235,6 +244,12 @@ class WriterFuzzer {
235244
BIGINT(),
236245
VARCHAR()};
237246

247+
const std::shared_ptr<FaultyFileSystem> faultyFs_ =
248+
std::dynamic_pointer_cast<FaultyFileSystem>(
249+
filesystems::getFileSystem("faulty:/tmp", {}));
250+
const std::string injectedErrorMsg_{"Injected Faulty File Error"};
251+
std::atomic<uint64_t> injectedErrorCount_{0};
252+
238253
FuzzerGenerator rng_;
239254
size_t currentSeed_{0};
240255
std::unique_ptr<ReferenceQueryRunner> referenceQueryRunner_;
@@ -292,6 +307,16 @@ void WriterFuzzer::go() {
292307
auto startTime = std::chrono::system_clock::now();
293308
size_t iteration = 0;
294309

310+
// Faulty fs will generate file system write error with certain possibility
311+
if (FLAGS_file_system_error_injection) {
312+
faultyFs_->setFileInjectionHook([&](FaultFileOperation* op) {
313+
if (vectorFuzzer_.coinToss(0.01)) {
314+
++injectedErrorCount_;
315+
VELOX_FAIL(injectedErrorMsg_);
316+
}
317+
});
318+
}
319+
295320
while (!isDone(iteration, startTime)) {
296321
LOG(INFO) << "==============================> Started iteration "
297322
<< iteration << " (seed: " << currentSeed_ << ")";
@@ -340,7 +365,9 @@ void WriterFuzzer::go() {
340365
}
341366
auto input = generateInputData(names, types, partitionOffset);
342367

343-
auto tempDirPath = exec::test::TempDirectoryPath::create();
368+
const auto outputDirPath = exec::test::TempDirectoryPath::create(
369+
FLAGS_file_system_error_injection);
370+
344371
verifyWriter(
345372
input,
346373
names,
@@ -351,7 +378,7 @@ void WriterFuzzer::go() {
351378
bucketColumns,
352379
sortColumnOffset,
353380
sortBy,
354-
tempDirPath->getPath());
381+
outputDirPath);
355382

356383
LOG(INFO) << "==============================> Done with iteration "
357384
<< iteration++;
@@ -423,11 +450,11 @@ void WriterFuzzer::verifyWriter(
423450
const std::vector<std::string>& bucketColumns,
424451
const int32_t sortColumnOffset,
425452
const std::vector<std::shared_ptr<const HiveSortingColumn>>& sortBy,
426-
const std::string& outputDirectoryPath) {
453+
const std::shared_ptr<TempDirectoryPath>& outputDirectoryPath) {
427454
const auto plan = PlanBuilder()
428455
.values(input)
429456
.tableWrite(
430-
outputDirectoryPath,
457+
outputDirectoryPath->getPath(),
431458
partitionKeys,
432459
bucketCount,
433460
bucketColumns,
@@ -436,7 +463,22 @@ void WriterFuzzer::verifyWriter(
436463

437464
const auto maxDrivers =
438465
boost::random::uniform_int_distribution<int32_t>(1, 16)(rng_);
439-
const auto result = veloxToPrestoResult(execute(plan, maxDrivers));
466+
RowVectorPtr result;
467+
const uint64_t prevInjectedErrorCount = injectedErrorCount_;
468+
try {
469+
result = veloxToPrestoResult(execute(plan, maxDrivers));
470+
} catch (VeloxRuntimeError& error) {
471+
if (injectedErrorCount_ == prevInjectedErrorCount) {
472+
throw error;
473+
}
474+
VELOX_CHECK_GT(
475+
injectedErrorCount_,
476+
prevInjectedErrorCount,
477+
"Unexpected writer fuzzer failure: {}",
478+
error.message());
479+
VELOX_CHECK_EQ(
480+
error.message(), injectedErrorMsg_, "Unexpected writer fuzzer failure");
481+
}
440482

441483
const auto dropSql = "DROP TABLE IF EXISTS tmp_write";
442484
const auto sql = referenceQueryRunner_->toSql(plan).value();
@@ -465,11 +507,13 @@ void WriterFuzzer::verifyWriter(
465507
const auto referencedOutputDirectoryPath =
466508
getReferenceOutputDirectoryPath(partitionKeys.size());
467509
comparePartitionAndBucket(
468-
outputDirectoryPath, referencedOutputDirectoryPath, bucketCount);
510+
outputDirectoryPath->getDelegatePath(),
511+
referencedOutputDirectoryPath,
512+
bucketCount);
469513
}
470514

471515
// 3. Verifies data itself.
472-
auto splits = makeSplits(outputDirectoryPath);
516+
auto splits = makeSplits(outputDirectoryPath->getDelegatePath());
473517
auto columnHandles =
474518
getTableColumnHandles(names, types, partitionOffset, bucketCount);
475519
const auto rowType = generateOutputType(names, types, bucketCount);
@@ -502,7 +546,8 @@ void WriterFuzzer::verifyWriter(
502546
types.begin() + sortColumnOffset,
503547
types.begin() + sortColumnOffset + sortBy.size()};
504548

505-
// Read from each file and check if data is sorted as presto sorted result.
549+
// Read from each file and check if data is sorted as presto sorted
550+
// result.
506551
for (const auto& split : splits) {
507552
auto splitReadPlan = PlanBuilder()
508553
.tableScan(generateOutputType(

velox/exec/fuzzer/WriterFuzzerRunner.h

+4
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@
2323
#include <vector>
2424

2525
#include "velox/common/file/FileSystems.h"
26+
#include "velox/common/file/tests/FaultyFileSystem.h"
2627
#include "velox/connectors/hive/HiveConnector.h"
2728
#include "velox/dwio/common/FileSink.h"
29+
#include "velox/dwio/common/tests/FaultyFileSink.h"
2830
#include "velox/dwio/dwrf/RegisterDwrfReader.h"
2931
#include "velox/dwio/dwrf/RegisterDwrfWriter.h"
3032
#include "velox/exec/fuzzer/FuzzerUtil.h"
@@ -74,6 +76,7 @@ class WriterFuzzerRunner {
7476
size_t seed,
7577
std::unique_ptr<ReferenceQueryRunner> referenceQueryRunner) {
7678
filesystems::registerLocalFileSystem();
79+
tests::utils::registerFaultyFileSystem();
7780
connector::registerConnectorFactory(
7881
std::make_shared<connector::hive::HiveConnectorFactory>());
7982
auto hiveConnector =
@@ -87,6 +90,7 @@ class WriterFuzzerRunner {
8790
dwrf::registerDwrfReaderFactory();
8891
dwrf::registerDwrfWriterFactory();
8992
dwio::common::registerFileSinks();
93+
dwio::common::registerFaultyFileSinks();
9094
facebook::velox::exec::test::writerFuzzer(
9195
seed, std::move(referenceQueryRunner));
9296
// Calling gtest here so that it can be recognized as tests in CI systems.

velox/exec/tests/utils/TempDirectoryPath.h

+5
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ class TempDirectoryPath {
4545
return path_;
4646
}
4747

48+
/// The actual file path if fault injection is enabled.
49+
const std::string& getDelegatePath() const {
50+
return tempPath_;
51+
}
52+
4853
private:
4954
static std::string createTempDirectory();
5055

0 commit comments

Comments
 (0)