Skip to content

Commit ce62381

Browse files
committed
Add initial table writer fuzzer
1 parent d726893 commit ce62381

10 files changed

+729
-11
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
===========
2+
Writer Fuzzer
3+
===========
4+
5+
Writer fuzzer tests table write plan with up to 5 regular columns and
6+
up to 3 partition keys.
7+
8+
At each iteration, fuzzer randomly generate a table write plan with different
9+
table properties, as of now, only support partitioned and unpartitioned table.
10+
11+
The fuzzer then generates inputs and runs the query plan and compares the
12+
results with PrestoDB.
13+
As of now, we compare:
14+
1. How many rows were written.
15+
2. Output directories have the same directory layout and hierarchy.
16+
17+
How to run
18+
----------
19+
20+
Use velox_writer_fuzzer_test binary to run join fuzzer:
21+
22+
::
23+
24+
velox/exec/tests/velox_writer_fuzzer_test
25+
26+
By default, the fuzzer will go through 10 interations. Use --steps
27+
or --duration-sec flag to run fuzzer for longer. Use --seed to
28+
reproduce fuzzer failures.
29+
30+
Here is a full list of supported command line arguments.
31+
32+
* ``–-steps``: How many iterations to run. Each iteration generates and
33+
evaluates one tale writer plan. Default is 10.
34+
35+
* ``–-duration_sec``: For how long to run in seconds. If both ``-–steps``
36+
and ``-–duration_sec`` are specified, –duration_sec takes precedence.
37+
38+
* ``–-seed``: The seed to generate random expressions and input vectors with.
39+
40+
* ``–-batch_size``: The size of input vectors to generate. Default is 100.
41+
42+
* ``--num_batches``: The number of input vectors of size `--batch_size` to
43+
generate. Default is 5.
44+
45+
If running from CLion IDE, add ``--logtostderr=1`` to see the full output.

velox/exec/fuzzer/CMakeLists.txt

+11
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,14 @@ target_link_libraries(
5757
velox_expression_test_utility
5858
velox_aggregation_fuzzer_base
5959
velox_temp_path)
60+
61+
add_library(velox_writer_fuzzer WriterFuzzer.cpp)
62+
63+
target_link_libraries(
64+
velox_writer_fuzzer
65+
velox_fuzzer_util
66+
velox_type
67+
velox_vector_fuzzer
68+
velox_exec_test_lib
69+
velox_temp_path
70+
velox_vector_test_lib)

velox/exec/fuzzer/PrestoQueryRunner.cpp

+38-10
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#include "velox/exec/tests/utils/QueryAssertions.h"
2727
#include "velox/serializers/PrestoSerializer.h"
2828
#include "velox/type/parser/TypeParser.h"
29+
#include "velox/velox/connectors/hive/HiveDataSink.h"
30+
#include "velox/velox/connectors/hive/TableHandle.h"
2931

3032
#include <utility>
3133

@@ -35,16 +37,6 @@ namespace facebook::velox::exec::test {
3537

3638
namespace {
3739

38-
template <typename T>
39-
T extractSingleValue(const std::vector<RowVectorPtr>& data) {
40-
VELOX_CHECK_EQ(1, data.size());
41-
VELOX_CHECK_EQ(1, data[0]->childrenSize());
42-
43-
auto simpleVector = data[0]->childAt(0)->as<SimpleVector<T>>();
44-
VELOX_CHECK(!simpleVector->isNullAt(0));
45-
return simpleVector->valueAt(0);
46-
}
47-
4840
void writeToFile(
4941
const std::string& path,
5042
const std::vector<RowVectorPtr>& data,
@@ -174,6 +166,11 @@ std::optional<std::string> PrestoQueryRunner::toSql(
174166
return toSql(aggregationNode);
175167
}
176168

169+
if (auto tableWriteNode =
170+
std::dynamic_pointer_cast<const core::TableWriteNode>(plan)) {
171+
return toSql(tableWriteNode);
172+
}
173+
177174
VELOX_NYI();
178175
}
179176

@@ -500,6 +497,37 @@ std::optional<std::string> PrestoQueryRunner::toSql(
500497
return sql.str();
501498
}
502499

500+
std::optional<std::string> PrestoQueryRunner::toSql(
501+
const std::shared_ptr<const core::TableWriteNode>& tableWriteNode) {
502+
std::vector<std::string> partitionKeys;
503+
auto insertTableHandle =
504+
std::dynamic_pointer_cast<connector::hive::HiveInsertTableHandle>(
505+
tableWriteNode->insertTableHandle()->connectorInsertTableHandle());
506+
507+
// Returns a CREATE sql with specified table properties from TableWriteNode, example sql:
508+
// CREATE TABLE tmp_write (c0 integer, c1 varchar, p0 varchar) WITH (PARTITIONED_BY = ARRAY['p0']);
509+
std::stringstream sql;
510+
sql << "CREATE TABLE tmp_write ( ";
511+
for (auto i = 0; i < tableWriteNode->columnNames().size(); ++i) {
512+
appendComma(i, sql);
513+
sql << tableWriteNode->columnNames()[i] << " " << toTypeSql(tableWriteNode->columns()->childAt(i));
514+
if (insertTableHandle->inputColumns()[i]->isPartitionKey()) {
515+
partitionKeys.push_back(insertTableHandle->inputColumns()[i]->name());
516+
}
517+
}
518+
sql << ")";
519+
520+
if (insertTableHandle->isPartitioned()) {
521+
sql << " WITH (PARTITIONED_BY = ARRAY[";
522+
for (int i = 0; i < partitionKeys.size(); ++i) {
523+
appendComma(i, sql);
524+
sql << "'" << partitionKeys[i] << "'";
525+
}
526+
sql << "])";
527+
}
528+
return sql.str();
529+
}
530+
503531
std::multiset<std::vector<variant>> PrestoQueryRunner::execute(
504532
const std::string& sql,
505533
const std::vector<RowVectorPtr>& input,

velox/exec/fuzzer/PrestoQueryRunner.h

+11-1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@
2323
#include "velox/vector/ComplexVector.h"
2424

2525
namespace facebook::velox::exec::test {
26+
template <typename T>
27+
T extractSingleValue(const std::vector<RowVectorPtr>& data) {
28+
auto simpleVector = data[0]->childAt(0)->as<SimpleVector<T>>();
29+
VELOX_CHECK(!simpleVector->isNullAt(0));
30+
return simpleVector->valueAt(0);
31+
}
32+
2633
class PrestoQueryRunner : public velox::exec::test::ReferenceQueryRunner {
2734
public:
2835
/// @param coordinatorUri Presto REST API endpoint, e.g. http://127.0.0.1:8080
@@ -58,7 +65,7 @@ class PrestoQueryRunner : public velox::exec::test::ReferenceQueryRunner {
5865

5966
/// Executes Presto SQL query and returns the results. Tables referenced by
6067
/// the query must already exist.
61-
std::vector<velox::RowVectorPtr> execute(const std::string& sql);
68+
std::vector<velox::RowVectorPtr> execute(const std::string& sql) override;
6269

6370
bool supportsVeloxVectorResults() const override;
6471

@@ -86,6 +93,9 @@ class PrestoQueryRunner : public velox::exec::test::ReferenceQueryRunner {
8693
std::optional<std::string> toSql(
8794
const std::shared_ptr<const velox::core::ProjectNode>& projectNode);
8895

96+
std::optional<std::string> toSql(
97+
const std::shared_ptr<const core::TableWriteNode>& tableWriteNode);
98+
8999
std::string startQuery(const std::string& sql);
90100

91101
std::string fetchNext(const std::string& nextUri);

velox/exec/fuzzer/ReferenceQueryRunner.h

+4
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ class ReferenceQueryRunner {
5050
const RowTypePtr& resultType) {
5151
VELOX_UNSUPPORTED();
5252
}
53+
54+
virtual std::vector<velox::RowVectorPtr> execute(const std::string& sql) {
55+
VELOX_UNSUPPORTED();
56+
}
5357
};
5458

5559
} // namespace facebook::velox::exec::test

0 commit comments

Comments
 (0)