Skip to content

Commit

Permalink
refactor(sparksql): Speed up sparksql compilation by splitting functi…
Browse files Browse the repository at this point in the history
…on registrations (facebookincubator#11565)

Summary:
This PR aims to speed up sparksql compilation by splitting function
registrations to multiple source files arranged according to function type.
Adds 'velox_functions_spark' for registrations and renames previous
'velox_functions_spark' as 'velox_functions_spark_impl'.

Tested the compilation time using `velox_functions_spark_test` target to mock
the general development process: build -> modify cpp file -> build. The
compilation time speeds up 1.5x(165s to 104s) in release mode and more in debug
mode.

Fixes facebookincubator#11564.

Pull Request resolved: facebookincubator#11565

Reviewed By: miaoever, kagamiori

Differential Revision: D66688101

Pulled By: xiaoxmeng

fbshipit-source-id: 54ba372f08c4ec91062b3d07e8e2b81aabbdef59
  • Loading branch information
Yohahaha authored and facebook-github-bot committed Dec 3, 2024
1 parent 46fd360 commit 0bb7e64
Show file tree
Hide file tree
Showing 35 changed files with 947 additions and 778 deletions.
2 changes: 1 addition & 1 deletion pyvelox/signatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
#include "velox/functions/FunctionRegistry.h"
#include "velox/functions/prestosql/aggregates/RegisterAggregateFunctions.h"
#include "velox/functions/prestosql/registration/RegistrationFunctions.h"
#include "velox/functions/sparksql/Register.h"
#include "velox/functions/sparksql/aggregates/Register.h"
#include "velox/functions/sparksql/registration/Register.h"

namespace facebook::velox::py {

Expand Down
2 changes: 1 addition & 1 deletion velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@

#include "velox/exec/fuzzer/ReferenceQueryRunner.h"
#include "velox/expression/fuzzer/FuzzerRunner.h"
#include "velox/functions/sparksql/Register.h"
#include "velox/functions/sparksql/fuzzer/AddSubtractArgGenerator.h"
#include "velox/functions/sparksql/fuzzer/DivideArgGenerator.h"
#include "velox/functions/sparksql/fuzzer/MakeTimestampArgGenerator.h"
#include "velox/functions/sparksql/fuzzer/MultiplyArgGenerator.h"
#include "velox/functions/sparksql/fuzzer/UnscaledValueArgGenerator.h"
#include "velox/functions/sparksql/registration/Register.h"

using namespace facebook::velox::functions::sparksql::fuzzer;
using facebook::velox::fuzzer::ArgGenerator;
Expand Down
2 changes: 1 addition & 1 deletion velox/expression/tests/ExpressionRunnerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#include "velox/exec/fuzzer/ReferenceQueryRunner.h"
#include "velox/expression/tests/ExpressionVerifier.h"
#include "velox/functions/prestosql/registration/RegistrationFunctions.h"
#include "velox/functions/sparksql/Register.h"
#include "velox/functions/sparksql/registration/Register.h"
#include "velox/vector/VectorSaver.h"

using namespace facebook::velox;
Expand Down
160 changes: 0 additions & 160 deletions velox/functions/sparksql/Bitwise.cpp

This file was deleted.

109 changes: 106 additions & 3 deletions velox/functions/sparksql/Bitwise.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,115 @@
*/
#pragma once

#include <string>
#include "velox/functions/Macros.h"
#include "velox/functions/lib/RegistrationHelpers.h"

namespace facebook::velox::functions::sparksql {

void registerBitwiseFunctions(const std::string& prefix);
template <typename T>
struct BitwiseAndFunction {
template <typename TInput>
FOLLY_ALWAYS_INLINE void call(TInput& result, TInput a, TInput b) {
result = a & b;
}
};

template <typename T>
struct BitwiseOrFunction {
template <typename TInput>
FOLLY_ALWAYS_INLINE void call(TInput& result, TInput a, TInput b) {
result = a | b;
}
};

template <typename T>
struct BitwiseXorFunction {
template <typename TInput>
FOLLY_ALWAYS_INLINE void call(TInput& result, TInput a, TInput b) {
result = a ^ b;
}
};

template <typename T>
struct BitwiseNotFunction {
template <typename TInput>
FOLLY_ALWAYS_INLINE void call(TInput& result, TInput a) {
result = ~a;
}
};

template <typename T>
struct ShiftLeftFunction {
template <typename TInput1, typename TInput2>
FOLLY_ALWAYS_INLINE void call(TInput1& result, TInput1 a, TInput2 b) {
if constexpr (std::is_same_v<TInput1, int32_t>) {
if (b < 0) {
b = b % 32 + 32;
}
if (b >= 32) {
b = b % 32;
}
}
if constexpr (std::is_same_v<TInput1, int64_t>) {
if (b < 0) {
b = b % 64 + 64;
}
if (b >= 64) {
b = b % 64;
}
}
result = a << b;
}
};

template <typename T>
struct ShiftRightFunction {
template <typename TInput1, typename TInput2>
FOLLY_ALWAYS_INLINE void call(TInput1& result, TInput1 a, TInput2 b) {
if constexpr (std::is_same_v<TInput1, int32_t>) {
if (b < 0) {
b = b % 32 + 32;
}
if (b >= 32) {
b = b % 32;
}
}
if constexpr (std::is_same_v<TInput1, int64_t>) {
if (b < 0) {
b = b % 64 + 64;
}
if (b >= 64) {
b = b % 64;
}
}
result = a >> b;
}
};

template <typename T>
struct BitCountFunction {
template <typename TInput>
FOLLY_ALWAYS_INLINE void call(int32_t& result, TInput num) {
constexpr int kMaxBits = sizeof(TInput) * CHAR_BIT;
auto value = static_cast<uint64_t>(num);
result = bits::countBits(&value, 0, kMaxBits);
}
};

template <typename T>
struct BitGetFunction {
template <typename TInput>
FOLLY_ALWAYS_INLINE void call(int8_t& result, TInput num, int32_t pos) {
constexpr int kMaxBits = sizeof(TInput) * CHAR_BIT;
VELOX_USER_CHECK_GE(
pos,
0,
"The value of 'pos' argument must be greater than or equal to zero.");
VELOX_USER_CHECK_LT(
pos,
kMaxBits,
"The value of 'pos' argument must not exceed the number of bits in 'x' - 1.");
result = (num >> pos) & 1;
}
};

} // namespace facebook::velox::functions::sparksql
23 changes: 6 additions & 17 deletions velox/functions/sparksql/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@

add_subdirectory(specialforms)
velox_add_library(
velox_functions_spark
velox_functions_spark_impl
ArrayGetFunction.cpp
ArraySort.cpp
Bitwise.cpp
Comparisons.cpp
DecimalArithmetic.cpp
DecimalCompare.cpp
Expand All @@ -27,34 +26,22 @@ velox_add_library(
MakeTimestamp.cpp
Map.cpp
RegexFunctions.cpp
Register.cpp
RegisterArithmetic.cpp
RegisterCompare.cpp
Size.cpp
String.cpp
UnscaledValueFunction.cpp)

# GCC 12 has a bug where it does not respect "pragma ignore" directives and ends
# up failing compilation in an openssl header included by a hash-related
# function.
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND NOT VELOX_MONO_LIBRARY)
target_compile_options(velox_functions_spark
PRIVATE -Wno-deprecated-declarations)
endif()

velox_link_libraries(
velox_functions_spark
velox_functions_spark_impl
velox_functions_lib
velox_functions_prestosql_impl
velox_functions_spark_specialforms
velox_is_null_functions
velox_functions_util
Folly::folly
simdjson::simdjson)

if(NOT VELOX_MONO_LIBRARY)
set_property(TARGET velox_functions_spark PROPERTY JOB_POOL_COMPILE
high_memory_pool)
set_property(TARGET velox_functions_spark_impl PROPERTY JOB_POOL_COMPILE
high_memory_pool)
endif()

add_subdirectory(window)
Expand All @@ -72,3 +59,5 @@ endif()
if(${VELOX_ENABLE_BENCHMARKS})
add_subdirectory(benchmarks)
endif()

add_subdirectory(registration)
1 change: 1 addition & 0 deletions velox/functions/sparksql/JsonObjectKeys.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
#pragma once

#include "velox/functions/Macros.h"
#include "velox/functions/prestosql/json/SIMDJsonUtil.h"

namespace facebook::velox::functions::sparksql {
Expand Down
Loading

0 comments on commit 0bb7e64

Please sign in to comment.