Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ORC-1841: [C++][CI] Add UBSAN support to Cmake #2117

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .github/workflows/asan_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.

name: Address Sanitizer Tests
name: Address/Undefined Sanitizer Tests

on:
pull_request:
Expand Down Expand Up @@ -47,18 +47,19 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Configure and Build with ASAN
- name: Configure and Build with ASAN and UBSAN
env:
CC: ${{ matrix.cc }}
CXX: ${{ matrix.cxx }}
run: |
mkdir build && cd build
cmake .. -DCMAKE_BUILD_TYPE=Debug -DENABLE_ASAN=ON -DBUILD_JAVA=OFF
cmake .. -DCMAKE_BUILD_TYPE=Debug -DENABLE_ASAN=ON -DENABLE_UBSAN=ON -DBUILD_ENABLE_AVX512=ON -DBUILD_CPP_ENABLE_METRICS=ON -DBUILD_JAVA=OFF
make
- name: Run Tests
working-directory: build
env:
ASAN_OPTIONS: detect_leaks=1:symbolize=1:strict_string_checks=1:halt_on_error=0:detect_container_overflow=0
LSAN_OPTIONS: suppressions=${{ github.workspace }}/.github/lsan-suppressions.txt
UBSAN_OPTIONS: print_stacktrace=1
run: |
ctest --output-on-failure
15 changes: 15 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ option(ORC_ENABLE_CLANG_TOOLS
"Enable Clang tools"
ON)

option(ENABLE_UBSAN
"Enable Undefined Behavior Sanitizer"
OFF)

# Make sure that a build type is selected
if (NOT CMAKE_BUILD_TYPE)
message(STATUS "No build type selected, default to ReleaseWithDebugInfo")
Expand Down Expand Up @@ -171,6 +175,17 @@ if (ENABLE_ASAN)
endif()
endif()

# Configure Undefined Behavior Sanitizer if enabled
if (ENABLE_UBSAN)
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr,function -fno-sanitize-recover=all")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr,function -fno-sanitize-recover=all")
message(STATUS "Undefined Behavior Sanitizer enabled")
else()
message(WARNING "Undefined Behavior Sanitizer is only supported for GCC and Clang compilers")
endif()
endif()

enable_testing()

INCLUDE(GNUInstallDirs) # Put it before ThirdpartyToolchain to make CMAKE_INSTALL_LIBDIR available.
Expand Down
2 changes: 2 additions & 0 deletions c++/include/orc/Int128.hh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/**

Check notice on line 1 in c++/include/orc/Int128.hh

View workflow job for this annotation

GitHub Actions / cpp-linter

Run clang-format on c++/include/orc/Int128.hh

File c++/include/orc/Int128.hh does not conform to Custom style guidelines. (lines 196, 218)
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
Expand Down Expand Up @@ -193,6 +193,7 @@
* Shift left by the given number of bits.
* Values larger than 2**127 will shift into the sign bit.
*/
__attribute__((no_sanitize("shift")))
Int128& operator<<=(uint32_t bits) {
if (bits != 0) {
if (bits < 64) {
Expand All @@ -214,6 +215,7 @@
* Shift right by the given number of bits. Negative values will
* sign extend and fill with one bits.
*/
__attribute__((no_sanitize("shift")))
Int128& operator>>=(uint32_t bits) {
if (bits != 0) {
if (bits < 64) {
Expand Down
2 changes: 2 additions & 0 deletions c++/src/BloomFilter.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/**

Check notice on line 1 in c++/src/BloomFilter.cc

View workflow job for this annotation

GitHub Actions / cpp-linter

Run clang-format on c++/src/BloomFilter.cc

File c++/src/BloomFilter.cc does not conform to Custom style guidelines. (lines 212, 230)
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
Expand Down Expand Up @@ -209,6 +209,7 @@

DIAGNOSTIC_POP

__attribute__((no_sanitize("signed-integer-overflow","shift")))
void BloomFilterImpl::addHash(int64_t hash64) {
int32_t hash1 = static_cast<int32_t>(hash64 & 0xffffffff);
// In Java codes, we use "hash64 >>> 32" which is an unsigned shift op.
Expand All @@ -226,6 +227,7 @@
}
}

__attribute__((no_sanitize("signed-integer-overflow","shift")))
bool BloomFilterImpl::testHash(int64_t hash64) const {
int32_t hash1 = static_cast<int32_t>(hash64 & 0xffffffff);
// In Java codes, we use "hash64 >>> 32" which is an unsigned shift op.
Expand Down
1 change: 1 addition & 0 deletions c++/src/BloomFilter.hh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/**

Check notice on line 1 in c++/src/BloomFilter.hh

View workflow job for this annotation

GitHub Actions / cpp-linter

Run clang-format on c++/src/BloomFilter.hh

File c++/src/BloomFilter.hh does not conform to Custom style guidelines. (lines 197)
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
Expand Down Expand Up @@ -194,6 +194,7 @@
// Thomas Wang's integer hash function
// http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm
// Put this in header file so tests can use it as well.
__attribute__((no_sanitize("signed-integer-overflow","shift")))
inline int64_t getLongHash(int64_t key) {
key = (~key) + (key << 21); // key = (key << 21) - key - 1;
key = key ^ (key >> 24);
Expand Down
7 changes: 6 additions & 1 deletion c++/src/ColumnReader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,9 @@ namespace orc {
if (totalBytes <= lastBufferLength_) {
// subtract the needed bytes from the ones left over
lastBufferLength_ -= totalBytes;
if (lastBuffer_ == nullptr) {
throw ParseError("StringDirectColumnReader::skip: lastBuffer_ is null");
}
lastBuffer_ += totalBytes;
} else {
// move the stream forward after accounting for the buffered bytes
Expand Down Expand Up @@ -780,7 +783,9 @@ namespace orc {
byteBatch.blob.resize(totalLength);
char* ptr = byteBatch.blob.data();
while (bytesBuffered + lastBufferLength_ < totalLength) {
memcpy(ptr + bytesBuffered, lastBuffer_, lastBufferLength_);
if (lastBuffer_ != nullptr) {
memcpy(ptr + bytesBuffered, lastBuffer_, lastBufferLength_);
}
bytesBuffered += lastBufferLength_;
const void* readBuffer;
int readLength;
Expand Down
4 changes: 2 additions & 2 deletions c++/src/ConvertColumnReader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,13 @@ namespace orc {
bool shouldThrow) {
constexpr bool isFileTypeFloatingPoint(std::is_floating_point<FileType>::value);
constexpr bool isReadTypeFloatingPoint(std::is_floating_point<ReadType>::value);
int64_t longValue = static_cast<int64_t>(srcValue);

if (isFileTypeFloatingPoint) {
if (isReadTypeFloatingPoint) {
destValue = static_cast<ReadType>(srcValue);
} else {
if (!canFitInLong(static_cast<double>(srcValue)) ||
!downCastToInteger(destValue, longValue)) {
!downCastToInteger(destValue, static_cast<int64_t>(srcValue))) {
handleOverflow<FileType, ReadType>(destBatch, idx, shouldThrow);
}
}
Expand Down
1 change: 1 addition & 0 deletions c++/src/RLE.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/**

Check notice on line 1 in c++/src/RLE.cc

View workflow job for this annotation

GitHub Actions / cpp-linter

Run clang-format on c++/src/RLE.cc

File c++/src/RLE.cc does not conform to Custom style guidelines. (lines 80)
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
Expand Down Expand Up @@ -77,6 +77,7 @@
add<int16_t>(data, numValues, notNull);
}

__attribute__((no_sanitize("shift")))
void RleEncoder::writeVslong(int64_t val) {
writeVulong((val << 1) ^ (val >> 63));
}
Expand Down
1 change: 1 addition & 0 deletions c++/src/RLE.hh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/**

Check notice on line 1 in c++/src/RLE.hh

View workflow job for this annotation

GitHub Actions / cpp-linter

Run clang-format on c++/src/RLE.hh

File c++/src/RLE.hh does not conform to Custom style guidelines. (lines 29)
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
Expand Down Expand Up @@ -26,6 +26,7 @@

namespace orc {

__attribute__((no_sanitize("shift")))
inline int64_t zigZag(int64_t value) {
return (value << 1) ^ (value >> 63);
}
Expand Down
Loading