Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions src/paimon/format/orc/complex_predicate_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,6 @@ class ComplexPredicateTest : public ::testing::Test {
void SetUp() override {
pool_ = GetDefaultPool();
batch_size_ = 10;

arrow::FieldVector fields = {
arrow::field("f1", arrow::int32()),
arrow::field("f2", arrow::int32()),
arrow::field("f3", arrow::date32()),
arrow::field("f4", arrow::timestamp(arrow::TimeUnit::NANO)),
arrow::field("f5", arrow::decimal128(23, 5)),
};
}
void TearDown() override {}

Expand Down Expand Up @@ -131,16 +123,33 @@ TEST_F(ComplexPredicateTest, TestSimple) {
arrow::field("f5", arrow::decimal128(23, 5)),
};
auto read_schema = arrow::schema(fields);
auto expected_array = std::dynamic_pointer_cast<arrow::StructArray>(
arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({fields}), R"([
std::shared_ptr<arrow::StructArray> expected_array;
if (::paimon::test::OsReleaseDetector::IsDebian()) {
// refer: https://github.com/eggert/tz/blob/main/asia#L653
// When using the Asia/Shanghai timezone under Debian, timestamps prior to 1901 have an
// additional offset of 5 minutes and 43 seconds
expected_array = std::dynamic_pointer_cast<arrow::StructArray>(
arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({fields}), R"([
[10, 1, 1234, "2033-05-18 03:33:20.0", "123456789987654321.45678"],
[10, 1, 19909, "2033-05-18 03:33:20.000001001", "12.30000"],
[10, 1, 0, "2008-12-28 00:00:00.000123456", null],
[10, 1, 100, "2008-12-28 00:00:00.00012345", "-123.45000"],
[10, 1, null, "1899-01-01 01:05:03.001001001", "0.00000"],
[10, 1, 20006, "2024-10-10 10:10:10.100100100", "1728551410100.10010"]
])")
.ValueOrDie());
} else {
expected_array = std::dynamic_pointer_cast<arrow::StructArray>(
arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({fields}), R"([
[10, 1, 1234, "2033-05-18 03:33:20.0", "123456789987654321.45678"],
[10, 1, 19909, "2033-05-18 03:33:20.000001001", "12.30000"],
[10, 1, 0, "2008-12-28 00:00:00.000123456", null],
[10, 1, 100, "2008-12-28 00:00:00.00012345", "-123.45000"],
[10, 1, null, "1899-01-01 00:59:20.001001001", "0.00000"],
[10, 1, 20006, "2024-10-10 10:10:10.100100100", "1728551410100.10010"]
])")
.ValueOrDie());
.ValueOrDie());
}

// date
{
Expand Down
2 changes: 2 additions & 0 deletions src/paimon/format/orc/orc_file_batch_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,8 @@ Result<::orc::RowReaderOptions> OrcFileBatchReader::CreateRowReaderOptions(
}
row_reader_options.include(include_fields);
row_reader_options.searchArgument(std::move(search_arg));
// refer: https://github.com/apache/arrow/pull/34591
row_reader_options.setTimezoneName("GMT");

PAIMON_ASSIGN_OR_RAISE(
bool enable_lazy_decoding,
Expand Down
20 changes: 18 additions & 2 deletions src/paimon/format/orc/orc_file_batch_reader_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -499,15 +499,31 @@ TEST_P(OrcFileBatchReaderTest, TestComplexType) {
ASSERT_OK_AND_ASSIGN(auto result_array,
paimon::test::ReadResultCollector::CollectResult(orc_batch_reader.get()));
std::shared_ptr<arrow::ChunkedArray> expected_array;
auto array_status = arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([
arrow::Status array_status;
if (::paimon::test::OsReleaseDetector::IsDebian()) {
// refer: https://github.com/eggert/tz/blob/main/asia#L653
// When using the Asia/Shanghai timezone under Debian, timestamps prior to 1901 have an
// additional offset of 5 minutes and 43 seconds
array_status = arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([
[10, 1, 1234, "2033-05-18 03:33:20.0", "123456789987654321.45678", "add"],
[10, 1, 19909, "2033-05-18 03:33:20.000001001", "12.30000", "cat"],
[10, 1, 0, "2008-12-28 00:00:00.000123456", null, "dad"],
[10, 1, 100, "2008-12-28 00:00:00.00012345", "-123.45000", "eat"],
[10, 1, null, "1899-01-01 01:05:03.001001001", "0.00000", "fat"],
[10, 1, 20006, "2024-10-10 10:10:10.1001001", "1728551410100.10010", null]
])"},
&expected_array);
} else {
array_status = arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([
[10, 1, 1234, "2033-05-18 03:33:20.0", "123456789987654321.45678", "add"],
[10, 1, 19909, "2033-05-18 03:33:20.000001001", "12.30000", "cat"],
[10, 1, 0, "2008-12-28 00:00:00.000123456", null, "dad"],
[10, 1, 100, "2008-12-28 00:00:00.00012345", "-123.45000", "eat"],
[10, 1, null, "1899-01-01 00:59:20.001001001", "0.00000", "fat"],
[10, 1, 20006, "2024-10-10 10:10:10.1001001", "1728551410100.10010", null]
])"},
&expected_array);
&expected_array);
}
ASSERT_TRUE(array_status.ok());
ASSERT_TRUE(result_array->Equals(*expected_array));
}
Expand Down
21 changes: 21 additions & 0 deletions src/paimon/testing/utils/testharness.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#pragma once

#include <filesystem>
#include <fstream>
#include <map>
#include <memory>
#include <string>
Expand All @@ -69,6 +70,26 @@ std::string GetJindoTestDir();

int64_t RandomNumber(int64_t min, int64_t max);

class OsReleaseDetector {
public:
static bool IsDebian() {
std::ifstream file("/etc/os-release");
if (!file.is_open()) {
return false;
}

std::string line;
while (std::getline(file, line)) {
if (line.find("ID=") == 0) {
if (line.find("debian") != std::string::npos) {
return true;
}
}
}
return false;
}
};

::testing::AssertionResult AssertStatus(const char* s_expr, const Status& s);

#define ASSERT_OK(expr) \
Expand Down
17 changes: 15 additions & 2 deletions test/inte/read_inte_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -659,13 +659,26 @@ TEST_P(ReadInteTest, TestAppendReadWithComplexTypePredicate) {
DataField::ConvertDataFieldsToArrowStructType(fields_with_row_kind);

std::shared_ptr<arrow::ChunkedArray> expected_array;
auto array_status = arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([
arrow::Status array_status;
if (::paimon::test::OsReleaseDetector::IsDebian() && param.file_format == "orc") {
// refer: https://github.com/eggert/tz/blob/main/asia#L653
// When using the Asia/Shanghai timezone under Debian, timestamps prior to 1901 have an
// additional offset of 5 minutes and 43 seconds
array_status = arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([
[0, "add", 1, "2033-05-18 03:33:20.0", 1234, "123456789987654321.45678"],
[0, "cat", 1, "2033-05-18 03:33:20.000001001", 19909, "12.30000"],
[0, "fat", 1, "1899-01-01 01:05:03.001001001", null, "0.00000"]
])"},
&expected_array);
} else {
array_status = arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([
[0, "add", 1, "2033-05-18 03:33:20.0", 1234, "123456789987654321.45678"],
[0, "cat", 1, "2033-05-18 03:33:20.000001001", 19909, "12.30000"],
[0, "fat", 1, "1899-01-01 00:59:20.001001001", null, "0.00000"],
[0, "bad", 1, "1899-01-01 00:59:20.001001001", -1234, "-123456789987654321.45678"]
])"},
&expected_array);
&expected_array);
}
ASSERT_TRUE(array_status.ok());
ASSERT_TRUE(result_array);
ASSERT_TRUE(result_array->Equals(*expected_array));
Expand Down
Loading