Skip to content

Commit

Permalink
Merge branch 'duckdb:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
meztez authored Jan 7, 2025
2 parents 6a4c099 + bafb8c7 commit cd08fa7
Show file tree
Hide file tree
Showing 50 changed files with 955 additions and 472 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: duckdb
Title: DBI Package for the DuckDB Database Management System
Version: 1.1.3.9032
Version: 1.1.3.9033
Authors@R: c(
person("Hannes", "Mühleisen", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0001-8552-0029")),
Expand Down
17 changes: 17 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,22 @@
<!-- NEWS.md is maintained by https://fledge.cynkra.com, contributors should not edit this file -->

# duckdb 1.1.3.9033

## vendor

- Update vendored sources to duckdb/duckdb@adc6f607a71b87da2d0a7550e90db623e9bea637 (#959).

- Update vendored sources to duckdb/duckdb@13ba13c121acfb3f4c48c16337297ac705779c19 (#958).

- Update vendored sources to duckdb/duckdb@45462bcffd761b7d797cc1ab660930be62c110cb (#957).

- Update vendored sources to duckdb/duckdb@a0a828b712f538a64263dad251d20a5f91f83a80 (#956).

- Update vendored sources to duckdb/duckdb@2082b55f89fe6e810f982c57dceecbee5ecd40fa (#955).

- Update vendored sources to duckdb/duckdb@7ee114cea8a43d9cdd0f0442cbde05c63a65a9c3 (#954).


# duckdb 1.1.3.9032

## vendor
Expand Down
1 change: 0 additions & 1 deletion src/duckdb/src/common/exception.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ bool Exception::InvalidatesTransaction(ExceptionType exception_type) {

bool Exception::InvalidatesDatabase(ExceptionType exception_type) {
switch (exception_type) {
case ExceptionType::INTERNAL:
case ExceptionType::FATAL:
return true;
default:
Expand Down
43 changes: 41 additions & 2 deletions src/duckdb/src/common/stacktrace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ static string UnmangleSymbol(string symbol) {
}
}
for (idx_t i = mangle_start; i < symbol.size(); i++) {
if (StringUtil::CharacterIsSpace(symbol[i])) {
if (StringUtil::CharacterIsSpace(symbol[i]) || symbol[i] == ')' || symbol[i] == '+') {
mangle_end = i;
break;
}
Expand All @@ -44,6 +44,45 @@ static string UnmangleSymbol(string symbol) {
return result;
}

static string CleanupStackTrace(string symbol) {
#ifdef __APPLE__
// structure of frame pointers is [depth] [library] [pointer] [symbol]
// we are only interested in [depth] and [symbol]

// find the depth
idx_t start;
for (start = 0; start < symbol.size(); start++) {
if (!StringUtil::CharacterIsDigit(symbol[start])) {
break;
}
}

// now scan forward until we find the frame pointer
idx_t frame_end = symbol.size();
for (idx_t i = start; i + 1 < symbol.size(); ++i) {
if (symbol[i] == '0' && symbol[i + 1] == 'x') {
idx_t k;
for (k = i + 2; k < symbol.size(); ++k) {
if (!StringUtil::CharacterIsHex(symbol[k])) {
break;
}
}
frame_end = k;
break;
}
}
static constexpr idx_t STACK_TRACE_INDENTATION = 8;
if (frame_end == symbol.size() || start >= STACK_TRACE_INDENTATION) {
// frame pointer not found - just preserve the original frame
return symbol;
}
idx_t space_count = STACK_TRACE_INDENTATION - start;
return symbol.substr(0, start) + string(space_count, ' ') + symbol.substr(frame_end, symbol.size() - frame_end);
#else
return symbol;
#endif
}

string StackTrace::GetStacktracePointers(idx_t max_depth) {
string result;
auto callstack = unique_ptr<void *[]>(new void *[max_depth]);
Expand All @@ -68,7 +107,7 @@ string StackTrace::ResolveStacktraceSymbols(const string &pointers) {
string result;
char **strs = backtrace_symbols(callstack.get(), NumericCast<int>(frame_count));
for (idx_t i = 0; i < frame_count; i++) {
result += UnmangleSymbol(strs[i]);
result += CleanupStackTrace(UnmangleSymbol(strs[i]));
result += "\n";
}
free(reinterpret_cast<void *>(strs));
Expand Down
92 changes: 67 additions & 25 deletions src/duckdb/src/common/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -988,6 +988,72 @@ static bool CombineUnequalTypes(const LogicalType &left, const LogicalType &righ
return false;
}

template <class OP>
static bool CombineStructTypes(const LogicalType &left, const LogicalType &right, LogicalType &result) {
auto &left_children = StructType::GetChildTypes(left);
auto &right_children = StructType::GetChildTypes(right);

auto left_unnamed = StructType::IsUnnamed(left);
auto is_unnamed = left_unnamed || StructType::IsUnnamed(right);
child_list_t<LogicalType> child_types;

// At least one side is unnamed, so we attempt positional casting.
if (is_unnamed) {
if (left_children.size() != right_children.size()) {
// We can't cast, or create the super-set.
return false;
}

for (idx_t i = 0; i < left_children.size(); i++) {
LogicalType child_type;
if (!OP::Operation(left_children[i].second, right_children[i].second, child_type)) {
return false;
}
auto &child_name = left_unnamed ? right_children[i].first : left_children[i].first;
child_types.emplace_back(child_name, std::move(child_type));
}
result = LogicalType::STRUCT(child_types);
return true;
}

// Create a super-set of the STRUCT fields.
// First, create a name->index map of the right children.
case_insensitive_map_t<idx_t> right_children_map;
for (idx_t i = 0; i < right_children.size(); i++) {
auto &name = right_children[i].first;
right_children_map[name] = i;
}

for (idx_t i = 0; i < left_children.size(); i++) {
auto &left_child = left_children[i];
auto right_child_it = right_children_map.find(left_child.first);

if (right_child_it == right_children_map.end()) {
// We can directly put the left child.
child_types.emplace_back(left_child.first, left_child.second);
continue;
}

// We need to recurse to ensure the children have a maximum logical type.
LogicalType child_type;
auto &right_child = right_children[right_child_it->second];
if (!OP::Operation(left_child.second, right_child.second, child_type)) {
return false;
}
child_types.emplace_back(left_child.first, std::move(child_type));
right_children_map.erase(right_child_it);
}

// Add all remaining right children.
for (const auto &right_child_it : right_children_map) {
auto &right_child = right_children[right_child_it.second];
child_types.emplace_back(right_child.first, right_child.second);
}

result = LogicalType::STRUCT(child_types);
return true;
}

template <class OP>
static bool CombineEqualTypes(const LogicalType &left, const LogicalType &right, LogicalType &result) {
// Since both left and right are equal we get the left type as our type_id for checks
Expand Down Expand Up @@ -1059,31 +1125,7 @@ static bool CombineEqualTypes(const LogicalType &left, const LogicalType &right,
return true;
}
case LogicalTypeId::STRUCT: {
// struct: perform recursively on each child
auto &left_child_types = StructType::GetChildTypes(left);
auto &right_child_types = StructType::GetChildTypes(right);
bool left_unnamed = StructType::IsUnnamed(left);
auto any_unnamed = left_unnamed || StructType::IsUnnamed(right);
if (left_child_types.size() != right_child_types.size()) {
// child types are not of equal size, we can't cast
// return false
return false;
}
child_list_t<LogicalType> child_types;
for (idx_t i = 0; i < left_child_types.size(); i++) {
LogicalType child_type;
// Child names must be in the same order OR either one of the structs must be unnamed
if (!any_unnamed && !StringUtil::CIEquals(left_child_types[i].first, right_child_types[i].first)) {
return false;
}
if (!OP::Operation(left_child_types[i].second, right_child_types[i].second, child_type)) {
return false;
}
auto &child_name = left_unnamed ? right_child_types[i].first : left_child_types[i].first;
child_types.emplace_back(child_name, std::move(child_type));
}
result = LogicalType::STRUCT(child_types);
return true;
return CombineStructTypes<OP>(left, right, result);
}
case LogicalTypeId::UNION: {
auto left_member_count = UnionType::GetMemberCount(left);
Expand Down
54 changes: 30 additions & 24 deletions src/duckdb/src/common/types/vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ UnifiedVectorFormat &UnifiedVectorFormat::operator=(UnifiedVectorFormat &&other)
return *this;
}

Vector::Vector(LogicalType type_p, bool create_data, bool zero_data, idx_t capacity)
Vector::Vector(LogicalType type_p, bool create_data, bool initialize_to_zero, idx_t capacity)
: vector_type(VectorType::FLAT_VECTOR), type(std::move(type_p)), data(nullptr), validity(capacity) {
if (create_data) {
Initialize(zero_data, capacity);
Initialize(initialize_to_zero, capacity);
}
}

Expand Down Expand Up @@ -306,7 +306,7 @@ void Vector::Slice(const SelectionVector &sel, idx_t count, SelCache &cache) {
}
}

void Vector::Initialize(bool zero_data, idx_t capacity) {
void Vector::Initialize(bool initialize_to_zero, idx_t capacity) {
auxiliary.reset();
validity.Reset();
auto &type = GetType();
Expand All @@ -325,7 +325,7 @@ void Vector::Initialize(bool zero_data, idx_t capacity) {
if (type_size > 0) {
buffer = VectorBuffer::CreateStandardVector(type, capacity);
data = buffer->GetData();
if (zero_data) {
if (initialize_to_zero) {
memset(data, 0, capacity * type_size);
}
}
Expand Down Expand Up @@ -1374,10 +1374,10 @@ void Vector::Deserialize(Deserializer &deserializer, idx_t count) {
}

void Vector::SetVectorType(VectorType vector_type_p) {
this->vector_type = vector_type_p;
vector_type = vector_type_p;
auto physical_type = GetType().InternalType();
if (TypeIsConstantSize(physical_type) &&
(GetVectorType() == VectorType::CONSTANT_VECTOR || GetVectorType() == VectorType::FLAT_VECTOR)) {
auto flat_or_const = GetVectorType() == VectorType::CONSTANT_VECTOR || GetVectorType() == VectorType::FLAT_VECTOR;
if (TypeIsConstantSize(physical_type) && flat_or_const) {
auxiliary.reset();
}
if (vector_type == VectorType::CONSTANT_VECTOR && physical_type == PhysicalType::STRUCT) {
Expand Down Expand Up @@ -1782,23 +1782,29 @@ void Vector::DebugShuffleNestedVector(Vector &vector, idx_t count) {
void FlatVector::SetNull(Vector &vector, idx_t idx, bool is_null) {
D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR);
vector.validity.Set(idx, !is_null);
if (is_null) {
auto &type = vector.GetType();
auto internal_type = type.InternalType();
if (internal_type == PhysicalType::STRUCT) {
// set all child entries to null as well
auto &entries = StructVector::GetEntries(vector);
for (auto &entry : entries) {
FlatVector::SetNull(*entry, idx, is_null);
}
} else if (internal_type == PhysicalType::ARRAY) {
// set the child element in the array to null as well
auto &child = ArrayVector::GetEntry(vector);
auto array_size = ArrayType::GetSize(type);
auto child_offset = idx * array_size;
for (idx_t i = 0; i < array_size; i++) {
FlatVector::SetNull(child, child_offset + i, is_null);
}
if (!is_null) {
return;
}

auto &type = vector.GetType();
auto internal_type = type.InternalType();

// Set all child entries to NULL.
if (internal_type == PhysicalType::STRUCT) {
auto &entries = StructVector::GetEntries(vector);
for (auto &entry : entries) {
FlatVector::SetNull(*entry, idx, is_null);
}
return;
}

// Set all child entries to NULL.
if (internal_type == PhysicalType::ARRAY) {
auto &child = ArrayVector::GetEntry(vector);
auto array_size = ArrayType::GetSize(type);
auto child_offset = idx * array_size;
for (idx_t i = 0; i < array_size; i++) {
FlatVector::SetNull(child, child_offset + i, is_null);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,19 @@ bool LineError::HandleErrors(StringValueResult &result) {
result.state_machine.options, cur_error.current_line_size, lines_per_batch, borked_line,
result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl), result.path);
break;
case INVALID_STATE:
if (result.current_line_position.begin == line_pos) {
csv_error = CSVError::InvalidState(
result.state_machine.options, col_idx, lines_per_batch, borked_line,
result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl),
line_pos.GetGlobalPosition(result.requested_size, first_nl), result.path);
} else {
csv_error = CSVError::InvalidState(
result.state_machine.options, col_idx, lines_per_batch, borked_line,
result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl),
line_pos.GetGlobalPosition(result.requested_size), result.path);
}
break;
default:
throw InvalidInputException("CSV Error not allowed when inserting row");
}
Expand Down Expand Up @@ -878,7 +891,11 @@ bool StringValueResult::AddRow(StringValueResult &result, const idx_t buffer_pos
}

void StringValueResult::InvalidState(StringValueResult &result) {
result.current_errors.Insert(UNTERMINATED_QUOTES, result.cur_col_id, result.chunk_col_id, result.last_position);
if (result.quoted) {
result.current_errors.Insert(UNTERMINATED_QUOTES, result.cur_col_id, result.chunk_col_id, result.last_position);
} else {
result.current_errors.Insert(INVALID_STATE, result.cur_col_id, result.chunk_col_id, result.last_position);
}
}

bool StringValueResult::EmptyLine(StringValueResult &result, const idx_t buffer_pos) {
Expand Down Expand Up @@ -1724,11 +1741,18 @@ void StringValueScanner::FinalizeChunkProcess() {
// If we are not done we have two options.
// 1) If a boundary is set.
if (iterator.IsBoundarySet()) {
bool has_unterminated_quotes = false;
if (!result.current_errors.HasErrorType(UNTERMINATED_QUOTES)) {
bool found_error = false;
CSVErrorType type;
if (!result.current_errors.HasErrorType(UNTERMINATED_QUOTES) &&
!result.current_errors.HasErrorType(INVALID_STATE)) {
iterator.done = true;
} else {
has_unterminated_quotes = true;
found_error = true;
if (result.current_errors.HasErrorType(UNTERMINATED_QUOTES)) {
type = UNTERMINATED_QUOTES;
} else {
type = INVALID_STATE;
}
}
// We read until the next line or until we have nothing else to read.
// Move to next buffer
Expand All @@ -1747,18 +1771,21 @@ void StringValueScanner::FinalizeChunkProcess() {
}
} else {
if (result.current_errors.HasErrorType(UNTERMINATED_QUOTES)) {
has_unterminated_quotes = true;
found_error = true;
type = UNTERMINATED_QUOTES;
} else if (result.current_errors.HasErrorType(INVALID_STATE)) {
found_error = true;
type = INVALID_STATE;
}
if (result.current_errors.HandleErrors(result)) {
result.number_of_rows++;
}
}
if (states.IsQuotedCurrent() && !has_unterminated_quotes &&
if (states.IsQuotedCurrent() && !found_error &&
state_machine->dialect_options.state_machine_options.rfc_4180.GetValue()) {
// If we finish the execution of a buffer, and we end in a quoted state, it means we have unterminated
// quotes
result.current_errors.Insert(UNTERMINATED_QUOTES, result.cur_col_id, result.chunk_col_id,
result.last_position);
result.current_errors.Insert(type, result.cur_col_id, result.chunk_col_id, result.last_position);
if (result.current_errors.HandleErrors(result)) {
result.number_of_rows++;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op
transition_array[static_cast<uint8_t>('\r')][state] = CSVState::CARRIAGE_RETURN;
if (state == static_cast<uint8_t>(CSVState::STANDARD_NEWLINE)) {
transition_array[static_cast<uint8_t>('\n')][state] = CSVState::STANDARD;
} else if (!state_machine_options.rfc_4180.GetValue()) {
transition_array[static_cast<uint8_t>('\n')][state] = CSVState::RECORD_SEPARATOR;
} else {
if (!state_machine_options.rfc_4180.GetValue()) {
transition_array[static_cast<uint8_t>('\n')][state] = CSVState::RECORD_SEPARATOR;
}
transition_array[static_cast<uint8_t>('\n')][state] = CSVState::INVALID;
}
} else {
transition_array[static_cast<uint8_t>('\r')][state] = CSVState::RECORD_SEPARATOR;
Expand Down
Loading

0 comments on commit cd08fa7

Please sign in to comment.