Skip to content

Commit f9a910d

Browse files
authored
Merge pull request #68 from carlopi/bump_duckdb
Vendor DuckDB sources, bumping to duckdb's version v0.10.1
2 parents 00bb900 + 3d1ea37 commit f9a910d

File tree

699 files changed

+38049
-29164
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

699 files changed

+38049
-29164
lines changed

binding.gyp

+4-1
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,9 @@
389389
{
390390
"defines": [
391391
"DUCKDB_BUILD_LIBRARY"
392+
],
393+
"libraries": [
394+
"rstrtmgr.lib", "bcrypt.lib"
392395
]
393396
}
394397
]
@@ -411,4 +414,4 @@
411414
]
412415
}
413416
]
414-
}
417+
}

src/duckdb/extension/icu/icu-strptime.cpp

+46-4
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ struct ICUStrptime : public ICUDateFunc {
246246
TailPatch(name, db, types);
247247
}
248248

249-
static bool CastFromVarchar(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
249+
static bool VarcharToTimestampTZ(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
250250
auto &cast_data = parameters.cast_data->Cast<CastData>();
251251
auto &info = cast_data.info->Cast<BindData>();
252252
CalendarPtr cal(info.calendar->clone());
@@ -260,7 +260,7 @@ struct ICUStrptime : public ICUDateFunc {
260260
bool has_offset = false;
261261
if (!Timestamp::TryConvertTimestampTZ(str, len, result, has_offset, tz)) {
262262
auto msg = Timestamp::ConversionError(string(str, len));
263-
HandleCastError::AssignError(msg, parameters.error_message);
263+
HandleCastError::AssignError(msg, parameters);
264264
mask.SetInvalid(idx);
265265
} else if (!has_offset) {
266266
// Convert parts to a TZ (default or parsed) if no offset was provided
@@ -280,22 +280,64 @@ struct ICUStrptime : public ICUDateFunc {
280280
return true;
281281
}
282282

283+
static bool VarcharToTimeTZ(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
284+
auto &cast_data = parameters.cast_data->Cast<CastData>();
285+
auto &info = cast_data.info->Cast<BindData>();
286+
CalendarPtr cal(info.calendar->clone());
287+
288+
UnaryExecutor::ExecuteWithNulls<string_t, dtime_tz_t>(
289+
source, result, count, [&](string_t input, ValidityMask &mask, idx_t idx) {
290+
dtime_tz_t result;
291+
const auto str = input.GetData();
292+
const auto len = input.GetSize();
293+
bool has_offset = false;
294+
idx_t pos = 0;
295+
if (!Time::TryConvertTimeTZ(str, len, pos, result, has_offset, false)) {
296+
auto msg = Time::ConversionError(string(str, len));
297+
HandleCastError::AssignError(msg, parameters);
298+
mask.SetInvalid(idx);
299+
} else if (!has_offset) {
300+
// Convert parts to a TZ (default or parsed) if no offset was provided
301+
auto calendar = cal.get();
302+
303+
// Extract the offset from the calendar
304+
auto offset = ExtractField(calendar, UCAL_ZONE_OFFSET);
305+
offset += ExtractField(calendar, UCAL_DST_OFFSET);
306+
offset /= Interval::MSECS_PER_SEC;
307+
308+
// Apply it to the offset +00 time we parsed.
309+
result = dtime_tz_t(result.time(), offset);
310+
}
311+
312+
return result;
313+
});
314+
return true;
315+
}
316+
283317
static BoundCastInfo BindCastFromVarchar(BindCastInput &input, const LogicalType &source,
284318
const LogicalType &target) {
285319
if (!input.context) {
286-
throw InternalException("Missing context for VARCHAR to TIMESTAMPTZ cast.");
320+
throw InternalException("Missing context for VARCHAR to TIME/TIMESTAMPTZ cast.");
287321
}
288322

289323
auto cast_data = make_uniq<CastData>(make_uniq<BindData>(*input.context));
290324

291-
return BoundCastInfo(CastFromVarchar, std::move(cast_data));
325+
switch (target.id()) {
326+
case LogicalTypeId::TIMESTAMP_TZ:
327+
return BoundCastInfo(VarcharToTimestampTZ, std::move(cast_data));
328+
case LogicalTypeId::TIME_TZ:
329+
return BoundCastInfo(VarcharToTimeTZ, std::move(cast_data));
330+
default:
331+
throw InternalException("Unsupported type for VARCHAR to TIME/TIMESTAMPTZ cast.");
332+
}
292333
}
293334

294335
static void AddCasts(DatabaseInstance &db) {
295336
auto &config = DBConfig::GetConfig(db);
296337
auto &casts = config.GetCastFunctions();
297338

298339
casts.RegisterCastFunction(LogicalType::VARCHAR, LogicalType::TIMESTAMP_TZ, BindCastFromVarchar);
340+
casts.RegisterCastFunction(LogicalType::VARCHAR, LogicalType::TIME_TZ, BindCastFromVarchar);
299341
}
300342
};
301343

src/duckdb/extension/icu/icu-table-range.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,11 @@ struct ICUTableRange {
9090

9191
auto &inputs = input.inputs;
9292
D_ASSERT(inputs.size() == 3);
93+
for (const auto &value : inputs) {
94+
if (value.IsNull()) {
95+
throw BinderException("RANGE with NULL bounds is not supported");
96+
}
97+
}
9398
result->start = inputs[0].GetValue<timestamp_t>();
9499
result->end = inputs[1].GetValue<timestamp_t>();
95100
result->increment = inputs[2].GetValue<interval_t>();

src/duckdb/extension/icu/icu-timezone.cpp

+28-4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "duckdb/parser/parsed_data/create_table_function_info.hpp"
99
#include "include/icu-datefunc.hpp"
1010
#include "duckdb/transaction/meta_transaction.hpp"
11+
#include "duckdb/common/operator/cast_operators.hpp"
1112

1213
namespace duckdb {
1314

@@ -138,13 +139,23 @@ struct ICUFromNaiveTimestamp : public ICUDateFunc {
138139
return GetTime(calendar, micros);
139140
}
140141

142+
struct CastTimestampUsToUs {
143+
template <class SRC, class DST>
144+
static inline DST Operation(SRC input) {
145+
// no-op
146+
return input;
147+
}
148+
};
149+
150+
template <class OP>
141151
static bool CastFromNaive(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
142152
auto &cast_data = parameters.cast_data->Cast<CastData>();
143153
auto &info = cast_data.info->Cast<BindData>();
144154
CalendarPtr calendar(info.calendar->clone());
145155

146-
UnaryExecutor::Execute<timestamp_t, timestamp_t>(
147-
source, result, count, [&](timestamp_t input) { return Operation(calendar.get(), input); });
156+
UnaryExecutor::Execute<timestamp_t, timestamp_t>(source, result, count, [&](timestamp_t input) {
157+
return Operation(calendar.get(), OP::template Operation<timestamp_t, timestamp_t>(input));
158+
});
148159
return true;
149160
}
150161

@@ -154,15 +165,28 @@ struct ICUFromNaiveTimestamp : public ICUDateFunc {
154165
}
155166

156167
auto cast_data = make_uniq<CastData>(make_uniq<BindData>(*input.context));
157-
158-
return BoundCastInfo(CastFromNaive, std::move(cast_data));
168+
switch (source.id()) {
169+
case LogicalTypeId::TIMESTAMP:
170+
return BoundCastInfo(CastFromNaive<CastTimestampUsToUs>, std::move(cast_data));
171+
case LogicalTypeId::TIMESTAMP_MS:
172+
return BoundCastInfo(CastFromNaive<CastTimestampMsToUs>, std::move(cast_data));
173+
case LogicalTypeId::TIMESTAMP_NS:
174+
return BoundCastInfo(CastFromNaive<CastTimestampNsToUs>, std::move(cast_data));
175+
case LogicalTypeId::TIMESTAMP_SEC:
176+
return BoundCastInfo(CastFromNaive<CastTimestampSecToUs>, std::move(cast_data));
177+
default:
178+
throw InternalException("Type %s not handled in BindCastFromNaive", LogicalTypeIdToString(source.id()));
179+
}
159180
}
160181

161182
static void AddCasts(DatabaseInstance &db) {
162183
auto &config = DBConfig::GetConfig(db);
163184
auto &casts = config.GetCastFunctions();
164185

165186
casts.RegisterCastFunction(LogicalType::TIMESTAMP, LogicalType::TIMESTAMP_TZ, BindCastFromNaive);
187+
casts.RegisterCastFunction(LogicalType::TIMESTAMP_MS, LogicalType::TIMESTAMP_TZ, BindCastFromNaive);
188+
casts.RegisterCastFunction(LogicalType::TIMESTAMP_NS, LogicalType::TIMESTAMP_TZ, BindCastFromNaive);
189+
casts.RegisterCastFunction(LogicalType::TIMESTAMP_S, LogicalType::TIMESTAMP_TZ, BindCastFromNaive);
166190
}
167191
};
168192

src/duckdb/extension/json/buffered_json_reader.cpp

+6-5
Original file line numberDiff line numberDiff line change
@@ -304,11 +304,11 @@ idx_t BufferedJSONReader::GetLineNumber(idx_t buf_index, idx_t line_or_object_in
304304
line += buffer_line_or_object_counts[b_idx];
305305
}
306306
}
307-
}
308-
if (can_throw) {
309-
thrown = true;
310-
// SQL uses 1-based indexing so I guess we will do that in our exception here as well
311-
return line + 1;
307+
if (can_throw) {
308+
thrown = true;
309+
// SQL uses 1-based indexing so I guess we will do that in our exception here as well
310+
return line + 1;
311+
}
312312
}
313313
TaskScheduler::YieldThread();
314314
}
@@ -331,6 +331,7 @@ void BufferedJSONReader::ThrowTransformError(idx_t buf_index, idx_t line_or_obje
331331
}
332332

333333
double BufferedJSONReader::GetProgress() const {
334+
lock_guard<mutex> guard(lock);
334335
if (HasFileHandle()) {
335336
return 100.0 - 100.0 * double(file_handle->Remaining()) / double(file_handle->FileSize());
336337
} else {

src/duckdb/extension/json/include/buffered_json_reader.hpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,9 @@ struct JSONFileHandle {
9090

9191
//! Read properties
9292
idx_t read_position;
93-
idx_t requested_reads;
93+
atomic<idx_t> requested_reads;
9494
atomic<idx_t> actual_reads;
95-
bool last_read_requested;
95+
atomic<bool> last_read_requested;
9696

9797
//! Cached buffers for resetting when reading stream
9898
vector<AllocatedData> cached_buffers;
@@ -161,7 +161,7 @@ class BufferedJSONReader {
161161
bool thrown;
162162

163163
public:
164-
mutex lock;
164+
mutable mutex lock;
165165
MultiFileReaderData reader_data;
166166
};
167167

src/duckdb/extension/json/include/json_transform.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ struct JSONTransformOptions {
4040
string error_message;
4141
//! Index of the object where the error occurred
4242
idx_t object_index = DConstants::INVALID_INDEX;
43+
//! Cast parameters
44+
CastParameters parameters;
4345

4446
public:
4547
void Serialize(Serializer &serializer) const;

src/duckdb/extension/json/json_functions.cpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,7 @@ static bool CastVarcharToJSON(Vector &source, Vector &result, idx_t count, CastP
224224
if (!doc) {
225225
mask.SetInvalid(idx);
226226
if (success) {
227-
HandleCastError::AssignError(JSONCommon::FormatParseError(data, length, error),
228-
parameters.error_message);
227+
HandleCastError::AssignError(JSONCommon::FormatParseError(data, length, error), parameters);
229228
success = false;
230229
}
231230
}

src/duckdb/extension/json/json_functions/json_create.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ static LogicalType GetJSONType(StructNames &const_struct_names, const LogicalTyp
6161
// The nested types need to conform as well
6262
case LogicalTypeId::LIST:
6363
return LogicalType::LIST(GetJSONType(const_struct_names, ListType::GetChildType(type)));
64+
case LogicalTypeId::ARRAY:
65+
return LogicalType::ARRAY(GetJSONType(const_struct_names, ArrayType::GetChildType(type)),
66+
ArrayType::GetSize(type));
6467
// Struct and MAP are treated as JSON values
6568
case LogicalTypeId::STRUCT: {
6669
child_list_t<LogicalType> child_types;
@@ -435,6 +438,9 @@ static void CreateValuesList(const StructNames &names, yyjson_mut_doc *doc, yyjs
435438

436439
static void CreateValuesArray(const StructNames &names, yyjson_mut_doc *doc, yyjson_mut_val *vals[], Vector &value_v,
437440
idx_t count) {
441+
442+
value_v.Flatten(count);
443+
438444
// Initialize array for the nested values
439445
auto &child_v = ArrayVector::GetEntry(value_v);
440446
auto array_size = ArrayType::GetSize(value_v.GetType());

src/duckdb/extension/json/json_functions/json_transform.cpp

+8-9
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,13 @@
1313

1414
namespace duckdb {
1515

16-
JSONTransformOptions::JSONTransformOptions() {
16+
JSONTransformOptions::JSONTransformOptions() : parameters(false, &error_message) {
1717
}
1818

1919
JSONTransformOptions::JSONTransformOptions(bool strict_cast_p, bool error_duplicate_key_p, bool error_missing_key_p,
2020
bool error_unkown_key_p)
2121
: strict_cast(strict_cast_p), error_duplicate_key(error_duplicate_key_p), error_missing_key(error_missing_key_p),
22-
error_unknown_key(error_unkown_key_p) {
22+
error_unknown_key(error_unkown_key_p), parameters(false, &error_message) {
2323
}
2424

2525
//! Forward declaration for recursion
@@ -135,25 +135,24 @@ static inline bool GetValueDecimal(yyjson_val *val, T &result, uint8_t w, uint8_
135135
bool success;
136136
switch (unsafe_yyjson_get_tag(val)) {
137137
case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE:
138-
success = OP::template Operation<string_t, T>(GetString(val), result, &options.error_message, w, s);
138+
success = OP::template Operation<string_t, T>(GetString(val), result, options.parameters, w, s);
139139
break;
140140
case YYJSON_TYPE_ARR | YYJSON_SUBTYPE_NONE:
141141
case YYJSON_TYPE_OBJ | YYJSON_SUBTYPE_NONE:
142142
success = false;
143143
break;
144144
case YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_TRUE:
145145
case YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_FALSE:
146-
success = OP::template Operation<bool, T>(unsafe_yyjson_get_bool(val), result, &options.error_message, w, s);
146+
success = OP::template Operation<bool, T>(unsafe_yyjson_get_bool(val), result, options.parameters, w, s);
147147
break;
148148
case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_UINT:
149-
success =
150-
OP::template Operation<uint64_t, T>(unsafe_yyjson_get_uint(val), result, &options.error_message, w, s);
149+
success = OP::template Operation<uint64_t, T>(unsafe_yyjson_get_uint(val), result, options.parameters, w, s);
151150
break;
152151
case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT:
153-
success = OP::template Operation<int64_t, T>(unsafe_yyjson_get_sint(val), result, &options.error_message, w, s);
152+
success = OP::template Operation<int64_t, T>(unsafe_yyjson_get_sint(val), result, options.parameters, w, s);
154153
break;
155154
case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL:
156-
success = OP::template Operation<double, T>(unsafe_yyjson_get_real(val), result, &options.error_message, w, s);
155+
success = OP::template Operation<double, T>(unsafe_yyjson_get_real(val), result, options.parameters, w, s);
157156
break;
158157
default:
159158
throw InternalException("Unknown yyjson tag in GetValueString");
@@ -982,7 +981,7 @@ static bool JSONToAnyCast(Vector &source, Vector &result, idx_t count, CastParam
982981

983982
auto success = TransformFunctionInternal(source, count, result, alc, options);
984983
if (!success) {
985-
HandleCastError::AssignError(options.error_message, parameters.error_message);
984+
HandleCastError::AssignError(options.error_message, parameters);
986985
}
987986
return success;
988987
}

src/duckdb/extension/json/json_scan.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,7 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
569569
if (file_done) {
570570
lock_guard<mutex> guard(gstate.lock);
571571
TryIncrementFileIndex(gstate);
572+
lock_guard<mutex> reader_guard(current_reader->lock);
572573
current_reader->GetFileHandle().Close();
573574
}
574575

src/duckdb/extension/parquet/column_reader.cpp

+17-1
Original file line numberDiff line numberDiff line change
@@ -1004,7 +1004,23 @@ idx_t CastColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, data
10041004
}
10051005
}
10061006
}
1007-
VectorOperations::DefaultCast(intermediate_vector, result, amount);
1007+
string error_message;
1008+
bool all_succeeded = VectorOperations::DefaultTryCast(intermediate_vector, result, amount, &error_message);
1009+
if (!all_succeeded) {
1010+
string extended_error;
1011+
extended_error =
1012+
StringUtil::Format("In file \"%s\" the column \"%s\" has type %s, but we are trying to read it as type %s.",
1013+
reader.file_name, schema.name, intermediate_vector.GetType(), result.GetType());
1014+
extended_error += "\nThis can happen when reading multiple Parquet files. The schema information is taken from "
1015+
"the first Parquet file by default. Possible solutions:\n";
1016+
extended_error += "* Enable the union_by_name=True option to combine the schema of all Parquet files "
1017+
"(duckdb.org/docs/data/multiple_files/combining_schemas)\n";
1018+
extended_error += "* Use a COPY statement to automatically derive types from an existing table.";
1019+
throw ConversionException(
1020+
"In Parquet reader of file \"%s\": failed to cast column \"%s\" from type %s to %s: %s\n\n%s",
1021+
reader.file_name, schema.name, intermediate_vector.GetType(), result.GetType(), error_message,
1022+
extended_error);
1023+
}
10081024
return amount;
10091025
}
10101026

0 commit comments

Comments
 (0)