From 1420aef3342012e0b25472fc569de2e34b74556f Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 7 Jan 2025 21:52:55 +0000
Subject: [PATCH 01/21] vendor: Update vendored sources to
 duckdb/duckdb@acdbf60889033d2701a5fef360a19963cafea471 (#974)

Annotate errors in table macros with the call position of the table macro (duckdb/duckdb#15590)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 src/duckdb/src/function/table/version/pragma_version.cpp | 6 +++---
 .../src/planner/binder/tableref/bind_table_function.cpp  | 9 ++++++++-
 2 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index 77e744b6b..6bb466621 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4106"
+#define DUCKDB_PATCH_VERSION "4-dev4108"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4106"
+#define DUCKDB_VERSION "v1.1.4-dev4108"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "2edfde3071"
+#define DUCKDB_SOURCE_ID "acdbf60889"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"
diff --git a/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp b/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp
index 2d222b4fc..3f4c249bd 100644
--- a/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp
+++ b/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp
@@ -283,7 +283,14 @@ unique_ptr<BoundTableRef> Binder::Bind(TableFunctionRef &ref) {
 		binder->can_contain_nulls = true;
 
 		binder->alias = ref.alias.empty() ? "unnamed_query" : ref.alias;
-		auto query = binder->BindNode(*query_node);
+		unique_ptr<BoundQueryNode> query;
+		try {
+			query = binder->BindNode(*query_node);
+		} catch (std::exception &ex) {
+			ErrorData error(ex);
+			error.AddQueryLocation(ref);
+			error.Throw();
+		}
 
 		idx_t bind_index = query->GetRootIndex();
 		// string alias;

From 924d64963e5f7f722e60f7511dc3dcd83af381cb Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 8 Jan 2025 01:17:58 +0000
Subject: [PATCH 02/21] fledge: Bump version to 1.1.3.9034 (#975)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 DESCRIPTION |  2 +-
 NEWS.md     | 31 +++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index f0e516db7..87528928b 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: duckdb
 Title: DBI Package for the DuckDB Database Management System
-Version: 1.1.3.9033
+Version: 1.1.3.9034
 Authors@R: c(
     person("Hannes", "Mühleisen", , "hannes@cwi.nl", role = "aut",
            comment = c(ORCID = "0000-0001-8552-0029")),
diff --git a/NEWS.md b/NEWS.md
index 452fe023a..2f6f554ee 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,36 @@
 <!-- NEWS.md is maintained by https://fledge.cynkra.com, contributors should not edit this file -->
 
+# duckdb 1.1.3.9034
+
+## vendor
+
+- Update vendored sources to duckdb/duckdb@acdbf60889033d2701a5fef360a19963cafea471 (#974).
+
+- Update vendored sources to duckdb/duckdb@2edfde3071cdc3ccc6047773e8229fb80238443d (#973).
+
+- Update vendored sources to duckdb/duckdb@3c0db29b2bf182105c1537ddf2fa8b12d186941d (#972).
+
+- Update vendored sources to duckdb/duckdb@5705d13dbf1d4f3db3e4f36d3194f669aaf99bc0 (#971).
+
+- Update vendored sources to duckdb/duckdb@41d13ad1612b3e91ff7a9a5c17edd6fdf51073f1 (#970).
+
+- Update vendored sources to duckdb/duckdb@52032a5b00d4f11376f4ba106753629aea87c0a8 (#969).
+
+- Update vendored sources to duckdb/duckdb@add512094d022fec50354208f908c3d60f3755a5 (#968).
+
+- Update vendored sources to duckdb/duckdb@d42c34cbd62061e5088fc1b81ced0c1008fa58ea (#967).
+
+- Update vendored sources to duckdb/duckdb@35bf611cc4832ceb2f20ec70a2597d0765618610 (duckdb/duckdb#15526, #966).
+
+- Update vendored sources to duckdb/duckdb@22de1ef5310136803f2caab0c9cc063b4fad52e5 (#965).
+
+- Update vendored sources to duckdb/duckdb@a3636bdd6203f4e4c47c5191264b9209e4f2a516 (#964).
+
+- Update vendored sources to duckdb/duckdb@50de06cec8de9c8da1b6fd9fcec3f514694a9b6a (#963).
+
+- Update vendored sources to duckdb/duckdb@4b163ff7c8cb9693fafe4d822dff54b31cfd2adf (#962).
+
+
 # duckdb 1.1.3.9033
 
 ## vendor

From 693ded16b4f2dbd4961b8afba31b3639e30c0228 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 8 Jan 2025 12:20:36 +0000
Subject: [PATCH 03/21] vendor: Update vendored sources to
 duckdb/duckdb@c8fa9aee7858909c625b5c3abcc3a257c5d9d934 (#977)

Line dependent buffer (duckdb/duckdb#14512)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 .../csv_scanner/buffer_manager/csv_buffer.cpp |   4 +-
 .../buffer_manager/csv_buffer_manager.cpp     |   9 +-
 .../buffer_manager/csv_file_handle.cpp        |   2 +-
 .../csv_scanner/scanner/scanner_boundary.cpp  |  46 +++--
 .../scanner/string_value_scanner.cpp          |  56 +++---
 .../csv_scanner/sniffer/csv_sniffer.cpp       |  11 +-
 .../csv_scanner/sniffer/type_detection.cpp    |  26 +--
 .../table_function/global_csv_state.cpp       | 115 +-----------
 .../operator/csv_scanner/util/csv_error.cpp   | 163 ++++++++++++++++--
 .../csv_scanner/util/csv_reader_options.cpp   |  21 ++-
 .../function/table/version/pragma_version.cpp |   6 +-
 .../operator/csv_scanner/csv_buffer.hpp       |  14 +-
 .../operator/csv_scanner/csv_error.hpp        |  37 ++--
 .../csv_scanner/csv_reader_options.hpp        |   5 +-
 .../operator/csv_scanner/scanner_boundary.hpp |  18 +-
 .../csv_scanner/string_value_scanner.hpp      |  13 +-
 .../storage/serialization/serialize_nodes.cpp |  11 +-
 17 files changed, 314 insertions(+), 243 deletions(-)

diff --git a/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp b/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
index 3677e0a75..c18c5f61e 100644
--- a/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
+++ b/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
@@ -58,7 +58,7 @@ void CSVBuffer::AllocateBuffer(idx_t buffer_size) {
 	block = handle.GetBlockHandle();
 }
 
-idx_t CSVBuffer::GetBufferSize() {
+idx_t CSVBuffer::GetBufferSize() const {
 	return actual_buffer_size;
 }
 
@@ -87,7 +87,7 @@ void CSVBuffer::Unpin() {
 	}
 }
 
-bool CSVBuffer::IsCSVFileLastBuffer() {
+bool CSVBuffer::IsCSVFileLastBuffer() const {
 	return last_buffer;
 }
 
diff --git a/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp b/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp
index 5f2823292..811689917 100644
--- a/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp
+++ b/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp
@@ -7,7 +7,7 @@ CSVBufferManager::CSVBufferManager(ClientContext &context_p, const CSVReaderOpti
                                    const idx_t file_idx_p, bool per_file_single_threaded_p,
                                    unique_ptr<CSVFileHandle> file_handle_p)
     : context(context_p), per_file_single_threaded(per_file_single_threaded_p), file_idx(file_idx_p),
-      file_path(file_path_p), buffer_size(CSVBuffer::CSV_BUFFER_SIZE) {
+      file_path(file_path_p), buffer_size(options.buffer_size_option.GetValue()) {
 	D_ASSERT(!file_path.empty());
 	if (file_handle_p) {
 		file_handle = std::move(file_handle_p);
@@ -16,13 +16,6 @@ CSVBufferManager::CSVBufferManager(ClientContext &context_p, const CSVReaderOpti
 	}
 	is_pipe = file_handle->IsPipe();
 	skip_rows = options.dialect_options.skip_rows.GetValue();
-	auto file_size = file_handle->FileSize();
-	if (file_size > 0 && file_size < buffer_size) {
-		buffer_size = CSVBuffer::CSV_MINIMUM_BUFFER_SIZE;
-	}
-	if (options.buffer_size < buffer_size) {
-		buffer_size = options.buffer_size;
-	}
 	Initialize();
 }
 
diff --git a/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp b/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp
index 0a8c1e20c..73fbb4ed0 100644
--- a/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp
+++ b/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp
@@ -10,7 +10,7 @@ namespace duckdb {
 CSVFileHandle::CSVFileHandle(DBConfig &config, unique_ptr<FileHandle> file_handle_p, const string &path_p,
                              const CSVReaderOptions &options)
     : compression_type(options.compression), file_handle(std::move(file_handle_p)),
-      encoder(config, options.encoding, options.buffer_size), path(path_p) {
+      encoder(config, options.encoding, options.buffer_size_option.GetValue()), path(path_p) {
 	can_seek = file_handle->CanSeek();
 	on_disk_file = file_handle->OnDiskFile();
 	file_size = file_handle->GetFileSize();
diff --git a/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp b/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp
index 0a577d779..b0511bec6 100644
--- a/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp
+++ b/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp
@@ -16,7 +16,7 @@ CSVBoundary::CSVBoundary() : buffer_idx(0), buffer_pos(0), boundary_idx(0), end_
 CSVIterator::CSVIterator() : is_set(false) {
 }
 
-void CSVBoundary::Print() {
+void CSVBoundary::Print() const {
 #ifndef DUCKDB_DISABLE_PRINT
 	std::cout << "---Boundary: " << boundary_idx << " ---" << '\n';
 	std::cout << "Buffer Index: " << buffer_idx << '\n';
@@ -26,26 +26,39 @@ void CSVBoundary::Print() {
 #endif
 }
 
-void CSVIterator::Print() {
+void CSVIterator::Print() const {
 #ifndef DUCKDB_DISABLE_PRINT
 	boundary.Print();
 	std::cout << "Is set: " << is_set << '\n';
 #endif
 }
 
-bool CSVIterator::Next(CSVBufferManager &buffer_manager) {
+idx_t CSVIterator::BytesPerThread(const CSVReaderOptions &reader_options) {
+	const idx_t buffer_size = reader_options.buffer_size_option.GetValue();
+	const idx_t max_row_size = reader_options.maximum_line_size.GetValue();
+	const idx_t bytes_per_thread = buffer_size / CSVBuffer::ROWS_PER_BUFFER * ROWS_PER_THREAD;
+	if (bytes_per_thread < max_row_size) {
+		// If we are setting up the buffer size directly, we must make sure each thread will read the full buffer.
+		return max_row_size;
+	}
+	return bytes_per_thread;
+}
+
+bool CSVIterator::Next(CSVBufferManager &buffer_manager, const CSVReaderOptions &reader_options) {
 	if (!is_set) {
 		return false;
 	}
+	const auto bytes_per_thread = BytesPerThread(reader_options);
+
 	// If we are calling next this is not the first one anymore
 	first_one = false;
 	boundary.boundary_idx++;
 	// This is our start buffer
 	auto buffer = buffer_manager.GetBuffer(boundary.buffer_idx);
-	if (buffer->is_last_buffer && boundary.buffer_pos + CSVIterator::BYTES_PER_THREAD > buffer->actual_size) {
+	if (buffer->is_last_buffer && boundary.buffer_pos + bytes_per_thread > buffer->actual_size) {
 		// 1) We are done with the current file
 		return false;
-	} else if (boundary.buffer_pos + BYTES_PER_THREAD >= buffer->actual_size) {
+	} else if (boundary.buffer_pos + bytes_per_thread >= buffer->actual_size) {
 		// 2) We still have data to scan in this file, we set the iterator accordingly.
 		// We must move the buffer
 		boundary.buffer_idx++;
@@ -58,9 +71,9 @@ bool CSVIterator::Next(CSVBufferManager &buffer_manager) {
 
 	} else {
 		// 3) We are not done with the current buffer, hence we just move where we start within the buffer
-		boundary.buffer_pos += BYTES_PER_THREAD;
+		boundary.buffer_pos += bytes_per_thread;
 	}
-	boundary.end_pos = boundary.buffer_pos + BYTES_PER_THREAD;
+	boundary.end_pos = boundary.buffer_pos + bytes_per_thread;
 	SetCurrentPositionToBoundary();
 	return true;
 }
@@ -85,20 +98,21 @@ void CSVIterator::SetCurrentPositionToBoundary() {
 	pos.buffer_pos = boundary.buffer_pos;
 }
 
-void CSVIterator::SetCurrentBoundaryToPosition(bool single_threaded) {
+void CSVIterator::SetCurrentBoundaryToPosition(bool single_threaded, const CSVReaderOptions &reader_options) {
 	if (single_threaded) {
 		is_set = false;
 		return;
 	}
+	const auto bytes_per_thread = BytesPerThread(reader_options);
+
 	boundary.buffer_idx = pos.buffer_idx;
 	if (pos.buffer_pos == 0) {
-		boundary.end_pos = CSVIterator::BYTES_PER_THREAD;
+		boundary.end_pos = bytes_per_thread;
 	} else {
-		boundary.end_pos = ((pos.buffer_pos + CSVIterator::BYTES_PER_THREAD - 1) / CSVIterator::BYTES_PER_THREAD) *
-		                   CSVIterator::BYTES_PER_THREAD;
+		boundary.end_pos = ((pos.buffer_pos + bytes_per_thread - 1) / bytes_per_thread) * bytes_per_thread;
 	}
 
-	boundary.buffer_pos = boundary.end_pos - CSVIterator::BYTES_PER_THREAD;
+	boundary.buffer_pos = boundary.end_pos - bytes_per_thread;
 	is_set = true;
 }
 
@@ -110,7 +124,13 @@ void CSVIterator::SetEnd(idx_t pos) {
 	boundary.end_pos = pos;
 }
 
-idx_t CSVIterator::GetGlobalCurrentPos() {
+void CSVIterator::CheckIfDone() {
+	if (IsBoundarySet() && (pos.buffer_idx > boundary.buffer_idx || pos.buffer_pos > boundary.buffer_pos)) {
+		done = true;
+	}
+}
+
+idx_t CSVIterator::GetGlobalCurrentPos() const {
 	return pos.buffer_pos + buffer_size * pos.buffer_idx;
 }
 
diff --git a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 967660a8e..aeb305a55 100644
--- a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -14,19 +14,22 @@
 #include <algorithm>
 
 namespace duckdb {
+
+constexpr idx_t StringValueScanner::LINE_FINDER_ID;
+
 StringValueResult::StringValueResult(CSVStates &states, CSVStateMachine &state_machine,
                                      const shared_ptr<CSVBufferHandle> &buffer_handle, Allocator &buffer_allocator,
                                      idx_t result_size_p, idx_t buffer_position, CSVErrorHandler &error_hander_p,
                                      CSVIterator &iterator_p, bool store_line_size_p,
                                      shared_ptr<CSVFileScan> csv_file_scan_p, idx_t &lines_read_p, bool sniffing_p,
-                                     string path_p)
+                                     string path_p, idx_t scan_id)
     : ScannerResult(states, state_machine, result_size_p),
       number_of_columns(NumericCast<uint32_t>(state_machine.dialect_options.num_cols)),
       null_padding(state_machine.options.null_padding), ignore_errors(state_machine.options.ignore_errors.GetValue()),
       extra_delimiter_bytes(state_machine.dialect_options.state_machine_options.delimiter.GetValue().size() - 1),
       error_handler(error_hander_p), iterator(iterator_p), store_line_size(store_line_size_p),
       csv_file_scan(std::move(csv_file_scan_p)), lines_read(lines_read_p),
-      current_errors(state_machine.options.IgnoreErrors()), sniffing(sniffing_p), path(std::move(path_p)) {
+      current_errors(scan_id, state_machine.options.IgnoreErrors()), sniffing(sniffing_p), path(std::move(path_p)) {
 	// Vector information
 	D_ASSERT(number_of_columns > 0);
 	if (!buffer_handle) {
@@ -681,7 +684,7 @@ bool LineError::HandleErrors(StringValueResult &result) {
 			break;
 		case MAXIMUM_LINE_SIZE:
 			csv_error = CSVError::LineSizeError(
-			    result.state_machine.options, cur_error.current_line_size, lines_per_batch, borked_line,
+			    result.state_machine.options, lines_per_batch, borked_line,
 			    result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl), result.path);
 			break;
 		case INVALID_STATE:
@@ -702,7 +705,7 @@ bool LineError::HandleErrors(StringValueResult &result) {
 		}
 		result.error_handler.Error(csv_error);
 	}
-	if (is_error_in_line) {
+	if (is_error_in_line && scan_id != StringValueScanner::LINE_FINDER_ID) {
 		if (result.sniffing) {
 			// If we are sniffing we just remove the line
 			result.RemoveLastLine();
@@ -780,7 +783,7 @@ bool StringValueResult::AddRowInternal() {
 	}
 	current_line_position.begin = current_line_position.end;
 	current_line_position.end = current_line_start;
-	if (current_line_size > state_machine.options.maximum_line_size) {
+	if (current_line_size > state_machine.options.maximum_line_size.GetValue()) {
 		current_errors.Insert(MAXIMUM_LINE_SIZE, 1, chunk_col_id, last_position, current_line_size);
 	}
 	if (!state_machine.options.null_padding) {
@@ -827,7 +830,7 @@ bool StringValueResult::AddRowInternal() {
 		} else {
 			// If we are not null-padding this is an error
 			if (!state_machine.options.IgnoreErrors()) {
-				bool first_nl;
+				bool first_nl = false;
 				auto borked_line =
 				    current_line_position.ReconstructCurrentLine(first_nl, buffer_handles, PrintErrorLine());
 				LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
@@ -938,8 +941,8 @@ StringValueScanner::StringValueScanner(idx_t scanner_idx_p, const shared_ptr<CSV
       result(states, *state_machine, cur_buffer_handle, BufferAllocator::Get(buffer_manager->context), result_size,
              iterator.pos.buffer_pos, *error_handler, iterator,
              buffer_manager->context.client_data->debug_set_max_line_length, csv_file_scan, lines_read, sniffing,
-             buffer_manager->GetFilePath()) {
-	iterator.buffer_size = state_machine->options.buffer_size;
+             buffer_manager->GetFilePath(), scanner_idx_p) {
+	iterator.buffer_size = state_machine->options.buffer_size_option.GetValue();
 }
 
 StringValueScanner::StringValueScanner(const shared_ptr<CSVBufferManager> &buffer_manager,
@@ -950,8 +953,8 @@ StringValueScanner::StringValueScanner(const shared_ptr<CSVBufferManager> &buffe
       result(states, *state_machine, cur_buffer_handle, Allocator::DefaultAllocator(), result_size,
              iterator.pos.buffer_pos, *error_handler, iterator,
              buffer_manager->context.client_data->debug_set_max_line_length, csv_file_scan, lines_read, sniffing,
-             buffer_manager->GetFilePath()) {
-	iterator.buffer_size = state_machine->options.buffer_size;
+             buffer_manager->GetFilePath(), 0) {
+	iterator.buffer_size = state_machine->options.buffer_size_option.GetValue();
 }
 
 unique_ptr<StringValueScanner> StringValueScanner::GetCSVScanner(ClientContext &context, CSVReaderOptions &options) {
@@ -1064,7 +1067,7 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
 			}
 			result.borked_rows.insert(line_error++);
 			D_ASSERT(state_machine->options.ignore_errors.GetValue());
-			// We are ignoring errors. We must continue but ignoring borked rows
+			// We are ignoring errors. We must continue but ignoring borked-rows
 			for (; line_error < parse_chunk.size(); line_error++) {
 				if (!inserted_column_data.validity.RowIsValid(line_error) &&
 				    parse_column_data.validity.RowIsValid(line_error)) {
@@ -1622,14 +1625,19 @@ bool StringValueScanner::IsRowValid(CSVIterator &current_iterator) const {
 		return false;
 	}
 	constexpr idx_t result_size = 1;
-	auto scan_finder =
-	    make_uniq<StringValueScanner>(0U, buffer_manager, state_machine_strict, make_shared_ptr<CSVErrorHandler>(),
-	                                  csv_file_scan, false, current_iterator, result_size);
+	auto scan_finder = make_uniq<StringValueScanner>(StringValueScanner::LINE_FINDER_ID, buffer_manager,
+	                                                 state_machine_strict, make_shared_ptr<CSVErrorHandler>(),
+	                                                 csv_file_scan, false, current_iterator, result_size);
 	auto &tuples = scan_finder->ParseChunk();
 	current_iterator.pos = scan_finder->GetIteratorPosition();
-	return (tuples.number_of_rows == 1 || tuples.first_line_is_comment) && tuples.borked_rows.empty() &&
-	       !tuples.current_errors.HasError();
-	;
+	bool has_error = false;
+	if (tuples.current_errors.HasError()) {
+		if (tuples.current_errors.Size() != 1 || !tuples.current_errors.HasErrorType(MAXIMUM_LINE_SIZE)) {
+			// We ignore maximum line size errors
+			has_error = true;
+		}
+	}
+	return (tuples.number_of_rows == 1 || tuples.first_line_is_comment) && !has_error && tuples.borked_rows.empty();
 }
 
 ValidRowInfo StringValueScanner::TryRow(CSVState state, idx_t start_pos, idx_t end_pos) const {
@@ -1641,9 +1649,6 @@ ValidRowInfo StringValueScanner::TryRow(CSVState state, idx_t start_pos, idx_t e
 		auto iterator_start = current_iterator;
 		idx_t current_pos = current_iterator.pos.buffer_pos;
 		current_iterator.SetEnd(iterator.GetEndPos());
-		if (iterator.GetEndPos() == current_pos) {
-			return {false, current_pos, current_iterator.pos.buffer_idx, current_iterator.pos.buffer_pos, quoted};
-		}
 		if (IsRowValid(current_iterator)) {
 			if (!quoted) {
 				quoted = FirstValueEndsOnQuote(iterator_start);
@@ -1715,12 +1720,19 @@ void StringValueScanner::SetStart() {
 			iterator.done = true;
 		} else {
 			bool mock;
-			SkipUntilState(CSVState::STANDARD_NEWLINE, CSVState::RECORD_SEPARATOR, iterator, mock);
+			if (!SkipUntilState(CSVState::STANDARD_NEWLINE, CSVState::RECORD_SEPARATOR, iterator, mock)) {
+				iterator.CheckIfDone();
+			}
 		}
 	} else {
 		iterator.pos.buffer_pos = best_row.start_pos;
-		iterator.done = iterator.pos.buffer_pos == cur_buffer_handle->actual_size;
+		bool is_this_the_end =
+		    best_row.start_pos >= cur_buffer_handle->actual_size && cur_buffer_handle->is_last_buffer;
+		if (is_this_the_end) {
+			iterator.done = true;
+		}
 	}
+
 	// 4. We have an error, if we have an error, we let life go on, the scanner will either ignore it
 	// or throw.
 	result.last_position = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, result.buffer_size};
diff --git a/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp b/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
index 08311cfe3..c8740ebde 100644
--- a/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
+++ b/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
@@ -205,15 +205,8 @@ SnifferResult CSVSniffer::SniffCSV(const bool force_match) {
 		buffer_manager->ResetBufferManager();
 	}
 	buffer_manager->sniffing = false;
-	if (!best_candidate->error_handler->errors.empty() && !options.ignore_errors.GetValue()) {
-		for (auto &error_vector : best_candidate->error_handler->errors) {
-			for (auto &error : error_vector.second) {
-				if (error.type == MAXIMUM_LINE_SIZE) {
-					// If it's a maximum line size error, we can do it now.
-					error_handler->Error(error);
-				}
-			}
-		}
+	if (best_candidate->error_handler->AnyErrors() && !options.ignore_errors.GetValue()) {
+		best_candidate->error_handler->ErrorIfTypeExists(MAXIMUM_LINE_SIZE);
 	}
 	D_ASSERT(best_sql_types_candidates_per_column_idx.size() == names.size());
 	// We are done, Set the CSV Options in the reference. Construct and return the result.
diff --git a/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp b/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp
index e7a5237af..49bf105a9 100644
--- a/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp
+++ b/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp
@@ -425,19 +425,9 @@ void CSVSniffer::DetectTypes() {
 		SetUserDefinedDateTimeFormat(*candidate->state_machine);
 		// Parse chunk and read csv with info candidate
 		auto &data_chunk = candidate->ParseChunk().ToChunk();
-		if (!candidate->error_handler->errors.empty()) {
-			bool break_loop = false;
-			for (auto &errors : candidate->error_handler->errors) {
-				for (auto &error : errors.second) {
-					if (error.type != CSVErrorType::MAXIMUM_LINE_SIZE) {
-						break_loop = true;
-						break;
-					}
-				}
-			}
-			if (break_loop && !candidate->state_machine->options.ignore_errors.GetValue()) {
-				continue;
-			}
+		if (candidate->error_handler->AnyErrors() && !candidate->error_handler->HasError(MAXIMUM_LINE_SIZE) &&
+		    !candidate->state_machine->options.ignore_errors.GetValue()) {
+			continue;
 		}
 		idx_t start_idx_detection = 0;
 		idx_t chunk_size = data_chunk.size();
@@ -463,11 +453,11 @@ void CSVSniffer::DetectTypes() {
 
 		// it's good if the dialect creates more non-varchar columns, but only if we sacrifice < 30% of
 		// best_num_cols.
-		if (!best_candidate ||
-		    (varchar_cols<min_varchar_cols &&static_cast<double>(info_sql_types_candidates.size())>(
-		         static_cast<double>(max_columns_found) * 0.7) &&
-		     (!options.ignore_errors.GetValue() || candidate->error_handler->errors.size() < min_errors))) {
-			min_errors = candidate->error_handler->errors.size();
+		const idx_t number_of_errors = candidate->error_handler->GetSize();
+		if (!best_candidate || (varchar_cols<min_varchar_cols &&static_cast<double>(info_sql_types_candidates.size())>(
+		                            static_cast<double>(max_columns_found) * 0.7) &&
+		                        (!options.ignore_errors.GetValue() || number_of_errors < min_errors))) {
+			min_errors = number_of_errors;
 			best_header_row.clear();
 			// we have a new best_options candidate
 			best_candidate = std::move(candidate);
diff --git a/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index f6d0d386e..25691076d 100644
--- a/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -1,5 +1,4 @@
 #include "duckdb/execution/operator/csv_scanner/global_csv_state.hpp"
-
 #include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
 #include "duckdb/execution/operator/csv_scanner/scanner_boundary.hpp"
 #include "duckdb/execution/operator/csv_scanner/skip_scanner.hpp"
@@ -38,7 +37,7 @@ CSVGlobalState::CSVGlobalState(ClientContext &context_p, const shared_ptr<CSVBuf
 	scanner_idx = 0;
 	running_threads = CSVGlobalState::MaxThreads();
 	current_boundary = file_scans.back()->start_iterator;
-	current_boundary.SetCurrentBoundaryToPosition(single_threaded);
+	current_boundary.SetCurrentBoundaryToPosition(single_threaded, options);
 	if (current_boundary.done && context.client_data->debug_set_max_line_length) {
 		context.client_data->debug_max_line_length = current_boundary.pos.buffer_pos;
 	}
@@ -124,7 +123,7 @@ unique_ptr<StringValueScanner> CSVGlobalState::Next(optional_ptr<StringValueScan
 				file_scans.emplace_back(std::move(file_scan));
 				auto current_file = file_scans.back();
 				current_boundary = current_file->start_iterator;
-				current_boundary.SetCurrentBoundaryToPosition(single_threaded);
+				current_boundary.SetCurrentBoundaryToPosition(single_threaded, bind_data.options);
 				current_buffer_in_use = make_shared_ptr<CSVBufferUsage>(*file_scans.back()->buffer_manager,
 				                                                        current_boundary.GetBufferIdx());
 
@@ -158,7 +157,7 @@ unique_ptr<StringValueScanner> CSVGlobalState::Next(optional_ptr<StringValueScan
 	csv_scanner->buffer_tracker = current_buffer_in_use;
 
 	// We then produce the next boundary
-	if (!current_boundary.Next(*current_file.buffer_manager)) {
+	if (!current_boundary.Next(*current_file.buffer_manager, bind_data.options)) {
 		// This means we are done scanning the current file
 		do {
 			auto current_file_idx = file_scans.back()->file_idx + 1;
@@ -169,7 +168,7 @@ unique_ptr<StringValueScanner> CSVGlobalState::Next(optional_ptr<StringValueScan
 				                                                     column_ids, file_schema, false));
 				// And re-start the boundary-iterator
 				current_boundary = file_scans.back()->start_iterator;
-				current_boundary.SetCurrentBoundaryToPosition(single_threaded);
+				current_boundary.SetCurrentBoundaryToPosition(single_threaded, bind_data.options);
 				current_buffer_in_use = make_shared_ptr<CSVBufferUsage>(*file_scans.back()->buffer_manager,
 				                                                        current_boundary.GetBufferIdx());
 			} else {
@@ -188,8 +187,8 @@ idx_t CSVGlobalState::MaxThreads() const {
 	if (single_threaded || !file_scans.front()->on_disk_file) {
 		return system_threads;
 	}
-	const idx_t total_threads = file_scans.front()->file_size / CSVIterator::BYTES_PER_THREAD + 1;
-
+	const idx_t bytes_per_thread = CSVIterator::BytesPerThread(file_scans.front()->options);
+	const idx_t total_threads = file_scans.front()->file_size / bytes_per_thread + 1;
 	if (total_threads < system_threads) {
 		return total_threads;
 	}
@@ -217,42 +216,6 @@ void CSVGlobalState::DecrementThread() {
 	}
 }
 
-bool IsCSVErrorAcceptedReject(CSVErrorType type) {
-	switch (type) {
-	case CSVErrorType::CAST_ERROR:
-	case CSVErrorType::TOO_MANY_COLUMNS:
-	case CSVErrorType::TOO_FEW_COLUMNS:
-	case CSVErrorType::MAXIMUM_LINE_SIZE:
-	case CSVErrorType::UNTERMINATED_QUOTES:
-	case CSVErrorType::INVALID_UNICODE:
-	case CSVErrorType::INVALID_STATE:
-		return true;
-	default:
-		return false;
-	}
-}
-
-string CSVErrorTypeToEnum(CSVErrorType type) {
-	switch (type) {
-	case CSVErrorType::CAST_ERROR:
-		return "CAST";
-	case CSVErrorType::TOO_FEW_COLUMNS:
-		return "MISSING COLUMNS";
-	case CSVErrorType::TOO_MANY_COLUMNS:
-		return "TOO MANY COLUMNS";
-	case CSVErrorType::MAXIMUM_LINE_SIZE:
-		return "LINE SIZE OVER MAXIMUM";
-	case CSVErrorType::UNTERMINATED_QUOTES:
-		return "UNQUOTED VALUE";
-	case CSVErrorType::INVALID_UNICODE:
-		return "INVALID UNICODE";
-	case CSVErrorType::INVALID_STATE:
-		return "INVALID STATE";
-	default:
-		throw InternalException("CSV Error is not valid to be stored in a Rejects Table");
-	}
-}
-
 void FillScanErrorTable(InternalAppender &scan_appender, idx_t scan_idx, idx_t file_idx, CSVFileScan &file) {
 	CSVReaderOptions &options = file.options;
 	// Add the row to the rejects table
@@ -326,70 +289,10 @@ void CSVGlobalState::FillRejectsTable() const {
 		InternalAppender scans_appender(context, scans_table);
 		idx_t scan_idx = context.transaction.GetActiveQuery();
 		for (auto &file : file_scans) {
-			idx_t file_idx = rejects->GetCurrentFileIndex(scan_idx);
+			const idx_t file_idx = rejects->GetCurrentFileIndex(scan_idx);
 			auto file_name = file->file_path;
-			auto &errors = file->error_handler->errors;
-			// We first insert the file into the file scans table
-			for (auto &error_vector : errors) {
-				for (auto &error : error_vector.second) {
-					if (!IsCSVErrorAcceptedReject(error.type)) {
-						continue;
-					}
-					// short circuit if we already have too many rejects
-					if (limit == 0 || rejects->count < limit) {
-						if (limit != 0 && rejects->count >= limit) {
-							break;
-						}
-						rejects->count++;
-						auto row_line = file->error_handler->GetLine(error.error_info);
-						auto col_idx = error.column_idx;
-						// Add the row to the rejects table
-						errors_appender.BeginRow();
-						// 1. Scan Id
-						errors_appender.Append(scan_idx);
-						// 2. File Id
-						errors_appender.Append(file_idx);
-						// 3. Row Line
-						errors_appender.Append(row_line);
-						// 4. Byte Position of the row error
-						errors_appender.Append(error.row_byte_position + 1);
-						// 5. Byte Position where error occurred
-						if (!error.byte_position.IsValid()) {
-							// This means this error comes from a flush, and we don't support this yet, so we give it
-							// a null
-							errors_appender.Append(Value());
-						} else {
-							errors_appender.Append(error.byte_position.GetIndex() + 1);
-						}
-						// 6. Column Index
-						if (error.type == CSVErrorType::MAXIMUM_LINE_SIZE) {
-							errors_appender.Append(Value());
-						} else {
-							errors_appender.Append(col_idx + 1);
-						}
-						// 7. Column Name (If Applicable)
-						switch (error.type) {
-						case CSVErrorType::TOO_MANY_COLUMNS:
-						case CSVErrorType::MAXIMUM_LINE_SIZE:
-							errors_appender.Append(Value());
-							break;
-						case CSVErrorType::TOO_FEW_COLUMNS:
-							D_ASSERT(bind_data.return_names.size() > col_idx + 1);
-							errors_appender.Append(string_t(bind_data.return_names[col_idx + 1]));
-							break;
-						default:
-							errors_appender.Append(string_t(bind_data.return_names[col_idx]));
-						}
-						// 8. Error Type
-						errors_appender.Append(string_t(CSVErrorTypeToEnum(error.type)));
-						// 9. Original CSV Line
-						errors_appender.Append(string_t(error.csv_row));
-						// 10. Full Error Message
-						errors_appender.Append(string_t(error.error_message));
-						errors_appender.EndRow();
-					}
-				}
-			}
+			file->error_handler->FillRejectsTable(errors_appender, file_idx, scan_idx, *file, *rejects, bind_data,
+			                                      limit);
 			if (rejects->count != 0) {
 				rejects->count = 0;
 				FillScanErrorTable(scans_appender, scan_idx, file_idx, *file);
diff --git a/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp
index 5c01d1fe9..f77a93ac0 100644
--- a/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp
+++ b/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp
@@ -1,7 +1,10 @@
 #include "duckdb/execution/operator/csv_scanner/csv_error.hpp"
 #include "duckdb/common/exception/conversion_exception.hpp"
 #include "duckdb/common/string_util.hpp"
-
+#include "duckdb/function/table/read_csv.hpp"
+#include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
+#include "duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp"
+#include "duckdb/main/appender.hpp"
 #include <sstream>
 
 namespace duckdb {
@@ -41,11 +44,11 @@ void CSVErrorHandler::ThrowError(const CSVError &csv_error) {
 	}
 }
 
-void CSVErrorHandler::Error(CSVError csv_error, bool force_error) {
+void CSVErrorHandler::Error(const CSVError &csv_error, bool force_error) {
 	lock_guard<mutex> parallel_lock(main_mutex);
 	if ((ignore_errors && !force_error) || (PrintLineNumber(csv_error) && !CanGetLine(csv_error.GetBoundaryIndex()))) {
 		// We store this error, we can't throw it now, or we are ignoring it
-		errors[csv_error.error_info].push_back(std::move(csv_error));
+		errors.push_back(csv_error);
 		return;
 	}
 	// Otherwise we can throw directly
@@ -58,10 +61,19 @@ void CSVErrorHandler::ErrorIfNeeded() {
 		// Nothing to error
 		return;
 	}
-	CSVError first_error = errors.begin()->second[0];
 
-	if (CanGetLine(first_error.error_info.boundary_idx)) {
-		ThrowError(first_error);
+	if (CanGetLine(errors[0].error_info.boundary_idx)) {
+		ThrowError(errors[0]);
+	}
+}
+
+void CSVErrorHandler::ErrorIfTypeExists(CSVErrorType error_type) {
+	lock_guard<mutex> parallel_lock(main_mutex);
+	for (auto &error : errors) {
+		if (error.type == error_type) {
+			// If it's a maximum line size error, we can do it now.
+			ThrowError(error);
+		}
 	}
 }
 
@@ -84,6 +96,131 @@ bool CSVErrorHandler::AnyErrors() {
 	return !errors.empty();
 }
 
+bool CSVErrorHandler::HasError(const CSVErrorType error_type) {
+	lock_guard<mutex> parallel_lock(main_mutex);
+	for (const auto &er : errors) {
+		if (er.type == error_type) {
+			return true;
+		}
+	}
+	return false;
+}
+
+idx_t CSVErrorHandler::GetSize() {
+	lock_guard<mutex> parallel_lock(main_mutex);
+	return errors.size();
+}
+
+bool IsCSVErrorAcceptedReject(CSVErrorType type) {
+	switch (type) {
+	case CSVErrorType::INVALID_STATE:
+	case CSVErrorType::CAST_ERROR:
+	case CSVErrorType::TOO_MANY_COLUMNS:
+	case CSVErrorType::TOO_FEW_COLUMNS:
+	case CSVErrorType::MAXIMUM_LINE_SIZE:
+	case CSVErrorType::UNTERMINATED_QUOTES:
+	case CSVErrorType::INVALID_UNICODE:
+		return true;
+	default:
+		return false;
+	}
+}
+string CSVErrorTypeToEnum(CSVErrorType type) {
+	switch (type) {
+	case CSVErrorType::CAST_ERROR:
+		return "CAST";
+	case CSVErrorType::TOO_FEW_COLUMNS:
+		return "MISSING COLUMNS";
+	case CSVErrorType::TOO_MANY_COLUMNS:
+		return "TOO MANY COLUMNS";
+	case CSVErrorType::MAXIMUM_LINE_SIZE:
+		return "LINE SIZE OVER MAXIMUM";
+	case CSVErrorType::UNTERMINATED_QUOTES:
+		return "UNQUOTED VALUE";
+	case CSVErrorType::INVALID_UNICODE:
+		return "INVALID UNICODE";
+	case CSVErrorType::INVALID_STATE:
+		return "INVALID STATE";
+	default:
+		throw InternalException("CSV Error is not valid to be stored in a Rejects Table");
+	}
+}
+
+void CSVErrorHandler::FillRejectsTable(InternalAppender &errors_appender, const idx_t file_idx, const idx_t scan_idx,
+                                       const CSVFileScan &file, CSVRejectsTable &rejects, const ReadCSVData &bind_data,
+                                       const idx_t limit) {
+	lock_guard<mutex> parallel_lock(main_mutex);
+	// We first insert the file into the file scans table
+	for (auto &error : file.error_handler->errors) {
+		if (!IsCSVErrorAcceptedReject(error.type)) {
+			continue;
+		}
+		// short circuit if we already have too many rejects
+		if (limit == 0 || rejects.count < limit) {
+			if (limit != 0 && rejects.count >= limit) {
+				break;
+			}
+			rejects.count++;
+			const auto row_line = file.error_handler->GetLineInternal(error.error_info);
+			const auto col_idx = error.column_idx;
+			// Add the row to the rejects table
+			errors_appender.BeginRow();
+			// 1. Scan ID
+			errors_appender.Append(scan_idx);
+			// 2. File ID
+			errors_appender.Append(file_idx);
+			// 3. Row Line
+			errors_appender.Append(row_line);
+			// 4. Byte Position of the row error
+			errors_appender.Append(error.row_byte_position + 1);
+			// 5. Byte Position where error occurred
+			if (!error.byte_position.IsValid()) {
+				// This means this error comes from a flush, and we don't support this yet, so we give it
+				// a null
+				errors_appender.Append(Value());
+			} else {
+				errors_appender.Append(error.byte_position.GetIndex() + 1);
+			}
+			// 6. Column Index
+			if (error.type == CSVErrorType::MAXIMUM_LINE_SIZE) {
+				errors_appender.Append(Value());
+			} else {
+				errors_appender.Append(col_idx + 1);
+			}
+			// 7. Column Name (If Applicable)
+			switch (error.type) {
+			case CSVErrorType::TOO_MANY_COLUMNS:
+			case CSVErrorType::MAXIMUM_LINE_SIZE:
+				errors_appender.Append(Value());
+				break;
+			case CSVErrorType::TOO_FEW_COLUMNS:
+				D_ASSERT(bind_data.return_names.size() > col_idx + 1);
+				errors_appender.Append(string_t(bind_data.return_names[col_idx + 1]));
+				break;
+			default:
+				errors_appender.Append(string_t(bind_data.return_names[col_idx]));
+			}
+			// 8. Error Type
+			errors_appender.Append(string_t(CSVErrorTypeToEnum(error.type)));
+			// 9. Original CSV Line
+			errors_appender.Append(string_t(error.csv_row));
+			// 10. Full Error Message
+			errors_appender.Append(string_t(error.error_message));
+			errors_appender.EndRow();
+		}
+	}
+}
+
+idx_t CSVErrorHandler::GetMaxLineLength() {
+	lock_guard<mutex> parallel_lock(main_mutex);
+	return max_line_length;
+}
+
+void CSVErrorHandler::DontPrintErrorLine() {
+	lock_guard<mutex> parallel_lock(main_mutex);
+	print_line = false;
+}
+
 void CSVErrorHandler::SetIgnoreErrors(bool ignore_errors_p) {
 	lock_guard<mutex> parallel_lock(main_mutex);
 	ignore_errors = ignore_errors_p;
@@ -115,7 +252,6 @@ CSVError CSVError::ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_
 		auto it = sql_types_per_column.find(names[i]);
 		if (it != sql_types_per_column.end()) {
 			sql_types_per_column.erase(names[i]);
-			continue;
 		}
 	}
 	if (sql_types_per_column.empty()) {
@@ -165,14 +301,14 @@ CSVError CSVError::CastError(const CSVReaderOptions &options, string &column_nam
 	                how_to_fix_it.str(), current_path);
 }
 
-CSVError CSVError::LineSizeError(const CSVReaderOptions &options, idx_t actual_size, LinesPerBoundary error_info,
-                                 string &csv_row, idx_t byte_position, const string &current_path) {
+CSVError CSVError::LineSizeError(const CSVReaderOptions &options, LinesPerBoundary error_info, string &csv_row,
+                                 idx_t byte_position, const string &current_path) {
 	std::ostringstream error;
-	error << "Maximum line size of " << options.maximum_line_size << " bytes exceeded. ";
-	error << "Actual Size:" << actual_size << " bytes." << '\n';
+	error << "Maximum line size of " << options.maximum_line_size.GetValue() << " bytes exceeded. ";
+	error << "Actual Size:" << csv_row.size() << " bytes." << '\n';
 
 	std::ostringstream how_to_fix_it;
-	how_to_fix_it << "Possible Solution: Change the maximum length size, e.g., max_line_size=" << actual_size + 1
+	how_to_fix_it << "Possible Solution: Change the maximum length size, e.g., max_line_size=" << csv_row.size() + 2
 	              << "\n";
 
 	return CSVError(error.str(), MAXIMUM_LINE_SIZE, 0, csv_row, error_info, byte_position, byte_position, options,
@@ -301,6 +437,9 @@ CSVError CSVError::SniffingError(const CSVReaderOptions &options, const string &
 	}
 	error << "* Check you are using the correct file compression, otherwise set it (e.g., compression = \'zstd\')"
 	      << '\n';
+	error << "* Be sure that the maximum line size is set to an appropriate value, otherwise set it (e.g., "
+	         "max_line_size=10000000)"
+	      << "\n";
 
 	if (options.dialect_options.state_machine_options.rfc_4180.GetValue() != false ||
 	    !options.dialect_options.state_machine_options.rfc_4180.IsSetByUser()) {
diff --git a/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp b/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
index 1a2ee65da..ac18d42eb 100644
--- a/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
+++ b/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
@@ -250,7 +250,7 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
 		if (line_size < 0) {
 			throw BinderException("Invalid value for MAX_LINE_SIZE parameter: it cannot be smaller than 0");
 		}
-		maximum_line_size = NumericCast<idx_t>(line_size);
+		maximum_line_size.Set(NumericCast<idx_t>(line_size));
 	} else if (loption == "date_format" || loption == "dateformat") {
 		string format = ParseString(value, loption);
 		SetDateFormat(LogicalTypeId::DATE, format, true);
@@ -260,8 +260,8 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
 	} else if (loption == "ignore_errors") {
 		ignore_errors.Set(ParseBoolean(value, loption));
 	} else if (loption == "buffer_size") {
-		buffer_size = NumericCast<idx_t>(ParseInteger(value, loption));
-		if (buffer_size == 0) {
+		buffer_size_option.Set(NumericCast<idx_t>(ParseInteger(value, loption)));
+		if (buffer_size_option == 0) {
 			throw InvalidInputException("Buffer Size option must be higher than 0");
 		}
 	} else if (loption == "decimal_separator") {
@@ -546,6 +546,18 @@ void CSVReaderOptions::Verify() {
 	if (rejects_limit != 0 && !store_rejects.GetValue()) {
 		throw BinderException("REJECTS_LIMIT option is only supported when REJECTS_TABLE is set to a table name");
 	}
+	// Validate CSV Buffer and max_line_size do not conflict.
+	if (buffer_size_option.IsSetByUser() && maximum_line_size.IsSetByUser()) {
+		if (buffer_size_option.GetValue() < maximum_line_size.GetValue()) {
+			throw BinderException("BUFFER_SIZE option was set to %d, while MAX_LINE_SIZE was set to %d. BUFFER_SIZE "
+			                      "must have always be set to value bigger than MAX_LINE_SIZE",
+			                      buffer_size_option.GetValue(), maximum_line_size.GetValue());
+		}
+	} else if (maximum_line_size.IsSetByUser() && maximum_line_size.GetValue() > max_line_size_default) {
+		// If the max line size is set by the user and bigger than we have by default, we make it part of our buffer
+		// size decision.
+		buffer_size_option.Set(CSVBuffer::ROWS_PER_BUFFER * maximum_line_size.GetValue(), false);
+	}
 }
 
 bool GetBooleanValue(const pair<const string, Value> &option) {
@@ -727,7 +739,7 @@ void CSVReaderOptions::ToNamedParameters(named_parameter_map_t &named_params) co
 	if (rfc_4180.IsSetByUser()) {
 		named_params["rfc_4180"] = Value(GetRFC4180());
 	}
-	named_params["max_line_size"] = Value::BIGINT(NumericCast<int64_t>(maximum_line_size));
+	named_params["max_line_size"] = Value::BIGINT(NumericCast<int64_t>(maximum_line_size.GetValue()));
 	if (dialect_options.skip_rows.IsSetByUser()) {
 		named_params["skip"] = Value::UBIGINT(GetSkipRows());
 	}
@@ -748,7 +760,6 @@ void CSVReaderOptions::ToNamedParameters(named_parameter_map_t &named_params) co
 		named_params["column_names"] = StringVectorToValue(name_list);
 	}
 	named_params["all_varchar"] = Value::BOOLEAN(all_varchar);
-	named_params["maximum_line_size"] = Value::BIGINT(NumericCast<int64_t>(maximum_line_size));
 }
 
 } // namespace duckdb
diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index 6bb466621..30ff6eca3 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4108"
+#define DUCKDB_PATCH_VERSION "4-dev4147"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4108"
+#define DUCKDB_VERSION "v1.1.4-dev4147"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "acdbf60889"
+#define DUCKDB_SOURCE_ID "c8fa9aee78"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"
diff --git a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp
index 039c06abe..99ce3c5ba 100644
--- a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp
+++ b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp
@@ -55,10 +55,10 @@ class CSVBuffer {
 	shared_ptr<CSVBuffer> Next(CSVFileHandle &file_handle, idx_t buffer_size, idx_t file_number, bool &has_seaked);
 
 	//! Gets the buffer actual size
-	idx_t GetBufferSize();
+	idx_t GetBufferSize() const;
 
 	//! If this buffer is the last buffer of the CSV File
-	bool IsCSVFileLastBuffer();
+	bool IsCSVFileLastBuffer() const;
 
 	//! Allocates internal buffer, sets 'block' and 'handle' variables.
 	void AllocateBuffer(idx_t buffer_size);
@@ -77,13 +77,9 @@ class CSVBuffer {
 	}
 
 	//! By default, we use CSV_BUFFER_SIZE to allocate each buffer
-	//! TODO: Should benchmarks other values
-	static constexpr idx_t CSV_BUFFER_SIZE = 32000000; // 32MB
-	//! In case the file has a size < 32MB, we will use this size instead
-	//! This is to avoid mallocing a lot of memory for a small file
-	//! And if it's a compressed file we can't use the actual size of the file
-	static constexpr idx_t CSV_MINIMUM_BUFFER_SIZE = 8000000; // 8MB
-	//! If this is the last buffer of the CSV File
+	static constexpr idx_t ROWS_PER_BUFFER = 16;
+	static constexpr idx_t MIN_ROWS_PER_BUFFER = 4;
+
 	bool last_buffer = false;
 
 private:
diff --git a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
index 6dd8a22ec..a31fd6190 100644
--- a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
+++ b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
@@ -17,9 +17,12 @@
 #include "duckdb/execution/operator/csv_scanner/header_value.hpp"
 
 namespace duckdb {
+class InternalAppender;
+class CSVFileScan;
+class CSVRejectsTable;
+struct ReadCSVData;
 
 //! Object that holds information on how many lines each csv batch read.
-
 class LinesPerBoundary {
 public:
 	LinesPerBoundary();
@@ -27,13 +30,6 @@ class LinesPerBoundary {
 
 	idx_t boundary_idx = 0;
 	idx_t lines_in_batch = 0;
-
-	bool operator<(const LinesPerBoundary &other) const {
-		if (boundary_idx < other.boundary_idx) {
-			return true;
-		}
-		return lines_in_batch < other.lines_in_batch;
-	}
 };
 
 enum CSVErrorType : uint8_t {
@@ -63,8 +59,8 @@ class CSVError {
 	                          idx_t column_idx, string &csv_row, LinesPerBoundary error_info, idx_t row_byte_position,
 	                          optional_idx byte_position, LogicalTypeId type, const string &current_path);
 	//! Produces error for when the line size exceeds the maximum line size option
-	static CSVError LineSizeError(const CSVReaderOptions &options, idx_t actual_size, LinesPerBoundary error_info,
-	                              string &csv_row, idx_t byte_position, const string &current_path);
+	static CSVError LineSizeError(const CSVReaderOptions &options, LinesPerBoundary error_info, string &csv_row,
+	                              idx_t byte_position, const string &current_path);
 	//! Produces error for when the state machine reaches an invalid state
 	static CSVError InvalidState(const CSVReaderOptions &options, idx_t current_column, LinesPerBoundary error_info,
 	                             string &csv_row, idx_t row_byte_position, optional_idx byte_position,
@@ -121,26 +117,27 @@ class CSVErrorHandler {
 public:
 	explicit CSVErrorHandler(bool ignore_errors = false);
 	//! Throws the error
-	void Error(CSVError csv_error, bool force_error = false);
+	void Error(const CSVError &csv_error, bool force_error = false);
 	//! If we have a cached error, and we can now error, we error.
 	void ErrorIfNeeded();
+	//! Throws an error if a given type exists
+	void ErrorIfTypeExists(CSVErrorType error_type);
 	//! Inserts a finished error info
 	void Insert(idx_t boundary_idx, idx_t rows);
 	idx_t GetLine(const LinesPerBoundary &error_info);
 	void NewMaxLineSize(idx_t scan_line_size);
 	//! Returns true if there are any errors
 	bool AnyErrors();
-	//! Set of errors
-	map<LinesPerBoundary, vector<CSVError>> errors;
+	bool HasError(CSVErrorType error_type);
+	idx_t GetMaxLineLength();
 
-	idx_t GetMaxLineLength() const {
-		return max_line_length;
-	}
-	void DontPrintErrorLine() {
-		print_line = false;
-	}
+	void DontPrintErrorLine();
 
 	void SetIgnoreErrors(bool ignore_errors);
+	idx_t GetSize();
+
+	void FillRejectsTable(InternalAppender &errors_appender, idx_t file_idx, idx_t scan_idx, const CSVFileScan &file,
+	                      CSVRejectsTable &rejects, const ReadCSVData &bind_data, idx_t limit);
 
 private:
 	//! Private methods should always be locked by parent method.
@@ -159,6 +156,8 @@ class CSVErrorHandler {
 	idx_t max_line_length = 0;
 	bool ignore_errors = false;
 	bool print_line = true;
+	//! Set of errors
+	vector<CSVError> errors;
 };
 
 } // namespace duckdb
diff --git a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
index 0fc506659..4e0ee86fd 100644
--- a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
+++ b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
@@ -87,7 +87,8 @@ struct CSVReaderOptions {
 	//===--------------------------------------------------------------------===//
 	//! Maximum CSV line size: specified because if we reach this amount, we likely have wrong delimiters (default: 2MB)
 	//! note that this is the guaranteed line length that will succeed, longer lines may be accepted if slightly above
-	idx_t maximum_line_size = 2097152;
+	static constexpr idx_t max_line_size_default = 2000000;
+	CSVOption<idx_t> maximum_line_size = max_line_size_default;
 	//! Whether header names shall be normalized
 	bool normalize_names = false;
 	//! True, if column with that index must skip null check
@@ -108,7 +109,7 @@ struct CSVReaderOptions {
 	//! Multi-file reader options
 	MultiFileReaderOptions file_options;
 	//! Buffer Size (Parallel Scan)
-	idx_t buffer_size = CSVBuffer::CSV_BUFFER_SIZE;
+	CSVOption<idx_t> buffer_size_option = CSVBuffer::ROWS_PER_BUFFER * max_line_size_default;
 	//! Decimal separator when reading as numeric
 	string decimal_separator = ".";
 	//! Whether  to pad rows that do not have enough columns with NULL values
diff --git a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp
index 9f0d97820..da796d79e 100644
--- a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp
+++ b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp
@@ -27,7 +27,7 @@ namespace duckdb {
 struct CSVBoundary {
 	CSVBoundary(idx_t buffer_idx, idx_t buffer_pos, idx_t boundary_idx, idx_t end_pos);
 	CSVBoundary();
-	void Print();
+	void Print() const;
 	//! Start Buffer index of the file where we start scanning
 	idx_t buffer_idx = 0;
 	//! Start Buffer position of the buffer of the file where we start scanning
@@ -53,10 +53,10 @@ struct CSVIterator {
 public:
 	CSVIterator();
 
-	void Print();
+	void Print() const;
 	//! Moves the boundary to the next one to be scanned, if there are no next boundaries, it returns False
 	//! Otherwise, if there are boundaries, it returns True
-	bool Next(CSVBufferManager &buffer_manager);
+	bool Next(CSVBufferManager &buffer_manager, const CSVReaderOptions &reader_options);
 	//! If boundary is set
 	bool IsBoundarySet() const;
 
@@ -67,16 +67,20 @@ struct CSVIterator {
 
 	void SetCurrentPositionToBoundary();
 
-	void SetCurrentBoundaryToPosition(bool single_threaded);
+	void SetCurrentBoundaryToPosition(bool single_threaded, const CSVReaderOptions &reader_options);
 
 	void SetStart(idx_t pos);
 	void SetEnd(idx_t pos);
 
 	// Gets the current position for the file
-	idx_t GetGlobalCurrentPos();
+	idx_t GetGlobalCurrentPos() const;
 
-	//! 8 MB TODO: Should benchmarks other values
-	static constexpr idx_t BYTES_PER_THREAD = 8000000;
+	//! Checks if we are done with this iterator
+	void CheckIfDone();
+
+	static idx_t BytesPerThread(const CSVReaderOptions &reader_options);
+
+	static constexpr idx_t ROWS_PER_THREAD = 4;
 
 	CSVPosition pos;
 
diff --git a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
index ad2250660..a2a3d5372 100644
--- a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
+++ b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
@@ -96,7 +96,8 @@ class CurrentError {
 
 class LineError {
 public:
-	explicit LineError(bool ignore_errors_p) : is_error_in_line(false), ignore_errors(ignore_errors_p) {};
+	explicit LineError(const idx_t scan_id_p, const bool ignore_errors_p)
+	    : is_error_in_line(false), ignore_errors(ignore_errors_p), scan_id(scan_id_p) {};
 	//! We clear up our CurrentError Vector
 	void Reset() {
 		current_errors.clear();
@@ -136,10 +137,15 @@ class LineError {
 		return !current_errors.empty();
 	}
 
+	idx_t Size() const {
+		return current_errors.size();
+	}
+
 private:
 	vector<CurrentError> current_errors;
 	bool is_error_in_line;
 	bool ignore_errors;
+	idx_t scan_id;
 };
 
 struct ParseTypeInfo {
@@ -159,13 +165,14 @@ struct ParseTypeInfo {
 	uint8_t scale;
 	uint8_t width;
 };
+
 class StringValueResult : public ScannerResult {
 public:
 	StringValueResult(CSVStates &states, CSVStateMachine &state_machine,
 	                  const shared_ptr<CSVBufferHandle> &buffer_handle, Allocator &buffer_allocator,
 	                  idx_t result_size_p, idx_t buffer_position, CSVErrorHandler &error_handler, CSVIterator &iterator,
 	                  bool store_line_size, shared_ptr<CSVFileScan> csv_file_scan, idx_t &lines_read, bool sniffing,
-	                  string path);
+	                  string path, idx_t scan_id);
 
 	~StringValueResult();
 
@@ -324,6 +331,8 @@ class StringValueScanner : public BaseScanner {
 	ValidatorLine GetValidationLine();
 
 	const idx_t scanner_idx;
+	//! We use the max of idx_t to signify this is a line finder scanner.
+	static constexpr idx_t LINE_FINDER_ID = NumericLimits<idx_t>::Maximum();
 
 	//! Variable that manages buffer tracking
 	shared_ptr<CSVBufferUsage> buffer_tracker;
diff --git a/src/duckdb/src/storage/serialization/serialize_nodes.cpp b/src/duckdb/src/storage/serialization/serialize_nodes.cpp
index 59fc2681d..6c0bc4ab1 100644
--- a/src/duckdb/src/storage/serialization/serialize_nodes.cpp
+++ b/src/duckdb/src/storage/serialization/serialize_nodes.cpp
@@ -174,7 +174,7 @@ void CSVReaderOptions::Serialize(Serializer &serializer) const {
 	serializer.WritePropertyWithDefault<vector<string>>(102, "null_str", null_str);
 	serializer.WriteProperty<FileCompressionType>(103, "compression", compression);
 	serializer.WritePropertyWithDefault<bool>(104, "allow_quoted_nulls", allow_quoted_nulls);
-	serializer.WritePropertyWithDefault<idx_t>(105, "maximum_line_size", maximum_line_size);
+	serializer.WriteProperty<CSVOption<idx_t>>(105, "maximum_line_size", maximum_line_size);
 	serializer.WritePropertyWithDefault<bool>(106, "normalize_names", normalize_names);
 	serializer.WritePropertyWithDefault<vector<bool>>(107, "force_not_null", force_not_null);
 	serializer.WritePropertyWithDefault<bool>(108, "all_varchar", all_varchar);
@@ -183,7 +183,7 @@ void CSVReaderOptions::Serialize(Serializer &serializer) const {
 	serializer.WritePropertyWithDefault<string>(111, "file_path", file_path);
 	serializer.WritePropertyWithDefault<string>(112, "decimal_separator", decimal_separator);
 	serializer.WritePropertyWithDefault<bool>(113, "null_padding", null_padding);
-	serializer.WritePropertyWithDefault<idx_t>(114, "buffer_size", buffer_size);
+	/* [Deleted] (idx_t) "buffer_size" */
 	serializer.WriteProperty<MultiFileReaderOptions>(115, "file_options", file_options);
 	serializer.WritePropertyWithDefault<vector<bool>>(116, "force_quote", force_quote);
 	serializer.WritePropertyWithDefault<string>(117, "rejects_table_name", rejects_table_name, "reject_errors");
@@ -212,6 +212,7 @@ void CSVReaderOptions::Serialize(Serializer &serializer) const {
 	serializer.WriteProperty<CSVOption<bool>>(140, "rfc_4180", dialect_options.state_machine_options.rfc_4180);
 	serializer.WriteProperty<CSVOption<string>>(141, "multi_byte_delimiter", GetMultiByteDelimiter());
 	serializer.WritePropertyWithDefault<bool>(142, "multi_file_reader", multi_file_reader);
+	serializer.WriteProperty<CSVOption<idx_t>>(143, "buffer_size_option", buffer_size_option);
 }
 
 CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
@@ -220,7 +221,7 @@ CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
 	auto null_str = deserializer.ReadPropertyWithDefault<vector<string>>(102, "null_str");
 	auto compression = deserializer.ReadProperty<FileCompressionType>(103, "compression");
 	auto allow_quoted_nulls = deserializer.ReadPropertyWithDefault<bool>(104, "allow_quoted_nulls");
-	auto maximum_line_size = deserializer.ReadPropertyWithDefault<idx_t>(105, "maximum_line_size");
+	auto maximum_line_size = deserializer.ReadProperty<CSVOption<idx_t>>(105, "maximum_line_size");
 	auto normalize_names = deserializer.ReadPropertyWithDefault<bool>(106, "normalize_names");
 	auto force_not_null = deserializer.ReadPropertyWithDefault<vector<bool>>(107, "force_not_null");
 	auto all_varchar = deserializer.ReadPropertyWithDefault<bool>(108, "all_varchar");
@@ -229,7 +230,7 @@ CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
 	auto file_path = deserializer.ReadPropertyWithDefault<string>(111, "file_path");
 	auto decimal_separator = deserializer.ReadPropertyWithDefault<string>(112, "decimal_separator");
 	auto null_padding = deserializer.ReadPropertyWithDefault<bool>(113, "null_padding");
-	auto buffer_size = deserializer.ReadPropertyWithDefault<idx_t>(114, "buffer_size");
+	deserializer.ReadDeletedProperty<idx_t>(114, "buffer_size");
 	auto file_options = deserializer.ReadProperty<MultiFileReaderOptions>(115, "file_options");
 	auto force_quote = deserializer.ReadPropertyWithDefault<vector<bool>>(116, "force_quote");
 	auto rejects_table_name = deserializer.ReadPropertyWithExplicitDefault<string>(117, "rejects_table_name", "reject_errors");
@@ -272,7 +273,6 @@ CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
 	result.file_path = std::move(file_path);
 	result.decimal_separator = std::move(decimal_separator);
 	result.null_padding = null_padding;
-	result.buffer_size = buffer_size;
 	result.file_options = file_options;
 	result.force_quote = std::move(force_quote);
 	result.rejects_table_name = std::move(rejects_table_name);
@@ -297,6 +297,7 @@ CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
 	result.encoding = std::move(encoding);
 	result.dialect_options.state_machine_options.rfc_4180 = dialect_options_state_machine_options_rfc_4180;
 	deserializer.ReadPropertyWithDefault<bool>(142, "multi_file_reader", result.multi_file_reader);
+	deserializer.ReadProperty<CSVOption<idx_t>>(143, "buffer_size_option", result.buffer_size_option);
 	return result;
 }
 

From fb495c09af6e743744fd0f978be7411a62f2de64 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 8 Jan 2025 13:10:25 +0000
Subject: [PATCH 04/21] vendor: Update vendored sources to
 duckdb/duckdb@ce33de966a521d1a6e86ec9579e133ff2b2534f4 (#978)

More bugfixes (duckdb/duckdb#15605)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 src/duckdb/extension/parquet/column_reader.cpp         |  8 ++++++++
 src/duckdb/extension/parquet/include/column_reader.hpp |  4 ++++
 src/duckdb/extension/parquet/parquet_extension.cpp     |  8 +++++++-
 src/duckdb/extension/parquet/parquet_reader.cpp        |  5 +++--
 src/duckdb/src/execution/aggregate_hashtable.cpp       |  3 +++
 src/duckdb/src/function/scalar/sequence/nextval.cpp    |  3 +++
 src/duckdb/src/function/scalar/string/length.cpp       | 10 ++++++----
 .../src/function/table/version/pragma_version.cpp      |  6 +++---
 .../binder/expression/bind_operator_expression.cpp     |  3 +++
 .../src/planner/binder/statement/bind_create.cpp       |  2 +-
 src/duckdb/src/storage/arena_allocator.cpp             |  9 +++++----
 11 files changed, 46 insertions(+), 15 deletions(-)

diff --git a/src/duckdb/extension/parquet/column_reader.cpp b/src/duckdb/extension/parquet/column_reader.cpp
index cf1f5ba50..8a2112c35 100644
--- a/src/duckdb/extension/parquet/column_reader.cpp
+++ b/src/duckdb/extension/parquet/column_reader.cpp
@@ -135,6 +135,14 @@ const SchemaElement &ColumnReader::Schema() const {
 	return schema;
 }
 
+optional_ptr<const SchemaElement> ColumnReader::GetParentSchema() const {
+	return parent_schema;
+}
+
+void ColumnReader::SetParentSchema(const SchemaElement &parent_schema_p) {
+	parent_schema = &parent_schema_p;
+}
+
 idx_t ColumnReader::FileIdx() const {
 	return file_idx;
 }
diff --git a/src/duckdb/extension/parquet/include/column_reader.hpp b/src/duckdb/extension/parquet/include/column_reader.hpp
index dd89cadd5..23d4fc3d4 100644
--- a/src/duckdb/extension/parquet/include/column_reader.hpp
+++ b/src/duckdb/extension/parquet/include/column_reader.hpp
@@ -57,6 +57,9 @@ class ColumnReader {
 	ParquetReader &Reader();
 	const LogicalType &Type() const;
 	const SchemaElement &Schema() const;
+	optional_ptr<const SchemaElement> GetParentSchema() const;
+	void SetParentSchema(const SchemaElement &parent_schema);
+
 	idx_t FileIdx() const;
 	idx_t MaxDefine() const;
 	idx_t MaxRepeat() const;
@@ -140,6 +143,7 @@ class ColumnReader {
 
 protected:
 	const SchemaElement &schema;
+	optional_ptr<const SchemaElement> parent_schema;
 
 	idx_t file_idx;
 	idx_t max_define;
diff --git a/src/duckdb/extension/parquet/parquet_extension.cpp b/src/duckdb/extension/parquet/parquet_extension.cpp
index 8cbea8090..500b89919 100644
--- a/src/duckdb/extension/parquet/parquet_extension.cpp
+++ b/src/duckdb/extension/parquet/parquet_extension.cpp
@@ -313,9 +313,15 @@ static void InitializeParquetReader(ParquetReader &reader, const ParquetReadBind
 	unordered_map<uint32_t, idx_t> field_id_to_column_index;
 	auto &column_readers = reader.root_reader->Cast<StructColumnReader>().child_readers;
 	for (idx_t column_index = 0; column_index < column_readers.size(); column_index++) {
-		auto &column_schema = column_readers[column_index]->Schema();
+		auto &column_reader = *column_readers[column_index];
+		auto &column_schema = column_reader.Schema();
 		if (column_schema.__isset.field_id) {
 			field_id_to_column_index[column_schema.field_id] = column_index;
+		} else if (column_reader.GetParentSchema()) {
+			auto &parent_column_schema = *column_reader.GetParentSchema();
+			if (parent_column_schema.__isset.field_id) {
+				field_id_to_column_index[parent_column_schema.field_id] = column_index;
+			}
 		}
 	}
 
diff --git a/src/duckdb/extension/parquet/parquet_reader.cpp b/src/duckdb/extension/parquet/parquet_reader.cpp
index e6ce7aa92..1d2565a2d 100644
--- a/src/duckdb/extension/parquet/parquet_reader.cpp
+++ b/src/duckdb/extension/parquet/parquet_reader.cpp
@@ -418,9 +418,10 @@ unique_ptr<ColumnReader> ParquetReader::CreateReaderRecursive(ClientContext &con
 		}
 		if (is_repeated) {
 			result_type = LogicalType::LIST(result_type);
-			return make_uniq<ListColumnReader>(*this, result_type, s_ele, this_idx, max_define, max_repeat,
-			                                   std::move(result));
+			result = make_uniq<ListColumnReader>(*this, result_type, s_ele, this_idx, max_define, max_repeat,
+			                                     std::move(result));
 		}
+		result->SetParentSchema(s_ele);
 		return result;
 	} else { // leaf node
 		if (!s_ele.__isset.type) {
diff --git a/src/duckdb/src/execution/aggregate_hashtable.cpp b/src/duckdb/src/execution/aggregate_hashtable.cpp
index 8a2dd448c..198b77fac 100644
--- a/src/duckdb/src/execution/aggregate_hashtable.cpp
+++ b/src/duckdb/src/execution/aggregate_hashtable.cpp
@@ -131,6 +131,9 @@ void GroupedAggregateHashTable::Abandon() {
 	// Start over
 	ClearPointerTable();
 	count = 0;
+
+	// Resetting the id ensures the dict state is reset properly when needed
+	state.dict_state.dictionary_id = string();
 }
 
 void GroupedAggregateHashTable::Repartition() {
diff --git a/src/duckdb/src/function/scalar/sequence/nextval.cpp b/src/duckdb/src/function/scalar/sequence/nextval.cpp
index 7e19dae7d..6738383dc 100644
--- a/src/duckdb/src/function/scalar/sequence/nextval.cpp
+++ b/src/duckdb/src/function/scalar/sequence/nextval.cpp
@@ -91,6 +91,9 @@ static void NextValFunction(DataChunk &args, ExpressionState &state, Vector &res
 
 static unique_ptr<FunctionData> NextValBind(ScalarFunctionBindInput &bind_input, ScalarFunction &,
                                             vector<unique_ptr<Expression>> &arguments) {
+	if (arguments[0]->HasParameter() || arguments[0]->return_type.id() == LogicalTypeId::UNKNOWN) {
+		throw ParameterNotResolvedException();
+	}
 	if (!arguments[0]->IsFoldable()) {
 		throw NotImplementedException(
 		    "currval/nextval requires a constant sequence - non-constant sequences are no longer supported");
diff --git a/src/duckdb/src/function/scalar/string/length.cpp b/src/duckdb/src/function/scalar/string/length.cpp
index f2ec8bae1..538646419 100644
--- a/src/duckdb/src/function/scalar/string/length.cpp
+++ b/src/duckdb/src/function/scalar/string/length.cpp
@@ -110,12 +110,14 @@ static void ArrayLengthFunction(DataChunk &args, ExpressionState &state, Vector
 
 static unique_ptr<FunctionData> ArrayOrListLengthBind(ClientContext &context, ScalarFunction &bound_function,
                                                       vector<unique_ptr<Expression>> &arguments) {
-	if (arguments[0]->HasParameter()) {
+	if (arguments[0]->HasParameter() || arguments[0]->return_type.id() == LogicalTypeId::UNKNOWN) {
 		throw ParameterNotResolvedException();
 	}
-	if (arguments[0]->return_type.id() == LogicalTypeId::ARRAY) {
+
+	const auto &arg_type = arguments[0]->return_type.id();
+	if (arg_type == LogicalTypeId::ARRAY) {
 		bound_function.function = ArrayLengthFunction;
-	} else if (arguments[0]->return_type.id() == LogicalTypeId::LIST) {
+	} else if (arg_type == LogicalTypeId::LIST) {
 		bound_function.function = ListLengthFunction;
 	} else {
 		// Unreachable
@@ -183,7 +185,7 @@ static void ArrayLengthBinaryFunction(DataChunk &args, ExpressionState &state, V
 
 static unique_ptr<FunctionData> ArrayOrListLengthBinaryBind(ClientContext &context, ScalarFunction &bound_function,
                                                             vector<unique_ptr<Expression>> &arguments) {
-	if (arguments[0]->HasParameter()) {
+	if (arguments[0]->HasParameter() || arguments[0]->return_type.id() == LogicalTypeId::UNKNOWN) {
 		throw ParameterNotResolvedException();
 	}
 	auto type = arguments[0]->return_type;
diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index 30ff6eca3..c1d5d02cd 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4147"
+#define DUCKDB_PATCH_VERSION "4-dev4157"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4147"
+#define DUCKDB_VERSION "v1.1.4-dev4157"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "c8fa9aee78"
+#define DUCKDB_SOURCE_ID "ce33de966a"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"
diff --git a/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp b/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp
index 81addd406..9326c8849 100644
--- a/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp
+++ b/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp
@@ -134,6 +134,9 @@ BindResult ExpressionBinder::BindExpression(OperatorExpression &op, idx_t depth)
 		D_ASSERT(op.children[0]->GetExpressionClass() == ExpressionClass::BOUND_EXPRESSION);
 		D_ASSERT(op.children[1]->GetExpressionClass() == ExpressionClass::BOUND_EXPRESSION);
 		auto &extract_exp = BoundExpression::GetExpression(*op.children[0]);
+		if (extract_exp->HasParameter() || extract_exp->return_type.id() == LogicalTypeId::UNKNOWN) {
+			throw ParameterNotResolvedException();
+		}
 		auto &name_exp = BoundExpression::GetExpression(*op.children[1]);
 		const auto &extract_expr_type = extract_exp->return_type;
 		if (extract_expr_type.id() != LogicalTypeId::STRUCT && extract_expr_type.id() != LogicalTypeId::UNION &&
diff --git a/src/duckdb/src/planner/binder/statement/bind_create.cpp b/src/duckdb/src/planner/binder/statement/bind_create.cpp
index 495a687e6..8d53446fc 100644
--- a/src/duckdb/src/planner/binder/statement/bind_create.cpp
+++ b/src/duckdb/src/planner/binder/statement/bind_create.cpp
@@ -196,7 +196,7 @@ SchemaCatalogEntry &Binder::BindCreateFunctionInfo(CreateInfo &info) {
 			if (param.IsQualified()) {
 				throw BinderException("Invalid parameter name '%s': must be unqualified", param.ToString());
 			}
-			dummy_types.emplace_back(LogicalType::SQLNULL);
+			dummy_types.emplace_back(LogicalType::UNKNOWN);
 			dummy_names.push_back(param.GetColumnName());
 		}
 		// default parameters
diff --git a/src/duckdb/src/storage/arena_allocator.cpp b/src/duckdb/src/storage/arena_allocator.cpp
index a67eeb286..000444f75 100644
--- a/src/duckdb/src/storage/arena_allocator.cpp
+++ b/src/duckdb/src/storage/arena_allocator.cpp
@@ -107,12 +107,13 @@ data_ptr_t ArenaAllocator::Reallocate(data_ptr_t pointer, idx_t old_size, idx_t
 		return pointer;
 	}
 
-	auto head_ptr = head->data.get() + head->current_position;
+	const auto head_ptr = head->data.get() + head->current_position - old_size;
+	int64_t current_position = NumericCast<int64_t>(head->current_position);
 	int64_t diff = NumericCast<int64_t>(size) - NumericCast<int64_t>(old_size);
-	if (pointer == head_ptr && (size < old_size || NumericCast<int64_t>(head->current_position) + diff <=
-	                                                   NumericCast<int64_t>(head->maximum_size))) {
+	if (pointer == head_ptr &&
+	    (size < old_size || current_position + diff <= NumericCast<int64_t>(head->maximum_size))) {
 		// passed pointer is the head pointer, and the diff fits on the current chunk
-		head->current_position += NumericCast<idx_t>(diff);
+		head->current_position = NumericCast<idx_t>(current_position + diff);
 		return pointer;
 	} else {
 		// allocate new memory

From 0dc66384eab9fd22bb29dacc6d1f7060d017f362 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 8 Jan 2025 15:33:33 +0000
Subject: [PATCH 05/21] vendor: Update vendored sources to
 duckdb/duckdb@69dff93d1a956c68eb9dc603d24d06719e57749d (#979)

csv_scanner: fix order of evaluation of arguments to a function (duckdb/duckdb#15609)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 .../csv_scanner/table_function/global_csv_state.cpp         | 3 ++-
 src/duckdb/src/function/table/version/pragma_version.cpp    | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 25691076d..b1a4a6165 100644
--- a/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -27,7 +27,8 @@ CSVGlobalState::CSVGlobalState(ClientContext &context_p, const shared_ptr<CSVBuf
 	}
 	idx_t cur_file_idx = 0;
 	while (file_scans.back()->start_iterator.done && file_scans.size() < files.size()) {
-		file_scans.emplace_back(make_uniq<CSVFileScan>(context, files[++cur_file_idx], options, cur_file_idx, bind_data,
+		cur_file_idx++;
+		file_scans.emplace_back(make_uniq<CSVFileScan>(context, files[cur_file_idx], options, cur_file_idx, bind_data,
 		                                               column_ids, file_schema, false));
 	}
 	// There are situations where we only support single threaded scanning
diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index c1d5d02cd..4641dd23e 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4157"
+#define DUCKDB_PATCH_VERSION "4-dev4159"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4157"
+#define DUCKDB_VERSION "v1.1.4-dev4159"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "ce33de966a"
+#define DUCKDB_SOURCE_ID "69dff93d1a"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

From 9f5e2bb3697d06c8c692b90e9988dd8eab0b2e47 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 8 Jan 2025 16:01:12 +0000
Subject: [PATCH 06/21] vendor: Update vendored sources to
 duckdb/duckdb@dcb9627543af52e4322de464003259a6b0e7fdb4 (#980)

[Dev] Fix an unnecessary copy in Dictionary compression (duckdb/duckdb#15594)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 src/duckdb/src/function/table/version/pragma_version.cpp   | 6 +++---
 .../duckdb/storage/compression/dictionary/compression.hpp  | 1 -
 .../src/storage/compression/dictionary/compression.cpp     | 7 ++++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index 4641dd23e..5a680f9c6 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4159"
+#define DUCKDB_PATCH_VERSION "4-dev4161"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4159"
+#define DUCKDB_VERSION "v1.1.4-dev4161"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "69dff93d1a"
+#define DUCKDB_SOURCE_ID "dcb9627543"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"
diff --git a/src/duckdb/src/include/duckdb/storage/compression/dictionary/compression.hpp b/src/duckdb/src/include/duckdb/storage/compression/dictionary/compression.hpp
index b0f29dc59..09f1f44bd 100644
--- a/src/duckdb/src/include/duckdb/storage/compression/dictionary/compression.hpp
+++ b/src/duckdb/src/include/duckdb/storage/compression/dictionary/compression.hpp
@@ -47,7 +47,6 @@ struct DictionaryCompressionCompressState : public DictionaryCompressionState {
 	data_ptr_t current_end_ptr;
 
 	// Buffers and map for current segment
-	StringHeap heap;
 	string_map_t<uint32_t> current_string_map;
 	vector<uint32_t> index_buffer;
 	vector<uint32_t> selection_buffer;
diff --git a/src/duckdb/src/storage/compression/dictionary/compression.cpp b/src/duckdb/src/storage/compression/dictionary/compression.cpp
index 064697fc7..64a3db402 100644
--- a/src/duckdb/src/storage/compression/dictionary/compression.cpp
+++ b/src/duckdb/src/storage/compression/dictionary/compression.cpp
@@ -6,8 +6,7 @@ namespace duckdb {
 DictionaryCompressionCompressState::DictionaryCompressionCompressState(ColumnDataCheckpointData &checkpoint_data_p,
                                                                        const CompressionInfo &info)
     : DictionaryCompressionState(info), checkpoint_data(checkpoint_data_p),
-      function(checkpoint_data.GetCompressionFunction(CompressionType::COMPRESSION_DICTIONARY)),
-      heap(BufferAllocator::Get(checkpoint_data.GetDatabase())) {
+      function(checkpoint_data.GetCompressionFunction(CompressionType::COMPRESSION_DICTIONARY)) {
 	CreateEmptySegment(checkpoint_data.GetRowGroup().start);
 }
 
@@ -72,7 +71,9 @@ void DictionaryCompressionCompressState::AddNewString(string_t str) {
 	if (str.IsInlined()) {
 		current_string_map.insert({str, index_buffer.size() - 1});
 	} else {
-		current_string_map.insert({heap.AddBlob(str), index_buffer.size() - 1});
+		string_t dictionary_string((const char *)dict_pos, UnsafeNumericCast<uint32_t>(str.GetSize())); // NOLINT
+		D_ASSERT(!dictionary_string.IsInlined());
+		current_string_map.insert({dictionary_string, index_buffer.size() - 1});
 	}
 	DictionaryCompression::SetDictionary(*current_segment, current_handle, current_dictionary);
 

From f4313720f00957d4a7c1dff372a573c18a443a76 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 9 Jan 2025 01:17:18 +0000
Subject: [PATCH 07/21] fledge: Bump version to 1.1.3.9035 (#982)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 DESCRIPTION |  2 +-
 NEWS.md     | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 87528928b..f5190378f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: duckdb
 Title: DBI Package for the DuckDB Database Management System
-Version: 1.1.3.9034
+Version: 1.1.3.9035
 Authors@R: c(
     person("Hannes", "Mühleisen", , "hannes@cwi.nl", role = "aut",
            comment = c(ORCID = "0000-0001-8552-0029")),
diff --git a/NEWS.md b/NEWS.md
index 2f6f554ee..0b901bc93 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,18 @@
 <!-- NEWS.md is maintained by https://fledge.cynkra.com, contributors should not edit this file -->
 
+# duckdb 1.1.3.9035
+
+## vendor
+
+- Update vendored sources to duckdb/duckdb@dcb9627543af52e4322de464003259a6b0e7fdb4 (#980).
+
+- Update vendored sources to duckdb/duckdb@69dff93d1a956c68eb9dc603d24d06719e57749d (#979).
+
+- Update vendored sources to duckdb/duckdb@ce33de966a521d1a6e86ec9579e133ff2b2534f4 (#978).
+
+- Update vendored sources to duckdb/duckdb@c8fa9aee7858909c625b5c3abcc3a257c5d9d934 (#977).
+
+
 # duckdb 1.1.3.9034
 
 ## vendor

From d3de74adb3a1954b849f3f9f640cab395a62873a Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 9 Jan 2025 10:46:51 +0000
Subject: [PATCH 08/21] vendor: Update vendored sources to
 duckdb/duckdb@fbc4d92fe6239536948a49d093196534c3492656 (#983)

[Compression] `Dictionary` compression data now also includes the validity data (duckdb/duckdb#15591)
Update URLs (duckdb/duckdb#15617)
[Julia] Fix incorrect types (duckdb/duckdb#15612)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 src/duckdb/src/common/enum_util.cpp           |  24 ++++-
 .../src/common/enums/compression_type.cpp     |   4 +
 .../src/function/compression_config.cpp       |   2 +
 .../function/table/version/pragma_version.cpp |   6 +-
 .../src/include/duckdb/common/enum_util.hpp   |   8 ++
 .../duckdb/common/enums/compression_type.hpp  |   5 +-
 .../function/compression/compression.hpp      |   5 +
 .../duckdb/function/compression_function.hpp  |   6 ++
 .../storage/compression/empty_validity.hpp    | 100 ++++++++++++++++++
 .../duckdb/storage/table/column_data.hpp      |   1 +
 .../table/column_data_checkpointer.hpp        |   1 +
 src/duckdb/src/main/database.cpp              |   1 +
 .../compression/dictionary/decompression.cpp  |  11 +-
 .../compression/dictionary_compression.cpp    |   4 +-
 .../storage/compression/empty_validity.cpp    |  15 +++
 src/duckdb/src/storage/table/column_data.cpp  |  10 ++
 .../table/column_data_checkpointer.cpp        |  35 ++++--
 src/duckdb/ub_src_storage_compression.cpp     |   2 +
 18 files changed, 223 insertions(+), 17 deletions(-)
 create mode 100644 src/duckdb/src/include/duckdb/storage/compression/empty_validity.hpp
 create mode 100644 src/duckdb/src/storage/compression/empty_validity.cpp

diff --git a/src/duckdb/src/common/enum_util.cpp b/src/duckdb/src/common/enum_util.cpp
index 23d9c6e27..6baa6854f 100644
--- a/src/duckdb/src/common/enum_util.cpp
+++ b/src/duckdb/src/common/enum_util.cpp
@@ -86,6 +86,7 @@
 #include "duckdb/execution/operator/csv_scanner/quote_rules.hpp"
 #include "duckdb/execution/reservoir_sample.hpp"
 #include "duckdb/function/aggregate_state.hpp"
+#include "duckdb/function/compression_function.hpp"
 #include "duckdb/function/copy_function.hpp"
 #include "duckdb/function/function.hpp"
 #include "duckdb/function/macro_function.hpp"
@@ -859,6 +860,7 @@ const StringUtil::EnumStringLiteral *GetCompressionTypeValues() {
 		{ static_cast<uint32_t>(CompressionType::COMPRESSION_ALPRD), "COMPRESSION_ALPRD" },
 		{ static_cast<uint32_t>(CompressionType::COMPRESSION_ZSTD), "COMPRESSION_ZSTD" },
 		{ static_cast<uint32_t>(CompressionType::COMPRESSION_ROARING), "COMPRESSION_ROARING" },
+		{ static_cast<uint32_t>(CompressionType::COMPRESSION_EMPTY), "COMPRESSION_EMPTY" },
 		{ static_cast<uint32_t>(CompressionType::COMPRESSION_COUNT), "COMPRESSION_COUNT" }
 	};
 	return values;
@@ -866,12 +868,30 @@ const StringUtil::EnumStringLiteral *GetCompressionTypeValues() {
 
 template<>
 const char* EnumUtil::ToChars<CompressionType>(CompressionType value) {
-	return StringUtil::EnumToString(GetCompressionTypeValues(), 15, "CompressionType", static_cast<uint32_t>(value));
+	return StringUtil::EnumToString(GetCompressionTypeValues(), 16, "CompressionType", static_cast<uint32_t>(value));
 }
 
 template<>
 CompressionType EnumUtil::FromString<CompressionType>(const char *value) {
-	return static_cast<CompressionType>(StringUtil::StringToEnum(GetCompressionTypeValues(), 15, "CompressionType", value));
+	return static_cast<CompressionType>(StringUtil::StringToEnum(GetCompressionTypeValues(), 16, "CompressionType", value));
+}
+
+const StringUtil::EnumStringLiteral *GetCompressionValidityValues() {
+	static constexpr StringUtil::EnumStringLiteral values[] {
+		{ static_cast<uint32_t>(CompressionValidity::REQUIRES_VALIDITY), "REQUIRES_VALIDITY" },
+		{ static_cast<uint32_t>(CompressionValidity::NO_VALIDITY_REQUIRED), "NO_VALIDITY_REQUIRED" }
+	};
+	return values;
+}
+
+template<>
+const char* EnumUtil::ToChars<CompressionValidity>(CompressionValidity value) {
+	return StringUtil::EnumToString(GetCompressionValidityValues(), 2, "CompressionValidity", static_cast<uint32_t>(value));
+}
+
+template<>
+CompressionValidity EnumUtil::FromString<CompressionValidity>(const char *value) {
+	return static_cast<CompressionValidity>(StringUtil::StringToEnum(GetCompressionValidityValues(), 2, "CompressionValidity", value));
 }
 
 const StringUtil::EnumStringLiteral *GetConflictManagerModeValues() {
diff --git a/src/duckdb/src/common/enums/compression_type.cpp b/src/duckdb/src/common/enums/compression_type.cpp
index b0528a7dd..3bc66fe08 100644
--- a/src/duckdb/src/common/enums/compression_type.cpp
+++ b/src/duckdb/src/common/enums/compression_type.cpp
@@ -24,6 +24,8 @@ bool CompressionTypeIsDeprecated(CompressionType compression_type) {
 
 CompressionType CompressionTypeFromString(const string &str) {
 	auto compression = StringUtil::Lower(str);
+	//! NOTE: this explicitly does not include 'constant' and 'empty validity', these are internal compression functions
+	//! not general purpose
 	if (compression == "uncompressed") {
 		return CompressionType::COMPRESSION_UNCOMPRESSED;
 	} else if (compression == "rle") {
@@ -83,6 +85,8 @@ string CompressionTypeToString(CompressionType type) {
 		return "ALPRD";
 	case CompressionType::COMPRESSION_ROARING:
 		return "Roaring";
+	case CompressionType::COMPRESSION_EMPTY:
+		return "Empty Validity";
 	default:
 		throw InternalException("Unrecognized compression type!");
 	}
diff --git a/src/duckdb/src/function/compression_config.cpp b/src/duckdb/src/function/compression_config.cpp
index c0a7f1609..deece1a85 100644
--- a/src/duckdb/src/function/compression_config.cpp
+++ b/src/duckdb/src/function/compression_config.cpp
@@ -28,6 +28,8 @@ static const DefaultCompressionMethod internal_compression_methods[] = {
     {CompressionType::COMPRESSION_FSST, FSSTFun::GetFunction, FSSTFun::TypeIsSupported},
     {CompressionType::COMPRESSION_ZSTD, ZSTDFun::GetFunction, ZSTDFun::TypeIsSupported},
     {CompressionType::COMPRESSION_ROARING, RoaringCompressionFun::GetFunction, RoaringCompressionFun::TypeIsSupported},
+    {CompressionType::COMPRESSION_EMPTY, EmptyValidityCompressionFun::GetFunction,
+     EmptyValidityCompressionFun::TypeIsSupported},
     {CompressionType::COMPRESSION_AUTO, nullptr, nullptr}};
 
 static optional_ptr<CompressionFunction> FindCompressionFunction(CompressionFunctionSet &set, CompressionType type,
diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index 5a680f9c6..29ac2f576 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4161"
+#define DUCKDB_PATCH_VERSION "4-dev4188"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4161"
+#define DUCKDB_VERSION "v1.1.4-dev4188"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "dcb9627543"
+#define DUCKDB_SOURCE_ID "fbc4d92fe6"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"
diff --git a/src/duckdb/src/include/duckdb/common/enum_util.hpp b/src/duckdb/src/include/duckdb/common/enum_util.hpp
index 6057a89b2..6215e10fd 100644
--- a/src/duckdb/src/include/duckdb/common/enum_util.hpp
+++ b/src/duckdb/src/include/duckdb/common/enum_util.hpp
@@ -102,6 +102,8 @@ enum class CompressedMaterializationDirection : uint8_t;
 
 enum class CompressionType : uint8_t;
 
+enum class CompressionValidity : uint8_t;
+
 enum class ConflictManagerMode : uint8_t;
 
 enum class ConstraintType : uint8_t;
@@ -484,6 +486,9 @@ const char* EnumUtil::ToChars<CompressedMaterializationDirection>(CompressedMate
 template<>
 const char* EnumUtil::ToChars<CompressionType>(CompressionType value);
 
+template<>
+const char* EnumUtil::ToChars<CompressionValidity>(CompressionValidity value);
+
 template<>
 const char* EnumUtil::ToChars<ConflictManagerMode>(ConflictManagerMode value);
 
@@ -1004,6 +1009,9 @@ CompressedMaterializationDirection EnumUtil::FromString<CompressedMaterializatio
 template<>
 CompressionType EnumUtil::FromString<CompressionType>(const char *value);
 
+template<>
+CompressionValidity EnumUtil::FromString<CompressionValidity>(const char *value);
+
 template<>
 ConflictManagerMode EnumUtil::FromString<ConflictManagerMode>(const char *value);
 
diff --git a/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp b/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp
index 6a1c30d42..a8753f12b 100644
--- a/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp
+++ b/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp
@@ -16,7 +16,7 @@ namespace duckdb {
 enum class CompressionType : uint8_t {
 	COMPRESSION_AUTO = 0,
 	COMPRESSION_UNCOMPRESSED = 1,
-	COMPRESSION_CONSTANT = 2,
+	COMPRESSION_CONSTANT = 2, // internal only
 	COMPRESSION_RLE = 3,
 	COMPRESSION_DICTIONARY = 4,
 	COMPRESSION_PFOR_DELTA = 5,
@@ -28,7 +28,8 @@ enum class CompressionType : uint8_t {
 	COMPRESSION_ALPRD = 11,
 	COMPRESSION_ZSTD = 12,
 	COMPRESSION_ROARING = 13,
-	COMPRESSION_COUNT // This has to stay the last entry of the type!
+	COMPRESSION_EMPTY = 14, // internal only
+	COMPRESSION_COUNT       // This has to stay the last entry of the type!
 };
 
 bool CompressionTypeIsDeprecated(CompressionType compression_type);
diff --git a/src/duckdb/src/include/duckdb/function/compression/compression.hpp b/src/duckdb/src/include/duckdb/function/compression/compression.hpp
index 28113c78e..adf5364b7 100644
--- a/src/duckdb/src/include/duckdb/function/compression/compression.hpp
+++ b/src/duckdb/src/include/duckdb/function/compression/compression.hpp
@@ -73,4 +73,9 @@ struct RoaringCompressionFun {
 	static bool TypeIsSupported(const PhysicalType physical_type);
 };
 
+struct EmptyValidityCompressionFun {
+	static CompressionFunction GetFunction(PhysicalType type);
+	static bool TypeIsSupported(const PhysicalType physical_type);
+};
+
 } // namespace duckdb
diff --git a/src/duckdb/src/include/duckdb/function/compression_function.hpp b/src/duckdb/src/include/duckdb/function/compression_function.hpp
index d11beaa72..b0546a5a2 100644
--- a/src/duckdb/src/include/duckdb/function/compression_function.hpp
+++ b/src/duckdb/src/include/duckdb/function/compression_function.hpp
@@ -203,6 +203,8 @@ typedef unique_ptr<ColumnSegmentState> (*compression_deserialize_state_t)(Deseri
 //! Function prototype for cleaning up the segment state when the column data is dropped
 typedef void (*compression_cleanup_state_t)(ColumnSegment &segment);
 
+enum class CompressionValidity : uint8_t { REQUIRES_VALIDITY, NO_VALIDITY_REQUIRED };
+
 class CompressionFunction {
 public:
 	CompressionFunction(CompressionType type, PhysicalType data_type, compression_init_analyze_t init_analyze,
@@ -297,6 +299,10 @@ class CompressionFunction {
 	compression_deserialize_state_t deserialize_state;
 	//! Cleanup the segment state (optional)
 	compression_cleanup_state_t cleanup_state;
+
+	//! Whether the validity mask should be separately compressed
+	//! or this compression function can also be used to decompress the validity
+	CompressionValidity validity = CompressionValidity::REQUIRES_VALIDITY;
 };
 
 //! The set of compression functions
diff --git a/src/duckdb/src/include/duckdb/storage/compression/empty_validity.hpp b/src/duckdb/src/include/duckdb/storage/compression/empty_validity.hpp
new file mode 100644
index 000000000..89abe1ce7
--- /dev/null
+++ b/src/duckdb/src/include/duckdb/storage/compression/empty_validity.hpp
@@ -0,0 +1,100 @@
+#pragma once
+
+#include "duckdb/function/compression_function.hpp"
+#include "duckdb/storage/table/column_data.hpp"
+#include "duckdb/storage/table/scan_state.hpp"
+#include "duckdb/storage/table/column_data_checkpointer.hpp"
+
+namespace duckdb {
+
+class EmptyValidityCompression {
+public:
+	struct EmptyValidityCompressionState : public CompressionState {
+	public:
+		explicit EmptyValidityCompressionState(ColumnDataCheckpointData &checkpoint_data, const CompressionInfo &info)
+		    : CompressionState(info),
+		      function(checkpoint_data.GetCompressionFunction(CompressionType::COMPRESSION_EMPTY)),
+		      checkpoint_data(checkpoint_data) {
+		}
+		~EmptyValidityCompressionState() override {
+		}
+
+	public:
+		optional_ptr<CompressionFunction> function;
+		ColumnDataCheckpointData &checkpoint_data;
+		idx_t count = 0;
+		idx_t non_nulls = 0;
+	};
+	struct EmptyValiditySegmentScanState : public SegmentScanState {
+		EmptyValiditySegmentScanState() {
+		}
+	};
+
+public:
+	static CompressionFunction CreateFunction() {
+		return CompressionFunction(CompressionType::COMPRESSION_EMPTY, PhysicalType::BIT, nullptr, nullptr, nullptr,
+		                           InitCompression, Compress, FinalizeCompress, InitScan, Scan, ScanPartial, FetchRow,
+		                           Skip, InitSegment);
+	}
+
+public:
+	static unique_ptr<CompressionState> InitCompression(ColumnDataCheckpointData &checkpoint_data,
+	                                                    unique_ptr<AnalyzeState> state_p) {
+		return make_uniq<EmptyValidityCompressionState>(checkpoint_data, state_p->info);
+	}
+	static void Compress(CompressionState &state_p, Vector &scan_vector, idx_t count) {
+		auto &state = state_p.Cast<EmptyValidityCompressionState>();
+		UnifiedVectorFormat format;
+		scan_vector.ToUnifiedFormat(count, format);
+		state.non_nulls += format.validity.CountValid(count);
+		state.count += count;
+	}
+	static void FinalizeCompress(CompressionState &state_p) {
+		auto &state = state_p.Cast<EmptyValidityCompressionState>();
+		auto &checkpoint_data = state.checkpoint_data;
+
+		auto &db = checkpoint_data.GetDatabase();
+		auto &type = checkpoint_data.GetType();
+		auto row_start = checkpoint_data.GetRowGroup().start;
+
+		auto &info = state.info;
+		auto compressed_segment = ColumnSegment::CreateTransientSegment(db, *state.function, type, row_start,
+		                                                                info.GetBlockSize(), info.GetBlockSize());
+		compressed_segment->count = state.count;
+		if (state.non_nulls != state.count) {
+			compressed_segment->stats.statistics.SetHasNullFast();
+		}
+		if (state.non_nulls != 0) {
+			compressed_segment->stats.statistics.SetHasNoNullFast();
+		}
+
+		auto &buffer_manager = BufferManager::GetBufferManager(checkpoint_data.GetDatabase());
+		auto handle = buffer_manager.Pin(compressed_segment->block);
+
+		auto &checkpoint_state = checkpoint_data.GetCheckpointState();
+		checkpoint_state.FlushSegment(std::move(compressed_segment), std::move(handle), 0);
+	}
+	static unique_ptr<SegmentScanState> InitScan(ColumnSegment &segment) {
+		return make_uniq<EmptyValiditySegmentScanState>();
+	}
+	static void ScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result,
+	                        idx_t result_offset) {
+		return;
+	}
+	static void Scan(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result) {
+		return;
+	}
+	static void FetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t row_id, Vector &result,
+	                     idx_t result_idx) {
+		return;
+	}
+	static void Skip(ColumnSegment &segment, ColumnScanState &state, idx_t skip_count) {
+		return;
+	}
+	static unique_ptr<CompressedSegmentState> InitSegment(ColumnSegment &segment, block_id_t block_id,
+	                                                      optional_ptr<ColumnSegmentState> segment_state) {
+		return nullptr;
+	}
+};
+
+} // namespace duckdb
diff --git a/src/duckdb/src/include/duckdb/storage/table/column_data.hpp b/src/duckdb/src/include/duckdb/storage/table/column_data.hpp
index cbf354f37..adb3ed056 100644
--- a/src/duckdb/src/include/duckdb/storage/table/column_data.hpp
+++ b/src/duckdb/src/include/duckdb/storage/table/column_data.hpp
@@ -99,6 +99,7 @@ class ColumnData {
 	const LogicalType &RootType() const;
 	//! Whether or not the column has any updates
 	bool HasUpdates() const;
+	bool HasChanges(idx_t start_row, idx_t end_row) const;
 	//! Whether or not we can scan an entire vector
 	virtual ScanVectorType GetVectorScanType(ColumnScanState &state, idx_t scan_count, Vector &result);
 
diff --git a/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp b/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp
index a5bd436bb..0db0e6fbc 100644
--- a/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp
+++ b/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp
@@ -74,6 +74,7 @@ class ColumnDataCheckpointer {
 	void WritePersistentSegments(ColumnCheckpointState &state);
 	void InitAnalyze();
 	void DropSegments();
+	bool ValidityCoveredByBasedata(vector<CheckpointAnalyzeResult> &result);
 
 private:
 	vector<reference<ColumnCheckpointState>> &checkpoint_states;
diff --git a/src/duckdb/src/main/database.cpp b/src/duckdb/src/main/database.cpp
index de60b1cf2..3bd387f84 100644
--- a/src/duckdb/src/main/database.cpp
+++ b/src/duckdb/src/main/database.cpp
@@ -26,6 +26,7 @@
 #include "duckdb/storage/storage_manager.hpp"
 #include "duckdb/transaction/transaction_manager.hpp"
 #include "duckdb/main/capi/extension_api.hpp"
+#include "duckdb/storage/compression/empty_validity.hpp"
 
 #ifndef DUCKDB_NO_THREADS
 #include "duckdb/common/thread.hpp"
diff --git a/src/duckdb/src/storage/compression/dictionary/decompression.cpp b/src/duckdb/src/storage/compression/dictionary/decompression.cpp
index a81c0e122..4063e9224 100644
--- a/src/duckdb/src/storage/compression/dictionary/decompression.cpp
+++ b/src/duckdb/src/storage/compression/dictionary/decompression.cpp
@@ -43,15 +43,18 @@ void CompressedStringScanState::Initialize(ColumnSegment &segment, bool initiali
 	block_size = segment.GetBlockManager().GetBlockSize();
 
 	dict = DictionaryCompression::GetDictionary(segment, *handle);
-	dictionary = make_buffer<Vector>(segment.type, index_buffer_count);
-	dictionary_size = index_buffer_count;
 
 	if (!initialize_dictionary) {
 		// Used by fetch, as fetch will never produce a DictionaryVector
 		return;
 	}
 
+	dictionary = make_buffer<Vector>(segment.type, index_buffer_count);
+	dictionary_size = index_buffer_count;
 	auto dict_child_data = FlatVector::GetData<string_t>(*(dictionary));
+	auto &validity = FlatVector::Validity(*dictionary);
+	D_ASSERT(index_buffer_count >= 1);
+	validity.SetInvalid(0);
 	for (uint32_t i = 0; i < index_buffer_count; i++) {
 		// NOTE: the passing of dict_child_vector, will not be used, its for big strings
 		uint16_t str_len = GetStringLength(i);
@@ -61,6 +64,7 @@ void CompressedStringScanState::Initialize(ColumnSegment &segment, bool initiali
 
 void CompressedStringScanState::ScanToFlatVector(Vector &result, idx_t result_offset, idx_t start, idx_t scan_count) {
 	auto result_data = FlatVector::GetData<string_t>(result);
+	auto &validity = FlatVector::Validity(result);
 
 	// Handling non-bitpacking-group-aligned start values;
 	idx_t start_offset = start % BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE;
@@ -82,6 +86,9 @@ void CompressedStringScanState::ScanToFlatVector(Vector &result, idx_t result_of
 	for (idx_t i = 0; i < scan_count; i++) {
 		// Lookup dict offset in index buffer
 		auto string_number = sel_vec->get_index(i + start_offset);
+		if (string_number == 0) {
+			validity.SetInvalid(result_offset + i);
+		}
 		auto dict_offset = index_buffer_ptr[string_number];
 		auto str_len = GetStringLength(UnsafeNumericCast<sel_t>(string_number));
 		result_data[result_offset + i] = FetchStringFromDict(UnsafeNumericCast<int32_t>(dict_offset), str_len);
diff --git a/src/duckdb/src/storage/compression/dictionary_compression.cpp b/src/duckdb/src/storage/compression/dictionary_compression.cpp
index 6f6b4beed..78915a374 100644
--- a/src/duckdb/src/storage/compression/dictionary_compression.cpp
+++ b/src/duckdb/src/storage/compression/dictionary_compression.cpp
@@ -157,7 +157,7 @@ void DictionaryCompressionStorage::StringFetchRow(ColumnSegment &segment, Column
 // Get Function
 //===--------------------------------------------------------------------===//
 CompressionFunction DictionaryCompressionFun::GetFunction(PhysicalType data_type) {
-	return CompressionFunction(
+	auto res = CompressionFunction(
 	    CompressionType::COMPRESSION_DICTIONARY, data_type, DictionaryCompressionStorage ::StringInitAnalyze,
 	    DictionaryCompressionStorage::StringAnalyze, DictionaryCompressionStorage::StringFinalAnalyze,
 	    DictionaryCompressionStorage::InitCompression, DictionaryCompressionStorage::Compress,
@@ -165,6 +165,8 @@ CompressionFunction DictionaryCompressionFun::GetFunction(PhysicalType data_type
 	    DictionaryCompressionStorage::StringScan, DictionaryCompressionStorage::StringScanPartial<false>,
 	    DictionaryCompressionStorage::StringFetchRow, UncompressedFunctions::EmptySkip,
 	    UncompressedStringStorage::StringInitSegment);
+	res.validity = CompressionValidity::NO_VALIDITY_REQUIRED;
+	return res;
 }
 
 bool DictionaryCompressionFun::TypeIsSupported(const PhysicalType physical_type) {
diff --git a/src/duckdb/src/storage/compression/empty_validity.cpp b/src/duckdb/src/storage/compression/empty_validity.cpp
new file mode 100644
index 000000000..0b6534153
--- /dev/null
+++ b/src/duckdb/src/storage/compression/empty_validity.cpp
@@ -0,0 +1,15 @@
+#include "duckdb/function/compression/compression.hpp"
+#include "duckdb/storage/compression/empty_validity.hpp"
+
+namespace duckdb {
+
+CompressionFunction EmptyValidityCompressionFun::GetFunction(PhysicalType type) {
+	return EmptyValidityCompression::CreateFunction();
+}
+
+bool EmptyValidityCompressionFun::TypeIsSupported(const PhysicalType physical_type) {
+	D_ASSERT(physical_type == PhysicalType::BIT);
+	return true;
+}
+
+} // namespace duckdb
diff --git a/src/duckdb/src/storage/table/column_data.cpp b/src/duckdb/src/storage/table/column_data.cpp
index 53e7c5459..985a59415 100644
--- a/src/duckdb/src/storage/table/column_data.cpp
+++ b/src/duckdb/src/storage/table/column_data.cpp
@@ -63,6 +63,16 @@ bool ColumnData::HasUpdates() const {
 	return updates.get();
 }
 
+bool ColumnData::HasChanges(idx_t start_row, idx_t end_row) const {
+	if (!updates) {
+		return false;
+	}
+	if (updates->HasUpdates(start_row, end_row)) {
+		return true;
+	}
+	return false;
+}
+
 void ColumnData::ClearUpdates() {
 	lock_guard<mutex> update_guard(update_lock);
 	updates.reset();
diff --git a/src/duckdb/src/storage/table/column_data_checkpointer.cpp b/src/duckdb/src/storage/table/column_data_checkpointer.cpp
index 13658077d..ae92a6972 100644
--- a/src/duckdb/src/storage/table/column_data_checkpointer.cpp
+++ b/src/duckdb/src/storage/table/column_data_checkpointer.cpp
@@ -1,6 +1,7 @@
 #include "duckdb/storage/table/column_data_checkpointer.hpp"
 #include "duckdb/main/config.hpp"
 #include "duckdb/storage/table/update_segment.hpp"
+#include "duckdb/storage/compression/empty_validity.hpp"
 #include "duckdb/storage/data_table.hpp"
 #include "duckdb/parser/column_definition.hpp"
 #include "duckdb/storage/table/scan_state.hpp"
@@ -273,11 +274,32 @@ void ColumnDataCheckpointer::DropSegments() {
 	}
 }
 
+bool ColumnDataCheckpointer::ValidityCoveredByBasedata(vector<CheckpointAnalyzeResult> &result) {
+	if (result.size() != 2) {
+		return false;
+	}
+	if (!has_changes[0]) {
+		// The base data had no changes so it will not be rewritten
+		return false;
+	}
+	auto &base = result[0];
+	D_ASSERT(base.function);
+	return base.function->validity == CompressionValidity::NO_VALIDITY_REQUIRED;
+}
+
 void ColumnDataCheckpointer::WriteToDisk() {
 	DropSegments();
 
 	// Analyze the candidate functions to select one of them to use for compression
 	auto analyze_result = DetectBestCompressionMethod();
+	if (ValidityCoveredByBasedata(analyze_result)) {
+		D_ASSERT(analyze_result.size() == 2);
+		auto &validity = analyze_result[1];
+		auto &config = DBConfig::GetConfig(db);
+		// Override the function to the COMPRESSION_EMPTY
+		// turning the compression+final compress steps into a no-op, saving a single empty segment
+		validity.function = config.GetCompressionFunction(CompressionType::COMPRESSION_EMPTY, PhysicalType::BIT);
+	}
 
 	// Initialize the compression for the selected function
 	D_ASSERT(analyze_result.size() == checkpoint_states.size());
@@ -328,13 +350,12 @@ bool ColumnDataCheckpointer::HasChanges(ColumnData &col_data) {
 		if (segment->segment_type == ColumnSegmentType::TRANSIENT) {
 			// transient segment: always need to write to disk
 			return true;
-		} else {
-			// persistent segment; check if there were any updates or deletions in this segment
-			idx_t start_row_idx = segment->start - row_group.start;
-			idx_t end_row_idx = start_row_idx + segment->count;
-			if (col_data.updates && col_data.updates->HasUpdates(start_row_idx, end_row_idx)) {
-				return true;
-			}
+		}
+		// persistent segment; check if there were any updates or deletions in this segment
+		idx_t start_row_idx = segment->start - row_group.start;
+		idx_t end_row_idx = start_row_idx + segment->count;
+		if (col_data.HasChanges(start_row_idx, end_row_idx)) {
+			return true;
 		}
 	}
 	return false;
diff --git a/src/duckdb/ub_src_storage_compression.cpp b/src/duckdb/ub_src_storage_compression.cpp
index 66e0303b5..491801cd7 100644
--- a/src/duckdb/ub_src_storage_compression.cpp
+++ b/src/duckdb/ub_src_storage_compression.cpp
@@ -24,3 +24,5 @@
 
 #include "src/storage/compression/fsst.cpp"
 
+#include "src/storage/compression/empty_validity.cpp"
+

From 422f1ccba03cb982855f76bb7026e8a2cf18909e Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 9 Jan 2025 11:13:51 +0000
Subject: [PATCH 09/21] vendor: Update vendored sources to
 duckdb/duckdb@3b44a57608458b063303d3576b2e21b6f1908a76 (#984)

Issue duckdb/duckdb#14996: Aggregate Secondary Orderings (duckdb/duckdb#15592)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 .../function/table/version/pragma_version.cpp |   6 +-
 .../window/window_aggregate_function.cpp      | 127 ++++------------
 .../src/function/window/window_aggregator.cpp |   9 +-
 .../window/window_constant_aggregator.cpp     | 137 ++++++++++++++----
 .../window/window_custom_aggregator.cpp       |  23 ++-
 .../window/window_distinct_aggregator.cpp     |  16 +-
 .../src/function/window/window_executor.cpp   |  11 ++
 .../window/window_naive_aggregator.cpp        | 133 +++++++++++++++--
 .../function/window/window_segment_tree.cpp   |  17 ++-
 .../window/window_aggregate_function.hpp      |   6 +-
 .../function/window/window_aggregator.hpp     |  10 +-
 .../window/window_constant_aggregator.hpp     |   5 +-
 .../window/window_custom_aggregator.hpp       |   5 +-
 .../window/window_distinct_aggregator.hpp     |   6 +-
 .../window/window_naive_aggregator.hpp        |  10 +-
 .../function/window/window_segment_tree.hpp   |  15 +-
 .../expression/bound_window_expression.hpp    |   1 +
 .../expression/transform_function.cpp         |   2 +-
 .../expression/bound_window_expression.cpp    |  12 +-
 19 files changed, 360 insertions(+), 191 deletions(-)

diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index 29ac2f576..1742d5569 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4188"
+#define DUCKDB_PATCH_VERSION "4-dev4200"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4188"
+#define DUCKDB_VERSION "v1.1.4-dev4200"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "fbc4d92fe6"
+#define DUCKDB_SOURCE_ID "3b44a57608"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"
diff --git a/src/duckdb/src/function/window/window_aggregate_function.cpp b/src/duckdb/src/function/window/window_aggregate_function.cpp
index 32dc0a276..308667601 100644
--- a/src/duckdb/src/function/window/window_aggregate_function.cpp
+++ b/src/duckdb/src/function/window/window_aggregate_function.cpp
@@ -26,117 +26,50 @@ class WindowAggregateExecutorGlobalState : public WindowExecutorGlobalState {
 	const Expression *filter_ref;
 };
 
-bool WindowAggregateExecutor::IsConstantAggregate() {
-	if (!wexpr.aggregate) {
-		return false;
-	}
-	// window exclusion cannot be handled by constant aggregates
-	if (wexpr.exclude_clause != WindowExcludeMode::NO_OTHER) {
-		return false;
-	}
-
-	//	COUNT(*) is already handled efficiently by segment trees.
-	if (wexpr.children.empty()) {
-		return false;
-	}
-
-	/*
-	    The default framing option is RANGE UNBOUNDED PRECEDING, which
-	    is the same as RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT
-	    ROW; it sets the frame to be all rows from the partition start
-	    up through the current row's last peer (a row that the window's
-	    ORDER BY clause considers equivalent to the current row; all
-	    rows are peers if there is no ORDER BY). In general, UNBOUNDED
-	    PRECEDING means that the frame starts with the first row of the
-	    partition, and similarly UNBOUNDED FOLLOWING means that the
-	    frame ends with the last row of the partition, regardless of
-	    RANGE, ROWS or GROUPS mode. In ROWS mode, CURRENT ROW means that
-	    the frame starts or ends with the current row; but in RANGE or
-	    GROUPS mode it means that the frame starts or ends with the
-	    current row's first or last peer in the ORDER BY ordering. The
-	    offset PRECEDING and offset FOLLOWING options vary in meaning
-	    depending on the frame mode.
-	*/
-	switch (wexpr.start) {
-	case WindowBoundary::UNBOUNDED_PRECEDING:
-		break;
-	case WindowBoundary::CURRENT_ROW_RANGE:
-		if (!wexpr.orders.empty()) {
-			return false;
+static BoundWindowExpression &SimplifyWindowedAggregate(BoundWindowExpression &wexpr, ClientContext &context) {
+	// Remove redundant/irrelevant modifiers (they can be serious performance cliffs)
+	if (wexpr.aggregate && ClientConfig::GetConfig(context).enable_optimizer) {
+		const auto &aggr = wexpr.aggregate;
+		auto &arg_orders = wexpr.arg_orders;
+		if (aggr->distinct_dependent != AggregateDistinctDependent::DISTINCT_DEPENDENT) {
+			wexpr.distinct = false;
 		}
-		break;
-	default:
-		return false;
-	}
-
-	switch (wexpr.end) {
-	case WindowBoundary::UNBOUNDED_FOLLOWING:
-		break;
-	case WindowBoundary::CURRENT_ROW_RANGE:
-		if (!wexpr.orders.empty()) {
-			return false;
+		if (aggr->order_dependent != AggregateOrderDependent::ORDER_DEPENDENT) {
+			arg_orders.clear();
+		} else {
+			//	If the argument order is prefix of the partition ordering,
+			//	then we can just use the partition ordering.
+			if (BoundWindowExpression::GetSharedOrders(wexpr.orders, arg_orders) == arg_orders.size()) {
+				arg_orders.clear();
+			}
 		}
-		break;
-	default:
-		return false;
-	}
-
-	return true;
-}
-
-bool WindowAggregateExecutor::IsDistinctAggregate() {
-	if (!wexpr.aggregate) {
-		return false;
 	}
 
-	return wexpr.distinct;
-}
-
-bool WindowAggregateExecutor::IsCustomAggregate() {
-	if (!wexpr.aggregate) {
-		return false;
-	}
-
-	if (!AggregateObject(wexpr).function.window) {
-		return false;
-	}
-
-	return (mode < WindowAggregationMode::COMBINE);
-}
-
-void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &eval_chunk, Vector &result, WindowExecutorLocalState &lstate,
-                              WindowExecutorGlobalState &gstate) const {
-	auto &lbstate = lstate.Cast<WindowExecutorBoundsState>();
-	lbstate.UpdateBounds(gstate, row_idx, eval_chunk, lstate.range_cursor);
-
-	const auto count = eval_chunk.size();
-	EvaluateInternal(gstate, lstate, eval_chunk, result, count, row_idx);
-
-	result.Verify(count);
+	return wexpr;
 }
 
 WindowAggregateExecutor::WindowAggregateExecutor(BoundWindowExpression &wexpr, ClientContext &context,
                                                  WindowSharedExpressions &shared, WindowAggregationMode mode)
-    : WindowExecutor(wexpr, context, shared), mode(mode) {
-	auto return_type = wexpr.return_type;
+    : WindowExecutor(SimplifyWindowedAggregate(wexpr, context), context, shared), mode(mode) {
 
 	// Force naive for SEPARATE mode or for (currently!) unsupported functionality
-	const auto force_naive =
-	    !ClientConfig::GetConfig(context).enable_optimizer || mode == WindowAggregationMode::SEPARATE;
-	if (force_naive || (wexpr.distinct && wexpr.exclude_clause != WindowExcludeMode::NO_OTHER)) {
-		aggregator = make_uniq<WindowNaiveAggregator>(wexpr, wexpr.exclude_clause, shared);
-	} else if (IsDistinctAggregate()) {
+	if (!ClientConfig::GetConfig(context).enable_optimizer || mode == WindowAggregationMode::SEPARATE) {
+		aggregator = make_uniq<WindowNaiveAggregator>(*this, shared);
+	} else if (WindowDistinctAggregator::CanAggregate(wexpr)) {
 		// build a merge sort tree
 		// see https://dl.acm.org/doi/pdf/10.1145/3514221.3526184
-		aggregator = make_uniq<WindowDistinctAggregator>(wexpr, wexpr.exclude_clause, shared, context);
-	} else if (IsConstantAggregate()) {
-		aggregator = make_uniq<WindowConstantAggregator>(wexpr, wexpr.exclude_clause, shared);
-	} else if (IsCustomAggregate()) {
-		aggregator = make_uniq<WindowCustomAggregator>(wexpr, wexpr.exclude_clause, shared);
-	} else {
+		aggregator = make_uniq<WindowDistinctAggregator>(wexpr, shared, context);
+	} else if (WindowConstantAggregator::CanAggregate(wexpr)) {
+		aggregator = make_uniq<WindowConstantAggregator>(wexpr, shared);
+	} else if (WindowCustomAggregator::CanAggregate(wexpr, mode)) {
+		aggregator = make_uniq<WindowCustomAggregator>(wexpr, shared);
+	} else if (WindowSegmentTree::CanAggregate(wexpr)) {
 		// build a segment tree for frame-adhering aggregates
 		// see http://www.vldb.org/pvldb/vol8/p1058-leis.pdf
-		aggregator = make_uniq<WindowSegmentTree>(wexpr, mode, wexpr.exclude_clause, shared);
+		aggregator = make_uniq<WindowSegmentTree>(wexpr, shared);
+	} else {
+		// No accelerator can handle this combination, so fall back to naïve.
+		aggregator = make_uniq<WindowNaiveAggregator>(*this, shared);
 	}
 
 	// Compute the FILTER with the other eval columns.
diff --git a/src/duckdb/src/function/window/window_aggregator.cpp b/src/duckdb/src/function/window/window_aggregator.cpp
index 60499073f..197b89e38 100644
--- a/src/duckdb/src/function/window/window_aggregator.cpp
+++ b/src/duckdb/src/function/window/window_aggregator.cpp
@@ -12,18 +12,17 @@ namespace duckdb {
 WindowAggregatorState::WindowAggregatorState() : allocator(Allocator::DefaultAllocator()) {
 }
 
-WindowAggregator::WindowAggregator(const BoundWindowExpression &wexpr, const WindowExcludeMode exclude_mode_p)
+WindowAggregator::WindowAggregator(const BoundWindowExpression &wexpr)
     : wexpr(wexpr), aggr(wexpr), result_type(wexpr.return_type), state_size(aggr.function.state_size(aggr.function)),
-      exclude_mode(exclude_mode_p) {
+      exclude_mode(wexpr.exclude_clause) {
 
 	for (auto &child : wexpr.children) {
 		arg_types.emplace_back(child->return_type);
 	}
 }
 
-WindowAggregator::WindowAggregator(const BoundWindowExpression &wexpr, const WindowExcludeMode exclude_mode_p,
-                                   WindowSharedExpressions &shared)
-    : WindowAggregator(wexpr, exclude_mode_p) {
+WindowAggregator::WindowAggregator(const BoundWindowExpression &wexpr, WindowSharedExpressions &shared)
+    : WindowAggregator(wexpr) {
 	for (auto &child : wexpr.children) {
 		child_idx.emplace_back(shared.RegisterCollection(child, false));
 	}
diff --git a/src/duckdb/src/function/window/window_constant_aggregator.cpp b/src/duckdb/src/function/window/window_constant_aggregator.cpp
index d94267f91..4be038229 100644
--- a/src/duckdb/src/function/window/window_constant_aggregator.cpp
+++ b/src/duckdb/src/function/window/window_constant_aggregator.cpp
@@ -6,7 +6,7 @@
 namespace duckdb {
 
 //===--------------------------------------------------------------------===//
-// WindowConstantAggregator
+// WindowConstantAggregatorGlobalState
 //===--------------------------------------------------------------------===//
 
 class WindowConstantAggregatorGlobalState : public WindowAggregatorGlobalState {
@@ -24,33 +24,6 @@ class WindowConstantAggregatorGlobalState : public WindowAggregatorGlobalState {
 	unique_ptr<Vector> results;
 };
 
-class WindowConstantAggregatorLocalState : public WindowAggregatorLocalState {
-public:
-	explicit WindowConstantAggregatorLocalState(const WindowConstantAggregatorGlobalState &gstate);
-	~WindowConstantAggregatorLocalState() override {
-	}
-
-	void Sink(DataChunk &sink_chunk, DataChunk &coll_chunk, idx_t input_idx, optional_ptr<SelectionVector> filter_sel,
-	          idx_t filtered);
-	void Combine(WindowConstantAggregatorGlobalState &gstate);
-
-public:
-	//! The global state we are sharing
-	const WindowConstantAggregatorGlobalState &gstate;
-	//! Reusable chunk for sinking
-	DataChunk inputs;
-	//! Chunk for referencing the input columns
-	DataChunk payload_chunk;
-	//! A vector of pointers to "state", used for intermediate window segment aggregation
-	Vector statep;
-	//! Reused result state container for the window functions
-	WindowAggregateStates statef;
-	//! The current result partition being read
-	idx_t partition;
-	//! Shared SV for evaluation
-	SelectionVector matches;
-};
-
 WindowConstantAggregatorGlobalState::WindowConstantAggregatorGlobalState(ClientContext &context,
                                                                          const WindowConstantAggregator &aggregator,
                                                                          idx_t group_count,
@@ -93,6 +66,36 @@ WindowConstantAggregatorGlobalState::WindowConstantAggregatorGlobalState(ClientC
 	partition_offsets.emplace_back(group_count);
 }
 
+//===--------------------------------------------------------------------===//
+// WindowConstantAggregatorLocalState
+//===--------------------------------------------------------------------===//
+class WindowConstantAggregatorLocalState : public WindowAggregatorLocalState {
+public:
+	explicit WindowConstantAggregatorLocalState(const WindowConstantAggregatorGlobalState &gstate);
+	~WindowConstantAggregatorLocalState() override {
+	}
+
+	void Sink(DataChunk &sink_chunk, DataChunk &coll_chunk, idx_t input_idx, optional_ptr<SelectionVector> filter_sel,
+	          idx_t filtered);
+	void Combine(WindowConstantAggregatorGlobalState &gstate);
+
+public:
+	//! The global state we are sharing
+	const WindowConstantAggregatorGlobalState &gstate;
+	//! Reusable chunk for sinking
+	DataChunk inputs;
+	//! Chunk for referencing the input columns
+	DataChunk payload_chunk;
+	//! A vector of pointers to "state", used for intermediate window segment aggregation
+	Vector statep;
+	//! Reused result state container for the window functions
+	WindowAggregateStates statef;
+	//! The current result partition being read
+	idx_t partition;
+	//! Shared SV for evaluation
+	SelectionVector matches;
+};
+
 WindowConstantAggregatorLocalState::WindowConstantAggregatorLocalState(
     const WindowConstantAggregatorGlobalState &gstate)
     : gstate(gstate), statep(Value::POINTER(0)), statef(gstate.statef.aggr), partition(0) {
@@ -110,10 +113,80 @@ WindowConstantAggregatorLocalState::WindowConstantAggregatorLocalState(
 	gstate.locals++;
 }
 
-WindowConstantAggregator::WindowConstantAggregator(const BoundWindowExpression &wexpr,
-                                                   const WindowExcludeMode exclude_mode_p,
-                                                   WindowSharedExpressions &shared)
-    : WindowAggregator(wexpr, exclude_mode_p) {
+//===--------------------------------------------------------------------===//
+// WindowConstantAggregator
+//===--------------------------------------------------------------------===//
+bool WindowConstantAggregator::CanAggregate(const BoundWindowExpression &wexpr) {
+	if (!wexpr.aggregate) {
+		return false;
+	}
+	// window exclusion cannot be handled by constant aggregates
+	if (wexpr.exclude_clause != WindowExcludeMode::NO_OTHER) {
+		return false;
+	}
+
+	// 	DISTINCT aggregation cannot be handled by constant aggregation
+	//	TODO: Use a hash table
+	if (wexpr.distinct) {
+		return false;
+	}
+
+	// 	ORDER BY arguments cannot be handled by constant aggregation
+	if (!wexpr.arg_orders.empty()) {
+		return false;
+	}
+
+	//	COUNT(*) is already handled efficiently by segment trees.
+	if (wexpr.children.empty()) {
+		return false;
+	}
+
+	/*
+	    The default framing option is RANGE UNBOUNDED PRECEDING, which
+	    is the same as RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT
+	    ROW; it sets the frame to be all rows from the partition start
+	    up through the current row's last peer (a row that the window's
+	    ORDER BY clause considers equivalent to the current row; all
+	    rows are peers if there is no ORDER BY). In general, UNBOUNDED
+	    PRECEDING means that the frame starts with the first row of the
+	    partition, and similarly UNBOUNDED FOLLOWING means that the
+	    frame ends with the last row of the partition, regardless of
+	    RANGE, ROWS or GROUPS mode. In ROWS mode, CURRENT ROW means that
+	    the frame starts or ends with the current row; but in RANGE or
+	    GROUPS mode it means that the frame starts or ends with the
+	    current row's first or last peer in the ORDER BY ordering. The
+	    offset PRECEDING and offset FOLLOWING options vary in meaning
+	    depending on the frame mode.
+	*/
+	switch (wexpr.start) {
+	case WindowBoundary::UNBOUNDED_PRECEDING:
+		break;
+	case WindowBoundary::CURRENT_ROW_RANGE:
+		if (!wexpr.orders.empty()) {
+			return false;
+		}
+		break;
+	default:
+		return false;
+	}
+
+	switch (wexpr.end) {
+	case WindowBoundary::UNBOUNDED_FOLLOWING:
+		break;
+	case WindowBoundary::CURRENT_ROW_RANGE:
+		if (!wexpr.orders.empty()) {
+			return false;
+		}
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
+WindowConstantAggregator::WindowConstantAggregator(const BoundWindowExpression &wexpr, WindowSharedExpressions &shared)
+    : WindowAggregator(wexpr) {
 
 	// We only need these values for Sink
 	for (auto &child : wexpr.children) {
diff --git a/src/duckdb/src/function/window/window_custom_aggregator.cpp b/src/duckdb/src/function/window/window_custom_aggregator.cpp
index d75143524..8416e3031 100644
--- a/src/duckdb/src/function/window/window_custom_aggregator.cpp
+++ b/src/duckdb/src/function/window/window_custom_aggregator.cpp
@@ -1,13 +1,30 @@
 #include "duckdb/function/window/window_custom_aggregator.hpp"
+#include "duckdb/planner/expression/bound_window_expression.hpp"
 
 namespace duckdb {
 
 //===--------------------------------------------------------------------===//
 // WindowCustomAggregator
 //===--------------------------------------------------------------------===//
-WindowCustomAggregator::WindowCustomAggregator(const BoundWindowExpression &wexpr, const WindowExcludeMode exclude_mode,
-                                               WindowSharedExpressions &shared)
-    : WindowAggregator(wexpr, exclude_mode, shared) {
+bool WindowCustomAggregator::CanAggregate(const BoundWindowExpression &wexpr, WindowAggregationMode mode) {
+	if (!wexpr.aggregate) {
+		return false;
+	}
+
+	if (!wexpr.aggregate->window) {
+		return false;
+	}
+
+	//	ORDER BY arguments are not currently supported
+	if (!wexpr.arg_orders.empty()) {
+		return false;
+	}
+
+	return (mode < WindowAggregationMode::COMBINE);
+}
+
+WindowCustomAggregator::WindowCustomAggregator(const BoundWindowExpression &wexpr, WindowSharedExpressions &shared)
+    : WindowAggregator(wexpr, shared) {
 }
 
 WindowCustomAggregator::~WindowCustomAggregator() {
diff --git a/src/duckdb/src/function/window/window_distinct_aggregator.cpp b/src/duckdb/src/function/window/window_distinct_aggregator.cpp
index ba466288f..1dc940f95 100644
--- a/src/duckdb/src/function/window/window_distinct_aggregator.cpp
+++ b/src/duckdb/src/function/window/window_distinct_aggregator.cpp
@@ -6,6 +6,7 @@
 #include "duckdb/function/window/window_aggregate_states.hpp"
 #include "duckdb/planner/bound_result_modifier.hpp"
 #include "duckdb/planner/expression/bound_constant_expression.hpp"
+#include "duckdb/planner/expression/bound_window_expression.hpp"
 
 #include <numeric>
 #include <thread>
@@ -15,10 +16,17 @@ namespace duckdb {
 //===--------------------------------------------------------------------===//
 // WindowDistinctAggregator
 //===--------------------------------------------------------------------===//
-WindowDistinctAggregator::WindowDistinctAggregator(const BoundWindowExpression &wexpr,
-                                                   const WindowExcludeMode exclude_mode_p,
-                                                   WindowSharedExpressions &shared, ClientContext &context)
-    : WindowAggregator(wexpr, exclude_mode_p, shared), context(context) {
+bool WindowDistinctAggregator::CanAggregate(const BoundWindowExpression &wexpr) {
+	if (!wexpr.aggregate) {
+		return false;
+	}
+
+	return wexpr.distinct && wexpr.exclude_clause == WindowExcludeMode::NO_OTHER && wexpr.arg_orders.empty();
+}
+
+WindowDistinctAggregator::WindowDistinctAggregator(const BoundWindowExpression &wexpr, WindowSharedExpressions &shared,
+                                                   ClientContext &context)
+    : WindowAggregator(wexpr, shared), context(context) {
 }
 
 class WindowDistinctAggregatorLocalState;
diff --git a/src/duckdb/src/function/window/window_executor.cpp b/src/duckdb/src/function/window/window_executor.cpp
index 40381f0e6..f4dab47af 100644
--- a/src/duckdb/src/function/window/window_executor.cpp
+++ b/src/duckdb/src/function/window/window_executor.cpp
@@ -47,6 +47,17 @@ WindowExecutor::WindowExecutor(BoundWindowExpression &wexpr, ClientContext &cont
 	}
 }
 
+void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &eval_chunk, Vector &result, WindowExecutorLocalState &lstate,
+                              WindowExecutorGlobalState &gstate) const {
+	auto &lbstate = lstate.Cast<WindowExecutorBoundsState>();
+	lbstate.UpdateBounds(gstate, row_idx, eval_chunk, lstate.range_cursor);
+
+	const auto count = eval_chunk.size();
+	EvaluateInternal(gstate, lstate, eval_chunk, result, count, row_idx);
+
+	result.Verify(count);
+}
+
 WindowExecutorGlobalState::WindowExecutorGlobalState(const WindowExecutor &executor, const idx_t payload_count,
                                                      const ValidityMask &partition_mask, const ValidityMask &order_mask)
     : executor(executor), payload_count(payload_count), partition_mask(partition_mask), order_mask(order_mask) {
diff --git a/src/duckdb/src/function/window/window_naive_aggregator.cpp b/src/duckdb/src/function/window/window_naive_aggregator.cpp
index 8bf0d54d3..448639e77 100644
--- a/src/duckdb/src/function/window/window_naive_aggregator.cpp
+++ b/src/duckdb/src/function/window/window_naive_aggregator.cpp
@@ -1,14 +1,21 @@
 #include "duckdb/function/window/window_naive_aggregator.hpp"
+#include "duckdb/common/sort/sort.hpp"
 #include "duckdb/function/window/window_collection.hpp"
+#include "duckdb/function/window/window_shared_expressions.hpp"
+#include "duckdb/planner/expression/bound_window_expression.hpp"
+#include "duckdb/function/window/window_aggregate_function.hpp"
 
 namespace duckdb {
 
 //===--------------------------------------------------------------------===//
 // WindowNaiveAggregator
 //===--------------------------------------------------------------------===//
-WindowNaiveAggregator::WindowNaiveAggregator(const BoundWindowExpression &wexpr, const WindowExcludeMode exclude_mode,
-                                             WindowSharedExpressions &shared)
-    : WindowAggregator(wexpr, exclude_mode, shared) {
+WindowNaiveAggregator::WindowNaiveAggregator(const WindowAggregateExecutor &executor, WindowSharedExpressions &shared)
+    : WindowAggregator(executor.wexpr, shared), executor(executor) {
+
+	for (const auto &order : wexpr.arg_orders) {
+		arg_order_idx.emplace_back(shared.RegisterCollection(order.expression, false));
+	}
 }
 
 WindowNaiveAggregator::~WindowNaiveAggregator() {
@@ -76,6 +83,17 @@ class WindowNaiveState : public WindowAggregatorLocalState {
 	Vector hashes;
 	//! The state used for comparing the collection across chunk boundaries
 	unique_ptr<WindowCursor> comparer;
+
+	//! The state used for scanning ORDER BY values from the collection
+	unique_ptr<WindowCursor> arg_orderer;
+	//! Reusable sort key chunk
+	DataChunk orderby_sort;
+	//! Reusable sort payload chunk
+	DataChunk orderby_payload;
+	//! Reusable sort key slicer
+	SelectionVector orderby_sel;
+	//! Reusable payload layout.
+	RowLayout payload_layout;
 };
 
 WindowNaiveState::WindowNaiveState(const WindowNaiveAggregator &aggregator_p)
@@ -95,6 +113,13 @@ WindowNaiveState::WindowNaiveState(const WindowNaiveAggregator &aggregator_p)
 		fdata[i] = state_ptr;
 		state_ptr += aggregator.state_size;
 	}
+
+	//	Initialise any ORDER BY data
+	if (!aggregator.arg_order_idx.empty() && !arg_orderer) {
+		orderby_payload.Initialize(Allocator::DefaultAllocator(), {LogicalType::UBIGINT});
+		payload_layout.Initialize(orderby_payload.GetTypes());
+		orderby_sel.Initialize();
+	}
 }
 
 void WindowNaiveState::Finalize(WindowAggregatorGlobalState &gastate, CollectionPtr collection) {
@@ -102,7 +127,19 @@ void WindowNaiveState::Finalize(WindowAggregatorGlobalState &gastate, Collection
 
 	//	Set up the comparison scanner just in case
 	if (!comparer) {
-		comparer = make_uniq<WindowCursor>(*collection, gastate.aggregator.child_idx);
+		comparer = make_uniq<WindowCursor>(*collection, aggregator.child_idx);
+	}
+
+	//	Set up the argument ORDER BY scanner if needed
+	if (!aggregator.arg_order_idx.empty() && !arg_orderer) {
+		arg_orderer = make_uniq<WindowCursor>(*collection, aggregator.arg_order_idx);
+		orderby_sort.Initialize(BufferAllocator::Get(gastate.context), arg_orderer->chunk.GetTypes());
+	}
+
+	// Initialise the chunks
+	const auto types = cursor->chunk.GetTypes();
+	if (leaves.ColumnCount() == 0 && !types.empty()) {
+		leaves.Initialize(BufferAllocator::Get(gastate.context), types);
 	}
 }
 
@@ -175,10 +212,6 @@ void WindowNaiveState::Evaluate(const WindowAggregatorGlobalState &gsink, const
 	auto &filter_mask = gsink.filter_mask;
 	const auto types = cursor->chunk.GetTypes();
 
-	if (leaves.ColumnCount() == 0 && !types.empty()) {
-		leaves.Initialize(Allocator::DefaultAllocator(), types);
-	}
-
 	auto fdata = FlatVector::GetData<data_ptr_t>(statef);
 	auto pdata = FlatVector::GetData<data_ptr_t>(statep);
 
@@ -190,8 +223,90 @@ void WindowNaiveState::Evaluate(const WindowAggregatorGlobalState &gsink, const
 		auto agg_state = fdata[rid];
 		aggr.function.initialize(aggr.function, agg_state);
 
-		//	Just update the aggregate with the unfiltered input rows
+		//	Reset the DISTINCT hash table
 		row_set.clear();
+
+		// 	Sort the input rows by the argument
+		if (arg_orderer) {
+			auto &context = aggregator.executor.context;
+			auto &orders = aggregator.wexpr.arg_orders;
+			auto &buffer_manager = BufferManager::GetBufferManager(context);
+			GlobalSortState global_sort(buffer_manager, orders, payload_layout);
+			LocalSortState local_sort;
+			local_sort.Initialize(global_sort, global_sort.buffer_manager);
+
+			idx_t orderby_count = 0;
+			auto orderby_row = FlatVector::GetData<idx_t>(orderby_payload.data[0]);
+			for (const auto &frame : frames) {
+				for (auto f = frame.start; f < frame.end; ++f) {
+					//	FILTER before the ORDER BY
+					if (!filter_mask.RowIsValid(f)) {
+						continue;
+					}
+
+					if (!arg_orderer->RowIsVisible(f) || orderby_count >= STANDARD_VECTOR_SIZE) {
+						if (orderby_count) {
+							orderby_sort.Reference(arg_orderer->chunk);
+							orderby_sort.Slice(orderby_sel, orderby_count);
+							orderby_payload.SetCardinality(orderby_count);
+							local_sort.SinkChunk(orderby_sort, orderby_payload);
+						}
+						orderby_count = 0;
+						arg_orderer->Seek(f);
+					}
+					orderby_row[orderby_count] = f;
+					orderby_sel.set_index(orderby_count++, arg_orderer->RowOffset(f));
+				}
+			}
+			if (orderby_count) {
+				orderby_sort.Reference(arg_orderer->chunk);
+				orderby_sort.Slice(orderby_sel, orderby_count);
+				orderby_payload.SetCardinality(orderby_count);
+				local_sort.SinkChunk(orderby_sort, orderby_payload);
+			}
+
+			global_sort.AddLocalState(local_sort);
+			if (global_sort.sorted_blocks.empty()) {
+				return;
+			}
+			global_sort.PrepareMergePhase();
+			while (global_sort.sorted_blocks.size() > 1) {
+				global_sort.InitializeMergeRound();
+				MergeSorter merge_sorter(global_sort, global_sort.buffer_manager);
+				merge_sorter.PerformInMergeRound();
+				global_sort.CompleteMergeRound(false);
+			}
+
+			PayloadScanner scanner(global_sort);
+			while (scanner.Remaining()) {
+				orderby_payload.Reset();
+				scanner.Scan(orderby_payload);
+				orderby_row = FlatVector::GetData<idx_t>(orderby_payload.data[0]);
+				for (idx_t i = 0; i < orderby_payload.size(); ++i) {
+					const auto f = orderby_row[i];
+					//	Seek to the current position
+					if (!cursor->RowIsVisible(f)) {
+						//	We need to flush when we cross a chunk boundary
+						FlushStates(gsink);
+						cursor->Seek(f);
+					}
+
+					//	Filter out duplicates
+					if (aggr.IsDistinct() && !row_set.insert(f).second) {
+						continue;
+					}
+
+					pdata[flush_count] = agg_state;
+					update_sel[flush_count++] = cursor->RowOffset(f);
+					if (flush_count >= STANDARD_VECTOR_SIZE) {
+						FlushStates(gsink);
+					}
+				}
+			}
+			return;
+		}
+
+		//	Just update the aggregate with the unfiltered input rows
 		for (const auto &frame : frames) {
 			for (auto f = frame.start; f < frame.end; ++f) {
 				if (!filter_mask.RowIsValid(f)) {
diff --git a/src/duckdb/src/function/window/window_segment_tree.cpp b/src/duckdb/src/function/window/window_segment_tree.cpp
index 0ccc8615b..6708ae90b 100644
--- a/src/duckdb/src/function/window/window_segment_tree.cpp
+++ b/src/duckdb/src/function/window/window_segment_tree.cpp
@@ -10,6 +10,18 @@ namespace duckdb {
 //===--------------------------------------------------------------------===//
 // WindowSegmentTree
 //===--------------------------------------------------------------------===//
+bool WindowSegmentTree::CanAggregate(const BoundWindowExpression &wexpr) {
+	if (!wexpr.aggregate) {
+		return false;
+	}
+
+	return !wexpr.distinct && wexpr.arg_orders.empty();
+}
+
+WindowSegmentTree::WindowSegmentTree(const BoundWindowExpression &wexpr, WindowSharedExpressions &shared)
+    : WindowAggregator(wexpr, shared) {
+}
+
 class WindowSegmentTreeGlobalState : public WindowAggregatorGlobalState {
 public:
 	using AtomicCounters = vector<std::atomic<idx_t>>;
@@ -43,11 +55,6 @@ class WindowSegmentTreeGlobalState : public WindowAggregatorGlobalState {
 	static constexpr idx_t TREE_FANOUT = 16;
 };
 
-WindowSegmentTree::WindowSegmentTree(const BoundWindowExpression &wexpr, WindowAggregationMode mode_p,
-                                     const WindowExcludeMode exclude_mode_p, WindowSharedExpressions &shared)
-    : WindowAggregator(wexpr, exclude_mode_p, shared), mode(mode_p) {
-}
-
 class WindowSegmentTreePart {
 public:
 	//! Right side nodes need to be cached and processed in reverse order
diff --git a/src/duckdb/src/include/duckdb/function/window/window_aggregate_function.hpp b/src/duckdb/src/include/duckdb/function/window/window_aggregate_function.hpp
index b110315cd..efc782b8c 100644
--- a/src/duckdb/src/include/duckdb/function/window/window_aggregate_function.hpp
+++ b/src/duckdb/src/include/duckdb/function/window/window_aggregate_function.hpp
@@ -6,6 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#pragma once
+
 #include "duckdb/function/window/window_executor.hpp"
 #include "duckdb/common/enums/window_aggregation_mode.hpp"
 #include "duckdb/function/window/window_aggregator.hpp"
@@ -17,10 +19,6 @@ class WindowAggregateExecutor : public WindowExecutor {
 	WindowAggregateExecutor(BoundWindowExpression &wexpr, ClientContext &context, WindowSharedExpressions &shared,
 	                        WindowAggregationMode mode);
 
-	bool IsConstantAggregate();
-	bool IsCustomAggregate();
-	bool IsDistinctAggregate();
-
 	void Sink(DataChunk &sink_chunk, DataChunk &coll_chunk, const idx_t input_idx, WindowExecutorGlobalState &gstate,
 	          WindowExecutorLocalState &lstate) const override;
 	void Finalize(WindowExecutorGlobalState &gstate, WindowExecutorLocalState &lstate,
diff --git a/src/duckdb/src/include/duckdb/function/window/window_aggregator.hpp b/src/duckdb/src/include/duckdb/function/window/window_aggregator.hpp
index c7103ce41..1caa6326d 100644
--- a/src/duckdb/src/include/duckdb/function/window/window_aggregator.hpp
+++ b/src/duckdb/src/include/duckdb/function/window/window_aggregator.hpp
@@ -105,9 +105,8 @@ class WindowAggregator {
 		}
 	}
 
-	WindowAggregator(const BoundWindowExpression &wexpr, const WindowExcludeMode exclude_mode_p);
-	WindowAggregator(const BoundWindowExpression &wexpr, const WindowExcludeMode exclude_mode_p,
-	                 WindowSharedExpressions &shared);
+	explicit WindowAggregator(const BoundWindowExpression &wexpr);
+	WindowAggregator(const BoundWindowExpression &wexpr, WindowSharedExpressions &shared);
 	virtual ~WindowAggregator();
 
 	//	Threading states
@@ -144,7 +143,7 @@ class WindowAggregator {
 class WindowAggregatorGlobalState : public WindowAggregatorState {
 public:
 	WindowAggregatorGlobalState(ClientContext &context, const WindowAggregator &aggregator_p, idx_t group_count)
-	    : aggregator(aggregator_p), aggr(aggregator.wexpr), locals(0), finalized(0) {
+	    : context(context), aggregator(aggregator_p), aggr(aggregator.wexpr), locals(0), finalized(0) {
 
 		if (aggr.filter) {
 			// 	Start with all invalid and set the ones that pass
@@ -154,6 +153,9 @@ class WindowAggregatorGlobalState : public WindowAggregatorState {
 		}
 	}
 
+	//! The context we are in
+	ClientContext &context;
+
 	//! The aggregator data
 	const WindowAggregator &aggregator;
 
diff --git a/src/duckdb/src/include/duckdb/function/window/window_constant_aggregator.hpp b/src/duckdb/src/include/duckdb/function/window/window_constant_aggregator.hpp
index af3c7f88c..fdf07cc7a 100644
--- a/src/duckdb/src/include/duckdb/function/window/window_constant_aggregator.hpp
+++ b/src/duckdb/src/include/duckdb/function/window/window_constant_aggregator.hpp
@@ -14,8 +14,9 @@ namespace duckdb {
 
 class WindowConstantAggregator : public WindowAggregator {
 public:
-	WindowConstantAggregator(const BoundWindowExpression &wexpr, WindowExcludeMode exclude_mode_p,
-	                         WindowSharedExpressions &shared);
+	static bool CanAggregate(const BoundWindowExpression &wexpr);
+
+	WindowConstantAggregator(const BoundWindowExpression &wexpr, WindowSharedExpressions &shared);
 	~WindowConstantAggregator() override {
 	}
 
diff --git a/src/duckdb/src/include/duckdb/function/window/window_custom_aggregator.hpp b/src/duckdb/src/include/duckdb/function/window/window_custom_aggregator.hpp
index dffad8483..ab664ca59 100644
--- a/src/duckdb/src/include/duckdb/function/window/window_custom_aggregator.hpp
+++ b/src/duckdb/src/include/duckdb/function/window/window_custom_aggregator.hpp
@@ -14,8 +14,9 @@ namespace duckdb {
 
 class WindowCustomAggregator : public WindowAggregator {
 public:
-	WindowCustomAggregator(const BoundWindowExpression &wexpr, const WindowExcludeMode exclude_mode,
-	                       WindowSharedExpressions &shared);
+	static bool CanAggregate(const BoundWindowExpression &wexpr, WindowAggregationMode mode);
+
+	WindowCustomAggregator(const BoundWindowExpression &wexpr, WindowSharedExpressions &shared);
 	~WindowCustomAggregator() override;
 
 	unique_ptr<WindowAggregatorState> GetGlobalState(ClientContext &context, idx_t group_count,
diff --git a/src/duckdb/src/include/duckdb/function/window/window_distinct_aggregator.hpp b/src/duckdb/src/include/duckdb/function/window/window_distinct_aggregator.hpp
index 1cafc9252..7fe9040c0 100644
--- a/src/duckdb/src/include/duckdb/function/window/window_distinct_aggregator.hpp
+++ b/src/duckdb/src/include/duckdb/function/window/window_distinct_aggregator.hpp
@@ -14,8 +14,10 @@ namespace duckdb {
 
 class WindowDistinctAggregator : public WindowAggregator {
 public:
-	WindowDistinctAggregator(const BoundWindowExpression &wexpr, const WindowExcludeMode exclude_mode_p,
-	                         WindowSharedExpressions &shared, ClientContext &context);
+	static bool CanAggregate(const BoundWindowExpression &wexpr);
+
+	WindowDistinctAggregator(const BoundWindowExpression &wexpr, WindowSharedExpressions &shared,
+	                         ClientContext &context);
 
 	//	Build
 	unique_ptr<WindowAggregatorState> GetGlobalState(ClientContext &context, idx_t group_count,
diff --git a/src/duckdb/src/include/duckdb/function/window/window_naive_aggregator.hpp b/src/duckdb/src/include/duckdb/function/window/window_naive_aggregator.hpp
index 199301bb4..1fdf497ee 100644
--- a/src/duckdb/src/include/duckdb/function/window/window_naive_aggregator.hpp
+++ b/src/duckdb/src/include/duckdb/function/window/window_naive_aggregator.hpp
@@ -12,16 +12,22 @@
 
 namespace duckdb {
 
+class WindowAggregateExecutor;
+
 // Used for validation
 class WindowNaiveAggregator : public WindowAggregator {
 public:
-	WindowNaiveAggregator(const BoundWindowExpression &wexpr, const WindowExcludeMode exclude_mode,
-	                      WindowSharedExpressions &shared);
+	WindowNaiveAggregator(const WindowAggregateExecutor &executor, WindowSharedExpressions &shared);
 	~WindowNaiveAggregator() override;
 
 	unique_ptr<WindowAggregatorState> GetLocalState(const WindowAggregatorState &gstate) const override;
 	void Evaluate(const WindowAggregatorState &gsink, WindowAggregatorState &lstate, const DataChunk &bounds,
 	              Vector &result, idx_t count, idx_t row_idx) const override;
+
+	//! The parent executor
+	const WindowAggregateExecutor &executor;
+	//! The column indices of any ORDER BY argument expressions
+	vector<column_t> arg_order_idx;
 };
 
 } // namespace duckdb
diff --git a/src/duckdb/src/include/duckdb/function/window/window_segment_tree.hpp b/src/duckdb/src/include/duckdb/function/window/window_segment_tree.hpp
index f3a7b6f96..0b574b959 100644
--- a/src/duckdb/src/include/duckdb/function/window/window_segment_tree.hpp
+++ b/src/duckdb/src/include/duckdb/function/window/window_segment_tree.hpp
@@ -13,10 +13,10 @@
 namespace duckdb {
 
 class WindowSegmentTree : public WindowAggregator {
-
 public:
-	WindowSegmentTree(const BoundWindowExpression &wexpr, WindowAggregationMode mode_p,
-	                  const WindowExcludeMode exclude_mode, WindowSharedExpressions &shared);
+	static bool CanAggregate(const BoundWindowExpression &wexpr);
+
+	WindowSegmentTree(const BoundWindowExpression &wexpr, WindowSharedExpressions &shared);
 
 	unique_ptr<WindowAggregatorState> GetGlobalState(ClientContext &context, idx_t group_count,
 	                                                 const ValidityMask &partition_mask) const override;
@@ -26,15 +26,6 @@ class WindowSegmentTree : public WindowAggregator {
 
 	void Evaluate(const WindowAggregatorState &gstate, WindowAggregatorState &lstate, const DataChunk &bounds,
 	              Vector &result, idx_t count, idx_t row_idx) const override;
-
-public:
-	//! Use the combine API, if available
-	inline bool UseCombineAPI() const {
-		return mode < WindowAggregationMode::SEPARATE;
-	}
-
-	//! Use the combine API, if available
-	WindowAggregationMode mode;
 };
 
 } // namespace duckdb
diff --git a/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp b/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp
index adf15ead3..acd76a42f 100644
--- a/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp
+++ b/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp
@@ -73,6 +73,7 @@ class BoundWindowExpression : public Expression {
 	string ToString() const override;
 
 	//! The number of ordering clauses the functions share
+	static idx_t GetSharedOrders(const vector<BoundOrderByNode> &lhs, const vector<BoundOrderByNode> &rhs);
 	idx_t GetSharedOrders(const BoundWindowExpression &other) const;
 
 	bool PartitionsAreEquivalent(const BoundWindowExpression &other) const;
diff --git a/src/duckdb/src/parser/transform/expression/transform_function.cpp b/src/duckdb/src/parser/transform/expression/transform_function.cpp
index ec8dad678..e410b7049 100644
--- a/src/duckdb/src/parser/transform/expression/transform_function.cpp
+++ b/src/duckdb/src/parser/transform/expression/transform_function.cpp
@@ -122,8 +122,8 @@ static bool IsOrderableWindowFunction(ExpressionType type) {
 	case ExpressionType::WINDOW_CUME_DIST:
 	case ExpressionType::WINDOW_LEAD:
 	case ExpressionType::WINDOW_LAG:
-		return true;
 	case ExpressionType::WINDOW_AGGREGATE:
+		return true;
 	case ExpressionType::WINDOW_RANK_DENSE:
 		return false;
 	default:
diff --git a/src/duckdb/src/planner/expression/bound_window_expression.cpp b/src/duckdb/src/planner/expression/bound_window_expression.cpp
index 5f778db14..b189b8e6e 100644
--- a/src/duckdb/src/planner/expression/bound_window_expression.cpp
+++ b/src/duckdb/src/planner/expression/bound_window_expression.cpp
@@ -101,19 +101,23 @@ bool BoundWindowExpression::PartitionsAreEquivalent(const BoundWindowExpression
 	return true;
 }
 
-idx_t BoundWindowExpression::GetSharedOrders(const BoundWindowExpression &other) const {
-	const auto overlap = MinValue<idx_t>(orders.size(), other.orders.size());
+idx_t BoundWindowExpression::GetSharedOrders(const vector<BoundOrderByNode> &lhs, const vector<BoundOrderByNode> &rhs) {
+	const auto overlap = MinValue<idx_t>(lhs.size(), rhs.size());
 
 	idx_t result = 0;
 	for (; result < overlap; ++result) {
-		if (!orders[result].Equals(other.orders[result])) {
-			return false;
+		if (!lhs.at(result).Equals(rhs.at(result))) {
+			return 0;
 		}
 	}
 
 	return result;
 }
 
+idx_t BoundWindowExpression::GetSharedOrders(const BoundWindowExpression &other) const {
+	return GetSharedOrders(orders, other.orders);
+}
+
 bool BoundWindowExpression::KeysAreCompatible(const BoundWindowExpression &other) const {
 	if (!PartitionsAreEquivalent(other)) {
 		return false;

From 16d78de8e253570d409b0d840d8ace36764fcf78 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 9 Jan 2025 11:42:31 +0000
Subject: [PATCH 10/21] vendor: Update vendored sources to
 duckdb/duckdb@c93ab6e8171b6cba652439cadc91e79d488ffd01 (#985)

Backport ENTRY_VISIBILITY from duckdb/extension-template-c (duckdb/duckdb#15611)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 .../src/function/table/version/pragma_version.cpp      |  6 +++---
 src/duckdb/src/include/duckdb_extension.h              | 10 ++++++++--
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index 1742d5569..781b85416 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4200"
+#define DUCKDB_PATCH_VERSION "4-dev4202"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4200"
+#define DUCKDB_VERSION "v1.1.4-dev4202"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "3b44a57608"
+#define DUCKDB_SOURCE_ID "c93ab6e817"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"
diff --git a/src/duckdb/src/include/duckdb_extension.h b/src/duckdb/src/include/duckdb_extension.h
index 5eca823d5..6f01ef592 100644
--- a/src/duckdb/src/include/duckdb_extension.h
+++ b/src/duckdb/src/include/duckdb_extension.h
@@ -966,6 +966,12 @@ typedef struct {
 // Place in global scope of any C/C++ file that needs to access the extension API
 #define DUCKDB_EXTENSION_EXTERN extern duckdb_ext_api_v1 duckdb_ext_api;
 
+#ifdef _WIN32
+#define DUCKDB_CAPI_ENTRY_VISIBILITY __declspec(dllexport)
+#else
+#define DUCKDB_CAPI_ENTRY_VISIBILITY __attribute__((visibility("default")))
+#endif
+
 //===--------------------------------------------------------------------===//
 // Entrypoint Macros
 //===--------------------------------------------------------------------===//
@@ -979,7 +985,7 @@ typedef struct {
 #define DUCKDB_EXTENSION_ENTRYPOINT                                                                                    \
 	DUCKDB_EXTENSION_GLOBAL static bool DUCKDB_EXTENSION_GLUE(DUCKDB_EXTENSION_NAME, _init_c_api_internal)(            \
 	    duckdb_connection connection, duckdb_extension_info info, struct duckdb_extension_access * access);            \
-	DUCKDB_EXTENSION_EXTERN_C_GUARD_OPEN DUCKDB_EXTENSION_API bool DUCKDB_EXTENSION_GLUE(                              \
+	DUCKDB_EXTENSION_EXTERN_C_GUARD_OPEN DUCKDB_CAPI_ENTRY_VISIBILITY DUCKDB_EXTENSION_API bool DUCKDB_EXTENSION_GLUE( \
 	    DUCKDB_EXTENSION_NAME, _init_c_api)(duckdb_extension_info info, struct duckdb_extension_access * access) {     \
 		DUCKDB_EXTENSION_API_INIT(info, access, DUCKDB_EXTENSION_API_VERSION_STRING);                                  \
 		duckdb_database *db = access->get_database(info);                                                              \
@@ -998,7 +1004,7 @@ typedef struct {
 #define DUCKDB_EXTENSION_ENTRYPOINT_CUSTOM                                                                             \
 	DUCKDB_EXTENSION_GLOBAL static bool DUCKDB_EXTENSION_GLUE(DUCKDB_EXTENSION_NAME, _init_c_api_internal)(            \
 	    duckdb_extension_info info, struct duckdb_extension_access * access);                                          \
-	DUCKDB_EXTENSION_EXTERN_C_GUARD_OPEN DUCKDB_EXTENSION_API bool DUCKDB_EXTENSION_GLUE(                              \
+	DUCKDB_EXTENSION_EXTERN_C_GUARD_OPEN DUCKDB_CAPI_ENTRY_VISIBILITY DUCKDB_EXTENSION_API bool DUCKDB_EXTENSION_GLUE( \
 	    DUCKDB_EXTENSION_NAME, _init_c_api)(duckdb_extension_info info, struct duckdb_extension_access * access) {     \
 		DUCKDB_EXTENSION_API_INIT(info, access, DUCKDB_EXTENSION_API_VERSION_STRING);                                  \
 		return DUCKDB_EXTENSION_GLUE(DUCKDB_EXTENSION_NAME, _init_c_api_internal)(info, access);                       \

From 99a79b04312f06cd15605a02f6eb9b5f8b68fba3 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 9 Jan 2025 12:06:14 +0000
Subject: [PATCH 11/21] vendor: Update vendored sources to
 duckdb/duckdb@b48222edb439e42517392eaa2e918b625cafd09a (#986)

Adjust list_reduce to use a 1-based indexing (duckdb/duckdb#15614)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 .../extension/core_functions/scalar/list/list_reduce.cpp    | 2 +-
 src/duckdb/src/function/table/version/pragma_version.cpp    | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/duckdb/extension/core_functions/scalar/list/list_reduce.cpp b/src/duckdb/extension/core_functions/scalar/list/list_reduce.cpp
index f8758515f..173b52699 100644
--- a/src/duckdb/extension/core_functions/scalar/list/list_reduce.cpp
+++ b/src/duckdb/extension/core_functions/scalar/list/list_reduce.cpp
@@ -101,7 +101,7 @@ static bool ExecuteReduce(idx_t loops, ReduceExecuteInfo &execute_info, LambdaFu
 	}
 
 	// create the index vector
-	Vector index_vector(Value::BIGINT(UnsafeNumericCast<int64_t>(loops + 1)));
+	Vector index_vector(Value::BIGINT(UnsafeNumericCast<int64_t>(loops + 2)));
 
 	// slice the left and right slice
 	execute_info.left_slice->Slice(*execute_info.left_slice, execute_info.left_sel, reduced_row_idx);
diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index 781b85416..3f58a34d9 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4202"
+#define DUCKDB_PATCH_VERSION "4-dev4204"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4202"
+#define DUCKDB_VERSION "v1.1.4-dev4204"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "c93ab6e817"
+#define DUCKDB_SOURCE_ID "b48222edb4"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

From b10616d051e99d9754fee521a780e76246e5bded Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 9 Jan 2025 12:33:51 +0000
Subject: [PATCH 12/21] vendor: Update vendored sources to
 duckdb/duckdb@b5393f7d0f8234028f9719889d33178ef793279f (#987)

window: fix nullptr dereference (duckdb/duckdb#15610)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 src/duckdb/src/function/table/version/pragma_version.cpp | 6 +++---
 src/duckdb/src/function/window/window_index_tree.cpp     | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index 3f58a34d9..4f5e368f6 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4204"
+#define DUCKDB_PATCH_VERSION "4-dev4206"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4204"
+#define DUCKDB_VERSION "v1.1.4-dev4206"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "b48222edb4"
+#define DUCKDB_SOURCE_ID "b5393f7d0f"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"
diff --git a/src/duckdb/src/function/window/window_index_tree.cpp b/src/duckdb/src/function/window/window_index_tree.cpp
index c71984f50..5791b2af7 100644
--- a/src/duckdb/src/function/window/window_index_tree.cpp
+++ b/src/duckdb/src/function/window/window_index_tree.cpp
@@ -56,7 +56,7 @@ idx_t WindowIndexTree::SelectNth(const SubFrames &frames, idx_t n) const {
 	if (mst32) {
 		return mst32->NthElement(mst32->SelectNth(frames, n));
 	} else {
-		return mst64->NthElement(mst32->SelectNth(frames, n));
+		return mst64->NthElement(mst64->SelectNth(frames, n));
 	}
 }
 

From 8cf65d4b8dcf9e085affc6981353c3f1c269d69c Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 9 Jan 2025 13:10:16 +0000
Subject: [PATCH 13/21] vendor: Update vendored sources to
 duckdb/duckdb@b89f77c881fcdb8018bf3562c6de00b9e9849451 (#988)

Implement `simple_update` for `first` aggregate function (duckdb/duckdb#15619)
Build/test/distribute linux_amd64_musl core extensions (duckdb/duckdb#15607)
Improve reading duplicate column names in JSON (duckdb/duckdb#15615)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 .../aggregate/distributive/first_last_any.cpp    | 16 +++++++++++++++-
 .../function/table/version/pragma_version.cpp    |  6 +++---
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/duckdb/src/function/aggregate/distributive/first_last_any.cpp b/src/duckdb/src/function/aggregate/distributive/first_last_any.cpp
index d8cb58e32..4e7455efa 100644
--- a/src/duckdb/src/function/aggregate/distributive/first_last_any.cpp
+++ b/src/duckdb/src/function/aggregate/distributive/first_last_any.cpp
@@ -216,9 +216,23 @@ struct FirstVectorFunction : FirstFunctionStringBase<LAST, SKIP_NULLS> {
 	}
 };
 
+template <class T, bool LAST, bool SKIP_NULLS>
+static void FirstFunctionSimpleUpdate(Vector inputs[], AggregateInputData &aggregate_input_data, idx_t input_count,
+                                      data_ptr_t state, idx_t count) {
+	auto agg_state = reinterpret_cast<FirstState<T> *>(state);
+	if (LAST || !agg_state->is_set) {
+		// For FIRST, this skips looping over the input once the aggregate state has been set
+		// FIXME: for LAST we could loop from the back of the Vector instead
+		AggregateFunction::UnaryUpdate<FirstState<T>, T, FirstFunction<LAST, SKIP_NULLS>>(inputs, aggregate_input_data,
+		                                                                                  input_count, state, count);
+	}
+}
+
 template <class T, bool LAST, bool SKIP_NULLS>
 static AggregateFunction GetFirstAggregateTemplated(LogicalType type) {
-	return AggregateFunction::UnaryAggregate<FirstState<T>, T, T, FirstFunction<LAST, SKIP_NULLS>>(type, type);
+	auto result = AggregateFunction::UnaryAggregate<FirstState<T>, T, T, FirstFunction<LAST, SKIP_NULLS>>(type, type);
+	result.simple_update = FirstFunctionSimpleUpdate<T, LAST, SKIP_NULLS>;
+	return result;
 }
 
 template <bool LAST, bool SKIP_NULLS>
diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index 4f5e368f6..a79fca7dd 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4206"
+#define DUCKDB_PATCH_VERSION "4-dev4217"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4206"
+#define DUCKDB_VERSION "v1.1.4-dev4217"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "b5393f7d0f"
+#define DUCKDB_SOURCE_ID "b89f77c881"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

From afa1140fa101a35162a156533f1d38840af53537 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 9 Jan 2025 13:44:51 +0000
Subject: [PATCH 14/21] vendor: Update vendored sources to
 duckdb/duckdb@5ec2adafa54af246814bd869bb6f48073a2f660b (#989)

Issue duckdb/duckdb#15596: Infinite Value Checks (duckdb/duckdb#15620)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 .../extension/core_functions/scalar/date/make_date.cpp      | 4 ++++
 src/duckdb/src/common/types/timestamp.cpp                   | 6 +++++-
 src/duckdb/src/function/table/version/pragma_version.cpp    | 6 +++---
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/duckdb/extension/core_functions/scalar/date/make_date.cpp b/src/duckdb/extension/core_functions/scalar/date/make_date.cpp
index 71591a227..0fe00a920 100644
--- a/src/duckdb/extension/core_functions/scalar/date/make_date.cpp
+++ b/src/duckdb/extension/core_functions/scalar/date/make_date.cpp
@@ -103,6 +103,10 @@ struct MakeTimestampOperator {
 
 	template <typename T, typename RESULT_TYPE>
 	static RESULT_TYPE Operation(T value) {
+		const auto result = RESULT_TYPE(value);
+		if (!Timestamp::IsFinite(result)) {
+			throw ConversionException("Timestamp microseconds out of range: %ld", value);
+		}
 		return RESULT_TYPE(value);
 	}
 };
diff --git a/src/duckdb/src/common/types/timestamp.cpp b/src/duckdb/src/common/types/timestamp.cpp
index dbc40a6e8..f90c0f77f 100644
--- a/src/duckdb/src/common/types/timestamp.cpp
+++ b/src/duckdb/src/common/types/timestamp.cpp
@@ -176,7 +176,11 @@ bool Timestamp::TryFromTimestampNanos(timestamp_t input, int32_t nanos, timestam
 		return false;
 	}
 
-	return TryAddOperator::Operation(result.value, int64_t(nanos), result.value);
+	if (!TryAddOperator::Operation(result.value, int64_t(nanos), result.value)) {
+		return false;
+	}
+
+	return IsFinite(result);
 }
 
 TimestampCastResult Timestamp::TryConvertTimestamp(const char *str, idx_t len, timestamp_ns_t &result) {
diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index a79fca7dd..ebc810e14 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4217"
+#define DUCKDB_PATCH_VERSION "4-dev4220"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4217"
+#define DUCKDB_VERSION "v1.1.4-dev4220"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "b89f77c881"
+#define DUCKDB_SOURCE_ID "5ec2adafa5"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

From 3b818fc7f736662875b6b763eae7b88a391da328 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 9 Jan 2025 14:07:22 +0000
Subject: [PATCH 15/21] vendor: Update vendored sources to
 duckdb/duckdb@bdf0f8c7231c44ed17bc2ede63498b3ef8d85d35 (#990)

Issue duckdb/duckdb#15610: Wide Secondary Ordering (duckdb/duckdb#15625)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 src/duckdb/src/function/table/version/pragma_version.cpp  | 6 +++---
 src/duckdb/src/function/window/window_merge_sort_tree.cpp | 5 +++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index ebc810e14..d38747335 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4220"
+#define DUCKDB_PATCH_VERSION "4-dev4222"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4220"
+#define DUCKDB_VERSION "v1.1.4-dev4222"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "5ec2adafa5"
+#define DUCKDB_SOURCE_ID "bdf0f8c723"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"
diff --git a/src/duckdb/src/function/window/window_merge_sort_tree.cpp b/src/duckdb/src/function/window/window_merge_sort_tree.cpp
index b2d7db4d9..ef22694c4 100644
--- a/src/duckdb/src/function/window/window_merge_sort_tree.cpp
+++ b/src/duckdb/src/function/window/window_merge_sort_tree.cpp
@@ -11,8 +11,9 @@ WindowMergeSortTree::WindowMergeSortTree(ClientContext &context, const vector<Bo
     : context(context), memory_per_thread(PhysicalOperator::GetMaxThreadMemory(context)), sort_idx(sort_idx),
       build_stage(PartitionSortStage::INIT), tasks_completed(0) {
 	// Sort the unfiltered indices by the orders
+	const auto force_external = ClientConfig::GetConfig(context).force_external;
 	LogicalType index_type;
-	if (count < std::numeric_limits<uint32_t>::max()) {
+	if (count < std::numeric_limits<uint32_t>::max() && !force_external) {
 		index_type = LogicalType::INTEGER;
 		mst32 = make_uniq<MergeSortTree32>();
 	} else {
@@ -40,7 +41,7 @@ WindowMergeSortTree::WindowMergeSortTree(ClientContext &context, const vector<Bo
 	} else {
 		global_sort = make_uniq<GlobalSortState>(buffer_manager, orders, payload_layout);
 	}
-	global_sort->external = ClientConfig::GetConfig(context).force_external;
+	global_sort->external = force_external;
 }
 
 optional_ptr<LocalSortState> WindowMergeSortTree::AddLocalSort() {

From 4b3abdc73e7046f7a8d692111e3937a215518203 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 9 Jan 2025 14:37:11 +0000
Subject: [PATCH 16/21] vendor: Update vendored sources to
 duckdb/duckdb@00cbb18d634352d1ae0cdf4dd42d587d187f5a04 (#991)

[C API] Expose the DB instance cache in the C API (duckdb/duckdb#15579)
Issue template updates (duckdb/duckdb#15618)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 .../function/table/version/pragma_version.cpp |  6 +-
 src/duckdb/src/include/duckdb.h               | 53 ++++++++++++---
 .../duckdb/main/capi/capi_internal.hpp        |  9 ++-
 .../duckdb/main/capi/extension_api.hpp        | 10 +++
 src/duckdb/src/include/duckdb_extension.h     | 14 ++++
 src/duckdb/src/main/capi/duckdb-c.cpp         | 67 +++++++++++++++----
 .../src/main/capi/replacement_scan-c.cpp      |  2 +-
 src/duckdb/src/main/capi/threading-c.cpp      |  8 +--
 .../src/main/extension/extension_load.cpp     |  6 +-
 9 files changed, 141 insertions(+), 34 deletions(-)

diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index d38747335..5f0897e3d 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4222"
+#define DUCKDB_PATCH_VERSION "4-dev4230"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4222"
+#define DUCKDB_VERSION "v1.1.4-dev4230"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "bdf0f8c723"
+#define DUCKDB_SOURCE_ID "00cbb18d63"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"
diff --git a/src/duckdb/src/include/duckdb.h b/src/duckdb/src/include/duckdb.h
index 5efd2bd6b..ae52abd33 100644
--- a/src/duckdb/src/include/duckdb.h
+++ b/src/duckdb/src/include/duckdb.h
@@ -441,7 +441,12 @@ typedef struct {
 	void *internal_data;
 } duckdb_result;
 
-//! A database object. Should be closed with `duckdb_close`.
+//! A database instance cache object. Must be destroyed with `duckdb_destroy_instance_cache`.
+typedef struct _duckdb_instance_cache {
+	void *internal_ptr;
+} * duckdb_instance_cache;
+
+//! A database object. Must be closed with `duckdb_close`.
 typedef struct _duckdb_database {
 	void *internal_ptr;
 } * duckdb_database;
@@ -680,12 +685,44 @@ struct duckdb_extension_access {
 // Open Connect
 //===--------------------------------------------------------------------===//
 
+/*!
+Creates a new database instance cache.
+The instance cache is necessary if a client/program (re)opens multiple databases to the same file within the same
+process. Must be destroyed with 'duckdb_destroy_instance_cache'.
+
+* @return The database instance cache.
+*/
+DUCKDB_API duckdb_instance_cache duckdb_create_instance_cache();
+
+/*!
+Creates a new database instance in the instance cache, or retrieves an existing database instance.
+Must be closed with 'duckdb_close'.
+
+* @param instance_cache The instance cache in which to create the database, or from which to take the database.
+* @param path Path to the database file on disk. Both `nullptr` and `:memory:` open or retrieve an in-memory database.
+* @param out_database The resulting cached database.
+* @param config (Optional) configuration used to create the database.
+* @param out_error If set and the function returns `DuckDBError`, this contains the error message.
+Note that the error message must be freed using `duckdb_free`.
+* @return `DuckDBSuccess` on success or `DuckDBError` on failure.
+*/
+DUCKDB_API duckdb_state duckdb_get_or_create_from_cache(duckdb_instance_cache instance_cache, const char *path,
+                                                        duckdb_database *out_database, duckdb_config config,
+                                                        char **out_error);
+
+/*!
+Destroys an existing database instance cache and de-allocates its memory.
+
+* @param instance_cache The instance cache to destroy.
+*/
+DUCKDB_API void duckdb_destroy_instance_cache(duckdb_instance_cache *instance_cache);
+
 /*!
 Creates a new database or opens an existing database file stored at the given path.
 If no path is given a new in-memory database is created instead.
-The instantiated database should be closed with 'duckdb_close'.
+The database must be closed with 'duckdb_close'.
 
-* @param path Path to the database file on disk, or `nullptr` or `:memory:` to open an in-memory database.
+* @param path Path to the database file on disk. Both `nullptr` and `:memory:` open an in-memory database.
 * @param out_database The result database object.
 * @return `DuckDBSuccess` on success or `DuckDBError` on failure.
 */
@@ -693,13 +730,13 @@ DUCKDB_API duckdb_state duckdb_open(const char *path, duckdb_database *out_datab
 
 /*!
 Extended version of duckdb_open. Creates a new database or opens an existing database file stored at the given path.
-The instantiated database should be closed with 'duckdb_close'.
+The database must be closed with 'duckdb_close'.
 
-* @param path Path to the database file on disk, or `nullptr` or `:memory:` to open an in-memory database.
+* @param path Path to the database file on disk. Both `nullptr` and `:memory:` open an in-memory database.
 * @param out_database The result database object.
-* @param config (Optional) configuration used to start up the database system.
-* @param out_error If set and the function returns DuckDBError, this will contain the reason why the start-up failed.
-Note that the error must be freed using `duckdb_free`.
+* @param config (Optional) configuration used to start up the database.
+* @param out_error If set and the function returns `DuckDBError`, this contains the error message.
+Note that the error message must be freed using `duckdb_free`.
 * @return `DuckDBSuccess` on success or `DuckDBError` on failure.
 */
 DUCKDB_API duckdb_state duckdb_open_ext(const char *path, duckdb_database *out_database, duckdb_config config,
diff --git a/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp b/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp
index f2d825f29..c00425420 100644
--- a/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp
+++ b/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp
@@ -16,6 +16,7 @@
 #include "duckdb/common/case_insensitive_map.hpp"
 #include "duckdb/main/client_context.hpp"
 #include "duckdb/planner/expression/bound_parameter_data.hpp"
+#include "duckdb/main/db_instance_cache.hpp"
 
 #include <cstring>
 #include <cassert>
@@ -28,8 +29,12 @@
 
 namespace duckdb {
 
-struct DatabaseData {
-	unique_ptr<DuckDB> database;
+struct DBInstanceCacheWrapper {
+	unique_ptr<DBInstanceCache> instance_cache;
+};
+
+struct DatabaseWrapper {
+	shared_ptr<DuckDB> database;
 };
 
 struct PreparedStatementWrapper {
diff --git a/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp b/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp
index 6a8bb4bca..71e6d18bc 100644
--- a/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp
+++ b/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp
@@ -460,6 +460,13 @@ typedef struct {
 	                                        duckdb_arrow_schema arrow_schema, duckdb_arrow_array arrow_array,
 	                                        duckdb_arrow_stream *out_stream);
 	duckdb_data_chunk (*duckdb_stream_fetch_chunk)(duckdb_result result);
+	// Exposing the instance cache
+
+	duckdb_instance_cache (*duckdb_create_instance_cache)();
+	duckdb_state (*duckdb_get_or_create_from_cache)(duckdb_instance_cache instance_cache, const char *path,
+	                                                duckdb_database *out_database, duckdb_config config,
+	                                                char **out_error);
+	void (*duckdb_destroy_instance_cache)(duckdb_instance_cache *instance_cache);
 	// New append functions that are added
 
 	duckdb_state (*duckdb_append_default_to_chunk)(duckdb_appender appender, duckdb_data_chunk chunk, idx_t col,
@@ -875,6 +882,9 @@ inline duckdb_ext_api_v1 CreateAPIv1() {
 	result.duckdb_arrow_scan = duckdb_arrow_scan;
 	result.duckdb_arrow_array_scan = duckdb_arrow_array_scan;
 	result.duckdb_stream_fetch_chunk = duckdb_stream_fetch_chunk;
+	result.duckdb_create_instance_cache = duckdb_create_instance_cache;
+	result.duckdb_get_or_create_from_cache = duckdb_get_or_create_from_cache;
+	result.duckdb_destroy_instance_cache = duckdb_destroy_instance_cache;
 	result.duckdb_append_default_to_chunk = duckdb_append_default_to_chunk;
 	return result;
 }
diff --git a/src/duckdb/src/include/duckdb_extension.h b/src/duckdb/src/include/duckdb_extension.h
index 6f01ef592..47c6a7f87 100644
--- a/src/duckdb/src/include/duckdb_extension.h
+++ b/src/duckdb/src/include/duckdb_extension.h
@@ -527,6 +527,15 @@ typedef struct {
 	duckdb_data_chunk (*duckdb_stream_fetch_chunk)(duckdb_result result);
 #endif
 
+// Exposing the instance cache
+#ifdef DUCKDB_EXTENSION_API_VERSION_UNSTABLE
+	duckdb_instance_cache (*duckdb_create_instance_cache)();
+	duckdb_state (*duckdb_get_or_create_from_cache)(duckdb_instance_cache instance_cache, const char *path,
+	                                                duckdb_database *out_database, duckdb_config config,
+	                                                char **out_error);
+	void (*duckdb_destroy_instance_cache)(duckdb_instance_cache *instance_cache);
+#endif
+
 // New append functions that are added
 #ifdef DUCKDB_EXTENSION_API_VERSION_UNSTABLE
 	duckdb_state (*duckdb_append_default_to_chunk)(duckdb_appender appender, duckdb_data_chunk chunk, idx_t col,
@@ -947,6 +956,11 @@ typedef struct {
 #define duckdb_arrow_array_scan           duckdb_ext_api.duckdb_arrow_array_scan
 #define duckdb_stream_fetch_chunk         duckdb_ext_api.duckdb_stream_fetch_chunk
 
+// Version unstable_instance_cache
+#define duckdb_create_instance_cache    duckdb_ext_api.duckdb_create_instance_cache
+#define duckdb_get_or_create_from_cache duckdb_ext_api.duckdb_get_or_create_from_cache
+#define duckdb_destroy_instance_cache   duckdb_ext_api.duckdb_destroy_instance_cache
+
 // Version unstable_new_append_functions
 #define duckdb_append_default_to_chunk duckdb_ext_api.duckdb_append_default_to_chunk
 
diff --git a/src/duckdb/src/main/capi/duckdb-c.cpp b/src/duckdb/src/main/capi/duckdb-c.cpp
index ac944578c..ad47d7448 100644
--- a/src/duckdb/src/main/capi/duckdb-c.cpp
+++ b/src/duckdb/src/main/capi/duckdb-c.cpp
@@ -1,49 +1,88 @@
 #include "duckdb/main/capi/capi_internal.hpp"
 
 using duckdb::Connection;
-using duckdb::DatabaseData;
+using duckdb::DatabaseWrapper;
 using duckdb::DBConfig;
+using duckdb::DBInstanceCacheWrapper;
 using duckdb::DuckDB;
 using duckdb::ErrorData;
 
-duckdb_state duckdb_open_ext(const char *path, duckdb_database *out, duckdb_config config, char **error) {
-	auto wrapper = new DatabaseData();
+duckdb_instance_cache duckdb_create_instance_cache() {
+	auto wrapper = new DBInstanceCacheWrapper();
+	wrapper->instance_cache = duckdb::make_uniq<duckdb::DBInstanceCache>();
+	return reinterpret_cast<duckdb_instance_cache>(wrapper);
+}
+
+void duckdb_destroy_instance_cache(duckdb_instance_cache *instance_cache) {
+	if (instance_cache && *instance_cache) {
+		auto wrapper = reinterpret_cast<DBInstanceCacheWrapper *>(*instance_cache);
+		delete wrapper;
+		*instance_cache = nullptr;
+	}
+}
+
+duckdb_state duckdb_open_internal(DBInstanceCacheWrapper *cache, const char *path, duckdb_database *out,
+                                  duckdb_config config, char **out_error) {
+	auto wrapper = new DatabaseWrapper();
 	try {
 		DBConfig default_config;
 		default_config.SetOptionByName("duckdb_api", "capi");
 
 		DBConfig *db_config = &default_config;
-		DBConfig *user_config = (DBConfig *)config;
+		DBConfig *user_config = reinterpret_cast<DBConfig *>(config);
 		if (user_config) {
 			db_config = user_config;
 		}
 
-		wrapper->database = duckdb::make_uniq<DuckDB>(path, db_config);
+		if (cache) {
+			wrapper->database = cache->instance_cache->GetOrCreateInstance(path, *db_config, true);
+		} else {
+			wrapper->database = duckdb::make_shared_ptr<DuckDB>(path, db_config);
+		}
+
 	} catch (std::exception &ex) {
-		if (error) {
+		if (out_error) {
 			ErrorData parsed_error(ex);
-			*error = strdup(parsed_error.Message().c_str());
+			*out_error = strdup(parsed_error.Message().c_str());
 		}
 		delete wrapper;
 		return DuckDBError;
+
 	} catch (...) { // LCOV_EXCL_START
-		if (error) {
-			*error = strdup("Unknown error");
+		if (out_error) {
+			*out_error = strdup("Unknown error");
 		}
 		delete wrapper;
 		return DuckDBError;
 	} // LCOV_EXCL_STOP
-	*out = (duckdb_database)wrapper;
+
+	*out = reinterpret_cast<duckdb_database>(wrapper);
 	return DuckDBSuccess;
 }
 
+duckdb_state duckdb_get_or_create_from_cache(duckdb_instance_cache instance_cache, const char *path,
+                                             duckdb_database *out_database, duckdb_config config, char **out_error) {
+	if (!instance_cache) {
+		if (out_error) {
+			*out_error = strdup("instance cache cannot be nullptr");
+		}
+		return DuckDBError;
+	}
+	auto cache = reinterpret_cast<DBInstanceCacheWrapper *>(instance_cache);
+	return duckdb_open_internal(cache, path, out_database, config, out_error);
+}
+
+duckdb_state duckdb_open_ext(const char *path, duckdb_database *out, duckdb_config config, char **error) {
+	return duckdb_open_internal(nullptr, path, out, config, error);
+}
+
 duckdb_state duckdb_open(const char *path, duckdb_database *out) {
 	return duckdb_open_ext(path, out, nullptr, nullptr);
 }
 
 void duckdb_close(duckdb_database *database) {
 	if (database && *database) {
-		auto wrapper = reinterpret_cast<DatabaseData *>(*database);
+		auto wrapper = reinterpret_cast<DatabaseWrapper *>(*database);
 		delete wrapper;
 		*database = nullptr;
 	}
@@ -53,14 +92,16 @@ duckdb_state duckdb_connect(duckdb_database database, duckdb_connection *out) {
 	if (!database || !out) {
 		return DuckDBError;
 	}
-	auto wrapper = reinterpret_cast<DatabaseData *>(database);
+
+	auto wrapper = reinterpret_cast<DatabaseWrapper *>(database);
 	Connection *connection;
 	try {
 		connection = new Connection(*wrapper->database);
 	} catch (...) { // LCOV_EXCL_START
 		return DuckDBError;
 	} // LCOV_EXCL_STOP
-	*out = (duckdb_connection)connection;
+
+	*out = reinterpret_cast<duckdb_connection>(connection);
 	return DuckDBSuccess;
 }
 
diff --git a/src/duckdb/src/main/capi/replacement_scan-c.cpp b/src/duckdb/src/main/capi/replacement_scan-c.cpp
index ff3eae4ff..5f23a3169 100644
--- a/src/duckdb/src/main/capi/replacement_scan-c.cpp
+++ b/src/duckdb/src/main/capi/replacement_scan-c.cpp
@@ -58,7 +58,7 @@ void duckdb_add_replacement_scan(duckdb_database db, duckdb_replacement_callback
 	if (!db || !replacement) {
 		return;
 	}
-	auto wrapper = reinterpret_cast<duckdb::DatabaseData *>(db);
+	auto wrapper = reinterpret_cast<duckdb::DatabaseWrapper *>(db);
 	auto scan_info = duckdb::make_uniq<duckdb::CAPIReplacementScanData>();
 	scan_info->callback = replacement;
 	scan_info->extra_data = extra_data;
diff --git a/src/duckdb/src/main/capi/threading-c.cpp b/src/duckdb/src/main/capi/threading-c.cpp
index b0a002f18..575f22c3c 100644
--- a/src/duckdb/src/main/capi/threading-c.cpp
+++ b/src/duckdb/src/main/capi/threading-c.cpp
@@ -1,10 +1,10 @@
 #include "duckdb/main/capi/capi_internal.hpp"
 #include "duckdb/parallel/task_scheduler.hpp"
 
-using duckdb::DatabaseData;
+using duckdb::DatabaseWrapper;
 
 struct CAPITaskState {
-	CAPITaskState(duckdb::DatabaseInstance &db)
+	explicit CAPITaskState(duckdb::DatabaseInstance &db)
 	    : db(db), marker(duckdb::make_uniq<duckdb::atomic<bool>>(true)), execute_count(0) {
 	}
 
@@ -17,7 +17,7 @@ void duckdb_execute_tasks(duckdb_database database, idx_t max_tasks) {
 	if (!database) {
 		return;
 	}
-	auto wrapper = (DatabaseData *)database;
+	auto wrapper = reinterpret_cast<DatabaseWrapper *>(database);
 	auto &scheduler = duckdb::TaskScheduler::GetScheduler(*wrapper->database->instance);
 	scheduler.ExecuteTasks(max_tasks);
 }
@@ -26,7 +26,7 @@ duckdb_task_state duckdb_create_task_state(duckdb_database database) {
 	if (!database) {
 		return nullptr;
 	}
-	auto wrapper = (DatabaseData *)database;
+	auto wrapper = reinterpret_cast<DatabaseWrapper *>(database);
 	auto state = new CAPITaskState(*wrapper->database->instance);
 	return state;
 }
diff --git a/src/duckdb/src/main/extension/extension_load.cpp b/src/duckdb/src/main/extension/extension_load.cpp
index a7a7c62fe..23101f722 100644
--- a/src/duckdb/src/main/extension/extension_load.cpp
+++ b/src/duckdb/src/main/extension/extension_load.cpp
@@ -48,7 +48,7 @@ struct DuckDBExtensionLoadState {
 
 	//! This is the duckdb_database struct that will be passed to the extension during initialization. Note that the
 	//! extension does not need to free it.
-	unique_ptr<DatabaseData> database_data;
+	unique_ptr<DatabaseWrapper> database_data;
 
 	//! The function pointer struct passed to the extension. The extension is expected to copy this struct during
 	//! initialization
@@ -88,8 +88,8 @@ struct ExtensionAccess {
 
 		try {
 			// Create the duckdb_database
-			load_state.database_data = make_uniq<DatabaseData>();
-			load_state.database_data->database = make_uniq<DuckDB>(load_state.db);
+			load_state.database_data = make_uniq<DatabaseWrapper>();
+			load_state.database_data->database = make_shared_ptr<DuckDB>(load_state.db);
 			return reinterpret_cast<duckdb_database *>(load_state.database_data.get());
 		} catch (std::exception &ex) {
 			load_state.error_data = ErrorData(ex);

From 06dd61fc1a68cc98054011bd26af17f667032f18 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 9 Jan 2025 14:58:41 +0000
Subject: [PATCH 17/21] vendor: Update vendored sources to
 duckdb/duckdb@f850786ddef96177354ca8183280bc0de62a66c0 (#992)

File url scheme (duckdb/duckdb#15563)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 src/duckdb/src/common/file_system.cpp         |   8 +-
 src/duckdb/src/common/local_file_system.cpp   | 154 ++++++++++++++----
 .../function/table/version/pragma_version.cpp |   6 +-
 .../duckdb/common/local_file_system.hpp       |   3 +
 4 files changed, 131 insertions(+), 40 deletions(-)

diff --git a/src/duckdb/src/common/file_system.cpp b/src/duckdb/src/common/file_system.cpp
index 26ac7fee3..fbcf0cede 100644
--- a/src/duckdb/src/common/file_system.cpp
+++ b/src/duckdb/src/common/file_system.cpp
@@ -115,7 +115,7 @@ string FileSystem::GetEnvVariable(const string &name) {
 
 bool FileSystem::IsPathAbsolute(const string &path) {
 	auto path_separator = PathSeparator(path);
-	return PathMatched(path, path_separator);
+	return PathMatched(path, path_separator) || StringUtil::StartsWith(path, "file:/");
 }
 
 string FileSystem::PathSeparator(const string &path) {
@@ -237,7 +237,11 @@ string FileSystem::NormalizeAbsolutePath(const string &path) {
 }
 
 string FileSystem::PathSeparator(const string &path) {
-	return "\\";
+	if (StringUtil::StartsWith(path, "file:")) {
+		return "/";
+	} else {
+		return "\\";
+	}
 }
 
 void FileSystem::SetWorkingDirectory(const string &path) {
diff --git a/src/duckdb/src/common/local_file_system.cpp b/src/duckdb/src/common/local_file_system.cpp
index 2827b33e3..83e242526 100644
--- a/src/duckdb/src/common/local_file_system.cpp
+++ b/src/duckdb/src/common/local_file_system.cpp
@@ -59,13 +59,13 @@ extern "C" WINBASEAPI BOOL QueryFullProcessImageNameW(HANDLE, DWORD, LPWSTR, PDW
 #endif
 
 namespace duckdb {
-
 #ifndef _WIN32
 bool LocalFileSystem::FileExists(const string &filename, optional_ptr<FileOpener> opener) {
 	if (!filename.empty()) {
-		if (access(filename.c_str(), 0) == 0) {
+		auto normalized_file = NormalizeLocalPath(filename);
+		if (access(normalized_file, 0) == 0) {
 			struct stat status;
-			stat(filename.c_str(), &status);
+			stat(normalized_file, &status);
 			if (S_ISREG(status.st_mode)) {
 				return true;
 			}
@@ -77,9 +77,10 @@ bool LocalFileSystem::FileExists(const string &filename, optional_ptr<FileOpener
 
 bool LocalFileSystem::IsPipe(const string &filename, optional_ptr<FileOpener> opener) {
 	if (!filename.empty()) {
-		if (access(filename.c_str(), 0) == 0) {
+		auto normalized_file = NormalizeLocalPath(filename);
+		if (access(normalized_file, 0) == 0) {
 			struct stat status;
-			stat(filename.c_str(), &status);
+			stat(normalized_file, &status);
 			if (S_ISFIFO(status.st_mode)) {
 				return true;
 			}
@@ -90,8 +91,21 @@ bool LocalFileSystem::IsPipe(const string &filename, optional_ptr<FileOpener> op
 }
 
 #else
+static std::wstring NormalizePathAndConvertToUnicode(const string &path) {
+	string normalized_path_copy;
+	const char *normalized_path;
+	if (StringUtil::StartsWith(path, "file:/")) {
+		normalized_path_copy = LocalFileSystem::NormalizeLocalPath(path);
+		normalized_path_copy = LocalFileSystem().ConvertSeparators(normalized_path_copy);
+		normalized_path = normalized_path_copy.c_str();
+	} else {
+		normalized_path = path.c_str();
+	}
+	return WindowsUtil::UTF8ToUnicode(normalized_path);
+}
+
 bool LocalFileSystem::FileExists(const string &filename, optional_ptr<FileOpener> opener) {
-	auto unicode_path = WindowsUtil::UTF8ToUnicode(filename.c_str());
+	auto unicode_path = NormalizePathAndConvertToUnicode(filename);
 	const wchar_t *wpath = unicode_path.c_str();
 	if (_waccess(wpath, 0) == 0) {
 		struct _stati64 status;
@@ -103,7 +117,7 @@ bool LocalFileSystem::FileExists(const string &filename, optional_ptr<FileOpener
 	return false;
 }
 bool LocalFileSystem::IsPipe(const string &filename, optional_ptr<FileOpener> opener) {
-	auto unicode_path = WindowsUtil::UTF8ToUnicode(filename.c_str());
+	auto unicode_path = NormalizePathAndConvertToUnicode(filename);
 	const wchar_t *wpath = unicode_path.c_str();
 	if (_waccess(wpath, 0) == 0) {
 		struct _stati64 status;
@@ -180,7 +194,7 @@ static string AdditionalProcessInfo(FileSystem &fs, pid_t pid) {
 	}
 
 	string process_name, process_owner;
-// macOS >= 10.7 has PROC_PIDT_SHORTBSDINFO
+	// macOS >= 10.7 has PROC_PIDT_SHORTBSDINFO
 #ifdef PROC_PIDT_SHORTBSDINFO
 	// try to find out more about the process holding the lock
 	struct proc_bsdshortinfo proc;
@@ -261,10 +275,11 @@ static string AdditionalProcessInfo(FileSystem &fs, pid_t pid) {
 
 bool LocalFileSystem::IsPrivateFile(const string &path_p, FileOpener *opener) {
 	auto path = FileSystem::ExpandPath(path_p, opener);
+	auto normalized_path = NormalizeLocalPath(path);
 
 	struct stat st;
 
-	if (lstat(path.c_str(), &st) != 0) {
+	if (lstat(normalized_path, &st) != 0) {
 		throw IOException(
 		    "Failed to stat '%s' when checking file permissions, file may be missing or have incorrect permissions",
 		    path.c_str());
@@ -281,6 +296,7 @@ bool LocalFileSystem::IsPrivateFile(const string &path_p, FileOpener *opener) {
 unique_ptr<FileHandle> LocalFileSystem::OpenFile(const string &path_p, FileOpenFlags flags,
                                                  optional_ptr<FileOpener> opener) {
 	auto path = FileSystem::ExpandPath(path_p, opener);
+	auto normalized_path = NormalizeLocalPath(path);
 	if (flags.Compression() != FileCompressionType::UNCOMPRESSED) {
 		throw NotImplementedException("Unsupported compression type for default file system");
 	}
@@ -338,7 +354,7 @@ unique_ptr<FileHandle> LocalFileSystem::OpenFile(const string &path_p, FileOpenF
 	}
 
 	// Open the file
-	int fd = open(path.c_str(), open_flags, filesec);
+	int fd = open(normalized_path, open_flags, filesec);
 
 	if (fd == -1) {
 		if (flags.ReturnNullIfNotExists() && errno == ENOENT) {
@@ -560,9 +576,10 @@ void LocalFileSystem::Truncate(FileHandle &handle, int64_t new_size) {
 
 bool LocalFileSystem::DirectoryExists(const string &directory, optional_ptr<FileOpener> opener) {
 	if (!directory.empty()) {
-		if (access(directory.c_str(), 0) == 0) {
+		auto normalized_dir = NormalizeLocalPath(directory);
+		if (access(normalized_dir, 0) == 0) {
 			struct stat status;
-			stat(directory.c_str(), &status);
+			stat(normalized_dir, &status);
 			if (status.st_mode & S_IFDIR) {
 				return true;
 			}
@@ -575,9 +592,10 @@ bool LocalFileSystem::DirectoryExists(const string &directory, optional_ptr<File
 void LocalFileSystem::CreateDirectory(const string &directory, optional_ptr<FileOpener> opener) {
 	struct stat st;
 
-	if (stat(directory.c_str(), &st) != 0) {
+	auto normalized_dir = NormalizeLocalPath(directory);
+	if (stat(normalized_dir, &st) != 0) {
 		/* Directory does not exist. EEXIST for race condition */
-		if (mkdir(directory.c_str(), 0755) != 0 && errno != EEXIST) {
+		if (mkdir(normalized_dir, 0755) != 0 && errno != EEXIST) {
 			throw IOException("Failed to create directory \"%s\": %s", {{"errno", std::to_string(errno)}}, directory,
 			                  strerror(errno));
 		}
@@ -628,11 +646,13 @@ int RemoveDirectoryRecursive(const char *path) {
 }
 
 void LocalFileSystem::RemoveDirectory(const string &directory, optional_ptr<FileOpener> opener) {
-	RemoveDirectoryRecursive(directory.c_str());
+	auto normalized_dir = NormalizeLocalPath(directory);
+	RemoveDirectoryRecursive(normalized_dir);
 }
 
 void LocalFileSystem::RemoveFile(const string &filename, optional_ptr<FileOpener> opener) {
-	if (std::remove(filename.c_str()) != 0) {
+	auto normalized_file = NormalizeLocalPath(filename);
+	if (std::remove(normalized_file) != 0) {
 		throw IOException("Could not remove file \"%s\": %s", {{"errno", std::to_string(errno)}}, filename,
 		                  strerror(errno));
 	}
@@ -640,7 +660,8 @@ void LocalFileSystem::RemoveFile(const string &filename, optional_ptr<FileOpener
 
 bool LocalFileSystem::ListFiles(const string &directory, const std::function<void(const string &, bool)> &callback,
                                 FileOpener *opener) {
-	auto dir = opendir(directory.c_str());
+	auto normalized_dir = NormalizeLocalPath(directory);
+	auto dir = opendir(normalized_dir);
 	if (!dir) {
 		return false;
 	}
@@ -657,7 +678,7 @@ bool LocalFileSystem::ListFiles(const string &directory, const std::function<voi
 			continue;
 		}
 		// now stat the file to figure out if it is a regular file or directory
-		string full_path = JoinPath(directory, name);
+		string full_path = JoinPath(normalized_dir, name);
 		struct stat status;
 		auto res = stat(full_path.c_str(), &status);
 		if (res != 0) {
@@ -682,8 +703,10 @@ void LocalFileSystem::FileSync(FileHandle &handle) {
 }
 
 void LocalFileSystem::MoveFile(const string &source, const string &target, optional_ptr<FileOpener> opener) {
+	auto normalized_source = NormalizeLocalPath(source);
+	auto normalized_target = NormalizeLocalPath(target);
 	//! FIXME: rename does not guarantee atomicity or overwriting target file if it exists
-	if (rename(source.c_str(), target.c_str()) != 0) {
+	if (rename(normalized_source, normalized_target) != 0) {
 		throw IOException("Could not rename file!", {{"errno", std::to_string(errno)}});
 	}
 }
@@ -808,6 +831,7 @@ bool LocalFileSystem::IsPrivateFile(const string &path_p, FileOpener *opener) {
 unique_ptr<FileHandle> LocalFileSystem::OpenFile(const string &path_p, FileOpenFlags flags,
                                                  optional_ptr<FileOpener> opener) {
 	auto path = FileSystem::ExpandPath(path_p, opener);
+	auto unicode_path = NormalizePathAndConvertToUnicode(path);
 	if (flags.Compression() != FileCompressionType::UNCOMPRESSED) {
 		throw NotImplementedException("Unsupported compression type for default file system");
 	}
@@ -841,7 +865,6 @@ unique_ptr<FileHandle> LocalFileSystem::OpenFile(const string &path_p, FileOpenF
 	if (flags.DirectIO()) {
 		flags_and_attributes |= FILE_FLAG_NO_BUFFERING;
 	}
-	auto unicode_path = WindowsUtil::UTF8ToUnicode(path.c_str());
 	HANDLE hFile = CreateFileW(unicode_path.c_str(), desired_access, share_mode, NULL, creation_disposition,
 	                           flags_and_attributes, NULL);
 	if (hFile == INVALID_HANDLE_VALUE) {
@@ -1016,10 +1039,14 @@ void LocalFileSystem::Truncate(FileHandle &handle, int64_t new_size) {
 }
 
 static DWORD WindowsGetFileAttributes(const string &filename) {
-	auto unicode_path = WindowsUtil::UTF8ToUnicode(filename.c_str());
+	auto unicode_path = NormalizePathAndConvertToUnicode(filename);
 	return GetFileAttributesW(unicode_path.c_str());
 }
 
+static DWORD WindowsGetFileAttributes(const std::wstring &filename) {
+	return GetFileAttributesW(filename.c_str());
+}
+
 bool LocalFileSystem::DirectoryExists(const string &directory, optional_ptr<FileOpener> opener) {
 	DWORD attrs = WindowsGetFileAttributes(directory);
 	return (attrs != INVALID_FILE_ATTRIBUTES && (attrs & FILE_ATTRIBUTE_DIRECTORY));
@@ -1029,7 +1056,7 @@ void LocalFileSystem::CreateDirectory(const string &directory, optional_ptr<File
 	if (DirectoryExists(directory)) {
 		return;
 	}
-	auto unicode_path = WindowsUtil::UTF8ToUnicode(directory.c_str());
+	auto unicode_path = NormalizePathAndConvertToUnicode(directory);
 	if (directory.empty() || !CreateDirectoryW(unicode_path.c_str(), NULL) || !DirectoryExists(directory)) {
 		auto error = LocalFileSystem::GetLastErrorAsString();
 		throw IOException("Failed to create directory \"%s\": %s", directory.c_str(), error);
@@ -1044,7 +1071,7 @@ static void DeleteDirectoryRecursive(FileSystem &fs, string directory) {
 			fs.RemoveFile(fs.JoinPath(directory, fname));
 		}
 	});
-	auto unicode_path = WindowsUtil::UTF8ToUnicode(directory.c_str());
+	auto unicode_path = NormalizePathAndConvertToUnicode(directory);
 	if (!RemoveDirectoryW(unicode_path.c_str())) {
 		auto error = LocalFileSystem::GetLastErrorAsString();
 		throw IOException("Failed to delete directory \"%s\": %s", directory, error);
@@ -1058,11 +1085,11 @@ void LocalFileSystem::RemoveDirectory(const string &directory, optional_ptr<File
 	if (!DirectoryExists(directory)) {
 		return;
 	}
-	DeleteDirectoryRecursive(*this, directory.c_str());
+	DeleteDirectoryRecursive(*this, directory);
 }
 
 void LocalFileSystem::RemoveFile(const string &filename, optional_ptr<FileOpener> opener) {
-	auto unicode_path = WindowsUtil::UTF8ToUnicode(filename.c_str());
+	auto unicode_path = NormalizePathAndConvertToUnicode(filename);
 	if (!DeleteFileW(unicode_path.c_str())) {
 		auto error = LocalFileSystem::GetLastErrorAsString();
 		throw IOException("Failed to delete file \"%s\": %s", filename, error);
@@ -1072,8 +1099,7 @@ void LocalFileSystem::RemoveFile(const string &filename, optional_ptr<FileOpener
 bool LocalFileSystem::ListFiles(const string &directory, const std::function<void(const string &, bool)> &callback,
                                 FileOpener *opener) {
 	string search_dir = JoinPath(directory, "*");
-
-	auto unicode_path = WindowsUtil::UTF8ToUnicode(search_dir.c_str());
+	auto unicode_path = NormalizePathAndConvertToUnicode(search_dir);
 
 	WIN32_FIND_DATAW ffd;
 	HANDLE hFind = FindFirstFileW(unicode_path.c_str(), &ffd);
@@ -1106,8 +1132,9 @@ void LocalFileSystem::FileSync(FileHandle &handle) {
 }
 
 void LocalFileSystem::MoveFile(const string &source, const string &target, optional_ptr<FileOpener> opener) {
-	auto source_unicode = WindowsUtil::UTF8ToUnicode(source.c_str());
-	auto target_unicode = WindowsUtil::UTF8ToUnicode(target.c_str());
+	auto source_unicode = NormalizePathAndConvertToUnicode(source);
+	auto target_unicode = NormalizePathAndConvertToUnicode(target);
+
 	if (!MoveFileW(source_unicode.c_str(), target_unicode.c_str())) {
 		throw IOException("Could not move file: %s", GetLastErrorAsString());
 	}
@@ -1119,7 +1146,8 @@ FileType LocalFileSystem::GetFileType(FileHandle &handle) {
 	if (strncmp(path.c_str(), PIPE_PREFIX, strlen(PIPE_PREFIX)) == 0) {
 		return FileType::FILE_TYPE_FIFO;
 	}
-	DWORD attrs = WindowsGetFileAttributes(path.c_str());
+	auto normalized_path = NormalizePathAndConvertToUnicode(path);
+	DWORD attrs = WindowsGetFileAttributes(normalized_path);
 	if (attrs != INVALID_FILE_ATTRIBUTES) {
 		if (attrs & FILE_ATTRIBUTE_DIRECTORY) {
 			return FileType::FILE_TYPE_DIR;
@@ -1161,9 +1189,10 @@ static bool HasMultipleCrawl(const vector<string> &splits) {
 	return std::count(splits.begin(), splits.end(), "**") > 1;
 }
 static bool IsSymbolicLink(const string &path) {
+	auto normalized_path = LocalFileSystem::NormalizeLocalPath(path);
 #ifndef _WIN32
 	struct stat status;
-	return (lstat(path.c_str(), &status) != -1 && S_ISLNK(status.st_mode));
+	return (lstat(normalized_path, &status) != -1 && S_ISLNK(status.st_mode));
 #else
 	auto attributes = WindowsGetFileAttributes(path);
 	if (attributes == INVALID_FILE_ATTRIBUTES)
@@ -1230,14 +1259,59 @@ vector<string> LocalFileSystem::FetchFileWithoutGlob(const string &path, FileOpe
 	return result;
 }
 
+// Helper function to handle file:/ URLs
+static idx_t GetFileUrlOffset(const string &path) {
+	if (!StringUtil::StartsWith(path, "file:/")) {
+		return 0;
+	}
+
+	// Url without host: file:/some/path
+	if (path[6] != '/') {
+#ifdef _WIN32
+		return 6;
+#else
+		return 5;
+#endif
+	}
+
+	// Url with empty host: file:///some/path
+	if (path[7] == '/') {
+#ifdef _WIN32
+		return 8;
+#else
+		return 7;
+#endif
+	}
+
+	// Url with localhost: file://localhost/some/path
+	if (path.compare(7, 10, "localhost/") == 0) {
+#ifdef _WIN32
+		return 17;
+#else
+		return 16;
+#endif
+	}
+
+	// unkown file:/ url format
+	return 0;
+}
+
+const char *LocalFileSystem::NormalizeLocalPath(const string &path) {
+	return path.c_str() + GetFileUrlOffset(path);
+}
+
 vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
 	if (path.empty()) {
 		return vector<string>();
 	}
 	// split up the path into separate chunks
 	vector<string> splits;
+
+	bool is_file_url = StringUtil::StartsWith(path, "file:/");
+	idx_t file_url_path_offset = GetFileUrlOffset(path);
+
 	idx_t last_pos = 0;
-	for (idx_t i = 0; i < path.size(); i++) {
+	for (idx_t i = file_url_path_offset; i < path.size(); i++) {
 		if (path[i] == '\\' || path[i] == '/') {
 			if (i == last_pos) {
 				// empty: skip this position
@@ -1245,6 +1319,7 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
 				continue;
 			}
 			if (splits.empty()) {
+				//				splits.push_back(path.substr(file_url_path_offset, i-file_url_path_offset));
 				splits.push_back(path.substr(0, i));
 			} else {
 				splits.push_back(path.substr(last_pos, i - last_pos));
@@ -1255,10 +1330,10 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
 	splits.push_back(path.substr(last_pos, path.size() - last_pos));
 	// handle absolute paths
 	bool absolute_path = false;
-	if (path[0] == '/') {
+	if (IsPathAbsolute(path)) {
 		// first character is a slash -  unix absolute path
 		absolute_path = true;
-	} else if (StringUtil::Contains(splits[0], ":")) {
+	} else if (StringUtil::Contains(splits[0], ":")) { // TODO: this is weird? shouldn't IsPathAbsolute handle this?
 		// first split has a colon -  windows absolute path
 		absolute_path = true;
 	} else if (splits[0] == "~") {
@@ -1298,7 +1373,16 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
 		throw IOException("Cannot use multiple \'**\' in one path");
 	}
 
-	for (idx_t i = absolute_path ? 1 : 0; i < splits.size(); i++) {
+	idx_t start_index;
+	if (is_file_url) {
+		start_index = 1;
+	} else if (absolute_path) {
+		start_index = 1;
+	} else {
+		start_index = 0;
+	}
+
+	for (idx_t i = start_index ? 1 : 0; i < splits.size(); i++) {
 		bool is_last_chunk = i + 1 == splits.size();
 		bool has_glob = HasGlob(splits[i]);
 		// if it's the last chunk we need to find files, otherwise we find directories
diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index 5f0897e3d..41709636d 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4230"
+#define DUCKDB_PATCH_VERSION "4-dev4241"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4230"
+#define DUCKDB_VERSION "v1.1.4-dev4241"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "00cbb18d63"
+#define DUCKDB_SOURCE_ID "f850786dde"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"
diff --git a/src/duckdb/src/include/duckdb/common/local_file_system.hpp b/src/duckdb/src/include/duckdb/common/local_file_system.hpp
index fe4e02ad8..c2d38152a 100644
--- a/src/duckdb/src/include/duckdb/common/local_file_system.hpp
+++ b/src/duckdb/src/include/duckdb/common/local_file_system.hpp
@@ -96,6 +96,9 @@ class LocalFileSystem : public FileSystem {
 	//! Checks a file is private (checks for 600 on linux/macos, TODO: currently always returns true on windows)
 	static bool IsPrivateFile(const string &path_p, FileOpener *opener);
 
+	// returns a C-string of the path that trims any file:/ prefix
+	static const char *NormalizeLocalPath(const string &path);
+
 private:
 	//! Set the file pointer of a file handle to a specified location. Reads and writes will happen from this location
 	void SetFilePointer(FileHandle &handle, idx_t location);

From c2d7dd38d314d6394841ae11c35770364f5bdbad Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 9 Jan 2025 19:44:40 +0000
Subject: [PATCH 18/21] vendor: Update vendored sources to
 duckdb/duckdb@acea8cde22f802b792adc8cfdeea335103edc5fe (#994)

Issue duckdb/duckdb#15597: Temporal Error Messages (duckdb/duckdb#15635)
Fix an if statement that is always True (duckdb/duckdb#15630)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 .../core_functions/scalar/date/epoch.cpp      |  2 +-
 src/duckdb/src/common/enum_util.cpp           | 27 ++++++-
 .../src/common/operator/cast_operators.cpp    | 33 ++++++---
 src/duckdb/src/common/types/date.cpp          | 64 +++++++++-------
 src/duckdb/src/common/types/timestamp.cpp     | 74 +++++++++++--------
 .../scanner/string_value_scanner.cpp          |  5 +-
 .../csv_scanner/sniffer/type_detection.cpp    |  2 +-
 .../function/table/version/pragma_version.cpp |  6 +-
 .../src/include/duckdb/common/enum_util.hpp   |  8 ++
 .../src/include/duckdb/common/types/date.hpp  | 13 +++-
 .../include/duckdb/common/types/timestamp.hpp | 13 ++--
 11 files changed, 164 insertions(+), 83 deletions(-)

diff --git a/src/duckdb/extension/core_functions/scalar/date/epoch.cpp b/src/duckdb/extension/core_functions/scalar/date/epoch.cpp
index c30530341..cda3232a4 100644
--- a/src/duckdb/extension/core_functions/scalar/date/epoch.cpp
+++ b/src/duckdb/extension/core_functions/scalar/date/epoch.cpp
@@ -11,7 +11,7 @@ struct EpochSecOperator {
 	static RESULT_TYPE Operation(INPUT_TYPE sec) {
 		int64_t result;
 		if (!TryCast::Operation(sec * Interval::MICROS_PER_SEC, result)) {
-			throw ConversionException("Could not convert epoch seconds to TIMESTAMP WITH TIME ZONE");
+			throw ConversionException("Epoch seconds out of range for TIMESTAMP WITH TIME ZONE");
 		}
 		return timestamp_t(result);
 	}
diff --git a/src/duckdb/src/common/enum_util.cpp b/src/duckdb/src/common/enum_util.cpp
index 6baa6854f..1fd91919b 100644
--- a/src/duckdb/src/common/enum_util.cpp
+++ b/src/duckdb/src/common/enum_util.cpp
@@ -73,6 +73,7 @@
 #include "duckdb/common/types/column/column_data_scan_states.hpp"
 #include "duckdb/common/types/column/partitioned_column_data.hpp"
 #include "duckdb/common/types/conflict_manager.hpp"
+#include "duckdb/common/types/date.hpp"
 #include "duckdb/common/types/hyperloglog.hpp"
 #include "duckdb/common/types/row/partitioned_tuple_data.hpp"
 #include "duckdb/common/types/row/tuple_data_states.hpp"
@@ -1009,6 +1010,25 @@ DataFileType EnumUtil::FromString<DataFileType>(const char *value) {
 	return static_cast<DataFileType>(StringUtil::StringToEnum(GetDataFileTypeValues(), 4, "DataFileType", value));
 }
 
+const StringUtil::EnumStringLiteral *GetDateCastResultValues() {
+	static constexpr StringUtil::EnumStringLiteral values[] {
+		{ static_cast<uint32_t>(DateCastResult::SUCCESS), "SUCCESS" },
+		{ static_cast<uint32_t>(DateCastResult::ERROR_INCORRECT_FORMAT), "ERROR_INCORRECT_FORMAT" },
+		{ static_cast<uint32_t>(DateCastResult::ERROR_RANGE), "ERROR_RANGE" }
+	};
+	return values;
+}
+
+template<>
+const char* EnumUtil::ToChars<DateCastResult>(DateCastResult value) {
+	return StringUtil::EnumToString(GetDateCastResultValues(), 3, "DateCastResult", static_cast<uint32_t>(value));
+}
+
+template<>
+DateCastResult EnumUtil::FromString<DateCastResult>(const char *value) {
+	return static_cast<DateCastResult>(StringUtil::StringToEnum(GetDateCastResultValues(), 3, "DateCastResult", value));
+}
+
 const StringUtil::EnumStringLiteral *GetDatePartSpecifierValues() {
 	static constexpr StringUtil::EnumStringLiteral values[] {
 		{ static_cast<uint32_t>(DatePartSpecifier::YEAR), "YEAR" },
@@ -3880,19 +3900,20 @@ const StringUtil::EnumStringLiteral *GetTimestampCastResultValues() {
 	static constexpr StringUtil::EnumStringLiteral values[] {
 		{ static_cast<uint32_t>(TimestampCastResult::SUCCESS), "SUCCESS" },
 		{ static_cast<uint32_t>(TimestampCastResult::ERROR_INCORRECT_FORMAT), "ERROR_INCORRECT_FORMAT" },
-		{ static_cast<uint32_t>(TimestampCastResult::ERROR_NON_UTC_TIMEZONE), "ERROR_NON_UTC_TIMEZONE" }
+		{ static_cast<uint32_t>(TimestampCastResult::ERROR_NON_UTC_TIMEZONE), "ERROR_NON_UTC_TIMEZONE" },
+		{ static_cast<uint32_t>(TimestampCastResult::ERROR_RANGE), "ERROR_RANGE" }
 	};
 	return values;
 }
 
 template<>
 const char* EnumUtil::ToChars<TimestampCastResult>(TimestampCastResult value) {
-	return StringUtil::EnumToString(GetTimestampCastResultValues(), 3, "TimestampCastResult", static_cast<uint32_t>(value));
+	return StringUtil::EnumToString(GetTimestampCastResultValues(), 4, "TimestampCastResult", static_cast<uint32_t>(value));
 }
 
 template<>
 TimestampCastResult EnumUtil::FromString<TimestampCastResult>(const char *value) {
-	return static_cast<TimestampCastResult>(StringUtil::StringToEnum(GetTimestampCastResultValues(), 3, "TimestampCastResult", value));
+	return static_cast<TimestampCastResult>(StringUtil::StringToEnum(GetTimestampCastResultValues(), 4, "TimestampCastResult", value));
 }
 
 const StringUtil::EnumStringLiteral *GetTransactionModifierTypeValues() {
diff --git a/src/duckdb/src/common/operator/cast_operators.cpp b/src/duckdb/src/common/operator/cast_operators.cpp
index fe1ce26af..9742c6087 100644
--- a/src/duckdb/src/common/operator/cast_operators.cpp
+++ b/src/duckdb/src/common/operator/cast_operators.cpp
@@ -1470,8 +1470,16 @@ bool TryCastToUUID::Operation(string_t input, hugeint_t &result, Vector &result_
 //===--------------------------------------------------------------------===//
 template <>
 bool TryCastErrorMessage::Operation(string_t input, date_t &result, CastParameters &parameters) {
-	if (!TryCast::Operation<string_t, date_t>(input, result, parameters.strict)) {
-		HandleCastError::AssignError(Date::ConversionError(input), parameters);
+	idx_t pos;
+	bool special = false;
+	switch (Date::TryConvertDate(input.GetData(), input.GetSize(), pos, result, special, parameters.strict)) {
+	case DateCastResult::SUCCESS:
+		break;
+	case DateCastResult::ERROR_INCORRECT_FORMAT:
+		HandleCastError::AssignError(Date::FormatError(input), parameters);
+		return false;
+	case DateCastResult::ERROR_RANGE:
+		HandleCastError::AssignError(Date::RangeError(input), parameters);
 		return false;
 	}
 	return true;
@@ -1481,7 +1489,8 @@ template <>
 bool TryCast::Operation(string_t input, date_t &result, bool strict) {
 	idx_t pos;
 	bool special = false;
-	return Date::TryConvertDate(input.GetData(), input.GetSize(), pos, result, special, strict);
+	return Date::TryConvertDate(input.GetData(), input.GetSize(), pos, result, special, strict) ==
+	       DateCastResult::SUCCESS;
 }
 
 template <>
@@ -1545,14 +1554,18 @@ dtime_tz_t Cast::Operation(string_t input) {
 //===--------------------------------------------------------------------===//
 template <>
 bool TryCastErrorMessage::Operation(string_t input, timestamp_t &result, CastParameters &parameters) {
-	auto cast_result = Timestamp::TryConvertTimestamp(input.GetData(), input.GetSize(), result);
-	if (cast_result == TimestampCastResult::SUCCESS) {
+	switch (Timestamp::TryConvertTimestamp(input.GetData(), input.GetSize(), result)) {
+	case TimestampCastResult::SUCCESS:
 		return true;
-	}
-	if (cast_result == TimestampCastResult::ERROR_INCORRECT_FORMAT) {
-		HandleCastError::AssignError(Timestamp::ConversionError(input), parameters);
-	} else {
+	case TimestampCastResult::ERROR_INCORRECT_FORMAT:
+		HandleCastError::AssignError(Timestamp::FormatError(input), parameters);
+		break;
+	case TimestampCastResult::ERROR_NON_UTC_TIMEZONE:
 		HandleCastError::AssignError(Timestamp::UnsupportedTimezoneError(input), parameters);
+		break;
+	case TimestampCastResult::ERROR_RANGE:
+		HandleCastError::AssignError(Timestamp::RangeError(input), parameters);
+		break;
 	}
 	return false;
 }
@@ -1578,7 +1591,7 @@ timestamp_ns_t Cast::Operation(string_t input) {
 	const auto ts = Timestamp::FromCString(input.GetData(), input.GetSize(), &nanos);
 	timestamp_ns_t result;
 	if (!Timestamp::TryFromTimestampNanos(ts, nanos, result)) {
-		throw ConversionException(Timestamp::ConversionError(input));
+		throw ConversionException(Timestamp::RangeError(input));
 	}
 	return result;
 }
diff --git a/src/duckdb/src/common/types/date.cpp b/src/duckdb/src/common/types/date.cpp
index 7c9f78ca8..429ee0311 100644
--- a/src/duckdb/src/common/types/date.cpp
+++ b/src/duckdb/src/common/types/date.cpp
@@ -205,11 +205,12 @@ bool Date::TryConvertDateSpecial(const char *buf, idx_t len, idx_t &pos, const c
 	return true;
 }
 
-bool Date::TryConvertDate(const char *buf, idx_t len, idx_t &pos, date_t &result, bool &special, bool strict) {
+DateCastResult Date::TryConvertDate(const char *buf, idx_t len, idx_t &pos, date_t &result, bool &special,
+                                    bool strict) {
 	special = false;
 	pos = 0;
 	if (len == 0) {
-		return false;
+		return DateCastResult::ERROR_INCORRECT_FORMAT;
 	}
 
 	int32_t day = 0;
@@ -224,13 +225,13 @@ bool Date::TryConvertDate(const char *buf, idx_t len, idx_t &pos, date_t &result
 	}
 
 	if (pos >= len) {
-		return false;
+		return DateCastResult::ERROR_INCORRECT_FORMAT;
 	}
 	if (buf[pos] == '-') {
 		yearneg = true;
 		pos++;
 		if (pos >= len) {
-			return false;
+			return DateCastResult::ERROR_INCORRECT_FORMAT;
 		}
 	}
 	if (!StringUtil::CharacterIsDigit(buf[pos])) {
@@ -240,62 +241,62 @@ bool Date::TryConvertDate(const char *buf, idx_t len, idx_t &pos, date_t &result
 		} else if (TryConvertDateSpecial(buf, len, pos, EPOCH)) {
 			result = date_t::epoch();
 		} else {
-			return false;
+			return DateCastResult::ERROR_INCORRECT_FORMAT;
 		}
 		// skip trailing spaces - parsing must be strict here
 		while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
 			pos++;
 		}
 		special = true;
-		return pos == len;
+		return (pos == len) ? DateCastResult::SUCCESS : DateCastResult::ERROR_INCORRECT_FORMAT;
 	}
 	// first parse the year
 	idx_t year_length = 0;
 	for (; pos < len && StringUtil::CharacterIsDigit(buf[pos]); pos++) {
 		if (year >= 100000000) {
-			return false;
+			return DateCastResult::ERROR_RANGE;
 		}
 		year = (buf[pos] - '0') + year * 10;
 		year_length++;
 	}
 	if (year_length < 2 && strict) {
-		return false;
+		return DateCastResult::ERROR_INCORRECT_FORMAT;
 	}
 	if (yearneg) {
 		year = -year;
 	}
 
 	if (pos >= len) {
-		return false;
+		return DateCastResult::ERROR_INCORRECT_FORMAT;
 	}
 
 	// fetch the separator
 	sep = buf[pos++];
 	if (sep != ' ' && sep != '-' && sep != '/' && sep != '\\') {
 		// invalid separator
-		return false;
+		return DateCastResult::ERROR_INCORRECT_FORMAT;
 	}
 
 	// parse the month
 	if (!Date::ParseDoubleDigit(buf, len, pos, month)) {
-		return false;
+		return DateCastResult::ERROR_INCORRECT_FORMAT;
 	}
 
 	if (pos >= len) {
-		return false;
+		return DateCastResult::ERROR_INCORRECT_FORMAT;
 	}
 
 	if (buf[pos++] != sep) {
-		return false;
+		return DateCastResult::ERROR_INCORRECT_FORMAT;
 	}
 
 	if (pos >= len) {
-		return false;
+		return DateCastResult::ERROR_INCORRECT_FORMAT;
 	}
 
 	// now parse the day
 	if (!Date::ParseDoubleDigit(buf, len, pos, day)) {
-		return false;
+		return DateCastResult::ERROR_INCORRECT_FORMAT;
 	}
 
 	// check for an optional trailing " (BC)""
@@ -303,7 +304,7 @@ bool Date::TryConvertDate(const char *buf, idx_t len, idx_t &pos, date_t &result
 	    StringUtil::CharacterToLower(buf[pos + 2]) == 'b' && StringUtil::CharacterToLower(buf[pos + 3]) == 'c' &&
 	    buf[pos + 4] == ')') {
 		if (yearneg || year == 0) {
-			return false;
+			return DateCastResult::ERROR_INCORRECT_FORMAT;
 		}
 		year = -year + 1;
 		pos += 5;
@@ -317,34 +318,47 @@ bool Date::TryConvertDate(const char *buf, idx_t len, idx_t &pos, date_t &result
 		}
 		// check position. if end was not reached, non-space chars remaining
 		if (pos < len) {
-			return false;
+			return DateCastResult::ERROR_INCORRECT_FORMAT;
 		}
 	} else {
 		// in non-strict mode, check for any direct trailing digits
 		if (pos < len && StringUtil::CharacterIsDigit(buf[pos])) {
-			return false;
+			return DateCastResult::ERROR_INCORRECT_FORMAT;
 		}
 	}
 
-	return Date::TryFromDate(year, month, day, result);
+	return Date::TryFromDate(year, month, day, result) ? DateCastResult::SUCCESS : DateCastResult::ERROR_RANGE;
 }
 
-string Date::ConversionError(const string &str) {
-	return StringUtil::Format("date field value out of range: \"%s\", "
+string Date::FormatError(const string &str) {
+	return StringUtil::Format("invalid date field format: \"%s\", "
 	                          "expected format is (YYYY-MM-DD)",
 	                          str);
 }
 
-string Date::ConversionError(string_t str) {
-	return ConversionError(str.GetString());
+string Date::RangeError(const string &str) {
+	return StringUtil::Format("date field value out of range: \"%s\"", str);
+}
+
+string Date::RangeError(string_t str) {
+	return RangeError(str.GetString());
+}
+
+string Date::FormatError(string_t str) {
+	return FormatError(str.GetString());
 }
 
 date_t Date::FromCString(const char *buf, idx_t len, bool strict) {
 	date_t result;
 	idx_t pos;
 	bool special = false;
-	if (!TryConvertDate(buf, len, pos, result, special, strict)) {
-		throw ConversionException(ConversionError(string(buf, len)));
+	switch (TryConvertDate(buf, len, pos, result, special, strict)) {
+	case DateCastResult::ERROR_INCORRECT_FORMAT:
+		throw ConversionException(FormatError(string(buf, len)));
+	case DateCastResult::ERROR_RANGE:
+		throw ConversionException(RangeError(string(buf, len)));
+	case DateCastResult::SUCCESS:
+		break;
 	}
 	return result;
 }
diff --git a/src/duckdb/src/common/types/timestamp.cpp b/src/duckdb/src/common/types/timestamp.cpp
index f90c0f77f..d596e7530 100644
--- a/src/duckdb/src/common/types/timestamp.cpp
+++ b/src/duckdb/src/common/types/timestamp.cpp
@@ -64,25 +64,31 @@ timestamp_t &timestamp_t::operator-=(const int64_t &delta) {
 	return *this;
 }
 
-bool Timestamp::TryConvertTimestampTZ(const char *str, idx_t len, timestamp_t &result, bool &has_offset, string_t &tz,
-                                      optional_ptr<int32_t> nanos) {
+TimestampCastResult Timestamp::TryConvertTimestampTZ(const char *str, idx_t len, timestamp_t &result, bool &has_offset,
+                                                     string_t &tz, optional_ptr<int32_t> nanos) {
 	idx_t pos;
 	date_t date;
 	dtime_t time;
 	has_offset = false;
-	if (!Date::TryConvertDate(str, len, pos, date, has_offset)) {
-		return false;
+	switch (Date::TryConvertDate(str, len, pos, date, has_offset)) {
+	case DateCastResult::ERROR_INCORRECT_FORMAT:
+		return TimestampCastResult::ERROR_INCORRECT_FORMAT;
+	case DateCastResult::ERROR_RANGE:
+		return TimestampCastResult::ERROR_RANGE;
+	default:
+		break;
 	}
 	if (pos == len) {
 		// no time: only a date or special
 		if (date == date_t::infinity()) {
 			result = timestamp_t::infinity();
-			return true;
+			return TimestampCastResult::SUCCESS;
 		} else if (date == date_t::ninfinity()) {
 			result = timestamp_t::ninfinity();
-			return true;
+			return TimestampCastResult::SUCCESS;
 		}
-		return Timestamp::TryFromDatetime(date, dtime_t(0), result);
+		return Timestamp::TryFromDatetime(date, dtime_t(0), result) ? TimestampCastResult::SUCCESS
+		                                                            : TimestampCastResult::ERROR_RANGE;
 	}
 	// try to parse a time field
 	if (str[pos] == ' ' || str[pos] == 'T') {
@@ -93,15 +99,15 @@ bool Timestamp::TryConvertTimestampTZ(const char *str, idx_t len, timestamp_t &r
 	// operation. Note that we can't pass strict== true here because we
 	// want to process any suffix.
 	if (!Time::TryConvertInterval(str + pos, len - pos, time_pos, time, false, nanos)) {
-		return false;
+		return TimestampCastResult::ERROR_INCORRECT_FORMAT;
 	}
 	//	We parsed an interval, so make sure it is in range.
 	if (time.micros > Interval::MICROS_PER_DAY) {
-		return false;
+		return TimestampCastResult::ERROR_RANGE;
 	}
 	pos += time_pos;
 	if (!Timestamp::TryFromDatetime(date, time, result)) {
-		return false;
+		return TimestampCastResult::ERROR_RANGE;
 	}
 	if (pos < len) {
 		// skip a "Z" at the end (as per the ISO8601 specs)
@@ -112,13 +118,13 @@ bool Timestamp::TryConvertTimestampTZ(const char *str, idx_t len, timestamp_t &r
 		} else if (Timestamp::TryParseUTCOffset(str, pos, len, hour_offset, minute_offset)) {
 			const int64_t delta = hour_offset * Interval::MICROS_PER_HOUR + minute_offset * Interval::MICROS_PER_MINUTE;
 			if (!TrySubtractOperator::Operation(result.value, delta, result.value)) {
-				return false;
+				return TimestampCastResult::ERROR_RANGE;
 			}
 			has_offset = true;
 		} else {
 			// Parse a time zone: / [A-Za-z0-9/_]+/
 			if (str[pos++] != ' ') {
-				return false;
+				return TimestampCastResult::ERROR_NON_UTC_TIMEZONE;
 			}
 			auto tz_name = str + pos;
 			for (; pos < len && CharacterIsTimeZone(str[pos]); ++pos) {
@@ -136,10 +142,10 @@ bool Timestamp::TryConvertTimestampTZ(const char *str, idx_t len, timestamp_t &r
 			pos++;
 		}
 		if (pos < len) {
-			return false;
+			return TimestampCastResult::ERROR_INCORRECT_FORMAT;
 		}
 	}
-	return true;
+	return TimestampCastResult::SUCCESS;
 }
 
 TimestampCastResult Timestamp::TryConvertTimestamp(const char *str, idx_t len, timestamp_t &result,
@@ -148,8 +154,8 @@ TimestampCastResult Timestamp::TryConvertTimestamp(const char *str, idx_t len, t
 	bool has_offset = false;
 	// We don't understand TZ without an extension, so fail if one was provided.
 	auto success = TryConvertTimestampTZ(str, len, result, has_offset, tz, nanos);
-	if (!success) {
-		return TimestampCastResult::ERROR_INCORRECT_FORMAT;
+	if (success != TimestampCastResult::SUCCESS) {
+		return success;
 	}
 	if (tz.GetSize() == 0) {
 		// no timezone provided - success!
@@ -195,8 +201,8 @@ TimestampCastResult Timestamp::TryConvertTimestamp(const char *str, idx_t len, t
 	return TimestampCastResult::SUCCESS;
 }
 
-string Timestamp::ConversionError(const string &str) {
-	return StringUtil::Format("timestamp field value out of range: \"%s\", "
+string Timestamp::FormatError(const string &str) {
+	return StringUtil::Format("invalid timestamp field format: \"%s\", "
 	                          "expected format is (YYYY-MM-DD HH:MM:SS[.US][±HH:MM| ZONE])",
 	                          str);
 }
@@ -207,25 +213,35 @@ string Timestamp::UnsupportedTimezoneError(const string &str) {
 	                          str);
 }
 
-string Timestamp::ConversionError(string_t str) {
-	return Timestamp::ConversionError(str.GetString());
+string Timestamp::RangeError(const string &str) {
+	return StringUtil::Format("timestamp field value out of range: \"%s\"", str);
+}
+
+string Timestamp::FormatError(string_t str) {
+	return Timestamp::FormatError(str.GetString());
 }
 
 string Timestamp::UnsupportedTimezoneError(string_t str) {
 	return Timestamp::UnsupportedTimezoneError(str.GetString());
 }
 
+string Timestamp::RangeError(string_t str) {
+	return Timestamp::RangeError(str.GetString());
+}
+
 timestamp_t Timestamp::FromCString(const char *str, idx_t len, optional_ptr<int32_t> nanos) {
 	timestamp_t result;
-	auto cast_result = Timestamp::TryConvertTimestamp(str, len, result, nanos);
-	if (cast_result == TimestampCastResult::SUCCESS) {
-		return result;
-	}
-	if (cast_result == TimestampCastResult::ERROR_NON_UTC_TIMEZONE) {
-		throw ConversionException(Timestamp::UnsupportedTimezoneError(string(str, len)));
-	} else {
-		throw ConversionException(Timestamp::ConversionError(string(str, len)));
+	switch (Timestamp::TryConvertTimestamp(str, len, result, nanos)) {
+	case TimestampCastResult::SUCCESS:
+		break;
+	case TimestampCastResult::ERROR_NON_UTC_TIMEZONE:
+		throw ConversionException(UnsupportedTimezoneError(string(str, len)));
+	case TimestampCastResult::ERROR_INCORRECT_FORMAT:
+		throw ConversionException(FormatError(string(str, len)));
+	case TimestampCastResult::ERROR_RANGE:
+		throw ConversionException(RangeError(string(str, len)));
 	}
+	return result;
 }
 
 bool Timestamp::TryParseUTCOffset(const char *str, idx_t &pos, idx_t len, int &hour_offset, int &minute_offset) {
@@ -338,7 +354,7 @@ bool Timestamp::TryFromDatetime(date_t date, dtime_tz_t timetz, timestamp_t &res
 timestamp_t Timestamp::FromDatetime(date_t date, dtime_t time) {
 	timestamp_t result;
 	if (!TryFromDatetime(date, time, result)) {
-		throw ConversionException("Overflow exception in date/time -> timestamp conversion");
+		throw ConversionException("Date and time not in timestamp range");
 	}
 	return result;
 }
diff --git a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index aeb305a55..c1ab79dfe 100644
--- a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -334,8 +334,9 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
 		} else {
 			idx_t pos;
 			bool special;
-			success = Date::TryConvertDate(
-			    value_ptr, size, pos, static_cast<date_t *>(vector_ptr[chunk_col_id])[number_of_rows], special, false);
+			success = Date::TryConvertDate(value_ptr, size, pos,
+			                               static_cast<date_t *>(vector_ptr[chunk_col_id])[number_of_rows], special,
+			                               false) == DateCastResult::SUCCESS;
 		}
 		break;
 	}
diff --git a/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp b/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp
index 49bf105a9..d6b2b1a6b 100644
--- a/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp
+++ b/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp
@@ -162,7 +162,7 @@ bool CSVSniffer::CanYouCastIt(ClientContext &context, const string_t value, cons
 		idx_t pos;
 		bool special;
 		date_t dummy_value;
-		return Date::TryConvertDate(value_ptr, value_size, pos, dummy_value, special, true);
+		return Date::TryConvertDate(value_ptr, value_size, pos, dummy_value, special, true) == DateCastResult::SUCCESS;
 	}
 	case LogicalTypeId::TIMESTAMP: {
 		timestamp_t dummy_value;
diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index 41709636d..c61c54141 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4241"
+#define DUCKDB_PATCH_VERSION "4-dev4248"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4241"
+#define DUCKDB_VERSION "v1.1.4-dev4248"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "f850786dde"
+#define DUCKDB_SOURCE_ID "acea8cde22"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"
diff --git a/src/duckdb/src/include/duckdb/common/enum_util.hpp b/src/duckdb/src/include/duckdb/common/enum_util.hpp
index 6215e10fd..852f43436 100644
--- a/src/duckdb/src/include/duckdb/common/enum_util.hpp
+++ b/src/duckdb/src/include/duckdb/common/enum_util.hpp
@@ -116,6 +116,8 @@ enum class CopyToType : uint8_t;
 
 enum class DataFileType : uint8_t;
 
+enum class DateCastResult : uint8_t;
+
 enum class DatePartSpecifier : uint8_t;
 
 enum class DebugInitialize : uint8_t;
@@ -507,6 +509,9 @@ const char* EnumUtil::ToChars<CopyToType>(CopyToType value);
 template<>
 const char* EnumUtil::ToChars<DataFileType>(DataFileType value);
 
+template<>
+const char* EnumUtil::ToChars<DateCastResult>(DateCastResult value);
+
 template<>
 const char* EnumUtil::ToChars<DatePartSpecifier>(DatePartSpecifier value);
 
@@ -1030,6 +1035,9 @@ CopyToType EnumUtil::FromString<CopyToType>(const char *value);
 template<>
 DataFileType EnumUtil::FromString<DataFileType>(const char *value);
 
+template<>
+DateCastResult EnumUtil::FromString<DateCastResult>(const char *value);
+
 template<>
 DatePartSpecifier EnumUtil::FromString<DatePartSpecifier>(const char *value);
 
diff --git a/src/duckdb/src/include/duckdb/common/types/date.hpp b/src/duckdb/src/include/duckdb/common/types/date.hpp
index f2cb08d50..ea6b52ce7 100644
--- a/src/duckdb/src/include/duckdb/common/types/date.hpp
+++ b/src/duckdb/src/include/duckdb/common/types/date.hpp
@@ -83,6 +83,8 @@ struct date_t { // NOLINT
 	} // NOLINT
 };
 
+enum class DateCastResult : uint8_t { SUCCESS, ERROR_INCORRECT_FORMAT, ERROR_RANGE };
+
 //! The Date class is a static class that holds helper functions for the Date type.
 class Date {
 public:
@@ -127,8 +129,8 @@ class Date {
 	DUCKDB_API static bool TryConvertDateSpecial(const char *buf, idx_t len, idx_t &pos, const char *special);
 	//! Try to convert text in a buffer to a date; returns true if parsing was successful
 	//! If the date was a "special" value, the special flag will be set.
-	DUCKDB_API static bool TryConvertDate(const char *buf, idx_t len, idx_t &pos, date_t &result, bool &special,
-	                                      bool strict = false);
+	DUCKDB_API static DateCastResult TryConvertDate(const char *buf, idx_t len, idx_t &pos, date_t &result,
+	                                                bool &special, bool strict = false);
 
 	//! Create a string "YYYY-MM-DD" from a specified (year, month, day)
 	//! combination
@@ -202,8 +204,11 @@ class Date {
 	//! Helper function to parse two digits from a string (e.g. "30" -> 30, "03" -> 3, "3" -> 3)
 	DUCKDB_API static bool ParseDoubleDigit(const char *buf, idx_t len, idx_t &pos, int32_t &result);
 
-	DUCKDB_API static string ConversionError(const string &str);
-	DUCKDB_API static string ConversionError(string_t str);
+	DUCKDB_API static string FormatError(const string &str);
+	DUCKDB_API static string FormatError(string_t str);
+
+	DUCKDB_API static string RangeError(const string &str);
+	DUCKDB_API static string RangeError(string_t str);
 
 private:
 	static void ExtractYearOffset(int32_t &n, int32_t &year, int32_t &year_offset);
diff --git a/src/duckdb/src/include/duckdb/common/types/timestamp.hpp b/src/duckdb/src/include/duckdb/common/types/timestamp.hpp
index eaab803e8..c56168251 100644
--- a/src/duckdb/src/include/duckdb/common/types/timestamp.hpp
+++ b/src/duckdb/src/include/duckdb/common/types/timestamp.hpp
@@ -112,7 +112,7 @@ struct timestamp_tz_t : public timestamp_t { // NOLINT
 	}
 };
 
-enum class TimestampCastResult : uint8_t { SUCCESS, ERROR_INCORRECT_FORMAT, ERROR_NON_UTC_TIMEZONE };
+enum class TimestampCastResult : uint8_t { SUCCESS, ERROR_INCORRECT_FORMAT, ERROR_NON_UTC_TIMEZONE, ERROR_RANGE };
 
 //! The static Timestamp class holds helper functions for the timestamp types.
 class Timestamp {
@@ -128,8 +128,9 @@ class Timestamp {
 	//! Convert a string where the offset can also be a time zone string: / [A_Za-z0-9/_]+/
 	//! If has_offset is true, then the result is an instant that was offset from UTC
 	//! If the tz is not empty, the result is still an instant, but the parts can be extracted and applied to the TZ
-	DUCKDB_API static bool TryConvertTimestampTZ(const char *str, idx_t len, timestamp_t &result, bool &has_offset,
-	                                             string_t &tz, optional_ptr<int32_t> nanos = nullptr);
+	DUCKDB_API static TimestampCastResult TryConvertTimestampTZ(const char *str, idx_t len, timestamp_t &result,
+	                                                            bool &has_offset, string_t &tz,
+	                                                            optional_ptr<int32_t> nanos = nullptr);
 	DUCKDB_API static TimestampCastResult TryConvertTimestamp(const char *str, idx_t len, timestamp_t &result,
 	                                                          optional_ptr<int32_t> nanos = nullptr);
 	DUCKDB_API static TimestampCastResult TryConvertTimestamp(const char *str, idx_t len, timestamp_ns_t &result);
@@ -200,10 +201,12 @@ class Timestamp {
 	DUCKDB_API static bool TryParseUTCOffset(const char *str, idx_t &pos, idx_t len, int &hour_offset,
 	                                         int &minute_offset);
 
-	DUCKDB_API static string ConversionError(const string &str);
-	DUCKDB_API static string ConversionError(string_t str);
+	DUCKDB_API static string FormatError(const string &str);
+	DUCKDB_API static string FormatError(string_t str);
 	DUCKDB_API static string UnsupportedTimezoneError(const string &str);
 	DUCKDB_API static string UnsupportedTimezoneError(string_t str);
+	DUCKDB_API static string RangeError(const string &str);
+	DUCKDB_API static string RangeError(string_t str);
 };
 
 } // namespace duckdb

From 7b9fa6163282a77fe14025ce5390c3f2f7d65985 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 9 Jan 2025 20:08:09 +0000
Subject: [PATCH 19/21] vendor: Update vendored sources to
 duckdb/duckdb@2c203c13b52f8d6073f5e03f3226eaadf0973654 (#996)

[Tidy]  create index info in static function for reusability (duckdb/duckdb#15633)
Parallel `union_by_name` for `read_json` (duckdb/duckdb#15593)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 .../function/table/version/pragma_version.cpp |  6 ++--
 .../expression_binder/index_binder.hpp        |  2 ++
 .../expression_binder/index_binder.cpp        | 33 ++++++++++---------
 3 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index c61c54141..900d5146e 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4248"
+#define DUCKDB_PATCH_VERSION "4-dev4253"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4248"
+#define DUCKDB_VERSION "v1.1.4-dev4253"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "acea8cde22"
+#define DUCKDB_SOURCE_ID "2c203c13b5"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"
diff --git a/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp b/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp
index b15948b34..f8972d13e 100644
--- a/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp
+++ b/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp
@@ -30,6 +30,8 @@ class IndexBinder : public ExpressionBinder {
 	                                            TableCatalogEntry &table_entry, unique_ptr<LogicalOperator> plan,
 	                                            unique_ptr<AlterTableInfo> alter_table_info);
 
+	static void InitCreateIndexInfo(LogicalGet &get, CreateIndexInfo &info, const string &schema);
+
 protected:
 	BindResult BindExpression(unique_ptr<ParsedExpression> &expr_ptr, idx_t depth,
 	                          bool root_expression = false) override;
diff --git a/src/duckdb/src/planner/expression_binder/index_binder.cpp b/src/duckdb/src/planner/expression_binder/index_binder.cpp
index 1d9d46bbc..950edb6e6 100644
--- a/src/duckdb/src/planner/expression_binder/index_binder.cpp
+++ b/src/duckdb/src/planner/expression_binder/index_binder.cpp
@@ -48,6 +48,23 @@ unique_ptr<BoundIndex> IndexBinder::BindIndex(const UnboundIndex &unbound_index)
 	return index_type->create_instance(input);
 }
 
+void IndexBinder::InitCreateIndexInfo(LogicalGet &get, CreateIndexInfo &info, const string &schema) {
+	auto &column_ids = get.GetColumnIds();
+	for (auto &column_id : column_ids) {
+		if (column_id.IsRowIdColumn()) {
+			throw BinderException("cannot create an index on the rowid");
+		}
+		auto col_id = column_id.GetPrimaryIndex();
+		info.column_ids.push_back(col_id);
+		info.scan_types.push_back(get.returned_types[col_id]);
+	}
+
+	info.scan_types.emplace_back(LogicalType::ROW_TYPE);
+	info.names = get.names;
+	info.schema = schema;
+	get.AddColumnId(COLUMN_IDENTIFIER_ROW_ID);
+}
+
 unique_ptr<LogicalOperator> IndexBinder::BindCreateIndex(ClientContext &context,
                                                          unique_ptr<CreateIndexInfo> create_index_info,
                                                          TableCatalogEntry &table_entry,
@@ -70,23 +87,9 @@ unique_ptr<LogicalOperator> IndexBinder::BindCreateIndex(ClientContext &context,
 	}
 
 	auto &get = plan->Cast<LogicalGet>();
-	auto &column_ids = get.GetColumnIds();
-	for (auto &column_id : column_ids) {
-		if (column_id.IsRowIdColumn()) {
-			throw BinderException("cannot create an index on the rowid");
-		}
-		auto col_id = column_id.GetPrimaryIndex();
-		create_index_info->column_ids.push_back(col_id);
-		create_index_info->scan_types.push_back(get.returned_types[col_id]);
-	}
-
-	create_index_info->scan_types.emplace_back(LogicalType::ROW_TYPE);
-	create_index_info->names = get.names;
-	create_index_info->schema = table_entry.schema.name;
-
+	InitCreateIndexInfo(get, *create_index_info, table_entry.schema.name);
 	auto &bind_data = get.bind_data->Cast<TableScanBindData>();
 	bind_data.is_create_index = true;
-	get.AddColumnId(COLUMN_IDENTIFIER_ROW_ID);
 
 	auto result = make_uniq<LogicalCreateIndex>(std::move(create_index_info), std::move(expressions), table_entry,
 	                                            std::move(alter_table_info));

From 1189fa7784dcbfa3c4e7318a79242843904ec59c Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 9 Jan 2025 20:36:52 +0000
Subject: [PATCH 20/21] vendor: Update vendored sources to
 duckdb/duckdb@d3444ef1413102e5c8246a066b201c59ac5c4c04 (#997)

[Arrow] Fix a bug related to ArrowArray lifetimes in the arrow scan code (duckdb/duckdb#15632)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 src/duckdb/src/function/cast/struct_cast.cpp           |  1 +
 src/duckdb/src/function/table/arrow_conversion.cpp     | 10 +++++++++-
 .../src/function/table/version/pragma_version.cpp      |  6 +++---
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/duckdb/src/function/cast/struct_cast.cpp b/src/duckdb/src/function/cast/struct_cast.cpp
index e5902b752..051581c54 100644
--- a/src/duckdb/src/function/cast/struct_cast.cpp
+++ b/src/duckdb/src/function/cast/struct_cast.cpp
@@ -128,6 +128,7 @@ static bool StructToStructCast(Vector &source, Vector &result, idx_t count, Cast
 	source.Flatten(count);
 	auto &result_validity = FlatVector::Validity(result);
 	result_validity = FlatVector::Validity(source);
+	result.Verify(count);
 	return all_converted;
 }
 
diff --git a/src/duckdb/src/function/table/arrow_conversion.cpp b/src/duckdb/src/function/table/arrow_conversion.cpp
index 18daff2b9..e09f81aac 100644
--- a/src/duckdb/src/function/table/arrow_conversion.cpp
+++ b/src/duckdb/src/function/table/arrow_conversion.cpp
@@ -725,6 +725,9 @@ static void ColumnArrowToDuckDBRunEndEncoded(Vector &vector, const ArrowArray &a
 	D_ASSERT(vector.GetType() == values_type.GetDuckType());
 
 	auto &scan_state = array_state.state;
+	if (vector.GetBuffer()) {
+		vector.GetBuffer()->SetAuxiliaryData(make_uniq<ArrowAuxiliaryData>(array_state.owned_data));
+	}
 
 	D_ASSERT(run_ends_array.length == values_array.length);
 	auto compressed_size = NumericCast<idx_t>(run_ends_array.length);
@@ -766,6 +769,9 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca
 	auto &scan_state = array_state.state;
 	D_ASSERT(!array.dictionary);
 
+	if (vector.GetBuffer()) {
+		vector.GetBuffer()->SetAuxiliaryData(make_uniq<ArrowAuxiliaryData>(array_state.owned_data));
+	}
 	switch (vector.GetType().id()) {
 	case LogicalTypeId::SQLNULL:
 		vector.Reference(Value());
@@ -1284,6 +1290,9 @@ static bool CanContainNull(const ArrowArray &array, const ValidityMask *parent_m
 static void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state,
                                           idx_t size, const ArrowType &arrow_type, int64_t nested_offset,
                                           const ValidityMask *parent_mask, uint64_t parent_offset) {
+	if (vector.GetBuffer()) {
+		vector.GetBuffer()->SetAuxiliaryData(make_uniq<ArrowAuxiliaryData>(array_state.owned_data));
+	}
 	D_ASSERT(arrow_type.HasDictionary());
 	auto &scan_state = array_state.state;
 	const bool has_nulls = CanContainNull(array, parent_mask);
@@ -1384,7 +1393,6 @@ void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state, const ar
 		if (!array_state.owned_data) {
 			array_state.owned_data = scan_state.chunk;
 		}
-		output.data[idx].GetBuffer()->SetAuxiliaryData(make_uniq<ArrowAuxiliaryData>(array_state.owned_data));
 
 		auto array_physical_type = GetArrowArrayPhysicalType(arrow_type);
 
diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index 900d5146e..ad6011e7f 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4253"
+#define DUCKDB_PATCH_VERSION "4-dev4255"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4253"
+#define DUCKDB_VERSION "v1.1.4-dev4255"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "2c203c13b5"
+#define DUCKDB_SOURCE_ID "d3444ef141"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

From d732c1228fc2825c7187acab20a5167734c42d90 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 9 Jan 2025 21:07:01 +0000
Subject: [PATCH 21/21] vendor: Update vendored sources to
 duckdb/duckdb@d707b4432b74b51f8a176c533b98e24d48f4d165 (#998)

Not using Random Device in DuckDB Core (duckdb/duckdb#15540)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
---
 src/duckdb/src/common/random_engine.cpp        | 18 +++++++++++++++++-
 src/duckdb/src/common/string_util.cpp          |  9 +++------
 .../function/table/version/pragma_version.cpp  |  6 +++---
 src/duckdb/third_party/httplib/httplib.hpp     | 17 +++--------------
 src/duckdb/third_party/skiplist/SkipList.h     |  1 -
 5 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/src/duckdb/src/common/random_engine.cpp b/src/duckdb/src/common/random_engine.cpp
index 7741e8f4d..23eb253e3 100644
--- a/src/duckdb/src/common/random_engine.cpp
+++ b/src/duckdb/src/common/random_engine.cpp
@@ -1,8 +1,13 @@
 #include "duckdb/common/random_engine.hpp"
 #include "duckdb/common/numeric_utils.hpp"
 #include "pcg_random.hpp"
-#include <random>
 
+#ifdef __linux__
+#include <sys/syscall.h>
+#include <unistd.h>
+#else
+#include <random>
+#endif
 namespace duckdb {
 
 struct RandomState {
@@ -14,7 +19,18 @@ struct RandomState {
 
 RandomEngine::RandomEngine(int64_t seed) : random_state(make_uniq<RandomState>()) {
 	if (seed < 0) {
+#ifdef __linux__
+		idx_t random_seed;
+		auto result = syscall(SYS_getrandom, &random_seed, sizeof(random_seed), 0);
+		if (result == -1) {
+			// Something went wrong with the syscall, we use chrono
+			const auto now = std::chrono::high_resolution_clock::now();
+			random_seed = now.time_since_epoch().count();
+		}
+		random_state->pcg.seed(random_seed);
+#else
 		random_state->pcg.seed(pcg_extras::seed_seq_from<std::random_device>());
+#endif
 	} else {
 		random_state->pcg.seed(NumericCast<uint64_t>(seed));
 	}
diff --git a/src/duckdb/src/common/string_util.cpp b/src/duckdb/src/common/string_util.cpp
index f1bd1e038..f96e3d64e 100644
--- a/src/duckdb/src/common/string_util.cpp
+++ b/src/duckdb/src/common/string_util.cpp
@@ -6,6 +6,7 @@
 #include "duckdb/common/to_string.hpp"
 #include "duckdb/common/helper.hpp"
 #include "duckdb/common/exception/parser_exception.hpp"
+#include "duckdb/common/random_engine.hpp"
 #include "jaro_winkler.hpp"
 #include "utf8proc_wrapper.hpp"
 
@@ -16,7 +17,6 @@
 #include <sstream>
 #include <stdarg.h>
 #include <string.h>
-#include <random>
 #include <stack>
 
 #include "yyjson.hpp"
@@ -26,13 +26,10 @@ using namespace duckdb_yyjson; // NOLINT
 namespace duckdb {
 
 string StringUtil::GenerateRandomName(idx_t length) {
-	std::random_device rd;
-	std::mt19937 gen(rd());
-	std::uniform_int_distribution<> dis(0, 15);
-
+	RandomEngine engine;
 	std::stringstream ss;
 	for (idx_t i = 0; i < length; i++) {
-		ss << "0123456789abcdef"[dis(gen)];
+		ss << "0123456789abcdef"[engine.NextRandomInteger(0, 15)];
 	}
 	return ss.str();
 }
diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index ad6011e7f..a98a327c6 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4255"
+#define DUCKDB_PATCH_VERSION "4-dev4271"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4255"
+#define DUCKDB_VERSION "v1.1.4-dev4271"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "d3444ef141"
+#define DUCKDB_SOURCE_ID "d707b4432b"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"
diff --git a/src/duckdb/third_party/httplib/httplib.hpp b/src/duckdb/third_party/httplib/httplib.hpp
index c7da982b7..6432d5d1a 100644
--- a/src/duckdb/third_party/httplib/httplib.hpp
+++ b/src/duckdb/third_party/httplib/httplib.hpp
@@ -237,7 +237,6 @@ using socket_t = int;
 #include <map>
 #include <memory>
 #include <mutex>
-#include <random>
 #include <regex>
 #include <set>
 #include <sstream>
@@ -246,9 +245,8 @@ using socket_t = int;
 #include <thread>
 #include <unordered_map>
 #include <unordered_set>
-#include <utility>
-
 #include "duckdb/common/re2_regex.hpp"
+#include "duckdb/common/random_engine.hpp"
 
 #ifdef CPPHTTPLIB_OPENSSL_SUPPORT
 #ifdef _WIN32
@@ -4646,19 +4644,10 @@ inline std::string make_multipart_data_boundary() {
   static const char data[] =
       "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
 
-  // std::random_device might actually be deterministic on some
-  // platforms, but due to lack of support in the c++ standard library,
-  // doing better requires either some ugly hacks or breaking portability.
-  std::random_device seed_gen;
-
-  // Request 128 bits of entropy for initialization
-  std::seed_seq seed_sequence{seed_gen(), seed_gen(), seed_gen(), seed_gen()};
-  std::mt19937 engine(seed_sequence);
-
   std::string result = "--cpp-httplib-multipart-data-";
-
+  duckdb::RandomEngine engine;
   for (auto i = 0; i < 16; i++) {
-    result += data[engine() % (sizeof(data) - 1)];
+    result += data[engine.NextRandomInteger32(0,sizeof(data) - 1)];
   }
 
   return result;
diff --git a/src/duckdb/third_party/skiplist/SkipList.h b/src/duckdb/third_party/skiplist/SkipList.h
index 5cf27c204..5b015acc3 100644
--- a/src/duckdb/third_party/skiplist/SkipList.h
+++ b/src/duckdb/third_party/skiplist/SkipList.h
@@ -446,7 +446,6 @@
 #include <vector>
 #include <set> // Used for HeadNode::_lacksIntegrityNodeReferencesNotInList()
 #include <string> // Used for class Exception
-#include <random>
 #include "pcg_random.hpp"
 
 #ifdef DEBUG