From 520f456c9b9e0b5584c4b980fbd83864f81ce8c4 Mon Sep 17 00:00:00 2001 From: ritwizsinha Date: Sun, 22 Dec 2024 12:44:54 +0530 Subject: [PATCH 1/4] Add support for BYTEAOID --- src/pgduckdb_filter.cpp | 2 ++ src/pgduckdb_types.cpp | 28 +++++++++++++++++++++++ test/regression/expected/type_support.out | 22 ++++++++++++++++++ test/regression/sql/type_support.sql | 13 +++++++++++ 4 files changed, 65 insertions(+) diff --git a/src/pgduckdb_filter.cpp b/src/pgduckdb_filter.cpp index 6ea6e032..7b0c4bd3 100644 --- a/src/pgduckdb_filter.cpp +++ b/src/pgduckdb_filter.cpp @@ -83,6 +83,8 @@ FilterOperationSwitch(const Datum &value, const duckdb::Value &constant, Oid typ case TEXTOID: case VARCHAROID: return StringFilterOperation(value, constant, type_oid == BPCHAROID); + case BYTEAOID: + return StringFilterOperation(value, constant, false); default: throw duckdb::InvalidTypeException( duckdb::string("(DuckDB/FilterOperationSwitch) Unsupported duckdb type: " + std::to_string(type_oid))); diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index ca234352..95fc012d 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -2,6 +2,7 @@ #include "duckdb/common/shared_ptr.hpp" #include "duckdb/common/extra_type_info.hpp" #include "duckdb/common/types/uuid.hpp" +#include "duckdb/common/types/blob.hpp" #include "pgduckdb/pgduckdb_types.hpp" #include "pgduckdb/pgduckdb_utils.hpp" @@ -199,6 +200,17 @@ ConvertVarCharDatum(const duckdb::Value &value) { return PointerGetDatum(result); } +static Datum +ConvertBinaryDatum(const duckdb::Value &value) { + auto str = value.GetValue(); + auto blob = str.c_str(); + auto blob_len = str.size(); + bytea* result = (bytea *)palloc0(blob_len + VARHDRSZ); + SET_VARSIZE(result, blob_len + VARHDRSZ); + memcpy(VARDATA(result), blob, blob_len); + return PointerGetDatum(result); +} + inline Datum ConvertDateDatum(const duckdb::Value &value) { duckdb::date_t date = value.GetValue(); @@ -733,6 +745,10 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col slot->tts_values[col] = ConvertUUIDDatum(value); break; } + case BYTEAOID: { + slot->tts_values[col] = ConvertBinaryDatum(value); + break; + } case BOOLARRAYOID: { ConvertDuckToPostgresArray(slot, value, col); break; @@ -866,6 +882,8 @@ ConvertPostgresToBaseDuckColumnType(Form_pg_attribute &attribute) { case REGCLASSOID: case REGCLASSARRAYOID: return duckdb::LogicalTypeId::UINTEGER; + case BYTEAOID: + return duckdb::LogicalTypeId::BLOB; default: return duckdb::LogicalType::USER("UnsupportedPostgresType (Oid=" + std::to_string(attribute->atttypid) + ")"); } @@ -974,6 +992,8 @@ GetPostgresDuckDBType(const duckdb::LogicalType &type) { } return GetPostgresArrayDuckDBType(*duck_type); } + case duckdb::LogicalTypeId::BLOB: + return BYTEAOID; default: { elog(WARNING, "(PGDuckDB/GetPostgresDuckDBType) Could not convert DuckDB type: %s to Postgres type", type.ToString().c_str()); @@ -1222,6 +1242,14 @@ ConvertPostgresToDuckValue(Oid attr_type, Datum value, duckdb::Vector &result, i Append(result, duckdb_uuid, offset); break; } + case duckdb::LogicalTypeId::BLOB: { + const char *bytea_data = VARDATA_ANY(value); + size_t bytea_length = VARSIZE_ANY_EXHDR(value); + const duckdb::string_t s(bytea_data, bytea_length); + auto data = duckdb::FlatVector::GetData(result); + data[offset] = duckdb::StringVector::AddString(result, s); + break; + } case duckdb::LogicalTypeId::LIST: { // Convert Datum to ArrayType auto array = DatumGetArrayTypeP(value); diff --git a/test/regression/expected/type_support.out b/test/regression/expected/type_support.out index c40ce762..f22919e0 100644 --- a/test/regression/expected/type_support.out +++ b/test/regression/expected/type_support.out @@ -308,6 +308,27 @@ SELECT * FROM json_tbl; {} (4 rows) +-- BLOB +CREATE TABLE blob_tbl(a bytea); +INSERT INTO blob_tbl SELECT CAST(a as bytea) FROM (VALUES + ('\x'), + ('\x110102030405060708090a0b0c0d0e0f'), + (''), + ('\x00'), + ('\x07'), + (NULL) +) t(a); +SELECT * from blob_tbl; + a +------------------------------------------------------------------------------------------------------------------------------------ + \x + \x5c7831315c7830315c7830325c7830335c7830345c7830355c7830365c7830375c7830385c7830395c7830415c7830425c7830435c7830445c7830455c783046 + \x + \x5c783030 + \x5c783037 + +(6 rows) + -- REGCLASSOID CREATE TABLE regclass_tbl (a REGCLASS); INSERT INTO regclass_tbl VALUES (42), (3000000000); @@ -337,4 +358,5 @@ DROP TABLE bigint_numeric; DROP TABLE hugeint_numeric; DROP TABLE uuid_tbl; DROP TABLE json_tbl; +DROP TABLE blob_tbl; DROP TABLE regclass_tbl; diff --git a/test/regression/sql/type_support.sql b/test/regression/sql/type_support.sql index 725ee143..d20c68c2 100644 --- a/test/regression/sql/type_support.sql +++ b/test/regression/sql/type_support.sql @@ -154,6 +154,18 @@ INSERT INTO json_tbl SELECT CAST(a as JSON) FROM (VALUES ) t(a); SELECT * FROM json_tbl; +-- BLOB +CREATE TABLE blob_tbl(a bytea); +INSERT INTO blob_tbl SELECT CAST(a as bytea) FROM (VALUES + ('\x'), + ('\x110102030405060708090a0b0c0d0e0f'), + (''), + ('\x00'), + ('\x07'), + (NULL) +) t(a); +SELECT * from blob_tbl; + -- REGCLASSOID CREATE TABLE regclass_tbl (a REGCLASS); INSERT INTO regclass_tbl VALUES (42), (3000000000); @@ -178,4 +190,5 @@ DROP TABLE bigint_numeric; DROP TABLE hugeint_numeric; DROP TABLE uuid_tbl; DROP TABLE json_tbl; +DROP TABLE blob_tbl; DROP TABLE regclass_tbl; From 26b54660bdd7161d0e8a9b323c53db1e255da358 Mon Sep 17 00:00:00 2001 From: ritwizsinha Date: Sun, 22 Dec 2024 13:21:02 +0530 Subject: [PATCH 2/4] Add support for byteaarrayoid --- src/pgduckdb_types.cpp | 21 +++++++++++++++++++ .../expected/array_type_support.out | 13 ++++++++++++ test/regression/sql/array_type_support.sql | 9 ++++++++ 3 files changed, 43 insertions(+) diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index 95fc012d..b6b1defa 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -517,6 +517,19 @@ struct PostgresTypeTraits { } }; +// BLOB type +template <> +struct PostgresTypeTraits { + static constexpr int16_t typlen = -1; // variable-length + static constexpr bool typbyval = false; + static constexpr char typalign = 'i'; + + static inline Datum + ToDatum(const duckdb::Value &val) { + return ConvertBinaryDatum(val); + } +}; + template struct PostgresOIDMapping { static constexpr int32_t postgres_oid = OID; @@ -557,6 +570,7 @@ using TimestampArray = PODArray>; using UUIDArray = PODArray>; using VarCharArray = PODArray>; using NumericArray = PODArray>; +using ByteArray = PODArray>; static idx_t GetDuckDBListDimensionality(const duckdb::LogicalType &list_type, idx_t depth = 0) { @@ -800,6 +814,10 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col ConvertDuckToPostgresArray(slot, value, col); break; } + case BYTEAARRAYOID: { + ConvertDuckToPostgresArray(slot, value, col); + break; + } default: elog(WARNING, "(PGDuckDB/ConvertDuckToPostgresValue) Unsuported pgduckdb type: %d", oid); return false; @@ -883,6 +901,7 @@ ConvertPostgresToBaseDuckColumnType(Form_pg_attribute &attribute) { case REGCLASSARRAYOID: return duckdb::LogicalTypeId::UINTEGER; case BYTEAOID: + case BYTEAARRAYOID: return duckdb::LogicalTypeId::BLOB; default: return duckdb::LogicalType::USER("UnsupportedPostgresType (Oid=" + std::to_string(attribute->atttypid) + ")"); @@ -938,6 +957,8 @@ GetPostgresArrayDuckDBType(const duckdb::LogicalType &type) { return NUMERICARRAYOID; case duckdb::LogicalTypeId::UUID: return UUIDARRAYOID; + case duckdb::LogicalTypeId::BLOB: + return BYTEAARRAYOID; default: { elog(WARNING, "(PGDuckDB/GetPostgresDuckDBType) Unsupported `LIST` subtype %d to Postgres type", static_cast(type.id())); diff --git a/test/regression/expected/array_type_support.out b/test/regression/expected/array_type_support.out index 35c31f3b..934c338b 100644 --- a/test/regression/expected/array_type_support.out +++ b/test/regression/expected/array_type_support.out @@ -318,6 +318,19 @@ SELECT * FROM varchar_array_2d; {{some,strings},{NULL,last}} (5 rows) +-- BYTEA (single dimension) +CREATE TABLE bytea_array_1d (a bytea[]); +INSERT INTO bytea_array_1d (a) +VALUES + (ARRAY[decode('01020304', 'hex'), decode('aabbccdd', 'hex')]), + (ARRAY[decode('11223344', 'hex'), decode('55667788', 'hex')]); +SELECT * FROM bytea_array_1d; + a +------------------------------------------------------------------------------- + {"\\x5c7830315c7830325c7830335c783034","\\x5c7841415c7842425c7843435c784444"} + {"\\x5c7831315c7832323344","\\x5566775c783838"} +(2 rows) + -- TIMESTAMP (two dimensions) CREATE TABLE timestamp_array_2d(a TIMESTAMP[][]); INSERT INTO timestamp_array_2d VALUES diff --git a/test/regression/sql/array_type_support.sql b/test/regression/sql/array_type_support.sql index 79c2e487..aec79467 100644 --- a/test/regression/sql/array_type_support.sql +++ b/test/regression/sql/array_type_support.sql @@ -195,6 +195,15 @@ INSERT INTO varchar_array_2d VALUES ('{{"some","strings"},{NULL,"last"}}'); SELECT * FROM varchar_array_2d; +-- BYTEA (single dimension) +CREATE TABLE bytea_array_1d (a bytea[]); + +INSERT INTO bytea_array_1d (a) +VALUES + (ARRAY[decode('01020304', 'hex'), decode('aabbccdd', 'hex')]), + (ARRAY[decode('11223344', 'hex'), decode('55667788', 'hex')]); +SELECT * FROM bytea_array_1d; + -- TIMESTAMP (two dimensions) CREATE TABLE timestamp_array_2d(a TIMESTAMP[][]); INSERT INTO timestamp_array_2d VALUES From c963bcd5f06332738b1ee7ce442c40e6cb55351b Mon Sep 17 00:00:00 2001 From: ritwizsinha Date: Thu, 2 Jan 2025 18:08:40 +0530 Subject: [PATCH 3/4] Fix implicit conversion to string for blob values --- src/pgduckdb_types.cpp | 6 +++--- test/regression/expected/array_type_support.out | 8 ++++---- test/regression/expected/type_support.out | 10 +++++----- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index b6b1defa..79255354 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -202,9 +202,9 @@ ConvertVarCharDatum(const duckdb::Value &value) { static Datum ConvertBinaryDatum(const duckdb::Value &value) { - auto str = value.GetValue(); - auto blob = str.c_str(); - auto blob_len = str.size(); + auto str = value.GetValueUnsafe(); + auto blob_len = str.GetSize(); + auto blob = str.GetDataUnsafe(); bytea* result = (bytea *)palloc0(blob_len + VARHDRSZ); SET_VARSIZE(result, blob_len + VARHDRSZ); memcpy(VARDATA(result), blob, blob_len); diff --git a/test/regression/expected/array_type_support.out b/test/regression/expected/array_type_support.out index 934c338b..e229f4c6 100644 --- a/test/regression/expected/array_type_support.out +++ b/test/regression/expected/array_type_support.out @@ -325,10 +325,10 @@ VALUES (ARRAY[decode('01020304', 'hex'), decode('aabbccdd', 'hex')]), (ARRAY[decode('11223344', 'hex'), decode('55667788', 'hex')]); SELECT * FROM bytea_array_1d; - a -------------------------------------------------------------------------------- - {"\\x5c7830315c7830325c7830335c783034","\\x5c7841415c7842425c7843435c784444"} - {"\\x5c7831315c7832323344","\\x5566775c783838"} + a +------------------------------- + {"\\x01020304","\\xaabbccdd"} + {"\\x11223344","\\x55667788"} (2 rows) -- TIMESTAMP (two dimensions) diff --git a/test/regression/expected/type_support.out b/test/regression/expected/type_support.out index f22919e0..56911a5f 100644 --- a/test/regression/expected/type_support.out +++ b/test/regression/expected/type_support.out @@ -319,13 +319,13 @@ INSERT INTO blob_tbl SELECT CAST(a as bytea) FROM (VALUES (NULL) ) t(a); SELECT * from blob_tbl; - a ------------------------------------------------------------------------------------------------------------------------------------- + a +------------------------------------ \x - \x5c7831315c7830315c7830325c7830335c7830345c7830355c7830365c7830375c7830385c7830395c7830415c7830425c7830435c7830445c7830455c783046 + \x110102030405060708090a0b0c0d0e0f \x - \x5c783030 - \x5c783037 + \x00 + \x07 (6 rows) From cc65dfbb12724257ca103805ed222eeca4b71605 Mon Sep 17 00:00:00 2001 From: ritwizsinha Date: Thu, 2 Jan 2025 19:27:13 +0530 Subject: [PATCH 4/4] Add where test --- test/regression/expected/type_support.out | 6 ++++++ test/regression/sql/type_support.sql | 1 + 2 files changed, 7 insertions(+) diff --git a/test/regression/expected/type_support.out b/test/regression/expected/type_support.out index 56911a5f..4d509658 100644 --- a/test/regression/expected/type_support.out +++ b/test/regression/expected/type_support.out @@ -329,6 +329,12 @@ SELECT * from blob_tbl; (6 rows) +SELECT * from blob_tbl where a = '\x07'; + a +------ + \x07 +(1 row) + -- REGCLASSOID CREATE TABLE regclass_tbl (a REGCLASS); INSERT INTO regclass_tbl VALUES (42), (3000000000); diff --git a/test/regression/sql/type_support.sql b/test/regression/sql/type_support.sql index d20c68c2..b42a9eb3 100644 --- a/test/regression/sql/type_support.sql +++ b/test/regression/sql/type_support.sql @@ -165,6 +165,7 @@ INSERT INTO blob_tbl SELECT CAST(a as bytea) FROM (VALUES (NULL) ) t(a); SELECT * from blob_tbl; +SELECT * from blob_tbl where a = '\x07'; -- REGCLASSOID CREATE TABLE regclass_tbl (a REGCLASS);