Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for BYTEA/BLOB #511

Merged
merged 5 commits into from
Jan 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/pgduckdb_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ FilterOperationSwitch(const Datum &value, const duckdb::Value &constant, Oid typ
case TEXTOID:
case VARCHAROID:
return StringFilterOperation<OP>(value, constant, type_oid == BPCHAROID);
case BYTEAOID:
return StringFilterOperation<OP>(value, constant, false);
default:
throw duckdb::InvalidTypeException(
duckdb::string("(DuckDB/FilterOperationSwitch) Unsupported duckdb type: " + std::to_string(type_oid)));
Expand Down
49 changes: 49 additions & 0 deletions src/pgduckdb_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "duckdb/common/shared_ptr.hpp"
#include "duckdb/common/extra_type_info.hpp"
#include "duckdb/common/types/uuid.hpp"
#include "duckdb/common/types/blob.hpp"

#include "pgduckdb/pgduckdb_types.hpp"
#include "pgduckdb/pgduckdb_utils.hpp"
Expand Down Expand Up @@ -199,6 +200,17 @@ ConvertVarCharDatum(const duckdb::Value &value) {
return PointerGetDatum(result);
}

static Datum
ConvertBinaryDatum(const duckdb::Value &value) {
auto str = value.GetValueUnsafe<duckdb::string_t>();
auto blob_len = str.GetSize();
auto blob = str.GetDataUnsafe();
bytea* result = (bytea *)palloc0(blob_len + VARHDRSZ);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: make format will re-format this line as

bytea *result = (bytea *)palloc0(blob_len + VARHDRSZ);

Copy link
Collaborator

@JelteF JelteF Jan 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the report. Fixed by: #518

SET_VARSIZE(result, blob_len + VARHDRSZ);
memcpy(VARDATA(result), blob, blob_len);
return PointerGetDatum(result);
}

inline Datum
ConvertDateDatum(const duckdb::Value &value) {
duckdb::date_t date = value.GetValue<duckdb::date_t>();
Expand Down Expand Up @@ -505,6 +517,19 @@ struct PostgresTypeTraits<VARCHAROID> {
}
};

// BLOB type
template <>
struct PostgresTypeTraits<BYTEAOID> {
static constexpr int16_t typlen = -1; // variable-length
static constexpr bool typbyval = false;
static constexpr char typalign = 'i';

static inline Datum
ToDatum(const duckdb::Value &val) {
return ConvertBinaryDatum(val);
}
};

template <int32_t OID>
struct PostgresOIDMapping {
static constexpr int32_t postgres_oid = OID;
Expand Down Expand Up @@ -545,6 +570,7 @@ using TimestampArray = PODArray<PostgresOIDMapping<TIMESTAMPOID>>;
using UUIDArray = PODArray<PostgresOIDMapping<UUIDOID>>;
using VarCharArray = PODArray<PostgresOIDMapping<VARCHAROID>>;
using NumericArray = PODArray<PostgresOIDMapping<NUMERICOID>>;
using ByteArray = PODArray<PostgresOIDMapping<BYTEAOID>>;

static idx_t
GetDuckDBListDimensionality(const duckdb::LogicalType &list_type, idx_t depth = 0) {
Expand Down Expand Up @@ -733,6 +759,10 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col
slot->tts_values[col] = ConvertUUIDDatum(value);
break;
}
case BYTEAOID: {
slot->tts_values[col] = ConvertBinaryDatum(value);
break;
}
case BOOLARRAYOID: {
ConvertDuckToPostgresArray<BoolArray>(slot, value, col);
break;
Expand Down Expand Up @@ -784,6 +814,10 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col
ConvertDuckToPostgresArray<UUIDArray>(slot, value, col);
break;
}
case BYTEAARRAYOID: {
ConvertDuckToPostgresArray<ByteArray>(slot, value, col);
break;
}
default:
elog(WARNING, "(PGDuckDB/ConvertDuckToPostgresValue) Unsuported pgduckdb type: %d", oid);
return false;
Expand Down Expand Up @@ -866,6 +900,9 @@ ConvertPostgresToBaseDuckColumnType(Form_pg_attribute &attribute) {
case REGCLASSOID:
case REGCLASSARRAYOID:
return duckdb::LogicalTypeId::UINTEGER;
case BYTEAOID:
case BYTEAARRAYOID:
return duckdb::LogicalTypeId::BLOB;
default:
return duckdb::LogicalType::USER("UnsupportedPostgresType (Oid=" + std::to_string(attribute->atttypid) + ")");
}
Expand Down Expand Up @@ -920,6 +957,8 @@ GetPostgresArrayDuckDBType(const duckdb::LogicalType &type) {
return NUMERICARRAYOID;
case duckdb::LogicalTypeId::UUID:
return UUIDARRAYOID;
case duckdb::LogicalTypeId::BLOB:
return BYTEAARRAYOID;
default: {
elog(WARNING, "(PGDuckDB/GetPostgresDuckDBType) Unsupported `LIST` subtype %d to Postgres type",
static_cast<uint8_t>(type.id()));
Expand Down Expand Up @@ -974,6 +1013,8 @@ GetPostgresDuckDBType(const duckdb::LogicalType &type) {
}
return GetPostgresArrayDuckDBType(*duck_type);
}
case duckdb::LogicalTypeId::BLOB:
return BYTEAOID;
default: {
elog(WARNING, "(PGDuckDB/GetPostgresDuckDBType) Could not convert DuckDB type: %s to Postgres type",
type.ToString().c_str());
Expand Down Expand Up @@ -1222,6 +1263,14 @@ ConvertPostgresToDuckValue(Oid attr_type, Datum value, duckdb::Vector &result, i
Append(result, duckdb_uuid, offset);
break;
}
case duckdb::LogicalTypeId::BLOB: {
const char *bytea_data = VARDATA_ANY(value);
size_t bytea_length = VARSIZE_ANY_EXHDR(value);
const duckdb::string_t s(bytea_data, bytea_length);
auto data = duckdb::FlatVector::GetData<duckdb::string_t>(result);
data[offset] = duckdb::StringVector::AddString(result, s);
break;
}
case duckdb::LogicalTypeId::LIST: {
// Convert Datum to ArrayType
auto array = DatumGetArrayTypeP(value);
Expand Down
13 changes: 13 additions & 0 deletions test/regression/expected/array_type_support.out
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,19 @@ SELECT * FROM varchar_array_2d;
{{some,strings},{NULL,last}}
(5 rows)

-- BYTEA (single dimension)
CREATE TABLE bytea_array_1d (a bytea[]);
INSERT INTO bytea_array_1d (a)
VALUES
(ARRAY[decode('01020304', 'hex'), decode('aabbccdd', 'hex')]),
(ARRAY[decode('11223344', 'hex'), decode('55667788', 'hex')]);
SELECT * FROM bytea_array_1d;
a
-------------------------------
{"\\x01020304","\\xaabbccdd"}
{"\\x11223344","\\x55667788"}
(2 rows)

-- TIMESTAMP (two dimensions)
CREATE TABLE timestamp_array_2d(a TIMESTAMP[][]);
INSERT INTO timestamp_array_2d VALUES
Expand Down
28 changes: 28 additions & 0 deletions test/regression/expected/type_support.out
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,33 @@ SELECT * FROM json_tbl;
{}
(4 rows)

-- BLOB
CREATE TABLE blob_tbl(a bytea);
INSERT INTO blob_tbl SELECT CAST(a as bytea) FROM (VALUES
('\x'),
('\x110102030405060708090a0b0c0d0e0f'),
(''),
('\x00'),
('\x07'),
(NULL)
) t(a);
SELECT * from blob_tbl;
a
------------------------------------
\x
\x110102030405060708090a0b0c0d0e0f
\x
\x00
\x07

(6 rows)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a comparison test too? Something like:

SELECT * FROM blob_tbl WHERE a = '\x00';

SELECT * from blob_tbl where a = '\x07';
a
------
\x07
(1 row)

-- REGCLASSOID
CREATE TABLE regclass_tbl (a REGCLASS);
INSERT INTO regclass_tbl VALUES (42), (3000000000);
Expand Down Expand Up @@ -337,4 +364,5 @@ DROP TABLE bigint_numeric;
DROP TABLE hugeint_numeric;
DROP TABLE uuid_tbl;
DROP TABLE json_tbl;
DROP TABLE blob_tbl;
DROP TABLE regclass_tbl;
9 changes: 9 additions & 0 deletions test/regression/sql/array_type_support.sql
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,15 @@ INSERT INTO varchar_array_2d VALUES
('{{"some","strings"},{NULL,"last"}}');
SELECT * FROM varchar_array_2d;

-- BYTEA (single dimension)
CREATE TABLE bytea_array_1d (a bytea[]);

INSERT INTO bytea_array_1d (a)
VALUES
(ARRAY[decode('01020304', 'hex'), decode('aabbccdd', 'hex')]),
(ARRAY[decode('11223344', 'hex'), decode('55667788', 'hex')]);
SELECT * FROM bytea_array_1d;

-- TIMESTAMP (two dimensions)
CREATE TABLE timestamp_array_2d(a TIMESTAMP[][]);
INSERT INTO timestamp_array_2d VALUES
Expand Down
14 changes: 14 additions & 0 deletions test/regression/sql/type_support.sql
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,19 @@ INSERT INTO json_tbl SELECT CAST(a as JSON) FROM (VALUES
) t(a);
SELECT * FROM json_tbl;

-- BLOB
CREATE TABLE blob_tbl(a bytea);
INSERT INTO blob_tbl SELECT CAST(a as bytea) FROM (VALUES
('\x'),
('\x110102030405060708090a0b0c0d0e0f'),
(''),
('\x00'),
('\x07'),
(NULL)
) t(a);
SELECT * from blob_tbl;
SELECT * from blob_tbl where a = '\x07';

-- REGCLASSOID
CREATE TABLE regclass_tbl (a REGCLASS);
INSERT INTO regclass_tbl VALUES (42), (3000000000);
Expand All @@ -178,4 +191,5 @@ DROP TABLE bigint_numeric;
DROP TABLE hugeint_numeric;
DROP TABLE uuid_tbl;
DROP TABLE json_tbl;
DROP TABLE blob_tbl;
DROP TABLE regclass_tbl;