Skip to content

Commit

Permalink
Merge pull request #157 from Maxxen/dev
Browse files Browse the repository at this point in the history
Add `ST_Dump`, bump duckdb, wrap yyjson in namespace
  • Loading branch information
Maxxen authored Oct 25, 2023
2 parents b5d031d + d03f053 commit cc560e6
Show file tree
Hide file tree
Showing 11 changed files with 256 additions and 4 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ set(CMAKE_CXX_STANDARD 11)

project(${TARGET_NAME})

add_definitions(-DDUCKDB_MAJOR_VERSION=${DUCKDB_MAJOR_VERSION})
add_definitions(-DDUCKDB_MINOR_VERSION=${DUCKDB_MINOR_VERSION})
add_definitions(-DDUCKDB_PATCH_VERSION=${DUCKDB_PATCH_VERSION})

# Options

# Enable network functionality (OpenSSL and GDAL's CURL based fs/drivers)
Expand Down
2 changes: 1 addition & 1 deletion duckdb
Submodule duckdb updated 122 files
4 changes: 4 additions & 0 deletions spatial/include/spatial/core/functions/scalar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ struct CoreScalarFunctions {
RegisterStContains(db);
RegisterStDimension(db);
RegisterStDistance(db);
RegisterStDump(db);
RegisterStEndPoint(db);
RegisterStExtent(db);
RegisterStExteriorRing(db);
Expand Down Expand Up @@ -83,6 +84,9 @@ struct CoreScalarFunctions {
// ST_Distance
static void RegisterStDistance(DatabaseInstance &db);

// ST_Dump
static void RegisterStDump(DatabaseInstance &db);

// ST_EndPoint
static void RegisterStEndPoint(DatabaseInstance &db);

Expand Down
1 change: 1 addition & 0 deletions spatial/src/spatial/core/functions/scalar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ set(EXTENSION_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/st_contains.cpp
${CMAKE_CURRENT_SOURCE_DIR}/st_dimension.cpp
${CMAKE_CURRENT_SOURCE_DIR}/st_distance.cpp
${CMAKE_CURRENT_SOURCE_DIR}/st_dump.cpp
${CMAKE_CURRENT_SOURCE_DIR}/st_endpoint.cpp
${CMAKE_CURRENT_SOURCE_DIR}/st_extent.cpp
${CMAKE_CURRENT_SOURCE_DIR}/st_exteriorring.cpp
Expand Down
2 changes: 2 additions & 0 deletions spatial/src/spatial/core/functions/scalar/st_asgeojson.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ namespace spatial {

namespace core {

using namespace duckdb_yyjson_spatial;

class JSONAllocator {
// Stolen from the JSON extension :)
public:
Expand Down
155 changes: 155 additions & 0 deletions spatial/src/spatial/core/functions/scalar/st_dump.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#include "spatial/common.hpp"
#include "spatial/core/types.hpp"
#include "spatial/core/functions/scalar.hpp"
#include "spatial/core/functions/common.hpp"
#include "spatial/core/geometry/geometry.hpp"

#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
#include "duckdb/common/vector_operations/unary_executor.hpp"
#include "duckdb/common/vector_operations/binary_executor.hpp"

namespace spatial {

namespace core {

static void DumpFunction(DataChunk &args, ExpressionState &state, Vector &result) {
auto &lstate = GeometryFunctionLocalState::ResetAndGet(state);
auto count = args.size();

auto &geom_vec = args.data[0];
UnifiedVectorFormat geom_format;
geom_vec.ToUnifiedFormat(count, geom_format);

idx_t total_geom_count = 0;
idx_t total_path_count = 0;

for(idx_t out_row_idx = 0; out_row_idx < count; out_row_idx++) {
auto in_row_idx = geom_format.sel->get_index(out_row_idx);

if (!geom_format.validity.RowIsValid(in_row_idx)) {
FlatVector::SetNull(result, out_row_idx, true);
continue;
}

auto geometry_blob = UnifiedVectorFormat::GetData<string_t>(geom_format)[in_row_idx];
auto geometry = lstate.factory.Deserialize(geometry_blob);

vector<std::tuple<Geometry, vector<int32_t>>> stack;
vector<std::tuple<Geometry, vector<int32_t>>> items;

stack.emplace_back(geometry, vector<int32_t>());

while(!stack.empty()) {
auto current = stack.back();
auto current_geom = std::get<0>(current);
auto current_path = std::get<1>(current);

stack.pop_back();

if(current_geom.Type() == GeometryType::MULTIPOINT) {
auto mpoint = current_geom.GetMultiPoint();
for (int32_t i = 0; i < mpoint.Count(); i++) {
auto path = current_path;
path.push_back(i + 1); // path is 1-indexed
stack.emplace_back(mpoint[i], path);
}
}
else if(current_geom.Type() == GeometryType::MULTILINESTRING) {
auto mline = current_geom.GetMultiLineString();
for (int32_t i = 0; i < mline.Count(); i++) {
auto path = current_path;
path.push_back(i + 1);
stack.emplace_back(mline[i], path);
}
}
else if(current_geom.Type() == GeometryType::MULTIPOLYGON) {
auto mpoly = current_geom.GetMultiPolygon();
for (int32_t i = 0; i < mpoly.Count(); i++) {
auto path = current_path;
path.push_back(i + 1);
stack.emplace_back(mpoly[i], path);
}
}
else if (current_geom.Type() == GeometryType::GEOMETRYCOLLECTION) {
auto collection = current_geom.GetGeometryCollection();
for (int32_t i = 0; i < collection.Count(); i++) {
auto path = current_path;
path.push_back(i + 1);
stack.emplace_back(collection[i], path);
}
} else {
items.push_back(current);
}
}

// Finally reverse the results
std::reverse(items.begin(), items.end());

// Push to the result vector
auto result_entries = ListVector::GetData(result);

auto geom_offset = total_geom_count;
auto geom_length = items.size();

result_entries[out_row_idx].length = geom_length;
result_entries[out_row_idx].offset = geom_offset;

total_geom_count += geom_length;

ListVector::Reserve(result, total_geom_count);
ListVector::SetListSize(result, total_geom_count);

auto &result_list = ListVector::GetEntry(result);
auto &result_list_children = StructVector::GetEntries(result_list);
auto &result_geom_vec = result_list_children[0];
auto &result_path_vec = result_list_children[1];

auto geom_data = FlatVector::GetData<string_t>(*result_geom_vec);
for(idx_t i = 0; i < geom_length; i++) {
// Write the geometry
auto &item_blob = std::get<0>(items[i]);
geom_data[geom_offset + i] = lstate.factory.Serialize(*result_geom_vec, item_blob);

// Now write the paths
auto &path = std::get<1>(items[i]);
auto path_offset = total_path_count;
auto path_length = path.size();

total_path_count += path_length;

ListVector::Reserve(*result_path_vec, total_path_count);
ListVector::SetListSize(*result_path_vec, total_path_count);

auto path_entries = ListVector::GetData(*result_path_vec);

path_entries[geom_offset + i].offset = path_offset;
path_entries[geom_offset + i].length = path_length;

auto &path_data_vec = ListVector::GetEntry(*result_path_vec);
auto path_data = FlatVector::GetData<int32_t>(path_data_vec);

for(idx_t j = 0; j < path_length; j++) {
path_data[path_offset + j] = path[j];
}
}
}

if(count == 1) {
result.SetVectorType(VectorType::CONSTANT_VECTOR);
}
}

void CoreScalarFunctions::RegisterStDump(DatabaseInstance &db) {
ScalarFunctionSet set("ST_Dump");

set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()},
LogicalType::LIST(LogicalType::STRUCT({{"geom", GeoTypes::GEOMETRY()}, {"path", LogicalType::LIST(LogicalType::INTEGER)}})),
DumpFunction,
nullptr, nullptr, nullptr, GeometryFunctionLocalState::Init));

ExtensionUtil::RegisterFunction(db, set);
}

} // namespace core

} // namespace spatial
9 changes: 7 additions & 2 deletions spatial/src/spatial/gdal/functions/st_write.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,14 @@ struct GlobalState : public GlobalFunctionData {
//===--------------------------------------------------------------------===//
// Bind
//===--------------------------------------------------------------------===//
static unique_ptr<FunctionData> Bind(ClientContext &context, CopyInfo &info, vector<string> &names,
// The parameters are const in duckdb > 0.9.1, ifdef so we can build for both versions for now.
#if DUCKDB_PATCH_VERSION == 1
static unique_ptr<FunctionData> Bind(ClientContext &context, CopyInfo &info, vector<string> &names,
vector<LogicalType> &sql_types) {

#else
static unique_ptr<FunctionData> Bind(ClientContext &context, const CopyInfo &info, const vector<string> &names,
const vector<LogicalType> &sql_types) {
#endif
GdalFileHandler::SetLocalClientContext(context);

auto bind_data = make_uniq<BindData>(info.file_path, sql_types, names);
Expand Down
2 changes: 1 addition & 1 deletion spatial/third_party/yyjson/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

set(EXTENSION_SOURCES
${EXTENSION_SOURCES}
${CMAKE_CURRENT_SOURCE_DIR}/yyjson.c
${CMAKE_CURRENT_SOURCE_DIR}/yyjson.cpp
PARENT_SCOPE
)
4 changes: 4 additions & 0 deletions spatial/third_party/yyjson/include/yyjson.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

/** @file yyjson.h */


#ifndef YYJSON_H
#define YYJSON_H

Expand All @@ -24,6 +25,7 @@
#include <float.h>


namespace duckdb_yyjson_spatial {

/*==============================================================================
* Compile-time Options
Expand Down Expand Up @@ -6243,3 +6245,5 @@ yyjson_api_inline bool yyjson_get_str_pointer(
#endif /* extern "C" end */

#endif /* YYJSON_H */

} // namespace duckdb_yyjson_spatial
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <stdio.h>
#include <math.h>

namespace duckdb_yyjson_spatial {


/*==============================================================================
Expand Down Expand Up @@ -8410,3 +8411,5 @@ bool yyjson_mut_write_file(const char *path,
#elif defined(_MSC_VER)
# pragma warning(pop)
#endif /* warning suppress end */

} // namespace yyjson
74 changes: 74 additions & 0 deletions test/sql/geometry/st_dump.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
require spatial

# Basic test
query II
SELECT UNNEST(st_dump(ST_GeomFromText('GEOMETRYCOLLECTION (POINT (1 1), POINT (2 2), GEOMETRYCOLLECTION(POINT (3 3)))')), recursive := true);
----
POINT (1 1) [1]
POINT (2 2) [2]
POINT (3 3) [3, 1]

query II
SELECT UNNEST(st_dump(ST_GeomFromText('GEOMETRYCOLLECTION (POINT (1 1), GEOMETRYCOLLECTION(POINT (3 3)), POINT (2 2))')), recursive := true);
----
POINT (1 1) [1]
POINT (3 3) [2, 1]
POINT (2 2) [3]

# Test empty collection
query I
SElECT ST_Dump(ST_GeomFromText('GEOMETRYCOLLECTION EMPTY'));
----
[]

# Test collection with one point
query I
SElECT ST_Dump(ST_GeomFromText('GEOMETRYCOLLECTION (POINT (0 0))'));
----
[{'geom': POINT (0 0), 'path': [1]}]

# Test with multipoint
query II
SELECT UNNEST(ST_Dump(ST_GeomFromText('MULTIPOINT ((0 0), (1 1)))')), recursive := true);
----
POINT (0 0) [1]
POINT (1 1) [2]

# Test with multilinestring
query II
SELECT UNNEST (ST_Dump(ST_GeomFromText('MULTILINESTRING ((0 0, 1 1), (2 2, 3 3))')), recursive := true);
----
LINESTRING (0 0, 1 1) [1]
LINESTRING (2 2, 3 3) [2]

# Test with multipolygon
query II
SELECT UNNEST(ST_Dump(ST_GeomFromText('MULTIPOLYGON (((0 0, 1 1, 1 0, 0 0)), ((2 2, 3 3, 3 2, 2 2))))')), recursive := true);
----
POLYGON ((0 0, 1 1, 1 0, 0 0)) [1]
POLYGON ((2 2, 3 3, 3 2, 2 2)) [2]

# Test complex
query II rowsort
SELECT UNNEST(ST_Dump(ST_GeomFromText('GEOMETRYCOLLECTION (POINT (1 1), LINESTRING (0 0, 1 1), POLYGON ((0 0, 1 1, 1 0, 0 0)), MULTIPOLYGON (((0 0, 1 1, 1 0, 0 0)), ((2 2, 3 3, 3 2, 2 2))), GEOMETRYCOLLECTION (POINT (3 3)))')), recursive := true);
----
LINESTRING (0 0, 1 1) [2]
POINT (1 1) [1]
POINT (3 3) [5, 1]
POLYGON ((0 0, 1 1, 1 0, 0 0)) [3]
POLYGON ((0 0, 1 1, 1 0, 0 0)) [4, 1]
POLYGON ((2 2, 3 3, 3 2, 2 2)) [4, 2]


# Test with intermittent nulls
query I
SELECT ST_Dump(geom)
FROM (VALUES
(ST_GeomFromText('GEOMETRYCOLLECTION (POINT (1 1), POINT (2 2), GEOMETRYCOLLECTION(POINT (3 3)))')),
(NULL),
(ST_GeomFromText('GEOMETRYCOLLECTION (POINT (1 1), GEOMETRYCOLLECTION(POINT (3 3)), POINT (2 2))'))
) as t(geom)
----
[{'geom': POINT (1 1), 'path': [1]}, {'geom': POINT (2 2), 'path': [2]}, {'geom': POINT (3 3), 'path': [3, 1]}]
NULL
[{'geom': POINT (1 1), 'path': [1]}, {'geom': POINT (3 3), 'path': [2, 1]}, {'geom': POINT (2 2), 'path': [3]}]

0 comments on commit cc560e6

Please sign in to comment.