Skip to content

Commit

Permalink
use index plan callback from duckdb when creating rtree indexes
Browse files Browse the repository at this point in the history
  • Loading branch information
Maxxen committed Nov 1, 2024
1 parent bd27be4 commit e079db5
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 74 deletions.
2 changes: 1 addition & 1 deletion duckdb
Submodule duckdb updated 695 files
12 changes: 12 additions & 0 deletions spatial/include/spatial/core/index/rtree/rtree_index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
#include "spatial/core/index/rtree/rtree_node.hpp"
#include "spatial/core/index/rtree/rtree.hpp"

namespace duckdb {
class PhysicalOperator;
}

namespace spatial {

namespace core {
Expand All @@ -28,6 +32,14 @@ class RTreeIndex final : public BoundIndex {
unique_ptr<IndexScanState> InitializeScan(const Box2D<float> &query) const;
idx_t Scan(IndexScanState &state, Vector &result) const;

static unique_ptr<BoundIndex> Create(CreateIndexInput &input) {
auto res = make_uniq<RTreeIndex>(input.name, input.constraint_type, input.column_ids, input.table_io_manager,
input.unbound_expressions, input.db, input.options, input.storage_info);
return std::move(res);
}

static unique_ptr<PhysicalOperator> CreatePlan(PlanIndexInput &input);

public:
//! Called when data is appended to the index. The lock obtained from InitializeLock must be held
ErrorData Append(IndexLock &lock, DataChunk &entries, Vector &row_identifiers) override;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once
#include "duckdb/planner/operator/logical_extension_operator.hpp"
#include "duckdb/parser/parsed_data/create_index_info.hpp"
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"

#include "spatial/common.hpp"
namespace spatial {
Expand Down
7 changes: 2 additions & 5 deletions spatial/src/spatial/core/index/rtree/rtree_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,11 +310,8 @@ void RTreeModule::RegisterIndex(DatabaseInstance &db) {
IndexType index_type;

index_type.name = RTreeIndex::TYPE_NAME;
index_type.create_instance = [](CreateIndexInput &input) -> unique_ptr<BoundIndex> {
auto res = make_uniq<RTreeIndex>(input.name, input.constraint_type, input.column_ids, input.table_io_manager,
input.unbound_expressions, input.db, input.options, input.storage_info);
return std::move(res);
};
index_type.create_instance = RTreeIndex::Create;
index_type.create_plan = RTreeIndex::CreatePlan;

// Register the index type
db.config.GetIndexTypes().RegisterIndexType(index_type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ void LogicalCreateRTreeIndex::ResolveColumnBindings(ColumnBindingResolver &res,
[&](unique_ptr<Expression> *child) { res.VisitExpression(child); });
}

static unique_ptr<PhysicalOperator> CreateNullFilter(const LogicalCreateRTreeIndex &op,
static unique_ptr<PhysicalOperator> CreateNullFilter(const LogicalOperator &op,
const vector<LogicalType> &types, ClientContext &context) {
vector<unique_ptr<Expression>> filter_select_list;

Expand Down Expand Up @@ -75,7 +75,7 @@ static unique_ptr<PhysicalOperator> CreateNullFilter(const LogicalCreateRTreeInd
return make_uniq<PhysicalFilter>(types, std::move(filter_select_list), op.estimated_cardinality);
}

static unique_ptr<PhysicalOperator> CreateBoundingBoxProjection(const LogicalCreateRTreeIndex &op,
static unique_ptr<PhysicalOperator> CreateBoundingBoxProjection(const LogicalOperator &op,
const vector<LogicalType> &types,
ClientContext &context) {
auto &catalog = Catalog::GetSystemCatalog(context);
Expand Down Expand Up @@ -103,7 +103,7 @@ static unique_ptr<PhysicalOperator> CreateBoundingBoxProjection(const LogicalCre
return make_uniq<PhysicalProjection>(types, std::move(select_list), op.estimated_cardinality);
}

static unique_ptr<PhysicalOperator> CreateOrderByMinX(const LogicalCreateRTreeIndex &op,
static unique_ptr<PhysicalOperator> CreateOrderByMinX(const LogicalOperator &op,
const vector<LogicalType> &types, ClientContext &context) {
auto &catalog = Catalog::GetSystemCatalog(context);

Expand Down Expand Up @@ -137,15 +137,87 @@ static unique_ptr<PhysicalOperator> CreateOrderByMinX(const LogicalCreateRTreeIn
return make_uniq<PhysicalOrder>(types, std::move(orders), projections, op.estimated_cardinality);
}


unique_ptr<PhysicalOperator> RTreeIndex::CreatePlan(PlanIndexInput &input) {

auto &op = input.op;
auto &table_scan = input.table_scan;
auto &context = input.context;

// generate a physical plan for the parallel index creation which consists of the following operators
// table scan - projection (for expression execution) - filter (NOT NULL) - order - create index
D_ASSERT(op.children.size() == 1);

// Validate that we only have one expression
if (op.unbound_expressions.size() != 1) {
throw BinderException("RTree indexes can only be created over a single column.");
}

auto &expr = op.unbound_expressions[0];

// Validate that we have the right type of expression (float array)
if (expr->return_type != GeoTypes::GEOMETRY()) {
throw BinderException("RTree indexes can only be created over GEOMETRY columns.");
}

// Validate that the expression does not have side effects
if (!expr->IsConsistent()) {
throw BinderException("RTree index keys cannot contain expressions with side "
"effects.");
}

// projection to execute expressions on the key columns
vector<LogicalType> new_column_types;
vector<unique_ptr<Expression>> select_list;

// Add the geometry expression to the select list
auto geom_expr = op.expressions[0]->Copy();
new_column_types.push_back(geom_expr->return_type);
select_list.push_back(std::move(geom_expr));

// Add the row ID to the select list
new_column_types.emplace_back(LogicalType::ROW_TYPE);
select_list.push_back(make_uniq<BoundReferenceExpression>(LogicalType::ROW_TYPE, op.info->scan_types.size() - 1));

// Project the expressions
auto projection = make_uniq<PhysicalProjection>(new_column_types, std::move(select_list), op.estimated_cardinality);
projection->children.push_back(std::move(table_scan));

// Filter operator for (IS_NOT_NULL) and (NOT ST_IsEmpty) on the geometry column
auto null_filter = CreateNullFilter(op, new_column_types, context);
null_filter->children.push_back(std::move(projection));

// Project the bounding box and the row ID
vector<LogicalType> projected_types = {GeoTypes::BOX_2DF(), LogicalType::ROW_TYPE};
auto bbox_proj = CreateBoundingBoxProjection(op, projected_types, context);
bbox_proj->children.push_back(std::move(null_filter));

// Create an ORDER_BY operator to sort the bounding boxes by the xmin value
auto physical_order = CreateOrderByMinX(op, projected_types, context);
physical_order->children.push_back(std::move(bbox_proj));

// Now finally create the actual physical create index operator
auto physical_create_index =
make_uniq<PhysicalCreateRTreeIndex>(op, op.table, op.info->column_ids, std::move(op.info),
std::move(op.unbound_expressions), op.estimated_cardinality);

physical_create_index->children.push_back(std::move(physical_order));

return std::move(physical_create_index);

}

// TODO: Remove this
unique_ptr<PhysicalOperator> LogicalCreateRTreeIndex::CreatePlan(ClientContext &context,
PhysicalPlanGenerator &generator) {

auto table_scan = generator.CreatePlan(std::move(children[0]));

auto &op = *this;

// generate a physical plan for the parallel index creation which consists of the following operators
// table scan - projection (for expression execution) - filter (NOT NULL) - order - create index
D_ASSERT(op.children.size() == 1);
auto table_scan = generator.CreatePlan(std::move(op.children[0]));

// Validate that we only have one expression
if (op.unbound_expressions.size() != 1) {
Expand Down
64 changes: 0 additions & 64 deletions spatial/src/spatial/core/index/rtree/rtree_index_plan_create.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
#include "duckdb/optimizer/optimizer_extension.hpp"
#include "duckdb/planner/operator/logical_create_index.hpp"
#include "duckdb/parser/parsed_data/create_index_info.hpp"
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"

#include "spatial/core/types.hpp"
#include "spatial/core/index/rtree/rtree_module.hpp"
#include "spatial/core/index/rtree/rtree_index.hpp"
#include "spatial/core/index/rtree/rtree_index_create_logical.hpp"
Expand All @@ -12,69 +7,10 @@ namespace spatial {

namespace core {

//-----------------------------------------------------------------------------
// Plan rewriter
//-----------------------------------------------------------------------------
class RTreeIndexInsertionRewriter : public OptimizerExtension {
public:
RTreeIndexInsertionRewriter() {
optimize_function = RTreeIndexInsertionRewriter::Optimize;
}

static void TryOptimize(ClientContext &context, unique_ptr<LogicalOperator> &plan) {
auto &op = *plan;

// Look for a CREATE INDEX operator
if (op.type != LogicalOperatorType::LOGICAL_CREATE_INDEX) {
return;
}
auto &create_index = op.Cast<LogicalCreateIndex>();

if (create_index.info->index_type != RTreeIndex::TYPE_NAME) {
// Not the index type we are looking for
return;
}

// Verify the number of expressions
if (create_index.expressions.size() != 1) {
throw BinderException("RTree indexes can only be created over a single column of keys.");
}

// Verify the expression type
if (create_index.expressions[0]->return_type != GeoTypes::GEOMETRY()) {
throw BinderException("RTree indexes can only be created over GEOMETRY columns.");
}

// We have a create index operator for our index
// We can replace this with a operator that creates the index
// The "LogicalCreateRTreeIndex" operator is a custom operator that we defined in the extension
auto create_rtree_index = make_uniq<LogicalCreateRTreeIndex>(
std::move(create_index.info), std::move(create_index.expressions), create_index.table);

// Move the children
create_rtree_index->children = std::move(create_index.children);

// Replace the operator
plan = std::move(create_rtree_index);
}

static void Optimize(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan) {

TryOptimize(input.context, plan);

// Recursively traverse the children
for (auto &child : plan->children) {
Optimize(input, child);
}
}
};

//-------------------------------------------------------------
// Register
//-------------------------------------------------------------
void RTreeModule::RegisterIndexPlanCreate(DatabaseInstance &db) {
// Register the optimizer extension
db.config.optimizer_extensions.push_back(RTreeIndexInsertionRewriter());

db.config.operator_extensions.push_back(make_uniq<LogicalCreateRTreeIndexOperatorExtension>());
}
Expand Down

0 comments on commit e079db5

Please sign in to comment.