Skip to content
This repository has been archived by the owner on Sep 27, 2019. It is now read-only.

Add cardinality estimate to AbstractPlan object #1475

Merged
merged 11 commits into from
Sep 18, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/include/optimizer/optimizer_task.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ class OptimizeInputs : public OptimizerTask {
GroupExpression *group_expr_;
double cur_total_cost_;
int cur_child_idx_ = -1;
int pre_child_idx_ = -1;
int prev_child_idx_ = -1;
int cur_prop_pair_idx_ = 0;
};

Expand Down
9 changes: 5 additions & 4 deletions src/include/optimizer/plan_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
//
// Peloton
//
// operator_to_plan_transformer.h
// plan_generator.h
//
// Identification: src/include/optimizer/operator_to_plan_transformer.h
// Identification: src/include/optimizer/plan_generator.h
//
// Copyright (c) 2015-16, Carnegie Mellon University Database Group
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -46,7 +46,8 @@ class PlanGenerator : public OperatorVisitor {
std::vector<expression::AbstractExpression *> required_cols,
std::vector<expression::AbstractExpression *> output_cols,
std::vector<std::unique_ptr<planner::AbstractPlan>> &children_plans,
std::vector<ExprMap> children_expr_map);
std::vector<ExprMap> children_expr_map,
int estimated_cardinality);

void Visit(const DummyScan *) override;

Expand Down
8 changes: 4 additions & 4 deletions src/include/optimizer/stats_calculator.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
//
// Peloton
//
// cost_and_stats_calculator.h
// stats_calculator.h
//
// Identification: src/include/optimizer/stats_calculator.h
//
// Copyright (c) 2015-16, Carnegie Mellon University Database Group
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -56,13 +56,13 @@ class StatsCalculator : public OperatorVisitor {
std::unordered_map<std::string, std::shared_ptr<ColumnStats>> &stats,
bool copy);
/**
* @brief Update selectivity for predicate evaluation
* @brief Return estimated cardinality for a filter
*
* @param num_rows Number of rows of base table
* @param predicate_stats The stats for columns in the expression
* @param predicates conjunction predicates
*/
void UpdateStatsForFilter(
size_t EstimateCardinalityForFilter(
size_t num_rows,
std::unordered_map<std::string, std::shared_ptr<ColumnStats>>
&predicate_stats,
Expand Down
8 changes: 3 additions & 5 deletions src/include/planner/abstract_plan.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ class AbstractPlan : public Printable {
// Get the estimated cardinality of this plan
int GetCardinality() const { return estimated_cardinality_; }

// TODO: This is only for testing now. When the optimizer is ready, we should
// delete this function and pass this information to constructor
// FOR TESTING ONLY. This function should only be called during construction of plan (ConvertOpExpression) or
// for tests.
void SetCardinality(int cardinality) { estimated_cardinality_ = cardinality; }

//===--------------------------------------------------------------------===//
Expand Down Expand Up @@ -152,9 +152,7 @@ class AbstractPlan : public Printable {
std::vector<std::unique_ptr<AbstractPlan>> children_;

AbstractPlan *parent_ = nullptr;

// TODO: This field is harded coded now. This needs to be changed when
// optimizer has the cost model and cardinality estimation

int estimated_cardinality_ = 500000;

private:
Expand Down
8 changes: 4 additions & 4 deletions src/optimizer/child_stats_deriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
//
// Peloton
//
// cost_and_stats_calculator.h
// child_stats_deriver.cpp
//
// Identification: src/optimizer/stats_calculator.cpp
// Identification: src/optimizer/child_stats_deriver.cpp
//
// Copyright (c) 2015-16, Carnegie Mellon University Database Group
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -67,7 +67,7 @@ void ChildStatsDeriver::PassDownColumn(expression::AbstractExpression *col) {
auto child_group = memo_->GetGroupByID(gexpr_->GetChildGroupId(idx));
if (child_group->GetTableAliases().count(tv_expr->GetTableName()) &&
// If we have not derived the column stats yet
child_group->HasColumnStats(tv_expr->GetColFullName())) {
!child_group->HasColumnStats(tv_expr->GetColFullName())) {
output_[idx].insert(col);
break;
}
Expand Down
5 changes: 2 additions & 3 deletions src/optimizer/optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
//
// Identification: src/optimizer/optimizer.cpp
//
// Copyright (c) 2015-16, Carnegie Mellon University Database Group
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -366,8 +366,7 @@ unique_ptr<planner::AbstractPlan> Optimizer::ChooseBestPlan(
PlanGenerator generator;
auto plan = generator.ConvertOpExpression(op, required_props, required_cols,
output_cols, children_plans,
children_expr_map);

children_expr_map, group->GetNumRows());
LOG_TRACE("Finish Choosing best plan for group %d", id);
return plan;
}
Expand Down
13 changes: 6 additions & 7 deletions src/optimizer/optimizer_task.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//
// Peloton
//
// rule.h
// optimizer_task.cpp
//
// Identification: src/optimizer/optimizer_task.cpp
//
Expand Down Expand Up @@ -313,14 +313,13 @@ void OptimizeInputs::execute() {
cur_total_cost_ += child_best_expr->GetCost(i_prop);
// Pruning
if (cur_total_cost_ > context_->cost_upper_bound) break;
} else if (pre_child_idx_ !=
cur_child_idx_) { // First time to optimize child group
pre_child_idx_ = cur_child_idx_;
} else if (prev_child_idx_ !=
cur_child_idx_) { // We haven't optimized child group
prev_child_idx_ = cur_child_idx_;
PushTask(new OptimizeInputs(this));
PushTask(new OptimizeGroup(
child_group, std::make_shared<OptimizeContext>(
context_->metadata, i_prop,
context_->cost_upper_bound - cur_total_cost_)));
context_->metadata, i_prop, context_->cost_upper_bound - cur_total_cost_)));
return;
} else { // If we return from OptimizeGroup, then there is no expr for
// the context
Expand Down Expand Up @@ -401,7 +400,7 @@ void OptimizeInputs::execute() {
}

// Reset child idx and total cost
pre_child_idx_ = -1;
prev_child_idx_ = -1;
cur_child_idx_ = 0;
cur_total_cost_ = 0;
}
Expand Down
8 changes: 5 additions & 3 deletions src/optimizer/plan_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
//
// Peloton
//
// operator_to_plan_transformer.cpp
// plan_generator.cpp
//
// Identification: src/optimizer/operator_to_plan_transformer.cpp
// Identification: src/optimizer/plan_generator.cpp
//
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
Expand Down Expand Up @@ -59,14 +59,16 @@ unique_ptr<planner::AbstractPlan> PlanGenerator::ConvertOpExpression(
vector<expression::AbstractExpression *> required_cols,
vector<expression::AbstractExpression *> output_cols,
vector<unique_ptr<planner::AbstractPlan>> &children_plans,
vector<ExprMap> children_expr_map) {
vector<ExprMap> children_expr_map,
int estimated_cardinality) {
required_props_ = move(required_props);
required_cols_ = move(required_cols);
output_cols_ = move(output_cols);
children_plans_ = move(children_plans);
children_expr_map_ = move(children_expr_map);
op->Op().Accept(this);
BuildProjectionPlan();
output_plan_->SetCardinality(estimated_cardinality);
GustavoAngulo marked this conversation as resolved.
Show resolved Hide resolved
return move(output_plan_);
}

Expand Down
6 changes: 2 additions & 4 deletions src/optimizer/stats/table_stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,10 @@ double TableStats::GetCardinality(const std::string column_name) {
return column_stats->cardinality;
}

// Returns true if we have column stats for a specific column
bool TableStats::HasColumnStats(const std::string col_name) {
auto it = col_name_to_stats_map_.find(col_name);
if (it == col_name_to_stats_map_.end()) {
return false;
}
return true;
return it != col_name_to_stats_map_.end();
}

std::shared_ptr<ColumnStats> TableStats::GetColumnStats(
Expand Down
21 changes: 11 additions & 10 deletions src/optimizer/stats_calculator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
//
// Peloton
//
// cost_and_stats_calculator.h
// stats_calculator.cpp
//
// Identification: src/optimizer/stats_calculator.cpp
//
// Copyright (c) 2015-16, Carnegie Mellon University Database Group
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -64,18 +64,19 @@ void StatsCalculator::Visit(const LogicalGet *op) {
AddBaseTableStats(col, table_stats, predicate_stats, false);
}
}
// Use predicates to update the stats accordingly
UpdateStatsForFilter(
table_stats->GetColumnCount() == 0 ? 0 : table_stats->num_rows,
predicate_stats, op->predicates);
// Use predicates to estimate cardinality. If we were unable to find any column stats from the catalog, default to 0
if (table_stats->GetColumnCount() == 0) {
root_group->SetNumRows(0);
} else {
root_group->SetNumRows(EstimateCardinalityForFilter(table_stats->num_rows, predicate_stats, op->predicates));
}
}
// Add the stats to the group
for (auto &column_name_stats_pair : required_stats) {
auto &column_name = column_name_stats_pair.first;
auto &column_stats = column_name_stats_pair.second;
column_stats->num_rows = root_group->GetNumRows();
memo_->GetGroupByID(gexpr_->GetGroupID())
->AddStats(column_name, column_stats);
root_group->AddStats(column_name, column_stats);
}
}

Expand Down Expand Up @@ -233,7 +234,7 @@ void StatsCalculator::AddBaseTableStats(
}
}

void StatsCalculator::UpdateStatsForFilter(
size_t StatsCalculator::EstimateCardinalityForFilter(
size_t num_rows,
std::unordered_map<std::string, std::shared_ptr<ColumnStats>>
&predicate_stats,
Expand All @@ -255,7 +256,7 @@ void StatsCalculator::UpdateStatsForFilter(
annotated_expr.expr.get());
}
// Update selectivity
memo_->GetGroupByID(gexpr_->GetGroupID())->SetNumRows(num_rows * selectivity);
return num_rows * selectivity;
}

// Calculate the selectivity given the predicate and the stats of columns in the
Expand Down
5 changes: 3 additions & 2 deletions src/planner/abstract_plan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
//
// Identification: src/planner/abstract_plan.cpp
//
// Copyright (c) 2015-16, Carnegie Mellon University Database Group
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -50,7 +50,8 @@ std::ostream &operator<<(std::ostream &os, const AbstractPlan &plan) {
const std::string AbstractPlan::GetInfo() const {
std::ostringstream os;
os << PlanNodeTypeToString(GetPlanNodeType())
<< " [NumChildren=" << children_.size() << "]";
<< " [NumChildren=" << children_.size() << "]"
<< " [Estimated Cardinality=" << GetCardinality() << "]";
return os.str();
}

Expand Down
6 changes: 4 additions & 2 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ set(TESTING_UTIL_TXN ${PROJECT_SOURCE_DIR}/test/concurrency/testing_trans
set(TESTING_UTIL_STATS ${PROJECT_SOURCE_DIR}/test/statistics/testing_stats_util.cpp)
set(TESTING_UTIL_SQL ${PROJECT_SOURCE_DIR}/test/sql/testing_sql_util.cpp)
set(TESTING_UTIL_INDEX ${PROJECT_SOURCE_DIR}/test/index/testing_index_util.cpp)
set(TESTING_UTIL_CODEGEN ${PROJECT_SOURCE_DIR}/test/codegen/testing_codegen_util.cpp)
set(TESTING_UTIL_FORECAST ${PROJECT_SOURCE_DIR}/test/brain/testing_forecast_util.cpp)
set(TESTING_UTIL_CODEGEN ${PROJECT_SOURCE_DIR}/test/codegen/testing_codegen_util.cpp)
set(TESTING_UTIL_FORECAST ${PROJECT_SOURCE_DIR}/test/brain/testing_forecast_util.cpp)
set(TESTING_UTIL_OPTIMIZER ${PROJECT_SOURCE_DIR}/test/optimizer/optimizer_test_util.cpp)

add_library(peloton-test-common EXCLUDE_FROM_ALL ${gmock_srcs} ${HARNESS}
${TESTING_UTIL_EXECUTOR}
Expand All @@ -60,6 +61,7 @@ add_library(peloton-test-common EXCLUDE_FROM_ALL ${gmock_srcs} ${HARNESS}
${TESTING_UTIL_SQL}
${TESTING_UTIL_CODEGEN}
${TESTING_UTIL_FORECAST}
${TESTING_UTIL_OPTIMIZER}
)

# --[ Add "make check" target
Expand Down
45 changes: 45 additions & 0 deletions test/optimizer/cardinality_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
//===----------------------------------------------------------------------===//
//
// Peloton
//
// cardinality_test.cpp
//
// Identification: test/optimizer/cardinality_test.cpp
//
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

#include "optimizer_test_util.cpp"
#include <chrono>


namespace peloton {
namespace test {

class CardinalityTest : public OptimizerTestUtil {};

TEST_F(CardinalityTest, EstimatedCardinalityTest) {

const std::string test_table_name = "testtable";
const int num_rows = 10;
OptimizerTestUtil::CreateTable(test_table_name, num_rows);

auto plan = GeneratePlan("SELECT a from " + test_table_name + ";");

EXPECT_EQ(num_rows, plan->GetCardinality());
}

TEST_F(CardinalityTest, EstimatedCardinalityTestWithPredicate) {

const std::string test_table_name = "testtable";
const int num_rows = 10;
OptimizerTestUtil::CreateTable(test_table_name, num_rows);

auto plan = GeneratePlan("SELECT a from " + test_table_name + " WHERE " + "a < 10;");

EXPECT_GE(num_rows, plan->GetCardinality());
}

}
}
Loading