Skip to content

Commit

Permalink
Run optimzer rules on subqueries by default
Browse files Browse the repository at this point in the history
This patch makes it so that rules the configure an `apply_order` will
also include subqueries in their traversel.

This is a step twoards being able to run TPC-DS q41 (apache#4763) which has
an expressions that needs simplification before we can decorrelate the
subquery.

This closes apache#3770 and maybe apache#2480
  • Loading branch information
eejbyfeldt committed Oct 22, 2024
1 parent 818ce3f commit bf8b752
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 22 deletions.
8 changes: 2 additions & 6 deletions datafusion/optimizer/src/optimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use log::{debug, warn};
use datafusion_common::alias::AliasGenerator;
use datafusion_common::config::ConfigOptions;
use datafusion_common::instant::Instant;
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
use datafusion_common::{internal_err, DFSchema, DataFusionError, Result};
use datafusion_expr::logical_plan::LogicalPlan;

Expand Down Expand Up @@ -250,10 +250,6 @@ impl Optimizer {
Arc::new(DecorrelatePredicateSubquery::new()),
Arc::new(ScalarSubqueryToJoin::new()),
Arc::new(ExtractEquijoinPredicate::new()),
// simplify expressions does not simplify expressions in subqueries, so we
// run it again after running the optimizations that potentially converted
// subqueries to joins
Arc::new(SimplifyExpressions::new()),
Arc::new(EliminateDuplicatedExpr::new()),
Arc::new(EliminateFilter::new()),
Arc::new(EliminateCrossJoin::new()),
Expand Down Expand Up @@ -384,7 +380,7 @@ impl Optimizer {

let result = match rule.apply_order() {
// optimizer handles recursion
Some(apply_order) => new_plan.rewrite(&mut Rewriter::new(
Some(apply_order) => new_plan.rewrite_with_subqueries(&mut Rewriter::new(
apply_order,
rule.as_ref(),
config,
Expand Down
2 changes: 0 additions & 2 deletions datafusion/sqllogictest/test_files/explain.slt
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,6 @@ logical_plan after eliminate_join SAME TEXT AS ABOVE
logical_plan after decorrelate_predicate_subquery SAME TEXT AS ABOVE
logical_plan after scalar_subquery_to_join SAME TEXT AS ABOVE
logical_plan after extract_equijoin_predicate SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after eliminate_duplicated_expr SAME TEXT AS ABOVE
logical_plan after eliminate_filter SAME TEXT AS ABOVE
logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
Expand All @@ -214,7 +213,6 @@ logical_plan after eliminate_join SAME TEXT AS ABOVE
logical_plan after decorrelate_predicate_subquery SAME TEXT AS ABOVE
logical_plan after scalar_subquery_to_join SAME TEXT AS ABOVE
logical_plan after extract_equijoin_predicate SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after eliminate_duplicated_expr SAME TEXT AS ABOVE
logical_plan after eliminate_filter SAME TEXT AS ABOVE
logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
Expand Down
28 changes: 14 additions & 14 deletions datafusion/sqllogictest/test_files/subquery.slt
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ logical_plan
01)Filter: EXISTS (<subquery>)
02)--Subquery:
03)----Projection: t1.t1_int
04)------Filter: t1.t1_id > t1.t1_int
04)------Filter: t1.t1_int < t1.t1_id
05)--------TableScan: t1
06)--TableScan: t1 projection=[t1_id, t1_name, t1_int]

Expand Down Expand Up @@ -462,8 +462,8 @@ explain SELECT t1_id, (SELECT t2_int FROM t2 WHERE t2.t2_int = t1.t1_int limit 1
logical_plan
01)Projection: t1.t1_id, (<subquery>) AS t2_int
02)--Subquery:
03)----Limit: skip=0, fetch=1
04)------Projection: t2.t2_int
03)----Projection: t2.t2_int
04)------Limit: skip=0, fetch=1
05)--------Filter: t2.t2_int = outer_ref(t1.t1_int)
06)----------TableScan: t2
07)--TableScan: t1 projection=[t1_id, t1_int]
Expand All @@ -475,8 +475,8 @@ logical_plan
01)Projection: t1.t1_id
02)--Filter: t1.t1_int = (<subquery>)
03)----Subquery:
04)------Limit: skip=0, fetch=1
05)--------Projection: t2.t2_int
04)------Projection: t2.t2_int
05)--------Limit: skip=0, fetch=1
06)----------Filter: t2.t2_int = outer_ref(t1.t1_int)
07)------------TableScan: t2
08)----TableScan: t1 projection=[t1_id, t1_int]
Expand Down Expand Up @@ -542,13 +542,13 @@ query TT
explain SELECT t0_id, t0_name FROM t0 WHERE EXISTS (SELECT 1 FROM t1 INNER JOIN t2 ON(t1.t1_id = t2.t2_id and t1.t1_name = t0.t0_name))
----
logical_plan
01)Filter: EXISTS (<subquery>)
02)--Subquery:
03)----Projection: Int64(1)
04)------Inner Join: Filter: t1.t1_id = t2.t2_id AND t1.t1_name = outer_ref(t0.t0_name)
05)--------TableScan: t1
06)--------TableScan: t2
07)--TableScan: t0 projection=[t0_id, t0_name]
01)LeftSemi Join: t0.t0_name = __correlated_sq_2.t1_name
02)--TableScan: t0 projection=[t0_id, t0_name]
03)--SubqueryAlias: __correlated_sq_2
04)----Projection: t1.t1_name
05)------Inner Join: t1.t1_id = t2.t2_id
06)--------TableScan: t1 projection=[t1_id, t1_name]
07)--------TableScan: t2 projection=[t2_id]

#subquery_contains_join_contains_correlated_columns
query TT
Expand Down Expand Up @@ -656,8 +656,8 @@ explain SELECT t1_id, t1_name FROM t1 WHERE t1_id in (SELECT t2_id FROM t2 where
logical_plan
01)Filter: t1.t1_id IN (<subquery>)
02)--Subquery:
03)----Limit: skip=0, fetch=10
04)------Projection: t2.t2_id
03)----Projection: t2.t2_id
04)------Limit: skip=0, fetch=10
05)--------Filter: outer_ref(t1.t1_name) = t2.t2_name
06)----------TableScan: t2
07)--TableScan: t1 projection=[t1_id, t1_name]
Expand Down

0 comments on commit bf8b752

Please sign in to comment.