From f3dd685645bb5f6ea6ae54b8ae15c6d6497154ef Mon Sep 17 00:00:00 2001 From: Pxl Date: Tue, 13 Aug 2024 14:22:56 +0800 Subject: [PATCH] [Improvement](runtime-filter) do not use bloom to replace in_or_bloom when rf need merge (#39147) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Proposed changes do not use bloom to replace in_or_bloom when rf need merge Because in some cases, this will lead to poor performance 图片 图片 --- .../post/RuntimeFilterPushDownVisitor.java | 6 ---- .../shape/query24.out | 4 +-- .../shape/query24.out | 4 +-- .../hint_tpcds/shape/query64.out | 28 +++++++++---------- .../tpcds_sf100/shape/query24.out | 4 +-- .../tpcds_sf1000/shape/query24.out | 4 +-- 6 files changed, 22 insertions(+), 28 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterPushDownVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterPushDownVisitor.java index 6ae5532ea8117e..f1b488c6e68ae8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterPushDownVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterPushDownVisitor.java @@ -203,12 +203,6 @@ public Boolean visitPhysicalRelation(PhysicalRelation scan, PushDownContext ctx) } TRuntimeFilterType type = ctx.type; - if (type == TRuntimeFilterType.IN_OR_BLOOM - && RuntimeFilterGenerator.hasRemoteTarget(ctx.builderNode, scan) - && !ctx.builderNode.isBroadCastJoin()) { - type = TRuntimeFilterType.BLOOM; - } - RuntimeFilter filter = ctx.rfContext.getRuntimeFilterBySrcAndType(ctx.srcExpr, type, ctx.builderNode); if (filter != null) { if (!filter.hasTargetScan(scan)) { diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query24.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query24.out index 6d93096d40594f..b1e5bf29828394 100644 --- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query24.out +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query24.out @@ -7,7 +7,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalDistribute[DistributionSpecHash] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF5 sr_ticket_number->[ss_ticket_number];RF6 sr_item_sk->[ss_item_sk];RF7 sr_item_sk->[i_item_sk] +--------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF5 sr_ticket_number->[ss_ticket_number];RF6 sr_item_sk->[i_item_sk,ss_item_sk] ----------------PhysicalProject ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF4 i_item_sk->[ss_item_sk] --------------------PhysicalProject @@ -26,7 +26,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------------------PhysicalProject ------------------------------PhysicalOlapScan[customer] --------------------PhysicalProject -----------------------PhysicalOlapScan[item] apply RFs: RF7 +----------------------PhysicalOlapScan[item] apply RFs: RF6 ----------------PhysicalProject ------------------PhysicalOlapScan[store_returns] --PhysicalResultSink diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out index a13884492fae43..14dbf92e98e5fd 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out @@ -7,7 +7,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalDistribute[DistributionSpecHash] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF5 sr_ticket_number->[ss_ticket_number];RF6 sr_item_sk->[ss_item_sk];RF7 sr_item_sk->[i_item_sk] +--------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF5 sr_ticket_number->[ss_ticket_number];RF6 sr_item_sk->[i_item_sk,ss_item_sk] ----------------PhysicalProject ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF4 i_item_sk->[ss_item_sk] --------------------PhysicalProject @@ -26,7 +26,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------------------PhysicalProject ------------------------------PhysicalOlapScan[customer] --------------------PhysicalProject -----------------------PhysicalOlapScan[item] apply RFs: RF7 +----------------------PhysicalOlapScan[item] apply RFs: RF6 ----------------PhysicalProject ------------------PhysicalOlapScan[store_returns] --PhysicalResultSink diff --git a/regression-test/data/new_shapes_p0/hint_tpcds/shape/query64.out b/regression-test/data/new_shapes_p0/hint_tpcds/shape/query64.out index 9c731c9fba36df..b33dcb2a77ae65 100644 --- a/regression-test/data/new_shapes_p0/hint_tpcds/shape/query64.out +++ b/regression-test/data/new_shapes_p0/hint_tpcds/shape/query64.out @@ -7,23 +7,23 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) --------PhysicalDistribute[DistributionSpecHash] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF20 cs_item_sk->[i_item_sk,sr_item_sk,ss_item_sk] +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF19 cs_item_sk->[i_item_sk,sr_item_sk,ss_item_sk] ----------------PhysicalProject -------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF19 p_promo_sk->[ss_promo_sk] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF18 p_promo_sk->[ss_promo_sk] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF18 s_store_sk->[ss_store_sk] +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF17 s_store_sk->[ss_store_sk] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF17 d_date_sk->[ss_sold_date_sk] +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF16 d_date_sk->[ss_sold_date_sk] ----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() build RFs:RF16 ib_income_band_sk->[hd_income_band_sk] +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() build RFs:RF15 ib_income_band_sk->[hd_income_band_sk] --------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() build RFs:RF15 ca_address_sk->[c_current_addr_sk] +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() build RFs:RF14 ca_address_sk->[c_current_addr_sk] ------------------------------------PhysicalProject ---------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() build RFs:RF14 hd_demo_sk->[c_current_hdemo_sk] +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() build RFs:RF13 hd_demo_sk->[c_current_hdemo_sk] ----------------------------------------PhysicalProject -------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF13 ca_address_sk->[ss_addr_sk] +------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF12 ca_address_sk->[ss_addr_sk] --------------------------------------------PhysicalProject -----------------------------------------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF10 sr_item_sk->[ss_item_sk];RF11 sr_item_sk->[i_item_sk];RF12 sr_ticket_number->[ss_ticket_number] +----------------------------------------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF10 sr_item_sk->[i_item_sk,ss_item_sk];RF11 sr_ticket_number->[ss_ticket_number] ------------------------------------------------PhysicalProject --------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() build RFs:RF9 hd_demo_sk->[ss_hdemo_sk] ----------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF8 i_item_sk->[ss_item_sk] @@ -34,13 +34,13 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) --------------------------------------------------------------PhysicalProject ----------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ss_customer_sk] ------------------------------------------------------------------PhysicalProject ---------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 RF6 RF8 RF9 RF10 RF12 RF13 RF17 RF18 RF19 RF20 +--------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 RF6 RF8 RF9 RF10 RF11 RF12 RF16 RF17 RF18 RF19 ------------------------------------------------------------------PhysicalProject --------------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=() build RFs:RF4 cd_demo_sk->[c_current_cdemo_sk] ----------------------------------------------------------------------PhysicalProject ------------------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[c_first_sales_date_sk] --------------------------------------------------------------------------PhysicalProject -----------------------------------------------------------------------------PhysicalOlapScan[customer] apply RFs: RF3 RF4 RF7 RF14 RF15 +----------------------------------------------------------------------------PhysicalOlapScan[customer] apply RFs: RF3 RF4 RF7 RF13 RF14 --------------------------------------------------------------------------PhysicalProject ----------------------------------------------------------------------------PhysicalOlapScan[date_dim] ----------------------------------------------------------------------PhysicalProject @@ -51,7 +51,7 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) ------------------------------------------------------------PhysicalOlapScan[date_dim] ------------------------------------------------------PhysicalProject --------------------------------------------------------filter((item.i_current_price <= 58.00) and (item.i_current_price >= 49.00) and i_color IN ('blush', 'lace', 'lawn', 'misty', 'orange', 'pink')) -----------------------------------------------------------PhysicalOlapScan[item] apply RFs: RF11 RF20 +----------------------------------------------------------PhysicalOlapScan[item] apply RFs: RF10 RF19 ----------------------------------------------------PhysicalProject ------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk)) otherCondition=() build RFs:RF2 ib_income_band_sk->[hd_income_band_sk] --------------------------------------------------------PhysicalProject @@ -59,11 +59,11 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) --------------------------------------------------------PhysicalProject ----------------------------------------------------------PhysicalOlapScan[income_band] ------------------------------------------------PhysicalProject ---------------------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF20 +--------------------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF19 --------------------------------------------PhysicalProject ----------------------------------------------PhysicalOlapScan[customer_address] ----------------------------------------PhysicalProject -------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF16 +------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF15 ------------------------------------PhysicalProject --------------------------------------PhysicalOlapScan[customer_address] --------------------------------PhysicalProject diff --git a/regression-test/data/new_shapes_p0/tpcds_sf100/shape/query24.out b/regression-test/data/new_shapes_p0/tpcds_sf100/shape/query24.out index cb8e44d35b32a8..0dc2c851744de2 100644 --- a/regression-test/data/new_shapes_p0/tpcds_sf100/shape/query24.out +++ b/regression-test/data/new_shapes_p0/tpcds_sf100/shape/query24.out @@ -7,7 +7,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalDistribute[DistributionSpecHash] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF5 sr_ticket_number->[ss_ticket_number];RF6 sr_item_sk->[ss_item_sk];RF7 sr_item_sk->[i_item_sk] +--------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF5 sr_ticket_number->[ss_ticket_number];RF6 sr_item_sk->[i_item_sk,ss_item_sk] ----------------PhysicalProject ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF4 i_item_sk->[ss_item_sk] --------------------PhysicalProject @@ -26,7 +26,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------------------PhysicalProject ------------------------------PhysicalOlapScan[customer_address] --------------------PhysicalProject -----------------------PhysicalOlapScan[item] apply RFs: RF7 +----------------------PhysicalOlapScan[item] apply RFs: RF6 ----------------PhysicalProject ------------------PhysicalOlapScan[store_returns] --PhysicalResultSink diff --git a/regression-test/data/new_shapes_p0/tpcds_sf1000/shape/query24.out b/regression-test/data/new_shapes_p0/tpcds_sf1000/shape/query24.out index a64d7f776a1546..d93b73ade16ecb 100644 --- a/regression-test/data/new_shapes_p0/tpcds_sf1000/shape/query24.out +++ b/regression-test/data/new_shapes_p0/tpcds_sf1000/shape/query24.out @@ -7,7 +7,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalDistribute[DistributionSpecHash] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF5 sr_ticket_number->[ss_ticket_number];RF6 sr_item_sk->[ss_item_sk];RF7 sr_item_sk->[i_item_sk] +--------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF5 sr_ticket_number->[ss_ticket_number];RF6 sr_item_sk->[i_item_sk,ss_item_sk] ----------------PhysicalProject ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF4 i_item_sk->[ss_item_sk] --------------------PhysicalProject @@ -26,7 +26,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------------------PhysicalProject ------------------------------PhysicalOlapScan[customer_address] --------------------PhysicalProject -----------------------PhysicalOlapScan[item] apply RFs: RF7 +----------------------PhysicalOlapScan[item] apply RFs: RF6 ----------------PhysicalProject ------------------PhysicalOlapScan[store_returns] --PhysicalResultSink