From 129e45377643a2fe989dd465aa5d574a6b63b0b0 Mon Sep 17 00:00:00 2001 From: Sanath Kumar Vobilisetty Date: Sat, 6 Aug 2022 01:15:10 +0700 Subject: [PATCH] [ORCA] Implemented InPlaceUpdate to be used for updates made on non-distribution columns. (#13889) Implemented InPlaceUpdate to be used for updates made on non-distribution columns. Currently, ORCA uses Split-Update for updates on both distribution and non-distribution columns. With this commit, ORCA uses an InPlaceUpdate whenever updates are made to non-distribution columns or non-partition keys, and Split Update if any of modified columns are either distribution or partition keys. Consider below setup where we are updating non-distibution column, b in the table foo. ` create table foo(a int, b int); explain update foo set b=4; ` ORCA produces plan with Split and Update nodes ``` Update on public.foo -> Result Output: foo_1.a, foo_1.b, (DMLAction), foo_1.ctid, foo_1.gp_segment_id -> Split Output: foo_1.a, foo_1.b, foo_1.ctid, foo_1.gp_segment_id, DMLAction -> Seq Scan on public.foo foo_1 Output: foo_1.a, foo_1.b, 4, foo_1.ctid, foo_1.gp_segment_id ``` There is no point in using a Split and Update for this as we are updating a non-distribution column which do not require any redistribution. This commit uses an InPlace Update to perform updates on non-distribution columns like Planner. Below is the new plan produced with this commit. New Plan ``` Update on public.foo -> Seq Scan on public.foo foo_1 Output: foo_1.a, 4, foo_1.ctid, foo_1.gp_segment_id Optimizer: Pivotal Optimizer (GPORCA) ``` --- src/backend/executor/nodeDML.c | 43 +- .../translate/CTranslatorDXLToPlStmt.cpp | 24 +- .../gporca/data/dxl/minidump/SelfUpdate.mdp | 4 +- .../dxl/minidump/UpdateCheckConstraint.mdp | 127 ++---- .../minidump/UpdateNoEnforceConstraints.mdp | 103 ++--- .../data/dxl/minidump/UpdateRandomDistr.mdp | 2 +- .../dxl/minidump/UpdateUniqueConstraint-2.mdp | 186 +++------ .../dxl/minidump/UpdateWindowGatherMerge.mdp | 2 +- .../data/dxl/minidump/UpdateWithHashJoin.mdp | 136 +++--- .../data/dxl/minidump/UpdateWithTriggers.mdp | 117 ++---- .../data/dxl/minidump/UpdateZeroRows.mdp | 387 ++++++++---------- .../minidump/UpdatingNonDistColSameTable.mdp | 73 ++-- .../UpdatingNonDistributionColumnFunc.mdp | 100 ++--- .../data/dxl/parse_tests/q60-DMLUpdate.xml | 284 ++++++------- .../gpopt/operators/CExpressionPreprocessor.h | 3 + .../include/gpopt/operators/CLogicalDML.h | 22 +- .../include/gpopt/operators/CLogicalUpdate.h | 13 +- .../include/gpopt/operators/CPhysicalDML.h | 13 +- .../src/operators/CExpressionPreprocessor.cpp | 102 ++++- .../libgpopt/src/operators/CLogicalDML.cpp | 61 ++- .../libgpopt/src/operators/CLogicalUpdate.cpp | 21 +- .../libgpopt/src/operators/CPhysicalDML.cpp | 48 +-- .../src/translate/CTranslatorDXLToExpr.cpp | 12 +- .../src/translate/CTranslatorExprToDXL.cpp | 7 +- .../src/xforms/CXformImplementDML.cpp | 8 +- .../libgpopt/src/xforms/CXformUpdate2DML.cpp | 93 +++-- .../libgpopt/src/xforms/CXformUtils.cpp | 8 +- .../naucrates/dxl/operators/CDXLPhysicalDML.h | 19 +- .../dxl/parser/CParseHandlerPhysicalDML.h | 3 + .../include/naucrates/dxl/xml/dxltokens.h | 2 + .../src/operators/CDXLPhysicalDML.cpp | 20 +- .../src/parser/CParseHandlerPhysicalDML.cpp | 18 +- .../gporca/libnaucrates/src/xml/dxltokens.cpp | 2 + .../gporca/server/src/unittest/CTestUtils.cpp | 2 +- src/include/nodes/execnodes.h | 1 + .../isolation/expected/create_index_hot.out | 3 +- .../isolation/specs/create_index_hot.spec | 3 - .../modify_table_data_corrupt_optimizer.out | 78 ++-- .../expected/DML_over_joins_optimizer.out | 69 ++-- .../regress/expected/bfv_dml_optimizer.out | 13 +- .../regress/expected/bfv_legacy_optimizer.out | 2 - .../expected/gp_unique_rowid_optimizer.out | 140 +++---- src/test/regress/expected/qp_dml_joins.out | 4 +- .../expected/qp_dml_joins_optimizer.out | 4 +- .../expected/qp_orca_fallback_optimizer.out | 13 +- .../expected/updatable_views_optimizer.out | 52 ++- src/test/regress/sql/qp_dml_joins.sql | 4 +- 47 files changed, 1169 insertions(+), 1282 deletions(-) diff --git a/src/backend/executor/nodeDML.c b/src/backend/executor/nodeDML.c index d5dc775273ad..93c4b798f22b 100644 --- a/src/backend/executor/nodeDML.c +++ b/src/backend/executor/nodeDML.c @@ -57,20 +57,21 @@ ExecDML(DMLState *node) { return NULL; } + EvalPlanQualSetSlot(&node->mt_epqstate, slot); + bool isnull = false; - int action = DatumGetUInt32(slot_getattr(slot, plannode->actionColIdx, &isnull)); - Assert(!isnull); + DMLAction action = -1; + bool isUpdate = node->ps.state->es_plannedstmt->commandType == CMD_UPDATE; - bool isUpdate = false; - if (node->ps.state->es_plannedstmt->commandType == CMD_UPDATE) + // if it's not in place update + if(plannode->actionColIdx) { - isUpdate = true; + action = DatumGetUInt32(slot_getattr(slot, plannode->actionColIdx, &isnull)); + Assert(!isnull); + Assert(action == DML_INSERT || action == DML_DELETE); } - Assert(action == DML_INSERT || action == DML_DELETE); - - /* * Reset per-tuple memory context to free any expression evaluation * storage allocated in the previous tuple cycle. @@ -155,7 +156,7 @@ ExecDML(DMLState *node) isUpdate, InvalidOid); } - else /* DML_DELETE */ + else if (action == DML_DELETE) { int32 segid = GpIdentity.segindex; Datum ctid = slot_getattr(slot, plannode->ctidColIdx, &isnull); @@ -183,6 +184,26 @@ ExecDML(DMLState *node) PLANGEN_OPTIMIZER /* Plan origin */, isUpdate); } + else /* DML_UPDATE */ + { + int32 segid = GpIdentity.segindex; + + Datum ctid = slot_getattr(slot, plannode->ctidColIdx, &isnull); + + ItemPointer tupleid = (ItemPointer) DatumGetPointer(ctid); + ItemPointerData tuple_ctid = *tupleid; + tupleid = &tuple_ctid; + + ExecUpdate( + tupleid, + segid, + NULL, //oldtuple + node->cleanedUpSlot, + NULL, //planSlot + &node->mt_epqstate, + node->ps.state, + true); + } return slot; } @@ -218,6 +239,8 @@ ExecInitDML(DML *node, EState *estate, int eflags) Plan *outerPlan = outerPlan(node); outerPlanState(dmlstate) = ExecInitNode(outerPlan, estate, eflags); + EvalPlanQualInit(&dmlstate->mt_epqstate, estate, outerPlan, NIL, 0); + /* * ORCA Plan does not seem to set junk attribute for "gp_segment_id", else we * could call the simple code below. @@ -261,6 +284,7 @@ ExecInitDML(DML *node, EState *estate, int eflags) dmlstate->junkfilter = ExecInitJunkFilter(node->plan.targetlist, dmlstate->ps.state->es_result_relation_info->ri_RelationDesc->rd_att->tdhasoid, dmlstate->cleanedUpSlot); + estate->es_result_relation_info->ri_junkFilter = dmlstate->junkfilter; /* * We don't maintain typmod in the targetlist, so we should fixup the @@ -311,6 +335,7 @@ ExecEndDML(DMLState *node) ExecFreeExprContext(&node->ps); ExecClearTuple(node->ps.ps_ResultTupleSlot); ExecClearTuple(node->cleanedUpSlot); + EvalPlanQualEnd(&node->mt_epqstate); ExecEndNode(outerPlanState(node)); EndPlanStateGpmonPkt(&node->ps); } diff --git a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp index 46d35e256c99..c39558f710b9 100644 --- a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp +++ b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp @@ -4100,6 +4100,7 @@ CTranslatorDXLToPlStmt::TranslateDXLDml( DML *dml = MakeNode(DML); Plan *plan = &(dml->plan); AclMode acl_mode = ACL_NO_RIGHTS; + BOOL isSplit = phy_dml_dxlop->FSplit(); switch (phy_dml_dxlop->GetDmlOpType()) { @@ -4186,14 +4187,25 @@ CTranslatorDXLToPlStmt::TranslateDXLDml( dml_target_list = target_list_with_dropped_cols; } - // Extract column numbers of the action and ctid columns from the - // target list. - dml->actionColIdx = AddTargetEntryForColId(&dml_target_list, &child_context, - phy_dml_dxlop->ActionColId(), - true /*is_resjunk*/); + // Doesn't needed for in place update + if (isSplit || CMD_UPDATE != m_cmd_type) + { + // Extract column numbers of the action and ctid columns from the + // target list. + dml->actionColIdx = AddTargetEntryForColId( + &dml_target_list, &child_context, phy_dml_dxlop->ActionColId(), + true /*is_resjunk*/); + GPOS_ASSERT(0 != dml->actionColIdx); + } + else + { + dml->actionColIdx = 0; + } + dml->ctidColIdx = AddTargetEntryForColId(&dml_target_list, &child_context, phy_dml_dxlop->GetCtIdColId(), true /*is_resjunk*/); + if (phy_dml_dxlop->IsOidsPreserved()) { dml->tupleoidColIdx = AddTargetEntryForColId( @@ -4205,8 +4217,6 @@ CTranslatorDXLToPlStmt::TranslateDXLDml( dml->tupleoidColIdx = 0; } - GPOS_ASSERT(0 != dml->actionColIdx); - plan->targetlist = dml_target_list; plan->lefttree = child_plan; diff --git a/src/backend/gporca/data/dxl/minidump/SelfUpdate.mdp b/src/backend/gporca/data/dxl/minidump/SelfUpdate.mdp index 77cd608aec88..0f25dec726e7 100644 --- a/src/backend/gporca/data/dxl/minidump/SelfUpdate.mdp +++ b/src/backend/gporca/data/dxl/minidump/SelfUpdate.mdp @@ -15,7 +15,7 @@ update t1 set b = c; - + @@ -216,7 +216,7 @@ update t1 set b = c; - + diff --git a/src/backend/gporca/data/dxl/minidump/UpdateCheckConstraint.mdp b/src/backend/gporca/data/dxl/minidump/UpdateCheckConstraint.mdp index d57ae91617d9..13728135d504 100644 --- a/src/backend/gporca/data/dxl/minidump/UpdateCheckConstraint.mdp +++ b/src/backend/gporca/data/dxl/minidump/UpdateCheckConstraint.mdp @@ -294,17 +294,17 @@ - + - + - - + + @@ -330,14 +330,14 @@ - + - - + + @@ -351,9 +351,6 @@ - - - @@ -377,15 +374,12 @@ - + - - - @@ -398,8 +392,8 @@ - - + + @@ -411,17 +405,17 @@ - + - + + + + - - - @@ -434,33 +428,23 @@ - - - - - - - + + + - + - - - - - - @@ -468,56 +452,29 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/backend/gporca/data/dxl/minidump/UpdateNoEnforceConstraints.mdp b/src/backend/gporca/data/dxl/minidump/UpdateNoEnforceConstraints.mdp index 6d2e6ade0c3e..dad87dacbdde 100644 --- a/src/backend/gporca/data/dxl/minidump/UpdateNoEnforceConstraints.mdp +++ b/src/backend/gporca/data/dxl/minidump/UpdateNoEnforceConstraints.mdp @@ -238,21 +238,21 @@ update constraints_tab SET notnullcol = NULL, positivecol =-1; - - + + - + - - + + - - + + @@ -269,19 +269,19 @@ update constraints_tab SET notnullcol = NULL, positivecol =-1; - + - + - - + + - - + + @@ -289,30 +289,17 @@ update constraints_tab SET notnullcol = NULL, positivecol =-1; - - - - + + + - + - - - - - - - - - - - - @@ -321,46 +308,22 @@ update constraints_tab SET notnullcol = NULL, positivecol =-1; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + diff --git a/src/backend/gporca/data/dxl/minidump/UpdateRandomDistr.mdp b/src/backend/gporca/data/dxl/minidump/UpdateRandomDistr.mdp index e7979bb06aa8..48b35e85f9db 100644 --- a/src/backend/gporca/data/dxl/minidump/UpdateRandomDistr.mdp +++ b/src/backend/gporca/data/dxl/minidump/UpdateRandomDistr.mdp @@ -196,7 +196,7 @@ - + diff --git a/src/backend/gporca/data/dxl/minidump/UpdateUniqueConstraint-2.mdp b/src/backend/gporca/data/dxl/minidump/UpdateUniqueConstraint-2.mdp index f05bcb78f115..b7df53522f2e 100644 --- a/src/backend/gporca/data/dxl/minidump/UpdateUniqueConstraint-2.mdp +++ b/src/backend/gporca/data/dxl/minidump/UpdateUniqueConstraint-2.mdp @@ -764,18 +764,18 @@ - - + + - + - - + + @@ -791,16 +791,19 @@ - + - + - - + + + + + @@ -808,22 +811,12 @@ - - - - - - - - - - - - - + + + - + @@ -838,13 +831,18 @@ - - - - + + + + + + + + + - + @@ -853,12 +851,6 @@ - - - - - - @@ -867,93 +859,45 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/backend/gporca/data/dxl/minidump/UpdateWindowGatherMerge.mdp b/src/backend/gporca/data/dxl/minidump/UpdateWindowGatherMerge.mdp index 55d3f01242eb..d650c5cb9502 100644 --- a/src/backend/gporca/data/dxl/minidump/UpdateWindowGatherMerge.mdp +++ b/src/backend/gporca/data/dxl/minidump/UpdateWindowGatherMerge.mdp @@ -709,7 +709,7 @@ - + diff --git a/src/backend/gporca/data/dxl/minidump/UpdateWithHashJoin.mdp b/src/backend/gporca/data/dxl/minidump/UpdateWithHashJoin.mdp index e7e1b1db9797..da0d818009e8 100644 --- a/src/backend/gporca/data/dxl/minidump/UpdateWithHashJoin.mdp +++ b/src/backend/gporca/data/dxl/minidump/UpdateWithHashJoin.mdp @@ -300,10 +300,10 @@ - - + + - + @@ -327,9 +327,9 @@ - + - + @@ -344,13 +344,22 @@ - - - - + + + + + + + + + + + + + - + @@ -367,75 +376,44 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/backend/gporca/data/dxl/minidump/UpdateWithTriggers.mdp b/src/backend/gporca/data/dxl/minidump/UpdateWithTriggers.mdp index e626def55975..bc6c57727571 100644 --- a/src/backend/gporca/data/dxl/minidump/UpdateWithTriggers.mdp +++ b/src/backend/gporca/data/dxl/minidump/UpdateWithTriggers.mdp @@ -204,17 +204,17 @@ - + - + - - + + @@ -234,16 +234,16 @@ - + - + - - + + @@ -254,13 +254,10 @@ - - - - + @@ -273,7 +270,7 @@ - + @@ -284,23 +281,20 @@ - + - + - - + + - - - @@ -308,73 +302,24 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + - + diff --git a/src/backend/gporca/data/dxl/minidump/UpdateZeroRows.mdp b/src/backend/gporca/data/dxl/minidump/UpdateZeroRows.mdp index 8e063421b638..45e9098f9cb2 100644 --- a/src/backend/gporca/data/dxl/minidump/UpdateZeroRows.mdp +++ b/src/backend/gporca/data/dxl/minidump/UpdateZeroRows.mdpdiff --git a/src/backend/gporca/data/dxl/minidump/UpdatingNonDistColSameTable.mdp b/src/backend/gporca/data/dxl/minidump/UpdatingNonDistColSameTable.mdp index 0c724aa138bc..532fbccf1c05 100644 --- a/src/backend/gporca/data/dxl/minidump/UpdatingNonDistColSameTable.mdp +++ b/src/backend/gporca/data/dxl/minidump/UpdatingNonDistColSameTable.mdp @@ -234,17 +234,17 @@ - + - + - - + + @@ -264,16 +264,16 @@ - + - + - - + + @@ -284,48 +284,23 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + diff --git a/src/backend/gporca/data/dxl/minidump/UpdatingNonDistributionColumnFunc.mdp b/src/backend/gporca/data/dxl/minidump/UpdatingNonDistributionColumnFunc.mdp index 30bd81e5f423..8439bf043967 100644 --- a/src/backend/gporca/data/dxl/minidump/UpdatingNonDistributionColumnFunc.mdp +++ b/src/backend/gporca/data/dxl/minidump/UpdatingNonDistributionColumnFunc.mdp @@ -238,18 +238,18 @@ - - + + - + - - + + @@ -265,16 +265,24 @@ - + - + - - + + + + + + + + + + @@ -282,32 +290,17 @@ - - - - + + + - + - - - - - - - - - - - - - - @@ -316,42 +309,21 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + diff --git a/src/backend/gporca/data/dxl/parse_tests/q60-DMLUpdate.xml b/src/backend/gporca/data/dxl/parse_tests/q60-DMLUpdate.xml index 038c6673722a..c66997b0007e 100644 --- a/src/backend/gporca/data/dxl/parse_tests/q60-DMLUpdate.xml +++ b/src/backend/gporca/data/dxl/parse_tests/q60-DMLUpdate.xmldiff --git a/src/backend/gporca/libgpopt/include/gpopt/operators/CExpressionPreprocessor.h b/src/backend/gporca/libgpopt/include/gpopt/operators/CExpressionPreprocessor.h index 003899edd69e..2f9d69008df6 100644 --- a/src/backend/gporca/libgpopt/include/gpopt/operators/CExpressionPreprocessor.h +++ b/src/backend/gporca/libgpopt/include/gpopt/operators/CExpressionPreprocessor.h @@ -213,6 +213,9 @@ class CExpressionPreprocessor static CExpression *PexprTransposeSelectAndProject(CMemoryPool *mp, CExpression *pexpr); + static CExpression *ConvertSplitUpdateToInPlaceUpdate(CMemoryPool *mp, + CExpression *expr); + static CExpression *CollapseSelectAndReplaceColref(CMemoryPool *mp, CExpression *expr, CColRef *pcolref, diff --git a/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalDML.h b/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalDML.h index fe2effe20be6..c7f7217026a1 100644 --- a/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalDML.h +++ b/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalDML.h @@ -67,8 +67,8 @@ class CLogicalDML : public CLogical // tuple oid column if one exists CColRef *m_pcrTupleOid; - // private copy ctor - CLogicalDML(const CLogicalDML &); + // Split Update + BOOL m_fSplit; public: // ctor @@ -76,9 +76,11 @@ class CLogicalDML : public CLogical // ctor CLogicalDML(CMemoryPool *mp, EDMLOperator edmlop, - CTableDescriptor *ptabdesc, CColRefArray *colref_array, - CBitSet *pbsModified, CColRef *pcrAction, CColRef *pcrCtid, - CColRef *pcrSegmentId, CColRef *pcrTupleOid); + CTableDescriptor *ptabdesc, + CColRefArray *pdrgpcrSource, CBitSet *pbsModified, + CColRef *pcrAction, CColRef *pcrCtid, + CColRef *pcrSegmentId, CColRef *pcrTupleOid, + BOOL fSplit); // dtor virtual ~CLogicalDML(); @@ -153,6 +155,13 @@ class CLogicalDML : public CLogical return m_pcrTupleOid; } + // Is update using split + BOOL + FSplit() const + { + return m_fSplit; + } + // operator specific hash function virtual ULONG HashValue() const; @@ -245,6 +254,9 @@ class CLogicalDML : public CLogical // debug print virtual IOstream &OsPrint(IOstream &) const; + // Helper function to print DML operator type. + static void PrintOperatorType(IOstream &os, EDMLOperator, BOOL fSplit); + }; // class CLogicalDML } // namespace gpopt diff --git a/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalUpdate.h b/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalUpdate.h index bf54f4c5abd8..af6e533bae49 100644 --- a/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalUpdate.h +++ b/src/backend/gporca/libgpopt/include/gpopt/operators/CLogicalUpdate.h @@ -49,8 +49,8 @@ class CLogicalUpdate : public CLogical // tuple oid column CColRef *m_pcrTupleOid; - // private copy ctor - CLogicalUpdate(const CLogicalUpdate &); + // Is Split Update + BOOL m_fSplit; public: // ctor @@ -60,7 +60,7 @@ class CLogicalUpdate : public CLogical CLogicalUpdate(CMemoryPool *mp, CTableDescriptor *ptabdesc, CColRefArray *pdrgpcrDelete, CColRefArray *pdrgpcrInsert, CColRef *pcrCtid, CColRef *pcrSegmentId, - CColRef *pcrTupleOid); + CColRef *pcrTupleOid, BOOL fSplit); // dtor virtual ~CLogicalUpdate(); @@ -121,6 +121,13 @@ class CLogicalUpdate : public CLogical return m_ptabdesc; } + // Is update using split + BOOL + FSplit() const + { + return m_fSplit; + } + // operator specific hash function virtual ULONG HashValue() const; diff --git a/src/backend/gporca/libgpopt/include/gpopt/operators/CPhysicalDML.h b/src/backend/gporca/libgpopt/include/gpopt/operators/CPhysicalDML.h index d1e9f432ae1c..b649260a545d 100644 --- a/src/backend/gporca/libgpopt/include/gpopt/operators/CPhysicalDML.h +++ b/src/backend/gporca/libgpopt/include/gpopt/operators/CPhysicalDML.h @@ -69,6 +69,9 @@ class CPhysicalDML : public CPhysical // compute required order spec COrderSpec *PosComputeRequired(CMemoryPool *mp, CTableDescriptor *ptabdesc); + // Split Update + BOOL m_fSplit; + // compute local required columns void ComputeRequiredLocalColumns(CMemoryPool *mp); @@ -80,7 +83,8 @@ class CPhysicalDML : public CPhysical CPhysicalDML(CMemoryPool *mp, CLogicalDML::EDMLOperator edmlop, CTableDescriptor *ptabdesc, CColRefArray *pdrgpcrSource, CBitSet *pbsModified, CColRef *pcrAction, CColRef *pcrCtid, - CColRef *pcrSegmentId, CColRef *pcrTupleOid); + CColRef *pcrSegmentId, CColRef *pcrTupleOid, + BOOL fSplit); // dtor virtual ~CPhysicalDML(); @@ -148,6 +152,13 @@ class CPhysicalDML : public CPhysical return m_pdrgpcrSource; } + // Is update using split + BOOL + FSplit() const + { + return m_fSplit; + } + // match function virtual BOOL Matches(COperator *pop) const; diff --git a/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp b/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp index 081758d0dbc2..45c282c339b5 100644 --- a/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp +++ b/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp @@ -39,6 +39,7 @@ #include "gpopt/operators/CLogicalSetOp.h" #include "gpopt/operators/CLogicalUnion.h" #include "gpopt/operators/CLogicalUnionAll.h" +#include "gpopt/operators/CLogicalUpdate.h" #include "gpopt/operators/CNormalizer.h" #include "gpopt/operators/COrderedAggPreprocessor.h" #include "gpopt/operators/CPredicateUtils.h" @@ -3057,6 +3058,95 @@ CExpressionPreprocessor::PexprTransposeSelectAndProject(CMemoryPool *mp, } } +// Preprocessor function to decide if the Update operator to be proceeded with +// split update or inplace update based on the columns modified by the update +// operation. + +// Split Update if any of the modified columns is a distribution column or a partition key, +// InPlace Update if all the modified columns are non-distribution columns and not partition keys. + +// Example: Update Modified non-distribution columns. +// Input: +// +--CLogicalUpdate ("foo"), Split Update, Delete Columns: ["a" (0), "b" (1)], Insert Columns: ["a" (0), "b" (9)], "ctid" (2), "gp_segment_id" (8) +// +--CLogicalProject +// |--CLogicalGet +// +--CScalarProjectList +// +// Output: +// +--CLogicalUpdate ("foo"), In-place Update, Delete Columns: ["a" (0), "b" (1)], Insert Columns: ["a" (0), "b" (9)], "ctid" (2), "gp_segment_id" (8) +// +--CLogicalProject +// |--CLogicalGet +// +--CScalarProjectList + +CExpression * +CExpressionPreprocessor::ConvertSplitUpdateToInPlaceUpdate(CMemoryPool *mp, + CExpression *pexpr) +{ + GPOS_ASSERT(NULL != mp); + GPOS_ASSERT(NULL != pexpr); + COperator *pop = pexpr->Pop(); + if (COperator::EopLogicalUpdate != pop->Eopid()) + { + pexpr->AddRef(); + return pexpr; + } + CLogicalUpdate *popUpdate = CLogicalUpdate::PopConvert(pop); + CTableDescriptor *tabdesc = popUpdate->Ptabdesc(); + CColRefArray *pdrgpcrInsert = popUpdate->PdrgpcrInsert(); + CColRefArray *pdrgpcrDelete = popUpdate->PdrgpcrDelete(); + const ULONG num_cols = pdrgpcrInsert->Size(); + BOOL split_update = false; + CColRefArray *ppartColRefs = GPOS_NEW(mp) CColRefArray(mp); + const ULongPtrArray *pdrgpulPart = tabdesc->PdrgpulPart(); + const ULONG ulPartKeys = pdrgpulPart->Size(); + + if (tabdesc->GetRelDistribution() == IMDRelation::EreldistrRandom) + { + split_update = true; + } + + // Uses split update if any of the modified columns are either + // distribution or partition keys. + for (ULONG ul = 0; ul < ulPartKeys; ul++) + { + ULONG *pulPartKey = (*pdrgpulPart)[ul]; + CColRef *colref = (*pdrgpcrInsert)[*pulPartKey]; + ppartColRefs->Append(colref); + } + + for (ULONG ul = 0; ul < num_cols; ul++) + { + CColRef *pcrInsert = (*pdrgpcrInsert)[ul]; + CColRef *pcrDelete = (*pdrgpcrDelete)[ul]; + + // Checking if column is either distribution or partition key. + if ((pcrInsert != pcrDelete && pcrDelete->IsDistCol()) || + (ppartColRefs->Find(pcrInsert) != NULL)) + { + split_update = true; + break; + } + } + ppartColRefs->Release(); + if (!split_update) + { + CExpression *pexprChild = (*pexpr)[0]; + pexprChild->AddRef(); + pdrgpcrInsert->AddRef(); + pdrgpcrDelete->AddRef(); + tabdesc->AddRef(); + CExpression *pexprNew = GPOS_NEW(mp) CExpression( + mp, + GPOS_NEW(mp) CLogicalUpdate( + mp, tabdesc, pdrgpcrDelete, pdrgpcrInsert, popUpdate->PcrCtid(), + popUpdate->PcrSegmentId(), popUpdate->PcrTupleOid(), false), + pexprChild); + return pexprNew; + } + pexpr->AddRef(); + return pexpr; +} + // main driver, pre-processing of input logical expression CExpression * CExpressionPreprocessor::PexprPreprocess( @@ -3257,12 +3347,18 @@ CExpressionPreprocessor::PexprPreprocess( PexprTransposeSelectAndProject(mp, pexprExistWithPredFromINSubq); pexprExistWithPredFromINSubq->Release(); - // (28) normalize expression again - CExpression *pexprNormalized2 = - CNormalizer::PexprNormalize(mp, pexprTransposeSelectAndProject); + // (28) convert split update to inplace update + CExpression *pexprSplitUpdateToInplace = + ConvertSplitUpdateToInPlaceUpdate(mp, pexprTransposeSelectAndProject); GPOS_CHECK_ABORT; pexprTransposeSelectAndProject->Release(); + // (29) normalize expression again + CExpression *pexprNormalized2 = + CNormalizer::PexprNormalize(mp, pexprSplitUpdateToInplace); + GPOS_CHECK_ABORT; + pexprSplitUpdateToInplace->Release(); + return pexprNormalized2; } diff --git a/src/backend/gporca/libgpopt/src/operators/CLogicalDML.cpp b/src/backend/gporca/libgpopt/src/operators/CLogicalDML.cpp index ac37fda4f0fd..a84ea75a49f7 100644 --- a/src/backend/gporca/libgpopt/src/operators/CLogicalDML.cpp +++ b/src/backend/gporca/libgpopt/src/operators/CLogicalDML.cpp @@ -41,7 +41,8 @@ CLogicalDML::CLogicalDML(CMemoryPool *mp) m_pcrAction(NULL), m_pcrCtid(NULL), m_pcrSegmentId(NULL), - m_pcrTupleOid(NULL) + m_pcrTupleOid(NULL), + m_fSplit(true) { m_fPattern = true; } @@ -58,7 +59,8 @@ CLogicalDML::CLogicalDML(CMemoryPool *mp, EDMLOperator edmlop, CTableDescriptor *ptabdesc, CColRefArray *pdrgpcrSource, CBitSet *pbsModified, CColRef *pcrAction, CColRef *pcrCtid, - CColRef *pcrSegmentId, CColRef *pcrTupleOid) + CColRef *pcrSegmentId, CColRef *pcrTupleOid, + BOOL fSplit) : CLogical(mp), m_edmlop(edmlop), m_ptabdesc(ptabdesc), @@ -67,7 +69,8 @@ CLogicalDML::CLogicalDML(CMemoryPool *mp, EDMLOperator edmlop, m_pcrAction(pcrAction), m_pcrCtid(pcrCtid), m_pcrSegmentId(pcrSegmentId), - m_pcrTupleOid(pcrTupleOid) + m_pcrTupleOid(pcrTupleOid), + m_fSplit(fSplit) { GPOS_ASSERT(EdmlSentinel != edmlop); GPOS_ASSERT(NULL != ptabdesc); @@ -134,7 +137,8 @@ CLogicalDML::Matches(COperator *pop) const m_pcrSegmentId == popDML->PcrSegmentId() && m_pcrTupleOid == popDML->PcrTupleOid() && m_ptabdesc->MDId()->Equals(popDML->Ptabdesc()->MDId()) && - m_pdrgpcrSource->Equals(popDML->PdrgpcrSource()); + m_pdrgpcrSource->Equals(popDML->PdrgpcrSource()) && + m_fSplit == popDML->FSplit(); } //--------------------------------------------------------------------------- @@ -210,7 +214,7 @@ CLogicalDML::PopCopyWithRemappedColumns(CMemoryPool *mp, return GPOS_NEW(mp) CLogicalDML(mp, m_edmlop, m_ptabdesc, colref_array, m_pbsModified, - pcrAction, pcrCtid, pcrSegmentId, pcrTupleOid); + pcrAction, pcrCtid, pcrSegmentId, pcrTupleOid, m_fSplit); } //--------------------------------------------------------------------------- @@ -345,14 +349,12 @@ CLogicalDML::OsPrint(IOstream &os) const return COperator::OsPrint(os); } - os << SzId() << " ("; - os << m_rgwszDml[m_edmlop] << ", "; m_ptabdesc->Name().OsPrint(os); - os << "), Affected Columns: ["; + CLogicalDML::PrintOperatorType(os, m_edmlop, m_fSplit); + os << "Affected Columns: ["; CUtils::OsPrintDrgPcr(os, m_pdrgpcrSource); os << "], Action: ("; m_pcrAction->OsPrint(os); - os << ")"; if (EdmlDelete == m_edmlop || EdmlUpdate == m_edmlop) { @@ -361,8 +363,47 @@ CLogicalDML::OsPrint(IOstream &os) const os << ", "; m_pcrSegmentId->OsPrint(os); } - return os; } +//--------------------------------------------------------------------------- +// @function: +// CLogicalDML::PrintOperatorType +// +// @doc: +// Helper function to print DML operator type based on the given operator +// enum, used in OSPrint to print objects. +// +//--------------------------------------------------------------------------- +void +CLogicalDML::PrintOperatorType(IOstream &os, EDMLOperator edmlOperator, + BOOL fSplit) +{ + switch (edmlOperator) + { + case EdmlInsert: + os << "), Insert, "; + break; + + case EdmlDelete: + os << "), Delete, "; + break; + + case EdmlUpdate: + if (fSplit) + { + os << "), Split Update, "; + } + else + { + os << "), In-place Update, "; + } + break; + + default: + GPOS_ASSERT(!"Unrecognized DML Operator"); + break; + } +} + // EOF diff --git a/src/backend/gporca/libgpopt/src/operators/CLogicalUpdate.cpp b/src/backend/gporca/libgpopt/src/operators/CLogicalUpdate.cpp index 0b469ebcf267..8bae0e1d56e7 100644 --- a/src/backend/gporca/libgpopt/src/operators/CLogicalUpdate.cpp +++ b/src/backend/gporca/libgpopt/src/operators/CLogicalUpdate.cpp @@ -52,14 +52,16 @@ CLogicalUpdate::CLogicalUpdate(CMemoryPool *mp) CLogicalUpdate::CLogicalUpdate(CMemoryPool *mp, CTableDescriptor *ptabdesc, CColRefArray *pdrgpcrDelete, CColRefArray *pdrgpcrInsert, CColRef *pcrCtid, - CColRef *pcrSegmentId, CColRef *pcrTupleOid) + CColRef *pcrSegmentId, CColRef *pcrTupleOid, + BOOL fSplit) : CLogical(mp), m_ptabdesc(ptabdesc), m_pdrgpcrDelete(pdrgpcrDelete), m_pdrgpcrInsert(pdrgpcrInsert), m_pcrCtid(pcrCtid), m_pcrSegmentId(pcrSegmentId), - m_pcrTupleOid(pcrTupleOid) + m_pcrTupleOid(pcrTupleOid), + m_fSplit(fSplit) { GPOS_ASSERT(NULL != ptabdesc); @@ -118,7 +120,8 @@ CLogicalUpdate::Matches(COperator *pop) const m_pcrTupleOid == popUpdate->PcrTupleOid() && m_ptabdesc->MDId()->Equals(popUpdate->Ptabdesc()->MDId()) && m_pdrgpcrDelete->Equals(popUpdate->PdrgpcrDelete()) && - m_pdrgpcrInsert->Equals(popUpdate->PdrgpcrInsert()); + m_pdrgpcrInsert->Equals(popUpdate->PdrgpcrInsert()) && + m_fSplit == popUpdate->FSplit(); } //--------------------------------------------------------------------------- @@ -175,7 +178,7 @@ CLogicalUpdate::PopCopyWithRemappedColumns(CMemoryPool *mp, } return GPOS_NEW(mp) CLogicalUpdate(mp, m_ptabdesc, pdrgpcrDelete, pdrgpcrInsert, pcrCtid, - pcrSegmentId, pcrTupleOid); + pcrSegmentId, pcrTupleOid, m_fSplit); } //--------------------------------------------------------------------------- @@ -285,7 +288,15 @@ CLogicalUpdate::OsPrint(IOstream &os) const os << SzId() << " ("; m_ptabdesc->Name().OsPrint(os); - os << "), Delete Columns: ["; + if (m_fSplit) + { + os << "), Split Update"; + } + else + { + os << "), In-place Update"; + } + os << ", Delete Columns: ["; CUtils::OsPrintDrgPcr(os, m_pdrgpcrDelete); os << "], Insert Columns: ["; CUtils::OsPrintDrgPcr(os, m_pdrgpcrInsert); diff --git a/src/backend/gporca/libgpopt/src/operators/CPhysicalDML.cpp b/src/backend/gporca/libgpopt/src/operators/CPhysicalDML.cpp index 1546260dab5a..d408696f30d5 100644 --- a/src/backend/gporca/libgpopt/src/operators/CPhysicalDML.cpp +++ b/src/backend/gporca/libgpopt/src/operators/CPhysicalDML.cpp @@ -38,7 +38,8 @@ CPhysicalDML::CPhysicalDML(CMemoryPool *mp, CLogicalDML::EDMLOperator edmlop, CTableDescriptor *ptabdesc, CColRefArray *pdrgpcrSource, CBitSet *pbsModified, CColRef *pcrAction, CColRef *pcrCtid, - CColRef *pcrSegmentId, CColRef *pcrTupleOid) + CColRef *pcrSegmentId, CColRef *pcrTupleOid, + BOOL fSplit) : CPhysical(mp), m_edmlop(edmlop), m_ptabdesc(ptabdesc), @@ -50,7 +51,8 @@ CPhysicalDML::CPhysicalDML(CMemoryPool *mp, CLogicalDML::EDMLOperator edmlop, m_pcrTupleOid(pcrTupleOid), m_pds(NULL), m_pos(NULL), - m_pcrsRequiredLocal(NULL) + m_pcrsRequiredLocal(NULL), + m_fSplit(fSplit) { GPOS_ASSERT(CLogicalDML::EdmlSentinel != edmlop); GPOS_ASSERT(NULL != ptabdesc); @@ -80,33 +82,8 @@ CPhysicalDML::CPhysicalDML(CMemoryPool *mp, CLogicalDML::EDMLOperator edmlop, // Update of the distribution key: This will be handled with a Split node below the DML node, // with the split deleting the existing rows and this DML node inserting the new rows, // so this is handled here like an insert, using hash distribution for all partitions. - BOOL is_update_without_changing_distribution_key = false; - if (CLogicalDML::EdmlUpdate == edmlop) - { - CDistributionSpecHashed *hashDistSpec = - CDistributionSpecHashed::PdsConvert(m_pds); - CColRefSet *updatedCols = GPOS_NEW(mp) CColRefSet(mp); - CColRefSet *distributionCols = hashDistSpec->PcrsUsed(mp); - - // compute a ColRefSet of the updated columns - for (ULONG c = 0; c < pdrgpcrSource->Size(); c++) - { - if (pbsModified->Get(c)) - { - updatedCols->Include((*pdrgpcrSource)[c]); - } - } - - is_update_without_changing_distribution_key = - !updatedCols->FIntersects(distributionCols); - - updatedCols->Release(); - distributionCols->Release(); - } - - if (CLogicalDML::EdmlDelete == edmlop || - is_update_without_changing_distribution_key) + if (CLogicalDML::EdmlDelete == edmlop || !fSplit) { m_pds->Release(); m_pds = GPOS_NEW(mp) CDistributionSpecRandom(); @@ -437,7 +414,8 @@ CPhysicalDML::Matches(COperator *pop) const m_pcrSegmentId == popDML->PcrSegmentId() && m_pcrTupleOid == popDML->PcrTupleOid() && m_ptabdesc->MDId()->Equals(popDML->Ptabdesc()->MDId()) && - m_pdrgpcrSource->Equals(popDML->PdrgpcrSource()); + m_pdrgpcrSource->Equals(popDML->PdrgpcrSource()) && + m_fSplit == popDML->FSplit(); } return false; @@ -499,7 +477,7 @@ CPhysicalDML::PosComputeRequired(CMemoryPool *mp, CTableDescriptor *ptabdesc) // the action column, see explanation in function's comment const ULONG ulKeySets = pdrgpbsKeys->Size(); BOOL fNeedsSort = false; - for (ULONG ul = 0; ul < ulKeySets; ul++) + for (ULONG ul = 0; ul < ulKeySets && !fNeedsSort; ul++) { CBitSet *pbs = (*pdrgpbsKeys)[ul]; if (!pbs->IsDisjoint(m_pbsModified)) @@ -538,7 +516,11 @@ CPhysicalDML::ComputeRequiredLocalColumns(CMemoryPool *mp) // include source columns m_pcrsRequiredLocal->Include(m_pdrgpcrSource); - m_pcrsRequiredLocal->Include(m_pcrAction); + // Action column is not required for InPlaceUpdate operator. + if (m_fSplit) + { + m_pcrsRequiredLocal->Include(m_pcrAction); + } if (CLogicalDML::EdmlDelete == m_edmlop || CLogicalDML::EdmlUpdate == m_edmlop) @@ -570,9 +552,9 @@ CPhysicalDML::OsPrint(IOstream &os) const } os << SzId() << " ("; - os << CLogicalDML::m_rgwszDml[m_edmlop] << ", "; m_ptabdesc->Name().OsPrint(os); - os << "), Source Columns: ["; + CLogicalDML::PrintOperatorType(os, m_edmlop, m_fSplit); + os << "Source Columns: ["; CUtils::OsPrintDrgPcr(os, m_pdrgpcrSource); os << "], Action: ("; m_pcrAction->OsPrint(os); diff --git a/src/backend/gporca/libgpopt/src/translate/CTranslatorDXLToExpr.cpp b/src/backend/gporca/libgpopt/src/translate/CTranslatorDXLToExpr.cpp index 95557eef6d88..3bd1335ddbaa 100644 --- a/src/backend/gporca/libgpopt/src/translate/CTranslatorDXLToExpr.cpp +++ b/src/backend/gporca/libgpopt/src/translate/CTranslatorDXLToExpr.cpp @@ -1490,12 +1490,12 @@ CTranslatorDXLToExpr::PexprLogicalUpdate(const CDXLNode *dxlnode) pcrTupleOid = LookupColRef(m_phmulcr, tuple_oid); } - return GPOS_NEW(m_mp) - CExpression(m_mp, - GPOS_NEW(m_mp) CLogicalUpdate(m_mp, ptabdesc, pdrgpcrDelete, - pdrgpcrInsert, pcrCtid, - pcrSegmentId, pcrTupleOid), - pexprChild); + return GPOS_NEW(m_mp) CExpression( + m_mp, + GPOS_NEW(m_mp) + CLogicalUpdate(m_mp, ptabdesc, pdrgpcrDelete, pdrgpcrInsert, + pcrCtid, pcrSegmentId, pcrTupleOid, true), + pexprChild); } //--------------------------------------------------------------------------- diff --git a/src/backend/gporca/libgpopt/src/translate/CTranslatorExprToDXL.cpp b/src/backend/gporca/libgpopt/src/translate/CTranslatorExprToDXL.cpp index db3d2a0f7c7c..bdaf9ec2bee0 100644 --- a/src/backend/gporca/libgpopt/src/translate/CTranslatorExprToDXL.cpp +++ b/src/backend/gporca/libgpopt/src/translate/CTranslatorExprToDXL.cpp @@ -5786,9 +5786,10 @@ CTranslatorExprToDXL::PdxlnDML(CExpression *pexpr, CDXLDirectDispatchInfo *dxl_direct_dispatch_info = GetDXLDirectDispatchInfo(pexpr); - CDXLPhysicalDML *pdxlopDML = GPOS_NEW(m_mp) CDXLPhysicalDML( - m_mp, dxl_dml_type, table_descr, pdrgpul, action_colid, ctid_colid, - segid_colid, preserve_oids, tuple_oid, dxl_direct_dispatch_info); + CDXLPhysicalDML *pdxlopDML = GPOS_NEW(m_mp) + CDXLPhysicalDML(m_mp, dxl_dml_type, table_descr, pdrgpul, action_colid, + ctid_colid, segid_colid, preserve_oids, tuple_oid, + dxl_direct_dispatch_info, popDML->FSplit()); // project list CColRefSet *pcrsOutput = pexpr->Prpp()->PcrsRequired(); diff --git a/src/backend/gporca/libgpopt/src/xforms/CXformImplementDML.cpp b/src/backend/gporca/libgpopt/src/xforms/CXformImplementDML.cpp index 3c5b920e4760..89c53b43c854 100644 --- a/src/backend/gporca/libgpopt/src/xforms/CXformImplementDML.cpp +++ b/src/backend/gporca/libgpopt/src/xforms/CXformImplementDML.cpp @@ -89,6 +89,7 @@ CXformImplementDML::Transform(CXformContext *pxfctxt, CXformResult *pxfres, CColRef *pcrCtid = popDML->PcrCtid(); CColRef *pcrSegmentId = popDML->PcrSegmentId(); CColRef *pcrTupleOid = popDML->PcrTupleOid(); + BOOL fSplit = popDML->FSplit(); // child of DML operator CExpression *pexprChild = (*pexpr)[0]; @@ -97,10 +98,9 @@ CXformImplementDML::Transform(CXformContext *pxfctxt, CXformResult *pxfres, // create physical DML CExpression *pexprAlt = GPOS_NEW(mp) CExpression( mp, - GPOS_NEW(mp) - CPhysicalDML(mp, edmlop, ptabdesc, pdrgpcrSource, pbsModified, - pcrAction, pcrCtid, pcrSegmentId, pcrTupleOid), - pexprChild); + GPOS_NEW(mp) CPhysicalDML(mp, edmlop, ptabdesc, pdrgpcrSource, pbsModified, + pcrAction, pcrCtid, pcrSegmentId, pcrTupleOid, fSplit), + pexprChild); // add alternative to transformation result pxfres->Add(pexprAlt); } diff --git a/src/backend/gporca/libgpopt/src/xforms/CXformUpdate2DML.cpp b/src/backend/gporca/libgpopt/src/xforms/CXformUpdate2DML.cpp index a1cd4328d20e..b1d38a7dee9f 100644 --- a/src/backend/gporca/libgpopt/src/xforms/CXformUpdate2DML.cpp +++ b/src/backend/gporca/libgpopt/src/xforms/CXformUpdate2DML.cpp @@ -87,6 +87,7 @@ CXformUpdate2DML::Transform(CXformContext *pxfctxt, CXformResult *pxfres, CColRef *pcrCtid = popUpdate->PcrCtid(); CColRef *pcrSegmentId = popUpdate->PcrSegmentId(); CColRef *pcrTupleOid = popUpdate->PcrTupleOid(); + BOOL fSplit = popUpdate->FSplit(); // child of update operator CExpression *pexprChild = (*pexpr)[0]; @@ -109,23 +110,32 @@ CXformUpdate2DML::Transform(CXformContext *pxfctxt, CXformResult *pxfres, CMDAccessor *md_accessor = poctxt->Pmda(); CColumnFactory *col_factory = poctxt->Pcf(); - pdrgpcrDelete->AddRef(); - pdrgpcrInsert->AddRef(); - const IMDType *pmdtype = md_accessor->PtMDType(); CColRef *pcrAction = col_factory->PcrCreate(pmdtype, default_type_modifier); - CExpression *pexprProjElem = GPOS_NEW(mp) CExpression( - mp, GPOS_NEW(mp) CScalarProjectElement(mp, pcrAction), - GPOS_NEW(mp) CExpression(mp, GPOS_NEW(mp) CScalarDMLAction(mp))); + CExpression *pexprSplit = NULL; + if (fSplit) + { + pdrgpcrDelete->AddRef(); + pdrgpcrInsert->AddRef(); + CExpression *pexprProjElem = GPOS_NEW(mp) CExpression( + mp, GPOS_NEW(mp) CScalarProjectElement(mp, pcrAction), + GPOS_NEW(mp) CExpression(mp, GPOS_NEW(mp) CScalarDMLAction(mp))); + + CExpression *pexprProjList = GPOS_NEW(mp) + CExpression(mp, GPOS_NEW(mp) CScalarProjectList(mp), pexprProjElem); + pexprSplit = GPOS_NEW(mp) CExpression( + mp, + GPOS_NEW(mp) + CLogicalSplit(mp, pdrgpcrDelete, pdrgpcrInsert, pcrCtid, + pcrSegmentId, pcrAction, pcrTupleOid), + pexprChild, pexprProjList); + } + else + { + pexprSplit = pexprChild; + } - CExpression *pexprProjList = GPOS_NEW(mp) - CExpression(mp, GPOS_NEW(mp) CScalarProjectList(mp), pexprProjElem); - CExpression *pexprSplit = GPOS_NEW(mp) CExpression( - mp, - GPOS_NEW(mp) CLogicalSplit(mp, pdrgpcrDelete, pdrgpcrInsert, pcrCtid, - pcrSegmentId, pcrAction, pcrTupleOid), - pexprChild, pexprProjList); // add assert checking that no NULL values are inserted for nullable columns or no check constraints are violated COptimizerConfig *optimizer_config = @@ -141,30 +151,47 @@ CXformUpdate2DML::Transform(CXformContext *pxfctxt, CXformResult *pxfres, pexprAssertConstraints = pexprSplit; } - const ULONG num_cols = pdrgpcrInsert->Size(); - - CBitSet *pbsModified = GPOS_NEW(mp) CBitSet(mp, ptabdesc->ColumnCount()); - for (ULONG ul = 0; ul < num_cols; ul++) + CExpression *pexprDML = NULL; + // create logical DML + ptabdesc->AddRef(); + if (fSplit) { - CColRef *pcrInsert = (*pdrgpcrInsert)[ul]; - CColRef *pcrDelete = (*pdrgpcrDelete)[ul]; - if (pcrInsert != pcrDelete) + const ULONG num_cols = pdrgpcrInsert->Size(); + + CBitSet *pbsModified = + GPOS_NEW(mp) CBitSet(mp, ptabdesc->ColumnCount()); + for (ULONG ul = 0; ul < num_cols; ul++) { - // delete columns refer to the original tuple's descriptor, if it's different - // from the corresponding insert column, then we're modifying the column - // at that position - pbsModified->ExchangeSet(ul); + CColRef *pcrInsert = (*pdrgpcrInsert)[ul]; + CColRef *pcrDelete = (*pdrgpcrDelete)[ul]; + if (pcrInsert != pcrDelete) + { + // delete columns refer to the original tuple's descriptor, if it's different + // from the corresponding insert column, then we're modifying the column + // at that position + pbsModified->ExchangeSet(ul); + } } + pdrgpcrDelete->AddRef(); + pexprDML = GPOS_NEW(mp) CExpression( + mp, + GPOS_NEW(mp) + CLogicalDML(mp, CLogicalDML::EdmlUpdate, ptabdesc, + pdrgpcrDelete, pbsModified, pcrAction, pcrCtid, + pcrSegmentId, pcrTupleOid, fSplit), + pexprAssertConstraints); + } + else + { + pdrgpcrInsert->AddRef(); + pexprDML = GPOS_NEW(mp) CExpression( + mp, + GPOS_NEW(mp) + CLogicalDML(mp, CLogicalDML::EdmlUpdate, ptabdesc, + pdrgpcrInsert, GPOS_NEW(mp) CBitSet(mp), pcrAction, + pcrCtid, pcrSegmentId, NULL, fSplit), + pexprAssertConstraints); } - // create logical DML - ptabdesc->AddRef(); - pdrgpcrDelete->AddRef(); - CExpression *pexprDML = GPOS_NEW(mp) CExpression( - mp, - GPOS_NEW(mp) CLogicalDML(mp, CLogicalDML::EdmlUpdate, ptabdesc, - pdrgpcrDelete, pbsModified, pcrAction, pcrCtid, - pcrSegmentId, pcrTupleOid), - pexprAssertConstraints); // TODO: - Oct 30, 2012; detect and handle AFTER triggers on update diff --git a/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp b/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp index 10e83c485e4f..2ea8c8f801e4 100644 --- a/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp +++ b/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp @@ -1356,10 +1356,10 @@ CXformUtils::PexprLogicalDMLOverProject(CMemoryPool *mp, CExpression *pexprDML = GPOS_NEW(mp) CExpression( mp, GPOS_NEW(mp) - CLogicalDML(mp, edmlop, ptabdesc, colref_array, - GPOS_NEW(mp) CBitSet(mp) /*pbsModified*/, pcrAction, - pcrCtid, pcrSegmentId, NULL /*pcrTupleOid*/), - pexprProject); + CLogicalDML(mp, edmlop, ptabdesc, colref_array, + GPOS_NEW(mp) CBitSet(mp) /*pbsModified*/, pcrAction, + pcrCtid, pcrSegmentId, NULL /*pcrTupleOid*/, true), + pexprProject); CExpression *pexprOutput = pexprDML; diff --git a/src/backend/gporca/libnaucrates/include/naucrates/dxl/operators/CDXLPhysicalDML.h b/src/backend/gporca/libnaucrates/include/naucrates/dxl/operators/CDXLPhysicalDML.h index e1a034a8a5b6..fd26f7c490d2 100644 --- a/src/backend/gporca/libnaucrates/include/naucrates/dxl/operators/CDXLPhysicalDML.h +++ b/src/backend/gporca/libnaucrates/include/naucrates/dxl/operators/CDXLPhysicalDML.h @@ -69,17 +69,19 @@ class CDXLPhysicalDML : public CDXLPhysical // direct dispatch info for insert statements CDXLDirectDispatchInfo *m_direct_dispatch_info; + // Is Split Update + BOOL m_fSplit; + // private copy ctor CDXLPhysicalDML(const CDXLPhysicalDML &); public: // ctor CDXLPhysicalDML(CMemoryPool *mp, const EdxlDmlType dxl_dml_type, - CDXLTableDescr *table_descr, - ULongPtrArray *src_colids_array, ULONG action_colid, - ULONG ctid_colid, ULONG segid_colid, BOOL preserve_oids, - ULONG tuple_oid, - CDXLDirectDispatchInfo *dxl_direct_dispatch_info); + CDXLTableDescr *table_descr, ULongPtrArray *src_colids_array, + ULONG action_colid, ULONG ctid_colid, ULONG segid_colid, BOOL preserve_oids, + ULONG tuple_oid, CDXLDirectDispatchInfo *dxl_direct_dispatch_info, + BOOL fSplit); // dtor virtual ~CDXLPhysicalDML(); @@ -153,6 +155,13 @@ class CDXLPhysicalDML : public CDXLPhysical return m_direct_dispatch_info; } + // Is update using split + BOOL + FSplit() const + { + return m_fSplit; + } + #ifdef GPOS_DEBUG // checks whether the operator has valid structure, i.e. number and // types of child nodes diff --git a/src/backend/gporca/libnaucrates/include/naucrates/dxl/parser/CParseHandlerPhysicalDML.h b/src/backend/gporca/libnaucrates/include/naucrates/dxl/parser/CParseHandlerPhysicalDML.h index 52bfd921b8ce..510f408f4e97 100644 --- a/src/backend/gporca/libnaucrates/include/naucrates/dxl/parser/CParseHandlerPhysicalDML.h +++ b/src/backend/gporca/libnaucrates/include/naucrates/dxl/parser/CParseHandlerPhysicalDML.h @@ -59,6 +59,9 @@ class CParseHandlerPhysicalDML : public CParseHandlerPhysicalOp // private copy ctor CParseHandlerPhysicalDML(const CParseHandlerPhysicalDML &); + // Split Update + BOOL m_fSplit; + // process the start of an element void StartElement( const XMLCh *const element_uri, // URI of element's namespace diff --git a/src/backend/gporca/libnaucrates/include/naucrates/dxl/xml/dxltokens.h b/src/backend/gporca/libnaucrates/include/naucrates/dxl/xml/dxltokens.h index 57595437b668..23322b051139 100644 --- a/src/backend/gporca/libnaucrates/include/naucrates/dxl/xml/dxltokens.h +++ b/src/backend/gporca/libnaucrates/include/naucrates/dxl/xml/dxltokens.h @@ -367,6 +367,8 @@ enum Edxltoken EdxltokenGpSegmentIdColId, EdxltokenTupleOidColId, EdxltokenUpdatePreservesOids, + EdxltokenInputSorted, + EdxltokenSplitUpdate, EdxltokenInputSegments, EdxltokenOutputSegments, diff --git a/src/backend/gporca/libnaucrates/src/operators/CDXLPhysicalDML.cpp b/src/backend/gporca/libnaucrates/src/operators/CDXLPhysicalDML.cpp index dee91cc08e30..67f7d6f08403 100644 --- a/src/backend/gporca/libnaucrates/src/operators/CDXLPhysicalDML.cpp +++ b/src/backend/gporca/libnaucrates/src/operators/CDXLPhysicalDML.cpp @@ -29,10 +29,11 @@ using namespace gpdxl; // //--------------------------------------------------------------------------- CDXLPhysicalDML::CDXLPhysicalDML( - CMemoryPool *mp, const EdxlDmlType dxl_dml_type, - CDXLTableDescr *table_descr, ULongPtrArray *src_colids_array, - ULONG action_colid, ULONG ctid_colid, ULONG segid_colid, BOOL preserve_oids, - ULONG tuple_oid, CDXLDirectDispatchInfo *dxl_direct_dispatch_info) + CMemoryPool *mp, const EdxlDmlType dxl_dml_type, + CDXLTableDescr *table_descr, ULongPtrArray *src_colids_array, + ULONG action_colid, ULONG ctid_colid, ULONG segid_colid, BOOL preserve_oids, + ULONG tuple_oid, CDXLDirectDispatchInfo *dxl_direct_dispatch_info, + BOOL fSplit) : CDXLPhysical(mp), m_dxl_dml_type(dxl_dml_type), m_dxl_table_descr(table_descr), @@ -42,7 +43,8 @@ CDXLPhysicalDML::CDXLPhysicalDML( m_segid_colid(segid_colid), m_preserve_oids(preserve_oids), m_tuple_oid(tuple_oid), - m_direct_dispatch_info(dxl_direct_dispatch_info) + m_direct_dispatch_info(dxl_direct_dispatch_info), + m_fSplit(fSplit) { GPOS_ASSERT(EdxldmlSentinel > dxl_dml_type); GPOS_ASSERT(NULL != table_descr); @@ -131,13 +133,19 @@ CDXLPhysicalDML::SerializeToDXL(CXMLSerializer *xml_serializer, CDXLTokens::GetDXLTokenStr(EdxltokenGpSegmentIdColId), m_segid_colid); if (Edxldmlupdate == m_dxl_dml_type) + { + xml_serializer->AddAttribute( + CDXLTokens::GetDXLTokenStr(EdxltokenSplitUpdate), m_fSplit); + } + + if (Edxldmlupdate == m_dxl_dml_type && !m_fSplit) { xml_serializer->AddAttribute( CDXLTokens::GetDXLTokenStr(EdxltokenUpdatePreservesOids), m_preserve_oids); } - if (m_preserve_oids) + if (m_preserve_oids && !m_fSplit) { xml_serializer->AddAttribute( CDXLTokens::GetDXLTokenStr(EdxltokenTupleOidColId), m_tuple_oid); diff --git a/src/backend/gporca/libnaucrates/src/parser/CParseHandlerPhysicalDML.cpp b/src/backend/gporca/libnaucrates/src/parser/CParseHandlerPhysicalDML.cpp index 8b0e90c3b202..524af9f7e24e 100644 --- a/src/backend/gporca/libnaucrates/src/parser/CParseHandlerPhysicalDML.cpp +++ b/src/backend/gporca/libnaucrates/src/parser/CParseHandlerPhysicalDML.cpp @@ -43,7 +43,8 @@ CParseHandlerPhysicalDML::CParseHandlerPhysicalDML( m_ctid_colid(0), m_segid_colid(0), m_preserve_oids(false), - m_tuple_oid_colid(0) + m_tuple_oid_colid(0), + m_fSplit(true) { } @@ -125,6 +126,15 @@ CParseHandlerPhysicalDML::StartElement(const XMLCh *const, // element_uri, EdxltokenTupleOidColId, EdxltokenPhysicalDMLUpdate); } + const XMLCh *fSplit = + attrs.getValue(CDXLTokens::XmlstrToken(EdxltokenSplitUpdate)); + if (NULL != fSplit) + { + m_fSplit = CDXLOperatorFactory::ConvertAttrValueToBool( + m_parse_handler_mgr->GetDXLMemoryManager(), preserve_oids_xml, + EdxltokenSplitUpdate, EdxltokenPhysicalDMLUpdate); + } + // parse handler for physical operator CParseHandlerBase *child_parse_handler = CParseHandlerFactory::GetParseHandler( @@ -223,9 +233,9 @@ CParseHandlerPhysicalDML::EndElement(const XMLCh *const, // element_uri, direct_dispatch_parse_handler->GetDXLDirectDispatchInfo(); dxl_direct_dispatch_info->AddRef(); CDXLPhysicalDML *dxl_op = GPOS_NEW(m_mp) CDXLPhysicalDML( - m_mp, m_dxl_dml_type, table_descr, m_src_colids_array, m_action_colid, - m_ctid_colid, m_segid_colid, m_preserve_oids, m_tuple_oid_colid, - dxl_direct_dispatch_info); + m_mp, m_dxl_dml_type, table_descr, m_src_colids_array, m_action_colid, + m_ctid_colid, m_segid_colid, m_preserve_oids, m_tuple_oid_colid, + dxl_direct_dispatch_info, m_fSplit); m_dxl_node = GPOS_NEW(m_mp) CDXLNode(m_mp, dxl_op); // set statistics and physical properties diff --git a/src/backend/gporca/libnaucrates/src/xml/dxltokens.cpp b/src/backend/gporca/libnaucrates/src/xml/dxltokens.cpp index e2bf166efd47..0d04ac7c2a64 100644 --- a/src/backend/gporca/libnaucrates/src/xml/dxltokens.cpp +++ b/src/backend/gporca/libnaucrates/src/xml/dxltokens.cpp @@ -416,6 +416,8 @@ CDXLTokens::Init(CMemoryPool *mp) {EdxltokenGpSegmentIdColId, GPOS_WSZ_LIT("SegmentIdCol")}, {EdxltokenTupleOidColId, GPOS_WSZ_LIT("TupleOidCol")}, {EdxltokenUpdatePreservesOids, GPOS_WSZ_LIT("PreserveOids")}, + {EdxltokenInputSorted, GPOS_WSZ_LIT("InputSorted")}, + {EdxltokenSplitUpdate, GPOS_WSZ_LIT("IsSplitUpdate")}, {EdxltokenInputSegments, GPOS_WSZ_LIT("InputSegments")}, {EdxltokenOutputSegments, GPOS_WSZ_LIT("OutputSegments")}, diff --git a/src/backend/gporca/server/src/unittest/CTestUtils.cpp b/src/backend/gporca/server/src/unittest/CTestUtils.cpp index c76a35ce4194..e66a83dd8cd4 100644 --- a/src/backend/gporca/server/src/unittest/CTestUtils.cpp +++ b/src/backend/gporca/server/src/unittest/CTestUtils.cpp @@ -1813,7 +1813,7 @@ CTestUtils::PexprLogicalUpdate(CMemoryPool *mp) return GPOS_NEW(mp) CExpression( mp, GPOS_NEW(mp) CLogicalUpdate(mp, ptabdesc, pdrgpcrDelete, pdrgpcrInsert, - colref, colref, NULL /*pcrTupleOid*/), + colref, colref, NULL /*pcrTupleOid*/, true), pexprGet); } diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 2ff11d3f587b..b882e9b73688 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -2831,6 +2831,7 @@ typedef struct DMLState JunkFilter *junkfilter; /* filter that removes junk and dropped attributes */ TupleTableSlot *cleanedUpSlot; /* holds 'final' tuple which matches the target relation schema */ AttrNumber segid_attno; /* attribute number of "gp_segment_id" */ + EPQState mt_epqstate; /* for evaluating EvalPlanQual rechecks */ } DMLState; /* diff --git a/src/test/isolation/expected/create_index_hot.out b/src/test/isolation/expected/create_index_hot.out index fb37c9a989d7..519318e1d9f0 100644 --- a/src/test/isolation/expected/create_index_hot.out +++ b/src/test/isolation/expected/create_index_hot.out @@ -1,6 +1,6 @@ Parsed test spec with 2 sessions -starting permutation: s2begin s2select s1optimizeroff s1update s1createindexonc s2select s2forceindexscan s2select +starting permutation: s2begin s2select s1update s1createindexonc s2select s2forceindexscan s2select step s2begin: BEGIN ISOLATION LEVEL SERIALIZABLE; step s2select: select '#' as expected, c from hot where c = '#' union all @@ -8,7 +8,6 @@ step s2select: select '#' as expected, c from hot where c = '#' expected c # # -step s1optimizeroff: set optimizer = off; step s1update: update hot set c = '$' where c = '#'; step s1createindexonc: create index idx_c on hot (c); step s2select: select '#' as expected, c from hot where c = '#' diff --git a/src/test/isolation/specs/create_index_hot.spec b/src/test/isolation/specs/create_index_hot.spec index fe224c4917d1..bb80d8e3cdec 100644 --- a/src/test/isolation/specs/create_index_hot.spec +++ b/src/test/isolation/specs/create_index_hot.spec @@ -23,9 +23,7 @@ teardown # Update a row, and create an index on the updated column. This produces # a broken HOT chain. -#FIXME do not turn off the optimizer when ORCA stops always using Split Update. session "s1" -step "s1optimizeroff" { set optimizer = off; } step "s1update" { update hot set c = '$' where c = '#'; } step "s1createindexonc" { create index idx_c on hot (c); } @@ -41,7 +39,6 @@ permutation "s2begin" "s2select" - "s1optimizeroff" "s1update" "s1createindexonc" diff --git a/src/test/isolation2/expected/modify_table_data_corrupt_optimizer.out b/src/test/isolation2/expected/modify_table_data_corrupt_optimizer.out index b313e608b524..b07e3a4f0cff 100644 --- a/src/test/isolation2/expected/modify_table_data_corrupt_optimizer.out +++ b/src/test/isolation2/expected/modify_table_data_corrupt_optimizer.out @@ -107,25 +107,24 @@ ABORT -- TODO: this case is for planner, it will not error out on 6X now, -- because 6x does not remove explicit motion yet. explain (costs off) update tab1 set a = 999 from tab2, tab3 where tab1.a = tab2.a and tab1.b = tab3.b; - QUERY PLAN ---------------------------------------------------------------------------------- - Update - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: tab1.b - -> Split - -> Result - -> Hash Join - Hash Cond: (tab2.a = tab1.a) - -> Seq Scan on tab2 - -> Hash - -> Broadcast Motion 3:3 (slice1; segments: 3) - -> Hash Join - Hash Cond: (tab3.b = tab1.b) - -> Seq Scan on tab3 - -> Hash - -> Seq Scan on tab1 - Optimizer: Pivotal Optimizer (GPORCA) -(16 rows) + QUERY PLAN +--------------------------------------------------------------------------- + Update + -> Result + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: tab1.b + -> Hash Join + Hash Cond: (tab2.a = tab1.a) + -> Seq Scan on tab2 + -> Hash + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Hash Join + Hash Cond: (tab3.b = tab1.b) + -> Seq Scan on tab3 + -> Hash + -> Seq Scan on tab1 + Optimizer: Pivotal Optimizer (GPORCA) +(15 rows) begin; BEGIN update tab1 set a = 999 from tab2, tab3 where tab1.a = tab2.a and tab1.b = tab3.b; @@ -163,30 +162,29 @@ ABORT -- For orca, this will error out explain (costs off) update tab1 set a = 999 from tab2, tab3 where tab1.a = tab2.a and tab1.b = tab3.a; - QUERY PLAN ---------------------------------------------------------------------------------------------------- - Update - -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: tab1.b - -> Split - -> Result - -> Hash Join - Hash Cond: (tab3.a = tab1.b) - -> Seq Scan on tab3 - -> Hash - -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Hash Join - Hash Cond: (tab2.a = tab1.a) - -> Seq Scan on tab2 - -> Hash - -> Broadcast Motion 3:3 (slice1; segments: 3) - -> Seq Scan on tab1 - Optimizer: Pivotal Optimizer (GPORCA) -(17 rows) + QUERY PLAN +--------------------------------------------------------------------------------------------- + Update + -> Result + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: tab1.b + -> Hash Join + Hash Cond: (tab3.a = tab1.b) + -> Seq Scan on tab3 + -> Hash + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Hash Join + Hash Cond: (tab2.a = tab1.a) + -> Seq Scan on tab2 + -> Hash + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Seq Scan on tab1 + Optimizer: Pivotal Optimizer (GPORCA) +(16 rows) begin; BEGIN update tab1 set a = 999 from tab2, tab3 where tab1.a = tab2.a and tab1.b = tab3.a; -ERROR: distribution key of the tuple (0, 1) doesn't belong to current segment (actually from seg0) (nodeModifyTable.c:602) (seg1 127.0.1.1:6003 pid=78344) (nodeModifyTable.c:602) +UPDATE 1 abort; ABORT diff --git a/src/test/regress/expected/DML_over_joins_optimizer.out b/src/test/regress/expected/DML_over_joins_optimizer.out index a405ae22a33b..80b2579c273a 100644 --- a/src/test/regress/expected/DML_over_joins_optimizer.out +++ b/src/test/regress/expected/DML_over_joins_optimizer.out @@ -24,7 +24,6 @@ insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; update r set b = r.b + 1 from s where r.a = s.a; update r set b = r.b + 1 from s where r.a in (select a from s); -ERROR: multiple updates to a row by the same query is not allowed (seg0 rhel62-vm1:25432 pid=32303) delete from r using s where r.a = s.a; delete from r; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; @@ -56,22 +55,21 @@ delete from s; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; explain update s set b = b + 1 where exists (select 1 from r where s.a = r.b); - QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------- - Update (cost=0.00..868.53 rows=34 width=1) - -> Split (cost=0.00..862.80 rows=67 width=22) - -> Result (cost=0.00..862.80 rows=34 width=22) - -> Hash Semi Join (cost=0.00..862.80 rows=34 width=18) - Hash Cond: (s.a = r.b) - -> Seq Scan on s (cost=0.00..431.00 rows=34 width=18) - Filter: (NOT (a IS NULL)) - -> Hash (cost=431.15..431.15 rows=3334 width=4) - -> Result (cost=0.00..431.15 rows=3334 width=4) - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..431.14 rows=3334 width=4) - Hash Key: r.b - -> Seq Scan on r (cost=0.00..431.07 rows=3334 width=4) + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------- + Update (cost=0.00..865.14 rows=34 width=1) + -> Result (cost=0.00..862.80 rows=34 width=18) + -> Hash Semi Join (cost=0.00..862.80 rows=34 width=18) + Hash Cond: (s.a = r.b) + -> Seq Scan on s (cost=0.00..431.00 rows=34 width=18) + Filter: (NOT (a IS NULL)) + -> Hash (cost=431.15..431.15 rows=3334 width=4) + -> Result (cost=0.00..431.15 rows=3334 width=4) + -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..431.14 rows=3334 width=4) + Hash Key: r.b + -> Seq Scan on r (cost=0.00..431.07 rows=3334 width=4) Optimizer: Pivotal Optimizer (GPORCA) -(13 rows) +(12 rows) update s set b = b + 1 where exists (select 1 from r where s.a = r.b); explain delete from s where exists (select 1 from r where s.a = r.b); @@ -110,7 +108,6 @@ insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; update r set b = r.b + 1 from s where r.a = s.a; update r set b = r.b + 1 from s where r.a in (select a from s); -ERROR: multiple updates to a row by the same query is not allowed (seg2 rhel62-vm1:25434 pid=32307) delete from r using s where r.a = s.a; delete from r; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; @@ -162,7 +159,6 @@ insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; update r set b = r.b + 1 from s where r.a = s.a; update r set b = r.b + 1 from s where r.a in (select a from s); -ERROR: multiple updates to a row by the same query is not allowed (seg0 rhel62-vm1:25432 pid=32303) delete from r using s where r.a = s.a; delete from r; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; @@ -214,7 +210,6 @@ insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; update r set b = r.b + 1 from s where r.a = s.a; update r set b = r.b + 1 from s where r.a in (select a from s); -ERROR: multiple updates to a row by the same query is not allowed (seg2 rhel62-vm1:25434 pid=32307) delete from r using s where r.a = s.a; delete from r; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; @@ -266,7 +261,6 @@ insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; update r set b = r.b + 1 from s where r.a = s.a; update r set b = r.b + 1 from s where r.a in (select a from s); -ERROR: multiple updates to a row by the same query is not allowed (seg0 rhel62-vm1:25432 pid=32303) delete from r using s where r.a = s.a; delete from r; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; @@ -318,7 +312,6 @@ insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; update r set b = r.b + 1 from s where r.a = s.a; update r set b = r.b + 1 from s where r.a in (select a from s); -ERROR: multiple updates to a row by the same query is not allowed (seg0 rhel62-vm1:25432 pid=32303) delete from r using s where r.a = s.a; delete from r; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; @@ -1346,32 +1339,16 @@ select r.* from r,s,sales where s.a = sales.day and sales.month = r.b; (6 rows) update r set b = r.b + 1 from s,sales where s.a = sales.day and sales.month = r.b; -ERROR: multiple updates to a row by the same query is not allowed (seg1 rhel62-vm1:25433 pid=32305) select r.* from r,s,sales where s.a = sales.day and sales.month = r.b-1; a | b ---+---- - 1 | 3 - 1 | 3 - 1 | 3 - 1 | 3 - 2 | 6 - 2 | 6 - 2 | 6 - 2 | 6 - 2 | 6 - 2 | 6 - 2 | 6 - 3 | 9 - 3 | 9 - 3 | 9 - 3 | 9 - 3 | 9 - 4 | 12 - 4 | 12 - 4 | 12 - 4 | 12 - 4 | 12 -(21 rows) + 2 | 7 + 3 | 10 + 3 | 10 + 4 | 13 + 1 | 4 + 1 | 4 +(6 rows) -- ---------------------------------------------------------------------- -- Test: query02.sql @@ -1662,7 +1639,7 @@ select distinct sales_par.* from sales_par,s where sales_par.id in (s.b, s.b+1) (4 rows) update sales_par set month = month+1 from s where sales_par.id in (s.b, s.b+1) and region = 'europe'; -ERROR: multiple updates to a row by the same query is not allowed (seg0 rhel62-vm1:25432 pid=32303) +ERROR: multiple updates to a row by the same query is not allowed select distinct sales_par.* from sales_par,s where sales_par.id in (s.b, s.b+1) and region='europe'; id | year | month | day | region ----+------+-------+-----+-------- @@ -1762,7 +1739,7 @@ select distinct sales_par.* from sales_par,s where sales_par.id in (s.b, s.b+1) PREPARE plan3 AS update sales_par set month = month+1 from s where sales_par.id in (s.b, s.b+1) and region = 'europe'; EXECUTE plan3; -ERROR: multiple updates to a row by the same query is not allowed (seg0 rhel62-vm1:25432 pid=32303) +ERROR: multiple updates to a row by the same query is not allowed select distinct sales_par.* from sales_par,s where sales_par.id in (s.b, s.b+1) and region='europe'; id | year | month | day | region ----+------+-------+-----+-------- diff --git a/src/test/regress/expected/bfv_dml_optimizer.out b/src/test/regress/expected/bfv_dml_optimizer.out index 5b53b1bce19f..7fe54476731e 100644 --- a/src/test/regress/expected/bfv_dml_optimizer.out +++ b/src/test/regress/expected/bfv_dml_optimizer.out @@ -183,14 +183,13 @@ drop table m; create table update_pk_test (a int primary key, b int) distributed by (a); insert into update_pk_test values(1,1); explain update update_pk_test set b = 5; - QUERY PLAN ------------------------------------------------------------------------------------ - Update (cost=0.00..431.06 rows=1 width=1) - -> Split (cost=0.00..431.00 rows=1 width=22) - -> Result (cost=0.00..431.00 rows=1 width=22) - -> Seq Scan on update_pk_test (cost=0.00..431.00 rows=1 width=18) + QUERY PLAN +----------------------------------------------------------------------------- + Update (cost=0.00..431.02 rows=1 width=1) + -> Result (cost=0.00..431.00 rows=1 width=18) + -> Seq Scan on update_pk_test (cost=0.00..431.00 rows=1 width=14) Optimizer: Pivotal Optimizer (GPORCA) -(5 rows) +(4 rows) update update_pk_test set b = 5; select * from update_pk_test order by 1,2; diff --git a/src/test/regress/expected/bfv_legacy_optimizer.out b/src/test/regress/expected/bfv_legacy_optimizer.out index 20024808293b..a7129568343b 100644 --- a/src/test/regress/expected/bfv_legacy_optimizer.out +++ b/src/test/regress/expected/bfv_legacy_optimizer.out @@ -86,10 +86,8 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur insert into bfv_int4_tbl values(123456), (-2147483647), (0), (-123456), (2147483647); update bfv_s set c_v = 11 from bfv_int4_tbl a join bfv_int4_tbl b on (a.f1 = (select f1 from bfv_int4_tbl c where c.f1=b.f1)); -ERROR: multiple updates to a row by the same query is not allowed update bfv_s set c_v = 11 from bfv_int4_tbl a join bfv_int4_tbl b on (a.f1 = (select f1 from bfv_int4_tbl c where c.f1=b.f1)); -ERROR: multiple updates to a row by the same query is not allowed -- -- -- diff --git a/src/test/regress/expected/gp_unique_rowid_optimizer.out b/src/test/regress/expected/gp_unique_rowid_optimizer.out index 2babb807013f..6199f5ccf457 100644 --- a/src/test/regress/expected/gp_unique_rowid_optimizer.out +++ b/src/test/regress/expected/gp_unique_rowid_optimizer.out @@ -120,29 +120,28 @@ where e.x in select b from t2_12512 ) ; - QUERY PLAN ------------------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------ Update - -> Split - -> Result - -> Hash Semi Join - Hash Cond: ((sum(t1_12512.a)) = (t2_12512.b)::bigint) - -> Nested Loop - Join Filter: true - -> Seq Scan on t_12512 - -> Materialize - -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Result - -> HashAggregate - Group Key: t1_12512.b - -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: t1_12512.b - -> Seq Scan on t1_12512 - -> Hash - -> Broadcast Motion 3:3 (slice3; segments: 3) - -> Seq Scan on t2_12512 + -> Result + -> Hash Semi Join + Hash Cond: ((sum(t1_12512.a)) = (t2_12512.b)::bigint) + -> Nested Loop + Join Filter: true + -> Seq Scan on t_12512 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Result + -> HashAggregate + Group Key: t1_12512.b + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: t1_12512.b + -> Seq Scan on t1_12512 + -> Hash + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on t2_12512 Optimizer: Pivotal Optimizer (GPORCA) -(20 rows) +(19 rows) update t_12512 set b = 1 from @@ -155,7 +154,6 @@ where e.x in select b from t2_12512 ) ; -ERROR: multiple updates to a row by the same query is not allowed (seg0 127.0.1.1:6002 pid=31136) -- test fake ctid for functions explain (costs off) update t_12512 set b = 1 @@ -168,24 +166,23 @@ where e.x in select b from t2_12512 ) ; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------- Update - -> Split - -> Result - -> Hash Semi Join - Hash Cond: ("outer".x = t2_12512.b) - -> Nested Loop - Join Filter: true - -> Seq Scan on t_12512 - -> Materialize + -> Result + -> Hash Semi Join + Hash Cond: ("outer".x = t2_12512.b) + -> Nested Loop + Join Filter: true + -> Seq Scan on t_12512 + -> Materialize + -> Result -> Result - -> Result - -> Hash - -> Broadcast Motion 3:3 (slice1; segments: 3) - -> Seq Scan on t2_12512 + -> Hash + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Seq Scan on t2_12512 Optimizer: Pivotal Optimizer (GPORCA) -(15 rows) +(14 rows) update t_12512 set b = 1 from @@ -209,27 +206,21 @@ where e.x in select b from t2_12512 ) ; - QUERY PLAN ------------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------- Update - -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: t_12512.a - -> Split - -> Result - -> Hash Semi Join - Hash Cond: ("Values".column1 = t2_12512.b) - -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: "Values".column1 - -> Nested Loop - Join Filter: true - -> Seq Scan on t_12512 - -> Values Scan on "Values" - -> Hash - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: t2_12512.b - -> Seq Scan on t2_12512 + -> Result + -> Hash Semi Join + Hash Cond: ("Values".column1 = t2_12512.b) + -> Nested Loop + Join Filter: true + -> Seq Scan on t_12512 + -> Values Scan on "Values" + -> Hash + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Seq Scan on t2_12512 Optimizer: Pivotal Optimizer (GPORCA) -(18 rows) +(12 rows) update t_12512 set b = 1 from @@ -241,7 +232,6 @@ where e.x in select b from t2_12512 ) ; -ERROR: multiple updates to a row by the same query is not allowed (seg0 127.0.1.1:6002 pid=31136) -- test fake ctid for external scan CREATE OR REPLACE FUNCTION write_to_file_12512() RETURNS integer AS '$libdir/gpextprotocol.so', 'demoprot_export' LANGUAGE C STABLE; @@ -269,26 +259,25 @@ where e.x in select b from t2_12512 ) ; - QUERY PLAN ---------------------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------- Update - -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: t_12512.a - -> Split - -> Result - -> Nested Loop - Join Filter: true - -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Seq Scan on t_12512 - -> Materialize - -> Hash Semi Join - Hash Cond: (ext_r_12512.a = t2_12512.b) - -> External Scan on ext_r_12512 - -> Hash - -> Broadcast Motion 3:3 (slice1; segments: 3) - -> Seq Scan on t2_12512 + -> Result + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: t_12512.a + -> Nested Loop + Join Filter: true + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on t_12512 + -> Materialize + -> Hash Semi Join + Hash Cond: (ext_r_12512.a = t2_12512.b) + -> External Scan on ext_r_12512 + -> Hash + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Seq Scan on t2_12512 Optimizer: Pivotal Optimizer (GPORCA) -(17 rows) +(16 rows) update t_12512 set b = 1 from @@ -300,7 +289,6 @@ where e.x in select b from t2_12512 ) ; -ERROR: multiple updates to a row by the same query is not allowed (seg0 127.0.1.1:6002 pid=31136) -- reset fault injector select gp_inject_fault('low_unique_rowid_path_cost', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = -1; gp_inject_fault diff --git a/src/test/regress/expected/qp_dml_joins.out b/src/test/regress/expected/qp_dml_joins.out index ee771430291c..2309bf400bf6 100644 --- a/src/test/regress/expected/qp_dml_joins.out +++ b/src/test/regress/expected/qp_dml_joins.out @@ -4050,8 +4050,8 @@ SELECT SUM(b) FROM dml_heap_r; (1 row) --Negative test - Update WHERE join returns more than one tuple with different values. -CREATE TABLE dml_heap_u as SELECT i as a, 1 as b FROM generate_series(1,10)i; -CREATE TABLE dml_heap_v as SELECT i as a ,i as b FROM generate_series(1,10)i; +CREATE TABLE dml_heap_u as SELECT i as a, 1 as b FROM generate_series(1,10)i distributed by (a); +CREATE TABLE dml_heap_v as SELECT i as a ,i as b FROM generate_series(1,10)i distributed by (a); SELECT SUM(a) FROM dml_heap_v; sum ----- diff --git a/src/test/regress/expected/qp_dml_joins_optimizer.out b/src/test/regress/expected/qp_dml_joins_optimizer.out index 221aac698eb4..6c4821eed73a 100644 --- a/src/test/regress/expected/qp_dml_joins_optimizer.out +++ b/src/test/regress/expected/qp_dml_joins_optimizer.out @@ -4068,8 +4068,8 @@ SELECT SUM(b) FROM dml_heap_r; (1 row) --Negative test - Update WHERE join returns more than one tuple with different values. -CREATE TABLE dml_heap_u as SELECT i as a, 1 as b FROM generate_series(1,10)i; -CREATE TABLE dml_heap_v as SELECT i as a ,i as b FROM generate_series(1,10)i; +CREATE TABLE dml_heap_u as SELECT i as a, 1 as b FROM generate_series(1,10)i distributed by (a); +CREATE TABLE dml_heap_v as SELECT i as a ,i as b FROM generate_series(1,10)i distributed by (a); SELECT SUM(a) FROM dml_heap_v; sum ----- diff --git a/src/test/regress/expected/qp_orca_fallback_optimizer.out b/src/test/regress/expected/qp_orca_fallback_optimizer.out index 2bee48b0aecd..1f5ac634ae52 100644 --- a/src/test/regress/expected/qp_orca_fallback_optimizer.out +++ b/src/test/regress/expected/qp_orca_fallback_optimizer.out @@ -57,14 +57,13 @@ DETAIL: Feature not supported: UPDATE with constraints set optimizer_enable_dml_constraints=on; explain update constr_tab set b = 10; - QUERY PLAN -------------------------------------------------------------------------------- - Update (cost=0.00..431.08 rows=1 width=1) - -> Split (cost=0.00..431.00 rows=1 width=30) - -> Result (cost=0.00..431.00 rows=1 width=30) - -> Seq Scan on constr_tab (cost=0.00..431.00 rows=1 width=26) + QUERY PLAN +------------------------------------------------------------------------- + Update (cost=0.00..431.03 rows=1 width=1) + -> Result (cost=0.00..431.00 rows=1 width=26) + -> Seq Scan on constr_tab (cost=0.00..431.00 rows=1 width=22) Optimizer: Pivotal Optimizer (GPORCA) -(5 rows) +(4 rows) -- Same, with NOT NULL constraint. DROP TABLE IF EXISTS constr_tab; diff --git a/src/test/regress/expected/updatable_views_optimizer.out b/src/test/regress/expected/updatable_views_optimizer.out index 8e10f46593d8..089d0b842689 100644 --- a/src/test/regress/expected/updatable_views_optimizer.out +++ b/src/test/regress/expected/updatable_views_optimizer.out @@ -642,21 +642,20 @@ SELECT * FROM rw_view2; (2 rows) EXPLAIN (costs off) UPDATE rw_view2 SET a=3 WHERE a=2; - QUERY PLAN -------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------- Update - -> Split - -> Hash Join - Hash Cond: (base_tbl.a = base_tbl_1.a) - -> Index Scan using base_tbl_pkey on base_tbl - Index Cond: (a = 2) - -> Hash - -> Result - Filter: ((base_tbl_1.a = 2) AND (base_tbl_1.a < 10)) - -> Index Scan using base_tbl_pkey on base_tbl base_tbl_1 - Index Cond: (a > 0) + -> Hash Join + Hash Cond: (base_tbl.a = base_tbl_1.a) + -> Index Scan using base_tbl_pkey on base_tbl + Index Cond: (a = 2) + -> Hash + -> Result + Filter: ((base_tbl_1.a = 2) AND (base_tbl_1.a < 10)) + -> Index Scan using base_tbl_pkey on base_tbl base_tbl_1 + Index Cond: (a > 0) Optimizer: Pivotal Optimizer (GPORCA) -(12 rows) +(11 rows) EXPLAIN (costs off) DELETE FROM rw_view2 WHERE a=2; QUERY PLAN @@ -2071,22 +2070,21 @@ EXPLAIN (costs off) INSERT INTO rw_view1 VALUES (2, 'New row 2'); Optimizer: Pivotal Optimizer (GPORCA) version 3.1.0 Update - -> Split - -> Result - -> Nested Loop Semi Join - Join Filter: true - -> Index Scan using base_tbl_pkey on base_tbl base_tbl_1 - Index Cond: (id = 2) - -> Materialize - -> Broadcast Motion 1:3 (slice2; segments: 1) + -> Result + -> Nested Loop Semi Join + Join Filter: true + -> Index Scan using base_tbl_pkey on base_tbl base_tbl_1 + Index Cond: (id = 2) + -> Materialize + -> Broadcast Motion 1:3 (slice2; segments: 1) + -> Result -> Result - -> Result - -> Limit - -> Gather Motion 3:1 (slice1; segments: 3) - -> Index Scan using base_tbl_pkey on base_tbl - Index Cond: (id = 2) + -> Limit + -> Gather Motion 3:1 (slice1; segments: 3) + -> Index Scan using base_tbl_pkey on base_tbl + Index Cond: (id = 2) Optimizer: Pivotal Optimizer (GPORCA) -(37 rows) +(36 rows) INSERT INTO rw_view1 VALUES (2, 'New row 2'); SELECT * FROM base_tbl; diff --git a/src/test/regress/sql/qp_dml_joins.sql b/src/test/regress/sql/qp_dml_joins.sql index 5550cab5b87b..01614f3a3f38 100644 --- a/src/test/regress/sql/qp_dml_joins.sql +++ b/src/test/regress/sql/qp_dml_joins.sql @@ -1735,8 +1735,8 @@ UPDATE dml_heap_r SET b = MAX(dml_heap_s.b) FROM dml_heap_s WHERE dml_heap_r.b = SELECT SUM(b) FROM dml_heap_r; --Negative test - Update WHERE join returns more than one tuple with different values. -CREATE TABLE dml_heap_u as SELECT i as a, 1 as b FROM generate_series(1,10)i; -CREATE TABLE dml_heap_v as SELECT i as a ,i as b FROM generate_series(1,10)i; +CREATE TABLE dml_heap_u as SELECT i as a, 1 as b FROM generate_series(1,10)i distributed by (a); +CREATE TABLE dml_heap_v as SELECT i as a ,i as b FROM generate_series(1,10)i distributed by (a); SELECT SUM(a) FROM dml_heap_v; UPDATE dml_heap_v SET a = dml_heap_u.a FROM dml_heap_u WHERE dml_heap_u.b = dml_heap_v.b; SELECT SUM(a) FROM dml_heap_v;