diff --git a/.abi-check/6.26.0/postgres.symbols.ignore b/.abi-check/6.26.0/postgres.symbols.ignore new file mode 100644 index 000000000000..aa4c33e2be25 --- /dev/null +++ b/.abi-check/6.26.0/postgres.symbols.ignore @@ -0,0 +1 @@ +ConfigureNamesBool_gp diff --git a/.github/workflows/greenplum-abi-tests.yml b/.github/workflows/greenplum-abi-tests.yml index 7bde532b21a5..e4561055497e 100644 --- a/.github/workflows/greenplum-abi-tests.yml +++ b/.github/workflows/greenplum-abi-tests.yml @@ -28,6 +28,7 @@ jobs: BASELINE_VERSION: ${{ steps.vars.outputs.BASELINE_VERSION }} ABI_LIBS: ${{ steps.vars.outputs.ABI_LIBS }} ABI_HEADERS: ${{ steps.vars.outputs.ABI_HEADERS }} + EXCEPTION_LISTS_COUNT: ${{ steps.check_exception_lists.outputs.EXCEPTION_LISTS_COUNT }} steps: - name: Fetch source uses: actions/checkout@v3 @@ -35,7 +36,7 @@ jobs: - name: Get Greenplum version variables id: vars run: | - remote_repo='https://github.com/greenplum-db/gpdb.git' + remote_repo='https://github.com/arenadata/gpdb.git' git ls-remote --tags --refs --sort='v:refname' $remote_repo '6.*' | tail -n 1 > baseline_version_ref baseline_ref=$(cat baseline_version_ref | awk '{print $1}') baseline_version=$(cat baseline_version_ref | awk '{print $2}') @@ -44,7 +45,14 @@ jobs: echo "ABI_LIBS=postgres" | tee -a $GITHUB_OUTPUT echo "ABI_HEADERS=." | tee -a $GITHUB_OUTPUT + - name: Check if exception list exists + id: check_exception_lists + run: | + exception_lists_count=$(ls .abi-check/${{ steps.vars.outputs.BASELINE_VERSION }}/ 2> /dev/null | wc -l) + echo "EXCEPTION_LISTS_COUNT=${exception_lists_count}" | tee -a $GITHUB_OUTPUT + - name: Upload symbol/type checking exception list + if: steps.check_exception_lists.outputs.EXCEPTION_LISTS_COUNT != '0' uses: actions/upload-artifact@v3 with: name: exception_lists @@ -53,7 +61,7 @@ jobs: abi-dump: needs: abi-dump-setup runs-on: ubuntu-latest - container: gcr.io/data-gpdb-public-images/gpdb6-rocky8-build + container: gcr.io/data-gpdb-public-images/gpdb6-centos7-build strategy: matrix: name: @@ -61,7 +69,7 @@ jobs: - build-latest include: - name: build-baseline - repo: greenplum-db/gpdb + repo: arenadata/gpdb ref: ${{ needs.abi-dump-setup.outputs.BASELINE_VERSION }} - name: build-latest repo: ${{ github.repository }} @@ -76,6 +84,8 @@ jobs: tar -xf uctags-2023.07.05-linux-x86_64.tar.xz cp uctags-2023.07.05-linux-x86_64/bin/* /usr/bin/ which ctags + yum install -y https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo.x86_64.rpm + yum install -y git - name: Download Greenplum source code uses: actions/checkout@v3 @@ -90,6 +100,7 @@ jobs: run: | yum install -y epel-release yum install -y abi-dumper + yum install -y libzstd-static - name: Build Greenplum run: | @@ -122,7 +133,7 @@ jobs: - abi-dump-setup - abi-dump runs-on: ubuntu-latest - container: gcr.io/data-gpdb-public-images/gpdb6-rocky8-build + container: gcr.io/data-gpdb-public-images/gpdb6-centos7-build steps: - name: Download baseline uses: actions/download-artifact@v3 @@ -136,6 +147,7 @@ jobs: path: build-latest/ - name: Download exception lists + if: needs.abi-dump-setup.outputs.EXCEPTION_LISTS_COUNT != '0' uses: actions/download-artifact@v3 with: name: exception_lists @@ -145,7 +157,7 @@ jobs: run: | yum install -y epel-release yum install -y abi-compliance-checker - yum install -y --enablerepo=powertools lynx + yum install -y lynx - name: Compare ABI run: | diff --git a/gpAux/gpperfmon/src/gpmon/gpmon_agg.c b/gpAux/gpperfmon/src/gpmon/gpmon_agg.c index 914707aaed49..c243e4cd004b 100644 --- a/gpAux/gpperfmon/src/gpmon/gpmon_agg.c +++ b/gpAux/gpperfmon/src/gpmon/gpmon_agg.c @@ -81,8 +81,6 @@ extern mmon_options_t opt; extern apr_queue_t* message_queue; extern void incremement_tail_bytes(apr_uint64_t bytes); -static bool is_query_not_active(apr_int32_t tmid, apr_int32_t ssid, - apr_int32_t ccnt, apr_hash_t *hash, apr_pool_t *pool); /** * Disk space check helper function @@ -167,43 +165,6 @@ static apr_status_t check_disk_space(mmon_fsinfo_t* rec) return 0; } -static bool is_query_not_active(apr_int32_t tmid, apr_int32_t ssid, apr_int32_t ccnt, apr_hash_t *hash, apr_pool_t *pool) -{ - // get active query of session - char *key = apr_psprintf(pool, "%d", ssid); - char *active_query = apr_hash_get(hash, key, APR_HASH_KEY_STRING); - if (active_query == NULL) - { - TR0(("Found orphan query, tmid:%d, ssid:%d, ccnt:%d\n", tmid, ssid, ccnt)); - return true; - } - - // read query text from q file - char *query = get_query_text(tmid, ssid, ccnt, pool); - if (query == NULL) - { - TR0(("Found error while reading query text in file '%sq%d-%d-%d.txt'\n", GPMON_DIR, tmid, ssid, ccnt)); - return true; - } - // if the current active query of session (ssid) is not the same - // as the one we are checking, we assume q(tmid)-(ssid)-(ccnt).txt - // has wrong status. This is a bug in execMain.c, which too hard to - // fix it there. - int qlen = strlen(active_query); - if (qlen > MAX_QUERY_COMPARE_LENGTH) - { - qlen = MAX_QUERY_COMPARE_LENGTH; - } - int res = strncmp(query, active_query, qlen); - if (res != 0) - { - TR0(("Found orphan query, tmid:%d, ssid:%d, ccnt:%d\n", tmid, ssid, ccnt)); - return true; - } - - return false; -} - static apr_status_t agg_put_fsinfo(agg_t* agg, const gpmon_fsinfo_t* met) { mmon_fsinfo_t* rec; @@ -481,8 +442,8 @@ apr_status_t agg_dup(agg_t** retagg, agg_t* oldagg, apr_pool_t* parent_pool, apr return e; } - apr_hash_t *active_query_tab = get_active_queries(newagg->pool); - if (! active_query_tab) + apr_hash_t *active_session_set = get_active_sessions(newagg->pool); + if (!active_session_set) { agg_destroy(newagg); return APR_EINVAL; @@ -508,8 +469,8 @@ apr_status_t agg_dup(agg_t** retagg, agg_t* oldagg, apr_pool_t* parent_pool, apr if ( (status != GPMON_QLOG_STATUS_SUBMIT && status != GPMON_QLOG_STATUS_CANCELING && status != GPMON_QLOG_STATUS_START) - || ((age % 5 == 0) /* don't call is_query_not_active every time because it's expensive */ - && is_query_not_active(dp->qlog.key.tmid, dp->qlog.key.ssid, dp->qlog.key.ccnt, active_query_tab, newagg->pool))) + || apr_hash_get(active_session_set, &dp->qlog.key.ssid, + sizeof(dp->qlog.key.ssid)) == NULL) { if (0 != strcmp(dp->qlog.db, GPMON_DB)) { diff --git a/gpAux/gpperfmon/src/gpmon/gpmondb.c b/gpAux/gpperfmon/src/gpmon/gpmondb.c index 38bb02037569..1aba2420b907 100644 --- a/gpAux/gpperfmon/src/gpmon/gpmondb.c +++ b/gpAux/gpperfmon/src/gpmon/gpmondb.c @@ -1335,24 +1335,22 @@ static void convert_tuples_to_hash(PGresult *result, apr_hash_t *hash, apr_pool_ int i = 0; for (; i < rowcount; i++) { - char* sessid = PQgetvalue(result, i, 0); - char* query = PQgetvalue(result, i, 1); - - char *sessid_copy = apr_pstrdup(pool, sessid); - char *query_copy = apr_pstrdup(pool, query); - if (sessid_copy == NULL || query_copy == NULL) + apr_int32_t* ssid = apr_palloc(pool, sizeof(apr_int32_t)); + if (ssid == NULL) { gpmon_warning(FLINE, "Out of memory"); continue; } - apr_hash_set(hash, sessid_copy, APR_HASH_KEY_STRING, query_copy); + *ssid = atoi(PQgetvalue(result, i, 0)); + + apr_hash_set(hash, ssid, sizeof(apr_int32_t), ""); } } -apr_hash_t *get_active_queries(apr_pool_t *pool) +apr_hash_t *get_active_sessions(apr_pool_t *pool) { PGresult *result = NULL; - apr_hash_t *active_query_tab = NULL; + apr_hash_t *active_session_set = NULL; PGconn *conn = PQconnectdb(GPDB_CONNECTION_STRING); if (PQstatus(conn) != CONNECTION_OK) @@ -1366,7 +1364,7 @@ apr_hash_t *get_active_queries(apr_pool_t *pool) return NULL; } - const char *qry= "SELECT sess_id, query FROM pg_stat_activity;"; + const char *qry= "SELECT sess_id FROM pg_stat_activity;"; const char *errmsg = gpdb_exec_only(conn, &result, qry); if (errmsg) { @@ -1374,21 +1372,21 @@ apr_hash_t *get_active_queries(apr_pool_t *pool) } else { - active_query_tab = apr_hash_make(pool); - if (! active_query_tab) + active_session_set = apr_hash_make(pool); + if (!active_session_set) { gpmon_warning(FLINE, "Out of memory"); } else { - convert_tuples_to_hash(result, active_query_tab, pool); + convert_tuples_to_hash(result, active_session_set, pool); } } PQclear(result); PQfinish(conn); - return active_query_tab; + return active_session_set; } const char *iconv_encodings[] = { diff --git a/gpAux/gpperfmon/src/gpmon/gpmondb.h b/gpAux/gpperfmon/src/gpmon/gpmondb.h index 76eecd965ba8..7d8d6a433520 100644 --- a/gpAux/gpperfmon/src/gpmon/gpmondb.h +++ b/gpAux/gpperfmon/src/gpmon/gpmondb.h @@ -88,7 +88,7 @@ APR_DECLARE (apr_status_t) gpdb_harvest_one(const char* table); APR_DECLARE (apr_status_t) remove_segid_constraint(void); -APR_DECLARE (apr_hash_t *) get_active_queries(apr_pool_t* pool); +APR_DECLARE (apr_hash_t *) get_active_sessions(apr_pool_t* pool); APR_DECLARE (void) create_log_alert_table(void); diff --git a/gpMgmt/test/behave/mgmt_utils/gpperfmon.feature b/gpMgmt/test/behave/mgmt_utils/gpperfmon.feature index cca923a31742..363eeb324186 100644 --- a/gpMgmt/test/behave/mgmt_utils/gpperfmon.feature +++ b/gpMgmt/test/behave/mgmt_utils/gpperfmon.feature @@ -90,6 +90,17 @@ Feature: gpperfmon Then wait until the results from boolean sql "SELECT count(*) = 0 FROM queries_history WHERE query_text like '--alter distributed by%'" is "true" And wait until the results from boolean sql "SELECT count(*) = 1 FROM queries_history WHERE query_text like '--end flag%'" is "true" + @gpperfmon_query_history + Scenario: gpperfmon does not lose the query text if its text differs from the text in pg_stat_activity + Given gpperfmon is configured and running in qamode + When the user truncates "queries_history" tables in "gpperfmon" + When below sql is executed in "gptest" db + """ + SET log_min_messages = "debug4"; + DO $$ BEGIN PERFORM pg_sleep(80); END$$; + """ + Then wait until the results from boolean sql "SELECT count(*) > 0 FROM queries_history WHERE query_text = 'SELECT pg_sleep(80)'" is "true" + @gpperfmon_system_history Scenario: gpperfmon adds to system_history table Given gpperfmon is configured and running in qamode diff --git a/src/backend/cdb/cdbllize.c b/src/backend/cdb/cdbllize.c index 73be0f430eb2..7d251226010c 100644 --- a/src/backend/cdb/cdbllize.c +++ b/src/backend/cdb/cdbllize.c @@ -590,8 +590,16 @@ ParallelizeCorrelatedSubPlanMutator(Node *node, ParallelizeCorrelatedPlanWalkerC scanPlan->flow->flotype = FLOW_SINGLETON; } - broadcastPlan(scanPlan, false /* stable */ , false /* rescannable */, - ctx->currentPlanFlow->numsegments /* numsegments */); + /* + * Broadcasting Replicated locus leads to data duplicates. + */ + if (scanPlan->flow->locustype == CdbLocusType_Replicated && + scanPlan->flow->numsegments != ctx->currentPlanFlow->numsegments) + elog(ERROR, "could not parallelize SubPlan"); + + if (scanPlan->flow->locustype != CdbLocusType_Replicated) + broadcastPlan(scanPlan, false /* stable */ , false /* rescannable */ , + ctx->currentPlanFlow->numsegments /* numsegments */ ); } else { @@ -758,8 +766,17 @@ ParallelizeSubplan(SubPlan *spExpr, PlanProfile *context) if (containingPlanDistributed) { Assert(NULL != context->currentPlanFlow); - broadcastPlan(newPlan, false /* stable */ , false /* rescannable */, - context->currentPlanFlow->numsegments /* numsegments */); + + /* + * Broadcasting Replicated locus leads to data duplicates. + */ + if (newPlan->flow->locustype == CdbLocusType_Replicated && + newPlan->flow->numsegments != context->currentPlanFlow->numsegments) + elog(ERROR, "could not parallelize SubPlan"); + + if (newPlan->flow->locustype != CdbLocusType_Replicated) + broadcastPlan(newPlan, false /* stable */ , false /* rescannable */, + context->currentPlanFlow->numsegments /* numsegments */); } else { diff --git a/src/backend/cdb/cdbmutate.c b/src/backend/cdb/cdbmutate.c index 21da452b71a2..67d3535a1011 100644 --- a/src/backend/cdb/cdbmutate.c +++ b/src/backend/cdb/cdbmutate.c @@ -621,7 +621,8 @@ apply_motion(PlannerInfo *root, Plan *plan, Query *query) { if ((plan->flow->flotype == FLOW_PARTITIONED || (plan->flow->flotype == FLOW_SINGLETON && - plan->flow->locustype == CdbLocusType_SegmentGeneral)) && + plan->flow->locustype == CdbLocusType_SegmentGeneral) || + plan->flow->flotype == FLOW_REPLICATED) && !root->glob->is_parallel_cursor) bringResultToDispatcher = true; diff --git a/src/backend/cdb/cdbpath.c b/src/backend/cdb/cdbpath.c index 9f81a2f43f92..d5c78e6db629 100644 --- a/src/backend/cdb/cdbpath.c +++ b/src/backend/cdb/cdbpath.c @@ -225,17 +225,16 @@ cdbpath_create_motion_path(PlannerInfo *root, return (Path *) pathnode; } - /* replicated-->singleton would give redundant copies of the rows. */ - if (CdbPathLocus_IsReplicated(subpath->locus)) - goto invalid_motion_request; - /* - * Must be partitioned-->singleton. If caller gave pathkeys, they'll + * Partitioned-->singleton. If caller gave pathkeys, they'll * be used for Merge Receive. If no pathkeys, Union Receive will * arbitrarily interleave the rows from the subpath partitions in no * special order. + * Replicated-->singleton is allowed, because Explicit Gather Motion + * exists, which don't give reduntant copies of rows. */ - if (!CdbPathLocus_IsPartitioned(subpath->locus)) + else if (!CdbPathLocus_IsPartitioned(subpath->locus) && + !CdbPathLocus_IsReplicated(subpath->locus)) goto invalid_motion_request; } @@ -345,12 +344,18 @@ cdbpath_create_motion_path(PlannerInfo *root, else if (CdbPathLocus_IsReplicated(locus)) { /* - * Assume that this case only can be generated in - * UPDATE/DELETE statement + * No motion needed for the case SegmentGeneral --> Replicated in + * case if number of SegmentGeneral's segments is greater or equal + * than Replicated's. Otherwise SegmentGeneral will be broadcasted + * from 1 segment. Currently, the only case is UNION ALL command, + * where one of the operands has Replicated locus. */ - if (root->upd_del_replicated_table == 0) - goto invalid_motion_request; - + if (CdbPathLocus_NumSegments(subpath->locus) >= + CdbPathLocus_NumSegments(locus)) + { + subpath->locus.numsegments = locus.numsegments; + return subpath; + } } else if (CdbPathLocus_IsSegmentGeneral(locus)) { @@ -1109,157 +1114,183 @@ cdbpath_motion_for_join(PlannerInfo *root, if (CdbPathLocus_IsReplicated(other->locus)) { - Assert(root->upd_del_replicated_table > 0); - /* - * It only appear when we UPDATE a replicated table. - * All the segment which replicated table storaged must execute - * the plan to delete tuple on himself, so if the segments count - * of broadcast(locus is Replicated) if less than the replicated - * table, we can not execute the plan correctly. - * - * TODO:Can we modify(or add) the broadcast motion for this case? + * The case, when UPDATE/DELETE operation on a replicated table + * also has join operand with Replicated locus, is unknown. */ - Assert(CdbPathLocus_NumSegments(segGeneral->locus) <= - CdbPathLocus_NumSegments(other->locus)); + Assert(root->upd_del_replicated_table == 0); + + int numsegments = CdbPathLocus_CommonSegments(segGeneral->locus, + other->locus); /* - * Only need to broadcast other to the segments of the - * replicated table. + * Replicated locus corresponds to the result of + * the CTE with modifying DML operation over a replicated + * table inside. In case when SegmentGeneral locus is + * propagated at more number of segments than Replicated locus + * is, it is appropriate to reduce SegmentGeneral's number to + * perform join on segments. Otherwise, perform join at + * SingleQE. */ - if (CdbPathLocus_NumSegments(segGeneral->locus) < - CdbPathLocus_NumSegments(other->locus)) + if (segGeneral->locus.numsegments >= other->locus.numsegments) { - other->locus.numsegments = - CdbPathLocus_NumSegments(segGeneral->locus); + segGeneral->locus.numsegments = numsegments; + return other->locus; + } + else + { + CdbPathLocus_MakeSingleQE(&segGeneral->move_to, numsegments); + CdbPathLocus_MakeSingleQE(&other->move_to, numsegments); } - - return other->locus; - } - - Assert(CdbPathLocus_IsBottleneck(other->locus) || - CdbPathLocus_IsSegmentGeneral(other->locus) || - CdbPathLocus_IsPartitioned(other->locus)); - - /* - * For UPDATE/DELETE, replicated table can't guarantee a logic row has - * same ctid or item pointer on each copy. If we broadcast matched tuples - * to all segments, the segments may update the wrong tuples or can't - * find a valid tuple according to ctid or item pointer. - * - * So For UPDATE/DELETE on replicated table, we broadcast other path so - * all target tuples can be selected on all copys and then be updated - * locally. - */ - if (root->upd_del_replicated_table > 0 && - bms_is_member(root->upd_del_replicated_table, segGeneral->path->parent->relids)) - { - CdbPathLocus_MakeReplicated(&other->move_to, - CdbPathLocus_NumSegments(segGeneral->locus)); - } - /* - * other is bottleneck, move inner to other - */ - else if (CdbPathLocus_IsBottleneck(other->locus)) - { - /* - * if the locus type is equal and segment count is unequal, - * we will dispatch the one on more segments to the other - */ - numsegments = CdbPathLocus_CommonSegments(segGeneral->locus, - other->locus); - segGeneral->move_to = other->locus; - segGeneral->move_to.numsegments = numsegments; - } - else if (!segGeneral->ok_to_replicate) - { - int numsegments = CdbPathLocus_CommonSegments(segGeneral->locus, - other->locus); - /* put both inner and outer to single QE */ - CdbPathLocus_MakeSingleQE(&segGeneral->move_to, numsegments); - CdbPathLocus_MakeSingleQE(&other->move_to, numsegments); } else { + Assert(CdbPathLocus_IsBottleneck(other->locus) || + CdbPathLocus_IsSegmentGeneral(other->locus) || + CdbPathLocus_IsPartitioned(other->locus)); + /* - * If all other's segments have segGeneral stored, then no motion - * is needed. + * For UPDATE/DELETE, replicated table can't guarantee a logic row has + * same ctid or item pointer on each copy. If we broadcast matched tuples + * to all segments, the segments may update the wrong tuples or can't + * find a valid tuple according to ctid or item pointer. * - * A sql to reach here: - * select * from d2 a join r1 b using (c1); - * where d2 is a replicated table on 2 segment, - * r1 is a random table on 1 segments. + * So For UPDATE/DELETE on replicated table, we broadcast other path so + * all target tuples can be selected on all copys and then be updated + * locally. */ - if (CdbPathLocus_NumSegments(segGeneral->locus) >= - CdbPathLocus_NumSegments(other->locus)) + if (root->upd_del_replicated_table > 0 && + bms_is_member(root->upd_del_replicated_table, segGeneral->path->parent->relids)) { - return other->locus; + CdbPathLocus_MakeReplicated(&other->move_to, + CdbPathLocus_NumSegments(segGeneral->locus)); } - - /* - * Otherwise there is some segments where other is on but - * segGeneral is not, in such a case motions are needed. - */ - /* - * For the case that other is a Hashed table and redistribute - * clause matches other's distribute keys, we could redistribute - * segGeneral to other. + * other is bottleneck, move inner to other */ - if (CdbPathLocus_IsHashed(other->locus) && - cdbpath_match_preds_to_distkey(root, - redistribution_clauses, - other->path, - other->locus, - &segGeneral->move_to)) /* OUT */ + else if (CdbPathLocus_IsBottleneck(other->locus)) { - /* the result is distributed on the same segments with other */ - AssertEquivalent(CdbPathLocus_NumSegments(other->locus), - CdbPathLocus_NumSegments(segGeneral->move_to)); + /* + * if the locus type is equal and segment count is unequal, + * we will dispatch the one on more segments to the other + */ + numsegments = CdbPathLocus_CommonSegments(segGeneral->locus, + other->locus); + segGeneral->move_to = other->locus; + segGeneral->move_to.numsegments = numsegments; + } + else if (!segGeneral->ok_to_replicate) + { + int numsegments = CdbPathLocus_CommonSegments(segGeneral->locus, + other->locus); + /* put both inner and outer to single QE */ + CdbPathLocus_MakeSingleQE(&segGeneral->move_to, numsegments); + CdbPathLocus_MakeSingleQE(&other->move_to, numsegments); } - /* - * Otherwise gather both of them to a SingleQE, this is not usually - * a best choice as the SingleQE might be on QD, so although the - * overall cost is low it increases the load on QD. - * - * FIXME: is it possible to only gather other to segGeneral? - */ else { - int numsegments = CdbPathLocus_NumSegments(segGeneral->locus); + /* + * If all other's segments have segGeneral stored, then no motion + * is needed. + * + * A sql to reach here: + * select * from d2 a join r1 b using (c1); + * where d2 is a replicated table on 2 segment, + * r1 is a random table on 1 segments. + */ + if (CdbPathLocus_NumSegments(segGeneral->locus) >= + CdbPathLocus_NumSegments(other->locus)) + { + return other->locus; + } - Assert(CdbPathLocus_NumSegments(segGeneral->locus) < - CdbPathLocus_NumSegments(other->locus)); + /* + * Otherwise there is some segments where other is on but + * segGeneral is not, in such a case motions are needed. + */ - CdbPathLocus_MakeSingleQE(&segGeneral->move_to, numsegments); - CdbPathLocus_MakeSingleQE(&other->move_to, numsegments); + /* + * For the case that other is a Hashed table and redistribute + * clause matches other's distribute keys, we could redistribute + * segGeneral to other. + */ + if (CdbPathLocus_IsHashed(other->locus) && + cdbpath_match_preds_to_distkey(root, + redistribution_clauses, + other->path, + other->locus, + &segGeneral->move_to)) /* OUT */ + { + /* the result is distributed on the same segments with other */ + AssertEquivalent(CdbPathLocus_NumSegments(other->locus), + CdbPathLocus_NumSegments(segGeneral->move_to)); + } + /* + * Otherwise gather both of them to a SingleQE, this is not usually + * a best choice as the SingleQE might be on QD, so although the + * overall cost is low it increases the load on QD. + * + * FIXME: is it possible to only gather other to segGeneral? + */ + else + { + int numsegments = CdbPathLocus_NumSegments(segGeneral->locus); + + Assert(CdbPathLocus_NumSegments(segGeneral->locus) < + CdbPathLocus_NumSegments(other->locus)); + + CdbPathLocus_MakeSingleQE(&segGeneral->move_to, numsegments); + CdbPathLocus_MakeSingleQE(&other->move_to, numsegments); + } } } } } - /* - * Replicated paths shouldn't occur except UPDATE/DELETE on replicated table. - */ - else if (CdbPathLocus_IsReplicated(outer.locus)) + else if (CdbPathLocus_IsReplicated(outer.locus) || + CdbPathLocus_IsReplicated(inner.locus)) { - if (root->upd_del_replicated_table > 0) - CdbPathLocus_MakeReplicated(&inner.move_to, - CdbPathLocus_NumSegments(outer.locus)); - else + /* + * Replicated paths shouldn't occur except ones including + * modifying CTEs with DML operations on replicated table. + */ + Assert(root->upd_del_replicated_table == 0); + + CdbpathMfjRel *replicated; + CdbpathMfjRel *other; + + if (CdbPathLocus_IsReplicated(outer.locus)) { - Assert(false); - goto fail; + replicated = &outer; + other = &inner; } - } - else if (CdbPathLocus_IsReplicated(inner.locus)) - { - if (root->upd_del_replicated_table > 0) - CdbPathLocus_MakeReplicated(&outer.move_to, - CdbPathLocus_NumSegments(inner.locus)); else { - Assert(false); - goto fail; + replicated = &inner; + other = &outer; + } + + int numsegments = CdbPathLocus_CommonSegments(replicated->locus, + other->locus); + + /* + * If Replicated locus is joined with Partitioned locus group + * it will be possible to perform join locally (if number of segments + * is equal). Otherwise, join must be performed at single segment. + */ + if (CdbPathLocus_IsBottleneck(other->locus)) + CdbPathLocus_MakeSimple(&replicated->move_to, + other->locus.locustype, numsegments); + else if (CdbPathLocus_IsPartitioned(other->locus)) + { + if (replicated->ok_to_replicate && + CdbPathLocus_NumSegments(replicated->locus) == + CdbPathLocus_NumSegments(other->locus)) + return other->locus; + else + { + CdbPathLocus_MakeSingleQE(&replicated->move_to, numsegments); + CdbPathLocus_MakeSingleQE(&other->move_to, numsegments); + } } } /* @@ -2079,7 +2110,9 @@ has_redistributable_clause(RestrictInfo *restrictinfo) Path * turn_volatile_seggen_to_singleqe(PlannerInfo *root, Path *path, Node *node) { - if ((CdbPathLocus_IsSegmentGeneral(path->locus) || CdbPathLocus_IsGeneral(path->locus)) && + if ((CdbPathLocus_IsSegmentGeneral(path->locus) || + CdbPathLocus_IsGeneral(path->locus) || + CdbPathLocus_IsReplicated(path->locus)) && (contain_volatile_functions(node))) { CdbPathLocus singleQE; @@ -2097,6 +2130,14 @@ turn_volatile_seggen_to_singleqe(PlannerInfo *root, Path *path, Node *node) getgpsegmentCount()); return path; } + else if (CdbPathLocus_IsReplicated(path->locus)) + { + /* + * Replicated locus is not supported yet in context of + * volatile functions handling. + */ + elog(ERROR, "could not devise a plan"); + } CdbPathLocus_MakeSingleQE(&singleQE, CdbPathLocus_NumSegments(path->locus)); diff --git a/src/backend/cdb/cdbsetop.c b/src/backend/cdb/cdbsetop.c index 8e157ae32cc6..f11f6f77288d 100644 --- a/src/backend/cdb/cdbsetop.c +++ b/src/backend/cdb/cdbsetop.c @@ -323,8 +323,7 @@ make_motion_gather(PlannerInfo *root, Plan *subplan, List *sortPathKeys, CdbLocu Motion *motion; Assert(subplan->flow != NULL); - Assert(subplan->flow->flotype == FLOW_PARTITIONED || - subplan->flow->flotype == FLOW_SINGLETON); + Assert(subplan->flow->flotype != FLOW_UNDEFINED); if (sortPathKeys) { diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index e0b998b9671f..1e5fa850d039 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -908,6 +908,15 @@ subquery_planner(PlannerGlobal *glob, Query *parse, list_make1_int(root->is_split_update), rowMarks, SS_assign_special_param(root)); + + /* + * Currently, we prohibit applying volatile functions + * to the result of modifying CTE with locus Replicated. + */ + if (parent_root && parent_root->parse->hasModifyingCTE && + plan->flow->locustype == CdbLocusType_Replicated && + contain_volatile_functions((Node *) parse->returningList)) + elog(ERROR, "could not devise a plan"); } } @@ -940,6 +949,16 @@ subquery_planner(PlannerGlobal *glob, Query *parse, { plan = (Plan *) make_motion_gather(root, plan, NIL, CdbLocusType_SingleQE); } + else if (plan->flow->locustype == CdbLocusType_Replicated && + (contain_volatile_functions((Node *) plan->targetlist) || + contain_volatile_functions(parse->havingQual))) + { + /* + * Replicated locus is not supported yet in context of volatile + * functions handling. + */ + elog(ERROR, "could not devise a plan"); + } /* Return internal info if caller wants it */ if (subroot) diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 79233aab1560..4806e59ed409 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1522,8 +1522,14 @@ set_append_path_locus(PlannerInfo *root, Path *pathnode, RelOptInfo *rel, /* * Align numsegments to be the common segments among the children. * Partitioned children will need to be motioned, so ignore them. + * If target locus type is Replicated, we can allow to align + * numsegments only to subpath with locus Replicated, because + * locus Replicated is executed strictly on its number of + * segments. */ - if (!CdbPathLocus_IsPartitioned(subpath->locus)) + if (!CdbPathLocus_IsPartitioned(subpath->locus) && + (targetlocustype != CdbLocusType_Replicated || + CdbPathLocus_IsReplicated(subpath->locus))) { /* When there are multiple SingleQE, use the common segments */ numsegments = Min(numsegments, diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index 920c1a7b2af4..037694028337 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -15,6 +15,8 @@ test: select_dropped_table test: update_hash_col_utilitymode execute_on_utilitymode # Tests for crash recovery +test: uao_crash_compaction_column +test: uao_crash_compaction_row test: crash_recovery test: crash_recovery_redundant_dtx test: crash_recovery_dtm @@ -24,8 +26,6 @@ test: udf_exception_blocks_panic_scenarios test: ao_same_trans_truncate_crash test: frozen_insert_crash test: ao_fsync_panic -test: uao_crash_compaction_column -test: uao_crash_compaction_row test: prevent_ao_wal diff --git a/src/test/regress/expected/with_clause.out b/src/test/regress/expected/with_clause.out index 0822760c5ec0..fb46855a827a 100644 --- a/src/test/regress/expected/with_clause.out +++ b/src/test/regress/expected/with_clause.out @@ -3,6 +3,15 @@ -- m/ERROR: Too much references to non-SELECT CTE \(allpaths\.c:\d+\)/ -- s/\d+/XXX/g -- +-- m/ERROR: could not devise a plan \(planner\.c:\d+\)/ +-- s/\d+/XXX/g +-- +-- m/ERROR: could not devise a plan \(cdbpath\.c:\d+\)/ +-- s/\d+/XXX/g +-- +-- m/ERROR: could not parallelize SubPlan \(cdbllize\.c:\d+\)/ +-- s/\d+/XXX/g +-- -- end_matchsubs drop table if exists with_test1 cascade; NOTICE: table "with_test1" does not exist, skipping @@ -2378,3 +2387,909 @@ create table t_new as (with cte as select * from cte); ERROR: cannot create plan with several writing gangs drop table with_dml; +-- Test various SELECT statements from CTE with +-- modifying DML operations over replicated tables +--start_ignore +drop table if exists with_dml_dr; +NOTICE: table "with_dml_dr" does not exist, skipping +--end_ignore +create table with_dml_dr(i int, j int) distributed replicated; +-- Test plain SELECT from CTE with modifying DML queries on replicated table. +-- Explicit Gather Motion should present at the top of the plan. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte; + QUERY PLAN +------------------------------------------------------ + Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Aggregate + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(5 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + update with_dml_dr + set j = j + 1 where i <= 5 + returning j +) select count(*) from cte; + QUERY PLAN +--------------------------------------------------- + Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Aggregate + -> Update on with_dml_dr + -> Seq Scan on with_dml_dr + Filter: (i <= 5) + Optimizer: Postgres query optimizer +(6 rows) + +with cte as ( + update with_dml_dr + set j = j + 1 where i <= 5 + returning j +) select count(*) from cte; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + delete from with_dml_dr where i > 0 + returning i +) select count(*) from cte; + QUERY PLAN +--------------------------------------------------- + Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Aggregate + -> Delete on with_dml_dr + -> Seq Scan on with_dml_dr + Filter: (i > 0) + Optimizer: Postgres query optimizer +(6 rows) + +with cte as ( + delete from with_dml_dr where i > 0 + returning i +) select count(*) from cte; + count +------- + 5 +(1 row) + +-- Test ORDER BY clause is applied correctly to the result of modifying +-- CTE over replicated table. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select * from cte order by i; + QUERY PLAN +------------------------------------------------------ + Explicit Gather Motion 3:1 (slice1; segments: 3) + Merge Key: with_dml_dr.i + -> Sort + Sort Key: with_dml_dr.i + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(7 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select * from cte order by i; + i +--- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +-- Test join operations between CTE conaining various modifying DML operations +-- over replicated table and other tables. Ensure that CdbLocusType_Replicated +-- is compatible with other type of locuses during joins. +-- Test join CdbLocusType_Replicated with CdbLocusType_SegmentGeneral. +--start_ignore +drop table if exists t_repl; +NOTICE: table "t_repl" does not exist, skipping +--end_ignore +create table t_repl (i int, j int) distributed replicated; +insert into t_repl values (1, 1), (2, 2), (3, 3); +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + QUERY PLAN +------------------------------------------------------------ + Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Aggregate + -> Hash Join + Hash Cond: (with_dml_dr.i = t_repl.i) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Seq Scan on t_repl + Optimizer: Postgres query optimizer +(9 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + count +------- + 3 +(1 row) + +-- Test join CdbLocusType_Replicated with CdbLocusType_SegmentGeneral +-- in case when relations are propagated on different number of segments. +--start_ignore +drop table if exists with_dml_dr_seg2; +NOTICE: table "with_dml_dr_seg2" does not exist, skipping +--end_ignore +select gp_debug_set_create_table_default_numsegments(2); + gp_debug_set_create_table_default_numsegments +----------------------------------------------- + 2 +(1 row) + +create table with_dml_dr_seg2 (i int, j int) distributed replicated; +select gp_debug_reset_create_table_default_numsegments(); + gp_debug_reset_create_table_default_numsegments +------------------------------------------------- + +(1 row) + +-- SegmentGeneral's number of segments is larger than Replicated's, +-- the join is performed at number of segments of Replicated locus. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + QUERY PLAN +------------------------------------------------------------ + Explicit Gather Motion 2:1 (slice1; segments: 2) + -> Aggregate + -> Hash Join + Hash Cond: (with_dml_dr_seg2.i = t_repl.i) + -> Insert on with_dml_dr_seg2 + -> Function Scan on generate_series i + -> Hash + -> Seq Scan on t_repl + Optimizer: Postgres query optimizer +(9 rows) + +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + count +------- + 3 +(1 row) + +-- SegmentGeneral's number of segments is less than Replicated's, +-- the join is performed at SingleQE. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join with_dml_dr_seg2 using (i); + QUERY PLAN +--------------------------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (with_dml_dr_seg2.i = with_dml_dr.i) + -> Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on with_dml_dr_seg2 + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join with_dml_dr_seg2 using (i); + count +------- + 5 +(1 row) + +drop table with_dml_dr_seg2; +drop table t_repl; +-- Test join CdbLocusType_Replicated with CdbLocusType_SingleQE. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join + (select random() * 0 v from generate_series(1,5)) x on cte.i = x.v; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Aggregate + -> Hash Left Join + Hash Cond: ((with_dml_dr.i)::double precision = ((random() * '0'::double precision))) + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Function Scan on generate_series + Optimizer: Postgres query optimizer +(9 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join + (select random() * 0 v from generate_series(1,5)) x on cte.i = x.v; + count +------- + 5 +(1 row) + +-- Test join CdbLocusType_Replicated with CdbLocusType_Entry. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(-5,-1) i + returning i +) select count(*) from cte left join gp_segment_configuration on cte.i = port; + QUERY PLAN +-------------------------------------------------------------------- + Aggregate + -> Hash Left Join + Hash Cond: (with_dml_dr.i = gp_segment_configuration.port) + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Seq Scan on gp_segment_configuration + Optimizer: Postgres query optimizer +(9 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(-5,-1) i + returning i +) select count(*) from cte left join gp_segment_configuration on cte.i = port; + count +------- + 5 +(1 row) + +-- Test join CdbLocusType_Replicated with CdbLocusType_General. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i) +select count(*) from cte join +(select a from generate_series(1,5) a) x on cte.i = x.a; + QUERY PLAN +------------------------------------------------------------ + Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Aggregate + -> Hash Join + Hash Cond: (with_dml_dr.i = a.a) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Function Scan on generate_series a + Optimizer: Postgres query optimizer +(9 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i) +select count(*) from cte join +(select a from generate_series(1,5) a) x on cte.i = x.a; + count +------- + 5 +(1 row) + +-- Test join CdbLocusType_Replicated with CdbLocusType_Hashed +-- and CdbLocusType_Strewn. +--start_ignore +drop table if exists t_hashed; +NOTICE: table "t_hashed" does not exist, skipping +drop table if exists t_strewn; +NOTICE: table "t_strewn" does not exist, skipping +--end_ignore +create table t_hashed (i int, j int) distributed by (i); +create table t_strewn (i int, j int) distributed randomly; +insert into t_hashed select i, i * 2 from generate_series(1, 10) i; +insert into t_strewn select i, i * 2 from generate_series(1, 10) i; +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed on cte.i = t_hashed.i; + QUERY PLAN +------------------------------------------------------------------ + Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Aggregate + -> Hash Join + Hash Cond: (with_dml_dr.i = t_hashed.i) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Seq Scan on t_hashed + Optimizer: Postgres query optimizer +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed on cte.i = t_hashed.i; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_hashed on cte.i = t_hashed.i; + QUERY PLAN +--------------------------------------------------------------- + Aggregate + -> Hash Left Join + Hash Cond: (with_dml_dr.i = t_hashed.i) + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on t_hashed + Optimizer: Postgres query optimizer +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_hashed on cte.i = t_hashed.i; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn on cte.i = t_strewn.i; + QUERY PLAN +------------------------------------------------------------------ + Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Aggregate + -> Hash Join + Hash Cond: (with_dml_dr.i = t_strewn.i) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Seq Scan on t_strewn + Optimizer: Postgres query optimizer +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn on cte.i = t_strewn.i; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_strewn on cte.i = t_strewn.i; + QUERY PLAN +--------------------------------------------------------------- + Aggregate + -> Hash Left Join + Hash Cond: (with_dml_dr.i = t_strewn.i) + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on t_strewn + Optimizer: Postgres query optimizer +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_strewn on cte.i = t_strewn.i; + count +------- + 5 +(1 row) + +drop table t_strewn; +drop table t_hashed; +-- Test join CdbLocusType_Replicated with CdbLocusType_Hashed and +-- CdbLocusType_Strewn in case when relations are propagated on +-- different number of segments. +select gp_debug_set_create_table_default_numsegments(2); + gp_debug_set_create_table_default_numsegments +----------------------------------------------- + 2 +(1 row) + +create table t_hashed_seg2 (i int, j int) distributed by (i); +create table t_strewn_seg2 (i int, j int) distributed randomly; +select gp_debug_reset_create_table_default_numsegments(); + gp_debug_reset_create_table_default_numsegments +------------------------------------------------- + +(1 row) + +insert into t_hashed_seg2 select i, i * 2 from generate_series(1, 10) i; +insert into t_strewn_seg2 select i, i * 2 from generate_series(1, 10) i; +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed_seg2 on cte.i = t_hashed_seg2.i; + QUERY PLAN +--------------------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (with_dml_dr.i = t_hashed_seg2.i) + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Gather Motion 2:1 (slice2; segments: 2) + -> Seq Scan on t_hashed_seg2 + Optimizer: Postgres query optimizer +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed_seg2 on cte.i = t_hashed_seg2.i; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn_seg2 on cte.i = t_strewn_seg2.i; + QUERY PLAN +--------------------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (with_dml_dr.i = t_strewn_seg2.i) + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Gather Motion 2:1 (slice2; segments: 2) + -> Seq Scan on t_strewn_seg2 + Optimizer: Postgres query optimizer +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn_seg2 on cte.i = t_strewn_seg2.i; + count +------- + 5 +(1 row) + +drop table t_strewn_seg2; +drop table t_hashed_seg2; +-- Test join CdbLocusType_Replicated with CdbLocusType_Replicated. +-- Join can be performed correctly only when CTE is shared. +set gp_cte_sharing = 1; +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte a join cte b using (i); + QUERY PLAN +------------------------------------------------------------------------------ + Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Aggregate + -> Hash Join + Hash Cond: (share0_ref2.i = share0_ref1.i) + -> Shared Scan (share slice:id 1:0) + -> Hash + -> Shared Scan (share slice:id 1:0) + -> Materialize + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(11 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte a join cte b using (i); + count +------- + 5 +(1 row) + +reset gp_cte_sharing; +-- Test prohibition of volatile functions applied to the +-- locus Replicated. The appropriate error should be thrown. +--start_ignore +drop table if exists t_repl; +NOTICE: table "t_repl" does not exist, skipping +--end_ignore +create table t_repl (i int, j int) distributed replicated; +-- Prohibit volatile qualifications. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select * from cte where cte.j > random(); +ERROR: could not devise a plan (cdbpath.c:2139) +-- Prohibit volatile returning list +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j * random() +) select * from cte; +ERROR: could not devise a plan (planner.c:919) +-- Prohibit volatile targetlist. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select i, j * random() from cte; +ERROR: could not devise a plan (planner.c:960) +-- Prohibit volatile having qualifications. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select i, sum(j) from cte group by i having sum(j) > random(); +ERROR: could not devise a plan (planner.c:960) +-- Prohibit volatile join qualifications. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select * from cte join t_repl on cte.i = t_repl.j * random(); +ERROR: could not devise a plan (cdbpath.c:2139) +drop table t_repl; +-- Test that node with locus Replicated is not boradcasted inside +-- a correlated/uncorrlated SubPlan. In case of different number of +-- segments between replicated node inside the SubPlan and main plan +-- the proper error should be thrown. +--start_ignore +drop table if exists t1; +NOTICE: table "t1" does not exist, skipping +drop table if exists with_dml_dr_seg2; +NOTICE: table "with_dml_dr_seg2" does not exist, skipping +--end_ignore +create table t1 (i int, j int) distributed by (i); +select gp_debug_set_create_table_default_numsegments(2); + gp_debug_set_create_table_default_numsegments +----------------------------------------------- + 2 +(1 row) + +create table with_dml_dr_seg2 (i int, j int) distributed replicated; +select gp_debug_reset_create_table_default_numsegments(); + gp_debug_reset_create_table_default_numsegments +------------------------------------------------- + +(1 row) + +insert into t1 select i, i from generate_series(1, 6) i; +-- Case when number of segments is equal, no Broadcast at the top of CTE plan. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + QUERY PLAN +-------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: t1.i + -> Sort + Sort Key: t1.i + -> Seq Scan on t1 + Filter: (hashed SubPlan 1) + SubPlan 1 (slice1; segments: 3) + -> Materialize + -> Subquery Scan on cte + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(12 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 +(6 rows) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: t1.i + -> Sort + Sort Key: t1.i + -> Seq Scan on t1 + Filter: (SubPlan 1) + SubPlan 1 (slice1; segments: 3) + -> Result + Filter: (cte.i = t1.j) + -> Materialize + -> Subquery Scan on cte + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(14 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 +(6 rows) + +-- Case with unequal number of segments between replicated node inside the +-- SubPlan and main plan, the error should be thrown. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; +ERROR: could not parallelize SubPlan (cdbllize.c:775) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; +ERROR: could not parallelize SubPlan (cdbllize.c:775) +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; +ERROR: could not parallelize SubPlan (cdbllize.c:598) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; +ERROR: could not parallelize SubPlan (cdbllize.c:598) +drop table t1; +-- Test UNION ALL command when combining SegmentGeneral locus and Replicated. +--start_ignore +drop table if exists t_repl; +NOTICE: table "t_repl" does not exist, skipping +drop table if exists t_repl_seg2; +NOTICE: table "t_repl_seg2" does not exist, skipping +--end_ignore +create table t_repl (i int, j int) distributed replicated; +select gp_debug_set_create_table_default_numsegments(2); + gp_debug_set_create_table_default_numsegments +----------------------------------------------- + 2 +(1 row) + +create table t_repl_seg2 (i int, j int) distributed replicated; +select gp_debug_reset_create_table_default_numsegments(); + gp_debug_reset_create_table_default_numsegments +------------------------------------------------- + +(1 row) + +insert into t_repl values (2, 2); +insert into t_repl_seg2 values (2, 2); +explain (costs off) +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + QUERY PLAN +--------------------------------------------------- + Explicit Gather Motion 3:1 (slice1; segments: 3) + Merge Key: with_dml_dr.i + -> Sort + Sort Key: with_dml_dr.i + -> Append + -> Insert on with_dml_dr + -> Result + -> Seq Scan on t_repl + Optimizer: Postgres query optimizer +(9 rows) + +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +-- Case when SegmentGeneral is originally propagated at less number +-- of segments. +explain (costs off) +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl_seg2 +order by 1; + QUERY PLAN +--------------------------------------------------------------- + Explicit Gather Motion 3:1 (slice2; segments: 3) + Merge Key: with_dml_dr.i + -> Sort + Sort Key: with_dml_dr.i + -> Append + -> Insert on with_dml_dr + -> Result + -> Broadcast Motion 1:3 (slice1; segments: 1) + -> Seq Scan on t_repl_seg2 + Optimizer: Postgres query optimizer +(10 rows) + +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl_seg2 +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +-- Case when final number of segments is aligned to Replicated subplan. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + QUERY PLAN +--------------------------------------------------- + Explicit Gather Motion 2:1 (slice1; segments: 2) + Merge Key: with_dml_dr_seg2.i + -> Sort + Sort Key: with_dml_dr_seg2.i + -> Append + -> Insert on with_dml_dr_seg2 + -> Result + -> Seq Scan on t_repl + Optimizer: Postgres query optimizer +(9 rows) + +with cte as ( + insert into with_dml_dr_seg2 + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +drop table t_repl_seg2; +drop table t_repl; +drop table with_dml_dr_seg2; +drop table with_dml_dr; diff --git a/src/test/regress/expected/with_clause_optimizer.out b/src/test/regress/expected/with_clause_optimizer.out index 396e57adb036..963c85316c14 100644 --- a/src/test/regress/expected/with_clause_optimizer.out +++ b/src/test/regress/expected/with_clause_optimizer.out @@ -3,6 +3,15 @@ -- m/ERROR: Too much references to non-SELECT CTE \(allpaths\.c:\d+\)/ -- s/\d+/XXX/g -- +-- m/ERROR: could not devise a plan \(planner\.c:\d+\)/ +-- s/\d+/XXX/g +-- +-- m/ERROR: could not devise a plan \(cdbpath\.c:\d+\)/ +-- s/\d+/XXX/g +-- +-- m/ERROR: could not parallelize SubPlan \(cdbllize\.c:\d+\)/ +-- s/\d+/XXX/g +-- -- end_matchsubs drop table if exists with_test1 cascade; NOTICE: table "with_test1" does not exist, skipping @@ -2381,3 +2390,909 @@ create table t_new as (with cte as select * from cte); ERROR: cannot create plan with several writing gangs drop table with_dml; +-- Test various SELECT statements from CTE with +-- modifying DML operations over replicated tables +--start_ignore +drop table if exists with_dml_dr; +NOTICE: table "with_dml_dr" does not exist, skipping +--end_ignore +create table with_dml_dr(i int, j int) distributed replicated; +-- Test plain SELECT from CTE with modifying DML queries on replicated table. +-- Explicit Gather Motion should present at the top of the plan. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte; + QUERY PLAN +------------------------------------------------------ + Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Aggregate + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(5 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + update with_dml_dr + set j = j + 1 where i <= 5 + returning j +) select count(*) from cte; + QUERY PLAN +--------------------------------------------------- + Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Aggregate + -> Update on with_dml_dr + -> Seq Scan on with_dml_dr + Filter: (i <= 5) + Optimizer: Postgres query optimizer +(6 rows) + +with cte as ( + update with_dml_dr + set j = j + 1 where i <= 5 + returning j +) select count(*) from cte; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + delete from with_dml_dr where i > 0 + returning i +) select count(*) from cte; + QUERY PLAN +--------------------------------------------------- + Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Aggregate + -> Delete on with_dml_dr + -> Seq Scan on with_dml_dr + Filter: (i > 0) + Optimizer: Postgres query optimizer +(6 rows) + +with cte as ( + delete from with_dml_dr where i > 0 + returning i +) select count(*) from cte; + count +------- + 5 +(1 row) + +-- Test ORDER BY clause is applied correctly to the result of modifying +-- CTE over replicated table. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select * from cte order by i; + QUERY PLAN +------------------------------------------------------ + Explicit Gather Motion 3:1 (slice1; segments: 3) + Merge Key: with_dml_dr.i + -> Sort + Sort Key: with_dml_dr.i + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(7 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select * from cte order by i; + i +--- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +-- Test join operations between CTE conaining various modifying DML operations +-- over replicated table and other tables. Ensure that CdbLocusType_Replicated +-- is compatible with other type of locuses during joins. +-- Test join CdbLocusType_Replicated with CdbLocusType_SegmentGeneral. +--start_ignore +drop table if exists t_repl; +NOTICE: table "t_repl" does not exist, skipping +--end_ignore +create table t_repl (i int, j int) distributed replicated; +insert into t_repl values (1, 1), (2, 2), (3, 3); +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + QUERY PLAN +------------------------------------------------------------ + Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Aggregate + -> Hash Join + Hash Cond: (with_dml_dr.i = t_repl.i) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Seq Scan on t_repl + Optimizer: Postgres query optimizer +(9 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + count +------- + 3 +(1 row) + +-- Test join CdbLocusType_Replicated with CdbLocusType_SegmentGeneral +-- in case when relations are propagated on different number of segments. +--start_ignore +drop table if exists with_dml_dr_seg2; +NOTICE: table "with_dml_dr_seg2" does not exist, skipping +--end_ignore +select gp_debug_set_create_table_default_numsegments(2); + gp_debug_set_create_table_default_numsegments +----------------------------------------------- + 2 +(1 row) + +create table with_dml_dr_seg2 (i int, j int) distributed replicated; +select gp_debug_reset_create_table_default_numsegments(); + gp_debug_reset_create_table_default_numsegments +------------------------------------------------- + +(1 row) + +-- SegmentGeneral's number of segments is larger than Replicated's, +-- the join is performed at number of segments of Replicated locus. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + QUERY PLAN +------------------------------------------------------------ + Explicit Gather Motion 2:1 (slice1; segments: 2) + -> Aggregate + -> Hash Join + Hash Cond: (with_dml_dr_seg2.i = t_repl.i) + -> Insert on with_dml_dr_seg2 + -> Function Scan on generate_series i + -> Hash + -> Seq Scan on t_repl + Optimizer: Postgres query optimizer +(9 rows) + +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + count +------- + 3 +(1 row) + +-- SegmentGeneral's number of segments is less than Replicated's, +-- the join is performed at SingleQE. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join with_dml_dr_seg2 using (i); + QUERY PLAN +--------------------------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (with_dml_dr_seg2.i = with_dml_dr.i) + -> Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on with_dml_dr_seg2 + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join with_dml_dr_seg2 using (i); + count +------- + 5 +(1 row) + +drop table with_dml_dr_seg2; +drop table t_repl; +-- Test join CdbLocusType_Replicated with CdbLocusType_SingleQE. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join + (select random() * 0 v from generate_series(1,5)) x on cte.i = x.v; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Aggregate + -> Hash Left Join + Hash Cond: ((with_dml_dr.i)::double precision = ((random() * '0'::double precision))) + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Function Scan on generate_series + Optimizer: Postgres query optimizer +(9 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join + (select random() * 0 v from generate_series(1,5)) x on cte.i = x.v; + count +------- + 5 +(1 row) + +-- Test join CdbLocusType_Replicated with CdbLocusType_Entry. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(-5,-1) i + returning i +) select count(*) from cte left join gp_segment_configuration on cte.i = port; + QUERY PLAN +-------------------------------------------------------------------- + Aggregate + -> Hash Left Join + Hash Cond: (with_dml_dr.i = gp_segment_configuration.port) + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Seq Scan on gp_segment_configuration + Optimizer: Postgres query optimizer +(9 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(-5,-1) i + returning i +) select count(*) from cte left join gp_segment_configuration on cte.i = port; + count +------- + 5 +(1 row) + +-- Test join CdbLocusType_Replicated with CdbLocusType_General. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i) +select count(*) from cte join +(select a from generate_series(1,5) a) x on cte.i = x.a; + QUERY PLAN +------------------------------------------------------------ + Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Aggregate + -> Hash Join + Hash Cond: (with_dml_dr.i = a.a) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Function Scan on generate_series a + Optimizer: Postgres query optimizer +(9 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i) +select count(*) from cte join +(select a from generate_series(1,5) a) x on cte.i = x.a; + count +------- + 5 +(1 row) + +-- Test join CdbLocusType_Replicated with CdbLocusType_Hashed +-- and CdbLocusType_Strewn. +--start_ignore +drop table if exists t_hashed; +NOTICE: table "t_hashed" does not exist, skipping +drop table if exists t_strewn; +NOTICE: table "t_strewn" does not exist, skipping +--end_ignore +create table t_hashed (i int, j int) distributed by (i); +create table t_strewn (i int, j int) distributed randomly; +insert into t_hashed select i, i * 2 from generate_series(1, 10) i; +insert into t_strewn select i, i * 2 from generate_series(1, 10) i; +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed on cte.i = t_hashed.i; + QUERY PLAN +------------------------------------------------------------------ + Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Aggregate + -> Hash Join + Hash Cond: (with_dml_dr.i = t_hashed.i) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Seq Scan on t_hashed + Optimizer: Postgres query optimizer +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed on cte.i = t_hashed.i; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_hashed on cte.i = t_hashed.i; + QUERY PLAN +--------------------------------------------------------------- + Aggregate + -> Hash Left Join + Hash Cond: (with_dml_dr.i = t_hashed.i) + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on t_hashed + Optimizer: Postgres query optimizer +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_hashed on cte.i = t_hashed.i; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn on cte.i = t_strewn.i; + QUERY PLAN +------------------------------------------------------------------ + Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Aggregate + -> Hash Join + Hash Cond: (with_dml_dr.i = t_strewn.i) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Seq Scan on t_strewn + Optimizer: Postgres query optimizer +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn on cte.i = t_strewn.i; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_strewn on cte.i = t_strewn.i; + QUERY PLAN +--------------------------------------------------------------- + Aggregate + -> Hash Left Join + Hash Cond: (with_dml_dr.i = t_strewn.i) + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on t_strewn + Optimizer: Postgres query optimizer +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_strewn on cte.i = t_strewn.i; + count +------- + 5 +(1 row) + +drop table t_strewn; +drop table t_hashed; +-- Test join CdbLocusType_Replicated with CdbLocusType_Hashed and +-- CdbLocusType_Strewn in case when relations are propagated on +-- different number of segments. +select gp_debug_set_create_table_default_numsegments(2); + gp_debug_set_create_table_default_numsegments +----------------------------------------------- + 2 +(1 row) + +create table t_hashed_seg2 (i int, j int) distributed by (i); +create table t_strewn_seg2 (i int, j int) distributed randomly; +select gp_debug_reset_create_table_default_numsegments(); + gp_debug_reset_create_table_default_numsegments +------------------------------------------------- + +(1 row) + +insert into t_hashed_seg2 select i, i * 2 from generate_series(1, 10) i; +insert into t_strewn_seg2 select i, i * 2 from generate_series(1, 10) i; +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed_seg2 on cte.i = t_hashed_seg2.i; + QUERY PLAN +--------------------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (with_dml_dr.i = t_hashed_seg2.i) + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Gather Motion 2:1 (slice2; segments: 2) + -> Seq Scan on t_hashed_seg2 + Optimizer: Postgres query optimizer +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed_seg2 on cte.i = t_hashed_seg2.i; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn_seg2 on cte.i = t_strewn_seg2.i; + QUERY PLAN +--------------------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (with_dml_dr.i = t_strewn_seg2.i) + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Gather Motion 2:1 (slice2; segments: 2) + -> Seq Scan on t_strewn_seg2 + Optimizer: Postgres query optimizer +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn_seg2 on cte.i = t_strewn_seg2.i; + count +------- + 5 +(1 row) + +drop table t_strewn_seg2; +drop table t_hashed_seg2; +-- Test join CdbLocusType_Replicated with CdbLocusType_Replicated. +-- Join can be performed correctly only when CTE is shared. +set gp_cte_sharing = 1; +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte a join cte b using (i); + QUERY PLAN +------------------------------------------------------------------------------ + Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Aggregate + -> Hash Join + Hash Cond: (share0_ref2.i = share0_ref1.i) + -> Shared Scan (share slice:id 1:0) + -> Hash + -> Shared Scan (share slice:id 1:0) + -> Materialize + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(11 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte a join cte b using (i); + count +------- + 5 +(1 row) + +reset gp_cte_sharing; +-- Test prohibition of volatile functions applied to the +-- locus Replicated. The appropriate error should be thrown. +--start_ignore +drop table if exists t_repl; +NOTICE: table "t_repl" does not exist, skipping +--end_ignore +create table t_repl (i int, j int) distributed replicated; +-- Prohibit volatile qualifications. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select * from cte where cte.j > random(); +ERROR: could not devise a plan (cdbpath.c:2139) +-- Prohibit volatile returning list +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j * random() +) select * from cte; +ERROR: could not devise a plan (planner.c:919) +-- Prohibit volatile targetlist. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select i, j * random() from cte; +ERROR: could not devise a plan (planner.c:960) +-- Prohibit volatile having qualifications. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select i, sum(j) from cte group by i having sum(j) > random(); +ERROR: could not devise a plan (planner.c:960) +-- Prohibit volatile join qualifications. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select * from cte join t_repl on cte.i = t_repl.j * random(); +ERROR: could not devise a plan (cdbpath.c:2139) +drop table t_repl; +-- Test that node with locus Replicated is not boradcasted inside +-- a correlated/uncorrlated SubPlan. In case of different number of +-- segments between replicated node inside the SubPlan and main plan +-- the proper error should be thrown. +--start_ignore +drop table if exists t1; +NOTICE: table "t1" does not exist, skipping +drop table if exists with_dml_dr_seg2; +NOTICE: table "with_dml_dr_seg2" does not exist, skipping +--end_ignore +create table t1 (i int, j int) distributed by (i); +select gp_debug_set_create_table_default_numsegments(2); + gp_debug_set_create_table_default_numsegments +----------------------------------------------- + 2 +(1 row) + +create table with_dml_dr_seg2 (i int, j int) distributed replicated; +select gp_debug_reset_create_table_default_numsegments(); + gp_debug_reset_create_table_default_numsegments +------------------------------------------------- + +(1 row) + +insert into t1 select i, i from generate_series(1, 6) i; +-- Case when number of segments is equal, no Broadcast at the top of CTE plan. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + QUERY PLAN +-------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: t1.i + -> Sort + Sort Key: t1.i + -> Seq Scan on t1 + Filter: (hashed SubPlan 1) + SubPlan 1 (slice1; segments: 3) + -> Materialize + -> Subquery Scan on cte + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(12 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 +(6 rows) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: t1.i + -> Sort + Sort Key: t1.i + -> Seq Scan on t1 + Filter: (SubPlan 1) + SubPlan 1 (slice1; segments: 3) + -> Result + Filter: (cte.i = t1.j) + -> Materialize + -> Subquery Scan on cte + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres query optimizer +(14 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 +(6 rows) + +-- Case with unequal number of segments between replicated node inside the +-- SubPlan and main plan, the error should be thrown. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; +ERROR: could not parallelize SubPlan (cdbllize.c:775) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; +ERROR: could not parallelize SubPlan (cdbllize.c:775) +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; +ERROR: could not parallelize SubPlan (cdbllize.c:598) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; +ERROR: could not parallelize SubPlan (cdbllize.c:598) +drop table t1; +-- Test UNION ALL command when combining SegmentGeneral locus and Replicated. +--start_ignore +drop table if exists t_repl; +NOTICE: table "t_repl" does not exist, skipping +drop table if exists t_repl_seg2; +NOTICE: table "t_repl_seg2" does not exist, skipping +--end_ignore +create table t_repl (i int, j int) distributed replicated; +select gp_debug_set_create_table_default_numsegments(2); + gp_debug_set_create_table_default_numsegments +----------------------------------------------- + 2 +(1 row) + +create table t_repl_seg2 (i int, j int) distributed replicated; +select gp_debug_reset_create_table_default_numsegments(); + gp_debug_reset_create_table_default_numsegments +------------------------------------------------- + +(1 row) + +insert into t_repl values (2, 2); +insert into t_repl_seg2 values (2, 2); +explain (costs off) +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + QUERY PLAN +--------------------------------------------------- + Explicit Gather Motion 3:1 (slice1; segments: 3) + Merge Key: with_dml_dr.i + -> Sort + Sort Key: with_dml_dr.i + -> Append + -> Insert on with_dml_dr + -> Result + -> Seq Scan on t_repl + Optimizer: Postgres query optimizer +(9 rows) + +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +-- Case when SegmentGeneral is originally propagated at less number +-- of segments. +explain (costs off) +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl_seg2 +order by 1; + QUERY PLAN +--------------------------------------------------------------- + Explicit Gather Motion 3:1 (slice2; segments: 3) + Merge Key: with_dml_dr.i + -> Sort + Sort Key: with_dml_dr.i + -> Append + -> Insert on with_dml_dr + -> Result + -> Broadcast Motion 1:3 (slice1; segments: 1) + -> Seq Scan on t_repl_seg2 + Optimizer: Postgres query optimizer +(10 rows) + +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl_seg2 +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +-- Case when final number of segments is aligned to Replicated subplan. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + QUERY PLAN +--------------------------------------------------- + Explicit Gather Motion 2:1 (slice1; segments: 2) + Merge Key: with_dml_dr_seg2.i + -> Sort + Sort Key: with_dml_dr_seg2.i + -> Append + -> Insert on with_dml_dr_seg2 + -> Result + -> Seq Scan on t_repl + Optimizer: Postgres query optimizer +(9 rows) + +with cte as ( + insert into with_dml_dr_seg2 + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +drop table t_repl_seg2; +drop table t_repl; +drop table with_dml_dr_seg2; +drop table with_dml_dr; diff --git a/src/test/regress/sql/with_clause.sql b/src/test/regress/sql/with_clause.sql index 7e383bcf4485..ba1eea4ec5d7 100644 --- a/src/test/regress/sql/with_clause.sql +++ b/src/test/regress/sql/with_clause.sql @@ -3,7 +3,19 @@ -- m/ERROR: Too much references to non-SELECT CTE \(allpaths\.c:\d+\)/ -- s/\d+/XXX/g -- +-- m/ERROR: could not devise a plan \(planner\.c:\d+\)/ +-- s/\d+/XXX/g +-- +-- m/ERROR: could not devise a plan \(cdbpath\.c:\d+\)/ +-- s/\d+/XXX/g +-- +-- m/ERROR: could not parallelize SubPlan \(cdbllize\.c:\d+\)/ +-- s/\d+/XXX/g +-- -- end_matchsubs +-- start_ignore +create extension if not exists gp_debug_numsegments; +-- end_ignore drop table if exists with_test1 cascade; create table with_test1 (i int, t text, value int) distributed by (i); @@ -481,3 +493,507 @@ create table t_new as (with cte as (delete from with_dml where i > 0 returning *) select * from cte); drop table with_dml; + +-- Test various SELECT statements from CTE with +-- modifying DML operations over replicated tables +--start_ignore +drop table if exists with_dml_dr; +--end_ignore +create table with_dml_dr(i int, j int) distributed replicated; + +-- Test plain SELECT from CTE with modifying DML queries on replicated table. +-- Explicit Gather Motion should present at the top of the plan. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte; + +explain (costs off) +with cte as ( + update with_dml_dr + set j = j + 1 where i <= 5 + returning j +) select count(*) from cte; + +with cte as ( + update with_dml_dr + set j = j + 1 where i <= 5 + returning j +) select count(*) from cte; + +explain (costs off) +with cte as ( + delete from with_dml_dr where i > 0 + returning i +) select count(*) from cte; + +with cte as ( + delete from with_dml_dr where i > 0 + returning i +) select count(*) from cte; + +-- Test ORDER BY clause is applied correctly to the result of modifying +-- CTE over replicated table. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select * from cte order by i; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select * from cte order by i; + +-- Test join operations between CTE conaining various modifying DML operations +-- over replicated table and other tables. Ensure that CdbLocusType_Replicated +-- is compatible with other type of locuses during joins. +-- Test join CdbLocusType_Replicated with CdbLocusType_SegmentGeneral. +--start_ignore +drop table if exists t_repl; +--end_ignore +create table t_repl (i int, j int) distributed replicated; + +insert into t_repl values (1, 1), (2, 2), (3, 3); + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + +-- Test join CdbLocusType_Replicated with CdbLocusType_SegmentGeneral +-- in case when relations are propagated on different number of segments. +--start_ignore +drop table if exists with_dml_dr_seg2; +--end_ignore +select gp_debug_set_create_table_default_numsegments(2); +create table with_dml_dr_seg2 (i int, j int) distributed replicated; +select gp_debug_reset_create_table_default_numsegments(); + + +-- SegmentGeneral's number of segments is larger than Replicated's, +-- the join is performed at number of segments of Replicated locus. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + +-- SegmentGeneral's number of segments is less than Replicated's, +-- the join is performed at SingleQE. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join with_dml_dr_seg2 using (i); + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join with_dml_dr_seg2 using (i); + +drop table with_dml_dr_seg2; +drop table t_repl; + +-- Test join CdbLocusType_Replicated with CdbLocusType_SingleQE. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join + (select random() * 0 v from generate_series(1,5)) x on cte.i = x.v; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join + (select random() * 0 v from generate_series(1,5)) x on cte.i = x.v; + +-- Test join CdbLocusType_Replicated with CdbLocusType_Entry. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(-5,-1) i + returning i +) select count(*) from cte left join gp_segment_configuration on cte.i = port; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(-5,-1) i + returning i +) select count(*) from cte left join gp_segment_configuration on cte.i = port; + +-- Test join CdbLocusType_Replicated with CdbLocusType_General. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i) +select count(*) from cte join +(select a from generate_series(1,5) a) x on cte.i = x.a; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i) +select count(*) from cte join +(select a from generate_series(1,5) a) x on cte.i = x.a; + +-- Test join CdbLocusType_Replicated with CdbLocusType_Hashed +-- and CdbLocusType_Strewn. +--start_ignore +drop table if exists t_hashed; +drop table if exists t_strewn; +--end_ignore +create table t_hashed (i int, j int) distributed by (i); +create table t_strewn (i int, j int) distributed randomly; +insert into t_hashed select i, i * 2 from generate_series(1, 10) i; +insert into t_strewn select i, i * 2 from generate_series(1, 10) i; + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed on cte.i = t_hashed.i; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed on cte.i = t_hashed.i; + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_hashed on cte.i = t_hashed.i; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_hashed on cte.i = t_hashed.i; + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn on cte.i = t_strewn.i; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn on cte.i = t_strewn.i; + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_strewn on cte.i = t_strewn.i; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_strewn on cte.i = t_strewn.i; + +drop table t_strewn; +drop table t_hashed; + +-- Test join CdbLocusType_Replicated with CdbLocusType_Hashed and +-- CdbLocusType_Strewn in case when relations are propagated on +-- different number of segments. +select gp_debug_set_create_table_default_numsegments(2); +create table t_hashed_seg2 (i int, j int) distributed by (i); +create table t_strewn_seg2 (i int, j int) distributed randomly; +select gp_debug_reset_create_table_default_numsegments(); + +insert into t_hashed_seg2 select i, i * 2 from generate_series(1, 10) i; +insert into t_strewn_seg2 select i, i * 2 from generate_series(1, 10) i; + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed_seg2 on cte.i = t_hashed_seg2.i; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed_seg2 on cte.i = t_hashed_seg2.i; + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn_seg2 on cte.i = t_strewn_seg2.i; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn_seg2 on cte.i = t_strewn_seg2.i; + +drop table t_strewn_seg2; +drop table t_hashed_seg2; + +-- Test join CdbLocusType_Replicated with CdbLocusType_Replicated. +-- Join can be performed correctly only when CTE is shared. +set gp_cte_sharing = 1; + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte a join cte b using (i); + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte a join cte b using (i); + +reset gp_cte_sharing; + +-- Test prohibition of volatile functions applied to the +-- locus Replicated. The appropriate error should be thrown. +--start_ignore +drop table if exists t_repl; +--end_ignore +create table t_repl (i int, j int) distributed replicated; + +-- Prohibit volatile qualifications. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select * from cte where cte.j > random(); + +-- Prohibit volatile returning list +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j * random() +) select * from cte; + +-- Prohibit volatile targetlist. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select i, j * random() from cte; + +-- Prohibit volatile having qualifications. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select i, sum(j) from cte group by i having sum(j) > random(); + +-- Prohibit volatile join qualifications. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select * from cte join t_repl on cte.i = t_repl.j * random(); + +drop table t_repl; + +-- Test that node with locus Replicated is not boradcasted inside +-- a correlated/uncorrlated SubPlan. In case of different number of +-- segments between replicated node inside the SubPlan and main plan +-- the proper error should be thrown. +--start_ignore +drop table if exists t1; +drop table if exists with_dml_dr_seg2; +--end_ignore + +create table t1 (i int, j int) distributed by (i); +select gp_debug_set_create_table_default_numsegments(2); +create table with_dml_dr_seg2 (i int, j int) distributed replicated; +select gp_debug_reset_create_table_default_numsegments(); + +insert into t1 select i, i from generate_series(1, 6) i; + +-- Case when number of segments is equal, no Broadcast at the top of CTE plan. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; + +-- Case with unequal number of segments between replicated node inside the +-- SubPlan and main plan, the error should be thrown. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; + +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; + +drop table t1; + +-- Test UNION ALL command when combining SegmentGeneral locus and Replicated. +--start_ignore +drop table if exists t_repl; +drop table if exists t_repl_seg2; +--end_ignore +create table t_repl (i int, j int) distributed replicated; + +select gp_debug_set_create_table_default_numsegments(2); +create table t_repl_seg2 (i int, j int) distributed replicated; +select gp_debug_reset_create_table_default_numsegments(); + +insert into t_repl values (2, 2); +insert into t_repl_seg2 values (2, 2); + +explain (costs off) +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + +-- Case when SegmentGeneral is originally propagated at less number +-- of segments. +explain (costs off) +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl_seg2 +order by 1; + +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl_seg2 +order by 1; + +-- Case when final number of segments is aligned to Replicated subplan. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + +with cte as ( + insert into with_dml_dr_seg2 + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + +drop table t_repl_seg2; +drop table t_repl; +drop table with_dml_dr_seg2; +drop table with_dml_dr;