From 939ba2baca7cb742c4eac962da47d70b86aa19f7 Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Thu, 25 Jan 2024 19:33:50 +1000 Subject: [PATCH] Fix queryId calculation for queries with grouping clause (#665) Problem description: if 'pg_stat_statements' extension is enabled and a query containing GROUP BY clause with ROLLUP, CUBE or GROUPING SETS or with GROUPING function is executed, queryId calculation doesn't take into account these grouping clause parameters and GROUPING functions. So, semantically different queries are treated by pg_stat_statements as equivalent queries. Plus warning messages appear during jumbling. Also, jumbling logic doesn't handle properly queries with group_id() function call and calls of functions with 'anytable' parameter. First type of queries causes warnings. Second type of query causes error message "unrecognized RTE kind: 7". Expected correct behavior: queries with different grouping clauses or different usage of the GROUPING function are treated by pg_stat_statements as different queries, and no warning messages appear during the query execution. Cause: In JumbleExpr there is no handling for node tag and T_TableValueExpr and T_GroupingSet is handled incorrectly. Jumble hashing is used as queryId. Error message "unrecognized RTE kind: 7" caused by not handled range table type RTE_TABLEFUNCTION in JumbleRangeTable. Fix: Handling logic for missed node tags was added. According to comments in queryjumble.c, main guideline how to handle query tree is: "Rule of thumb for what to include is that we should ignore anything not semantically significant (such as alias names) as well as anything that can be deduced from child nodes (else we'd just be double- hashing that piece of information)." For T_GroupingSet we append to the jumble the grouping type and list of groupsets. Field 'location' is not appended to the jumble because it is the textual location from parser and is not semantically significant. For RTE_TABLEFUNCTION - 'functions' field of RangeTblEntry was added to the jumble. RangeTblEntry in case of RTE_TABLEFUNCTION uses 'functions' and 'subquery' fields (other significant fields are null). But 'subquery' is duplicated in TableValueExpr below, so do not jumble it. For T_TableValueExpr - 'subquery' field of TableValueExpr (the subquery that is inside "TABLE()" statement) was added to the jumble. Changes from original commit: 1. Cases for T_GroupId and T_GroupingFunc are already present, no fix needed. 2. Remove case for T_Integer because it is not necessary since GPDB 7 uses T_IntList instead. 3. T_GroupingClause is now named T_GroupingSet. Its case was not correct in GPDB 7, change to match implementation from GPDB 6. 4. Rename test to jumble since there were already tests present in GPDB 7. 5. The tests in pg_stat_statements were disabled before because they do not set up shared_preload_libraries and they don't disable optimizer. Add files enable_pg_stat_statements and disable_pg_stat_statements to adapt pg_stat_statements tests to current testing utilities. This allows to remove some initialization and cleanup from the new test. 6. Fix test output for GPDB 7. Notably, ROLLUP now outputs 1 row even if the table has no rows in it. (cherry picked from commit fa44e5054f1147881ff4bf2b064225d0a722ae02) --- GNUmakefile.in | 3 + contrib/pg_stat_statements/Makefile | 10 +- .../expected/disable_pg_stat_statements.out | 0 .../expected/enable_pg_stat_statements.out | 0 .../pg_stat_statements/expected/jumble.out | 142 ++++++++++++++++++ .../expected/pg_stat_statements.out | 4 + .../sql/disable_pg_stat_statements.sql | 4 + .../sql/enable_pg_stat_statements.sql | 4 + contrib/pg_stat_statements/sql/jumble.sql | 54 +++++++ .../sql/pg_stat_statements.sql | 6 + src/backend/utils/misc/queryjumble.c | 9 ++ 11 files changed, 232 insertions(+), 4 deletions(-) create mode 100644 contrib/pg_stat_statements/expected/disable_pg_stat_statements.out create mode 100644 contrib/pg_stat_statements/expected/enable_pg_stat_statements.out create mode 100644 contrib/pg_stat_statements/expected/jumble.out create mode 100644 contrib/pg_stat_statements/sql/disable_pg_stat_statements.sql create mode 100644 contrib/pg_stat_statements/sql/enable_pg_stat_statements.sql create mode 100644 contrib/pg_stat_statements/sql/jumble.sql diff --git a/GNUmakefile.in b/GNUmakefile.in index f63aa14e98e4..917773d6991b 100644 --- a/GNUmakefile.in +++ b/GNUmakefile.in @@ -12,6 +12,7 @@ $(call recurse,all install,src config) all: $(MAKE) -C contrib/auto_explain all + $(MAKE) -C contrib/pg_stat_statements all $(MAKE) -C contrib/citext all $(MAKE) -C contrib/file_fdw all $(MAKE) -C contrib/postgres_fdw all @@ -63,6 +64,7 @@ html man: install: $(MAKE) -C contrib/auto_explain $@ + $(MAKE) -C contrib/pg_stat_statements $@ $(MAKE) -C contrib/citext $@ $(MAKE) -C contrib/file_fdw $@ $(MAKE) -C contrib/postgres_fdw $@ @@ -182,6 +184,7 @@ $(call recurse,checkprep, src/test src/pl src/interfaces/ecpg contrib src/bin g # recipe body). ICW_TARGETS = src/test src/pl src/interfaces/gppc ICW_TARGETS += contrib/auto_explain contrib/citext contrib/btree_gin +ICW_TARGETS += contrib/pg_stat_statements ICW_TARGETS += contrib/file_fdw contrib/postgres_fdw contrib/formatter_fixedwidth ICW_TARGETS += contrib/extprotocol contrib/dblink contrib/pg_trgm ICW_TARGETS += contrib/indexscan contrib/hstore contrib/ltree contrib/pgcrypto contrib/isn diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile index d19eec69d0b4..940a0fb99c8a 100644 --- a/contrib/pg_stat_statements/Makefile +++ b/contrib/pg_stat_statements/Makefile @@ -14,10 +14,12 @@ PGFILEDESC = "pg_stat_statements - execution statistics of SQL statements" LDFLAGS_SL += $(filter -lm, $(LIBS)) REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf -REGRESS = pg_stat_statements olap_setup olap_group -# Disabled because these tests require "shared_preload_libraries=pg_stat_statements", -# which typical installcheck users do not have (e.g. buildfarm clients). -NO_INSTALLCHECK = 1 +REGRESS = enable_pg_stat_statements \ + pg_stat_statements \ + olap_setup \ + olap_group \ + jumble \ + disable_pg_stat_statements ifdef USE_PGXS PG_CONFIG = pg_config diff --git a/contrib/pg_stat_statements/expected/disable_pg_stat_statements.out b/contrib/pg_stat_statements/expected/disable_pg_stat_statements.out new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/contrib/pg_stat_statements/expected/enable_pg_stat_statements.out b/contrib/pg_stat_statements/expected/enable_pg_stat_statements.out new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/contrib/pg_stat_statements/expected/jumble.out b/contrib/pg_stat_statements/expected/jumble.out new file mode 100644 index 000000000000..9004af970e17 --- /dev/null +++ b/contrib/pg_stat_statements/expected/jumble.out @@ -0,0 +1,142 @@ +CREATE TABLE t(a int, b text) DISTRIBUTED BY (a); +-- Known issue: query is not added to pg_stat_statements statistics in +-- case it is planned by GPORCA. So disable GPORCA during tests. +SET optimizer=off; +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT GROUPING (a) FROM t GROUP BY ROLLUP(a, b); + grouping +---------- + 1 +(1 row) + +-- launch not equivalent query +SELECT GROUPING (b) FROM t GROUP BY ROLLUP(a, b); + grouping +---------- + 1 +(1 row) + +-- check group_id() in a query +SELECT group_id() FROM t GROUP BY ROLLUP(a, b); + group_id +---------- + 0 +(1 row) + +-- check that queries have separate entries +SELECT query, calls FROM pg_stat_statements ORDER BY query; + query | calls +--------------------------------------------------+------- + SELECT group_id() FROM t GROUP BY ROLLUP(a, b) | 1 + SELECT GROUPING (a) FROM t GROUP BY ROLLUP(a, b) | 1 + SELECT GROUPING (b) FROM t GROUP BY ROLLUP(a, b) | 1 + SELECT pg_stat_statements_reset() | 1 +(4 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +-- check that different grouping options result in separate entries +SELECT COUNT (*) FROM t GROUP BY ROLLUP(a, b); + count +------- + 0 +(1 row) + +SELECT COUNT (*) FROM t GROUP BY CUBE(a, b); + count +------- + 0 +(1 row) + +SELECT COUNT (*) FROM t GROUP BY GROUPING SETS(a, b); + count +------- +(0 rows) + +SELECT COUNT (*) FROM t GROUP BY GROUPING SETS((a), (a, b)); + count +------- +(0 rows) + +SELECT COUNT (*) FROM t GROUP BY a, b; + count +------- +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query; + query | calls +-------------------------------------------------------------+------- + SELECT COUNT (*) FROM t GROUP BY a, b | 1 + SELECT COUNT (*) FROM t GROUP BY CUBE(a, b) | 1 + SELECT COUNT (*) FROM t GROUP BY GROUPING SETS((a), (a, b)) | 1 + SELECT COUNT (*) FROM t GROUP BY GROUPING SETS(a, b) | 1 + SELECT COUNT (*) FROM t GROUP BY ROLLUP(a, b) | 1 + SELECT pg_stat_statements_reset() | 1 +(6 rows) + +-- check several parameters options in ROLLUP +-- all should result in separate entries +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT COUNT (*) FROM t GROUP BY ROLLUP(a, b); + count +------- + 0 +(1 row) + +SELECT COUNT (*) FROM t GROUP BY ROLLUP(b); + count +------- + 0 +(1 row) + +SELECT query, calls FROM pg_stat_statements ORDER BY query; + query | calls +-----------------------------------------------+------- + SELECT COUNT (*) FROM t GROUP BY ROLLUP(a, b) | 1 + SELECT COUNT (*) FROM t GROUP BY ROLLUP(b) | 1 + SELECT pg_stat_statements_reset() | 1 +(3 rows) + +--- check anytable parameter for a function +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +-- call of anytable_out will cause an error, +-- thus prevent actual call by adding FALSE condition +SELECT * FROM anytable_out(TABLE(SELECT * FROM t)) WHERE 1 = 0; + anytable_out +-------------- +(0 rows) + +SELECT * FROM anytable_out(TABLE(SELECT * FROM t WHERE a=0)) WHERE 1 = 0; + anytable_out +-------------- +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query; + query | calls +-----------------------------------------------------------------------------+------- + SELECT * FROM anytable_out(TABLE(SELECT * FROM t)) WHERE $1 = $2 | 1 + SELECT * FROM anytable_out(TABLE(SELECT * FROM t WHERE a=$1)) WHERE $2 = $3 | 1 + SELECT pg_stat_statements_reset() | 1 +(3 rows) + +DROP TABLE t; +RESET optimizer; diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out index 1202a8a33a81..cae7d4992377 100644 --- a/contrib/pg_stat_statements/expected/pg_stat_statements.out +++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out @@ -1,4 +1,7 @@ CREATE EXTENSION pg_stat_statements; +-- Known issue: query is not added to pg_stat_statements statistics in +-- case it is planned by GPORCA. So disable GPORCA during tests. +SET optimizer=off; -- -- simple and compound statements -- @@ -596,3 +599,4 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; DROP ROLE regress_stats_user1; DROP ROLE regress_stats_user2; DROP EXTENSION pg_stat_statements; +RESET optimizer; diff --git a/contrib/pg_stat_statements/sql/disable_pg_stat_statements.sql b/contrib/pg_stat_statements/sql/disable_pg_stat_statements.sql new file mode 100644 index 000000000000..2416cbf7b920 --- /dev/null +++ b/contrib/pg_stat_statements/sql/disable_pg_stat_statements.sql @@ -0,0 +1,4 @@ +-- start_ignore +\! gpconfig -r shared_preload_libraries; +\! gpstop -raq -M fast; +-- end_ignore diff --git a/contrib/pg_stat_statements/sql/enable_pg_stat_statements.sql b/contrib/pg_stat_statements/sql/enable_pg_stat_statements.sql new file mode 100644 index 000000000000..1ca67da95530 --- /dev/null +++ b/contrib/pg_stat_statements/sql/enable_pg_stat_statements.sql @@ -0,0 +1,4 @@ +-- start_ignore +\! gpconfig -c shared_preload_libraries -v 'pg_stat_statements'; +\! gpstop -raq -M fast; +-- end_ignore diff --git a/contrib/pg_stat_statements/sql/jumble.sql b/contrib/pg_stat_statements/sql/jumble.sql new file mode 100644 index 000000000000..1e4d93c1605e --- /dev/null +++ b/contrib/pg_stat_statements/sql/jumble.sql @@ -0,0 +1,54 @@ +-- start_ignore +CREATE EXTENSION IF NOT EXISTS pg_stat_statements; +DROP TABLE IF EXISTS t; +-- end_ignore +CREATE TABLE t(a int, b text) DISTRIBUTED BY (a); + +-- Known issue: query is not added to pg_stat_statements statistics in +-- case it is planned by GPORCA. So disable GPORCA during tests. +SET optimizer=off; + +SELECT pg_stat_statements_reset(); + +SELECT GROUPING (a) FROM t GROUP BY ROLLUP(a, b); +-- launch not equivalent query +SELECT GROUPING (b) FROM t GROUP BY ROLLUP(a, b); +-- check group_id() in a query +SELECT group_id() FROM t GROUP BY ROLLUP(a, b); + +-- check that queries have separate entries +SELECT query, calls FROM pg_stat_statements ORDER BY query; + +SELECT pg_stat_statements_reset(); + +-- check that different grouping options result in separate entries +SELECT COUNT (*) FROM t GROUP BY ROLLUP(a, b); +SELECT COUNT (*) FROM t GROUP BY CUBE(a, b); +SELECT COUNT (*) FROM t GROUP BY GROUPING SETS(a, b); +SELECT COUNT (*) FROM t GROUP BY GROUPING SETS((a), (a, b)); +SELECT COUNT (*) FROM t GROUP BY a, b; + +SELECT query, calls FROM pg_stat_statements ORDER BY query; + +-- check several parameters options in ROLLUP +-- all should result in separate entries +SELECT pg_stat_statements_reset(); + +SELECT COUNT (*) FROM t GROUP BY ROLLUP(a, b); +SELECT COUNT (*) FROM t GROUP BY ROLLUP(b); + +SELECT query, calls FROM pg_stat_statements ORDER BY query; + +--- check anytable parameter for a function +SELECT pg_stat_statements_reset(); + +-- call of anytable_out will cause an error, +-- thus prevent actual call by adding FALSE condition +SELECT * FROM anytable_out(TABLE(SELECT * FROM t)) WHERE 1 = 0; +SELECT * FROM anytable_out(TABLE(SELECT * FROM t WHERE a=0)) WHERE 1 = 0; + +SELECT query, calls FROM pg_stat_statements ORDER BY query; + +DROP TABLE t; + +RESET optimizer; diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql index 4280ba98b433..98dded373206 100644 --- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql +++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql @@ -1,5 +1,9 @@ CREATE EXTENSION pg_stat_statements; +-- Known issue: query is not added to pg_stat_statements statistics in +-- case it is planned by GPORCA. So disable GPORCA during tests. +SET optimizer=off; + -- -- simple and compound statements -- @@ -260,3 +264,5 @@ DROP ROLE regress_stats_user1; DROP ROLE regress_stats_user2; DROP EXTENSION pg_stat_statements; + +RESET optimizer; diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c index 96ed1a2c825b..354d1efa5534 100644 --- a/src/backend/utils/misc/queryjumble.c +++ b/src/backend/utils/misc/queryjumble.c @@ -185,6 +185,7 @@ JumbleRangeTable(JumbleState *jstate, List *rtable) APP_JUMB(rte->jointype); break; case RTE_FUNCTION: + case RTE_TABLEFUNCTION: JumbleExpr(jstate, (Node *) rte->functions); break; case RTE_TABLEFUNC: @@ -646,6 +647,7 @@ JumbleExpr(JumbleState *jstate, Node *node) { GroupingSet *gsnode = (GroupingSet *) node; + APP_JUMB(gsnode->kind); JumbleExpr(jstate, (Node *) gsnode->content); } break; @@ -706,6 +708,13 @@ JumbleExpr(JumbleState *jstate, Node *node) JumbleExpr(jstate, (Node *) tsc->repeatable); } break; + case T_TableValueExpr: + { + TableValueExpr *tve = (TableValueExpr *) node; + + JumbleQueryInternal(jstate, (Query *) tve->subquery); + } + break; default: /* Only a warning, since we can stumble along anyway */ elog(WARNING, "unrecognized node type: %d",