Skip to content

Commit

Permalink
Fix approx_count_distinct for queries without a FROM (duckdb#524)
Browse files Browse the repository at this point in the history
I noticed two issues with the new `approx_count_distinct`
implementation:

1. If no FROM clause was used it was not possible to use it
2. It would not be detected correctly as duckdb-only without
   `duckdb.force_execution = true` (or some  other mechanism). This
   fixes both of those issues.

Related to duckdb#499
  • Loading branch information
JelteF authored and ritwizsinha committed Jan 11, 2025
1 parent 8743bb4 commit 1cb6e6d
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 10 deletions.
39 changes: 29 additions & 10 deletions src/pgduckdb_hooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,13 @@ ContainsDuckdbItems(Node *node, void *context) {
}
}

if (IsA(node, Aggref)) {
Aggref *func = castNode(Aggref, node);
if (pgduckdb::IsDuckdbOnlyFunction(func->aggfnoid)) {
return true;
}
}

#if PG_VERSION_NUM >= 160000
return expression_tree_walker(node, ContainsDuckdbItems, context);
#else
Expand All @@ -118,6 +125,27 @@ NeedsDuckdbExecution(Query *query) {
return ContainsDuckdbItems((Node *)query, NULL);
}

/*
* We only check ContainsFromClause if duckdb.force_execution is set to true.
*
* If there's no FROM clause, we're only selecting constants. From a
* performance perspective there's not really a point in using DuckDB. If we
* forward all of such queries to DuckDB anyway, then many queries that are
* used to inspect postgres will throw a warning or return incorrect results.
* For example:
*
* SELECT current_setting('work_mem');
*
* So even if a user sets duckdb.force_execution = true, we still won't
* forward such queries to DuckDB. With one exception: If the query
* requires duckdb e.g. due to a duckdb-only function being used, we'll
* still executing this in DuckDB.
*/
static bool
ContainsFromClause(Query *query) {
return query->rtable;
}

static bool
IsAllowedStatement(Query *query, bool throw_error = false) {
int elevel = throw_error ? ERROR : DEBUG4;
Expand All @@ -142,15 +170,6 @@ IsAllowedStatement(Query *query, bool throw_error = false) {
}
}

/*
* If there's no rtable, we're only selecting constants. There's no point
* in using DuckDB for that.
*/
if (!query->rtable) {
elog(elevel, "DuckDB usage requires at least one table");
return false;
}

/*
* If any table is from pg_catalog, we don't want to use DuckDB. This is
* because DuckDB has its own pg_catalog tables that contain different data
Expand All @@ -172,7 +191,7 @@ DuckdbPlannerHook_Cpp(Query *parse, const char *query_string, int cursor_options
IsAllowedStatement(parse, true);

return DuckdbPlanNode(parse, query_string, cursor_options, bound_params, true);
} else if (duckdb_force_execution && IsAllowedStatement(parse)) {
} else if (duckdb_force_execution && IsAllowedStatement(parse) && ContainsFromClause(parse)) {
PlannedStmt *duckdbPlan = DuckdbPlanNode(parse, query_string, cursor_options, bound_params, false);
if (duckdbPlan) {
return duckdbPlan;
Expand Down
13 changes: 13 additions & 0 deletions test/regression/expected/approx_count_distinct.out
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,17 @@ SELECT a, approx_count_distinct(b) OVER (PARTITION BY a) FROM t ORDER BY a;
5 | 1
(8 rows)

SELECT approx_count_distinct(1);
approx_count_distinct
-----------------------
1
(1 row)

SET duckdb.force_execution = false;
SELECT approx_count_distinct(1);
approx_count_distinct
-----------------------
1
(1 row)

DROP TABLE t;
3 changes: 3 additions & 0 deletions test/regression/sql/approx_count_distinct.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,7 @@ INSERT INTO t VALUES (2, 'f'), (3, 'g'), (4, 'h');
SELECT approx_count_distinct(a), approx_count_distinct(b) FROM t;
SELECT a, approx_count_distinct(b) FROM t GROUP BY a ORDER BY a;
SELECT a, approx_count_distinct(b) OVER (PARTITION BY a) FROM t ORDER BY a;
SELECT approx_count_distinct(1);
SET duckdb.force_execution = false;
SELECT approx_count_distinct(1);
DROP TABLE t;

0 comments on commit 1cb6e6d

Please sign in to comment.