digoal
2020-07-10
PostgreSQL , 分组 , 动态 , 过滤 , 子查询
如果每个分组都需要输出, 使用窗口查询, 本例说的是输出的分组由用户提供, 并不是输出所有分组。
分组: 标签ID
排序: 标签权重
limit: 每个标签返回多少条
自定义分组+限制记录数.
create type sf as (
c int, -- 分组条件
k int, -- 分组条件
l int -- limit多少条
);
SQL原理如下
select
-- 子查询
(select string_agg(vid::text, ',')
from
(select vid
from
test_video_box_a ,
-- 其他过滤条件的来源
( select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,
coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole
from video_user_read where user_id = 7619473860554752
) as t
where
-- 分组条件c,k
categray = sf.c and keyword = sf.k
-- 其他过滤条件
and (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)
-- 分组内的排序条件
order by weight desc
-- 每个分组limit多少条 l
limit sf.l)
t) as x
from
-- 分组条件, 每个分组取多少条,
-- 通过自定义类型的数组来构建, 从而可以很方便的得到分组以及每个分组需要limit多少条
unnest('{"(5,63,4)","(0,59,4)","(9,31,4)","(7,33,4)","(6,18,4)","(7,13,4)","(9,77,4)","(8,70,4)","(3,30,4)","(2,75,4)","(9,3,4)","(5,41,4)","(3,35,4)","(4,27,4)","(6,35,4)","(8,13,4)","(10,85,4)","(7,85,4)","(2,41,4)","(2,74,4)","(6,83,4)","(9,50,4)","(6,69,4)","(2,89,4)","(10,44,4)","(6,84,4)","(5,13,4)","(2,75,4)","(5,61,4)","(0,7,4)","(10,86,4)","(2,94,4)","(7,93,4)","(8,88,4)","(3,97,4)","(6,92,4)","(8,49,4)","(4,41,4)","(2,62,4)","(3,16,4)","(1,95,4)","(10,54,4)","(1,24,4)","(3,55,4)","(9,31,4)","(6,81,4)","(2,83,4)","(8,88,4)","(8,54,4)","(8,9,4)"}'::sf[]) sf
多个以上查询的聚合
select string_agg(x, ',')
from (
select (select string_agg(vid::text, ',')
from (select vid
from test_video_box_a , (select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,
coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole
from video_user_read where user_id = 7619473860554752) as t
where categray = sf.c and keyword = sf.k
and (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)
order by weight desc
limit sf.l) t) as x
from unnest('{"(5,63,4)","(0,59,4)","(9,31,4)","(7,33,4)","(6,18,4)","(7,13,4)","(9,77,4)","(8,70,4)","(3,30,4)","(2,75,4)","(9,3,4)","(5,41,4)","(3,35,4)","(4,27,4)","(6,35,4)","(8,13,4)","(10,85,4)","(7,85,4)","(2,41,4)","(2,74,4)","(6,83,4)","(9,50,4)","(6,69,4)","(2,89,4)","(10,44,4)","(6,84,4)","(5,13,4)","(2,75,4)","(5,61,4)","(0,7,4)","(10,86,4)","(2,94,4)","(7,93,4)","(8,88,4)","(3,97,4)","(6,92,4)","(8,49,4)","(4,41,4)","(2,62,4)","(3,16,4)","(1,95,4)","(10,54,4)","(1,24,4)","(3,55,4)","(9,31,4)","(6,81,4)","(2,83,4)","(8,88,4)","(8,54,4)","(8,9,4)"}'::sf[]) sf
union all
select (select string_agg(vid::text, ',')
from (select vid
from test_video_box_b , (select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,
coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole
from video_user_read where user_id = 7619473860554752) as t
where categray = sf.c and keyword = sf.k
and (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)
order by weight desc
limit sf.l) t) as x
from unnest('{"(5,63,4)","(0,59,4)","(9,31,4)","(7,33,4)","(6,18,4)","(7,13,4)","(9,77,4)","(8,70,4)","(3,30,4)","(2,75,4)","(9,3,4)","(5,41,4)","(3,35,4)","(4,27,4)","(6,35,4)","(8,13,4)","(10,85,4)","(7,85,4)","(2,41,4)","(2,74,4)","(6,83,4)","(9,50,4)","(6,69,4)","(2,89,4)","(10,44,4)","(6,84,4)","(5,13,4)","(2,75,4)","(5,61,4)","(0,7,4)","(10,86,4)","(2,94,4)","(7,93,4)","(8,88,4)","(3,97,4)","(6,92,4)","(8,49,4)","(4,41,4)","(2,62,4)","(3,16,4)","(1,95,4)","(10,54,4)","(1,24,4)","(3,55,4)","(9,31,4)","(6,81,4)","(2,83,4)","(8,88,4)","(8,54,4)","(8,9,4)"}'::sf[]) sf
union all
select (select string_agg(vid::text, ',')
from (select vid
from test_video_box_xxxx , (select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,
coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole
from video_user_read where user_id = 7619473860554752) as t
where categray = sf.c and keyword = sf.k
and (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)
order by weight desc
limit sf.l) t) as x
from unnest('{"(5,63,4)","(0,59,4)","(9,31,4)","(7,33,4)","(6,18,4)","(7,13,4)","(9,77,4)","(8,70,4)","(3,30,4)","(2,75,4)","(9,3,4)","(5,41,4)","(3,35,4)","(4,27,4)","(6,35,4)","(8,13,4)","(10,85,4)","(7,85,4)","(2,41,4)","(2,74,4)","(6,83,4)","(9,50,4)","(6,69,4)","(2,89,4)","(10,44,4)","(6,84,4)","(5,13,4)","(2,75,4)","(5,61,4)","(0,7,4)","(10,86,4)","(2,94,4)","(7,93,4)","(8,88,4)","(3,97,4)","(6,92,4)","(8,49,4)","(4,41,4)","(2,62,4)","(3,16,4)","(1,95,4)","(10,54,4)","(1,24,4)","(3,55,4)","(9,31,4)","(6,81,4)","(2,83,4)","(8,88,4)","(8,54,4)","(8,9,4)"}'::sf[]) sf
) t;
性能压测, 随机生成查询分组, limit条数的sf数组. 总共返回4*50=200条
CREATE OR REPLACE FUNCTION public.gen_array_sf()
RETURNS sf[]
LANGUAGE plpgsql
STRICT
AS $function$
declare
out_arr sf[];
begin
for i in 1..50 loop
out_arr[i] := row (random() * 10, random() * 100, 4)::sf;
end loop;
return out_arr;
end;
$function$;
压测函数
create or replace function test_union() returns text as $$
declare
sql text;
cond sf[] := gen_array_sf();
res text;
begin
sql := format($_$
select string_agg(x, ',')
from (
select (select string_agg(vid::text, ',')
from (select vid
from test_video_box_a , (select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,
coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole
from video_user_read where user_id = 26503284841971712) as t
where categray = sf.c and keyword = sf.k
and (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)
order by weight desc
limit sf.l) t) as x
from unnest(%L::sf[]) sf
union all
select (select string_agg(vid::text, ',')
from (select vid
from test_video_box_b , (select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,
coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole
from video_user_read where user_id = 26503284841971712) as t
where categray = sf.c and keyword = sf.k
and (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)
order by weight desc
limit sf.l) t) as x
from unnest(%L::sf[]) sf
union all
select (select string_agg(vid::text, ',')
from (select vid
from test_video_box_xxxx , (select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,
coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole
from video_user_read where user_id = 26503284841971712) as t
where categray = sf.c and keyword = sf.k
and (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)
order by weight desc
limit sf.l) t) as x
from unnest(%L::sf[]) sf
) t
$_$,
cond,cond,cond);
-- raise notice '%', sql;
execute sql into res;
return res;
end;
$$ language plpgsql strict;
压测方法
vi test.sql
select test_union();
pgbench -M prepared -n -r -P 1 -f ./test.sql -c 16 -j 16 -T 1200
您的愿望将传达给PG kernel hacker、数据库厂商等, 帮助提高数据库产品质量和功能, 说不定下一个PG版本就有您提出的功能点. 针对非常好的提议,奖励限量版PG文化衫、纪念品、贴纸、PG热门书籍等,奖品丰富,快来许愿。开不开森.