Skip to content

Files

Latest commit

Dec 15, 2023
f400df2 · Dec 15, 2023

History

History
260 lines (172 loc) · 11 KB

20200710_02.md

File metadata and controls

260 lines (172 loc) · 11 KB

PostgreSQL x分组, y排序, 每组各取(N动态)条 - 子查询+子查询聚合使用

作者

digoal

日期

2020-07-10

标签

PostgreSQL , 分组 , 动态 , 过滤 , 子查询


背景

如果每个分组都需要输出, 使用窗口查询, 本例说的是输出的分组由用户提供, 并不是输出所有分组。

分组: 标签ID
排序: 标签权重
limit: 每个标签返回多少条

自定义分组+限制记录数.

create type sf as (  
  c int, -- 分组条件  
  k int, -- 分组条件  
  l int  -- limit多少条  
);  

SQL原理如下

select   
  -- 子查询  
  (select string_agg(vid::text, ',')  
  from   
    (select vid  
    from   
      test_video_box_a ,   
      -- 其他过滤条件的来源  
      ( select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,   
               coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole   
        from video_user_read where user_id = 7619473860554752   
      ) as  t   
    where   
    -- 分组条件c,k  
    categray = sf.c and keyword = sf.k   
    -- 其他过滤条件   
    and  (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)   
    -- 分组内的排序条件   
    order by weight desc   
    -- 每个分组limit多少条 l   
    limit sf.l)   
  t) as x   
from   
-- 分组条件, 每个分组取多少条,   
-- 通过自定义类型的数组来构建, 从而可以很方便的得到分组以及每个分组需要limit多少条  
unnest('{"(5,63,4)","(0,59,4)","(9,31,4)","(7,33,4)","(6,18,4)","(7,13,4)","(9,77,4)","(8,70,4)","(3,30,4)","(2,75,4)","(9,3,4)","(5,41,4)","(3,35,4)","(4,27,4)","(6,35,4)","(8,13,4)","(10,85,4)","(7,85,4)","(2,41,4)","(2,74,4)","(6,83,4)","(9,50,4)","(6,69,4)","(2,89,4)","(10,44,4)","(6,84,4)","(5,13,4)","(2,75,4)","(5,61,4)","(0,7,4)","(10,86,4)","(2,94,4)","(7,93,4)","(8,88,4)","(3,97,4)","(6,92,4)","(8,49,4)","(4,41,4)","(2,62,4)","(3,16,4)","(1,95,4)","(10,54,4)","(1,24,4)","(3,55,4)","(9,31,4)","(6,81,4)","(2,83,4)","(8,88,4)","(8,54,4)","(8,9,4)"}'::sf[]) sf   

多个以上查询的聚合

select string_agg(x, ',')  
from (  
         select (select string_agg(vid::text, ',')  
                 from (select vid  
                       from test_video_box_a , (select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,   
    coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole   
    from video_user_read where user_id = 7619473860554752) as  t   
                       where categray = sf.c and keyword = sf.k  
                       and  (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)   
                       order by weight desc   
                       limit sf.l) t) as x   
         from unnest('{"(5,63,4)","(0,59,4)","(9,31,4)","(7,33,4)","(6,18,4)","(7,13,4)","(9,77,4)","(8,70,4)","(3,30,4)","(2,75,4)","(9,3,4)","(5,41,4)","(3,35,4)","(4,27,4)","(6,35,4)","(8,13,4)","(10,85,4)","(7,85,4)","(2,41,4)","(2,74,4)","(6,83,4)","(9,50,4)","(6,69,4)","(2,89,4)","(10,44,4)","(6,84,4)","(5,13,4)","(2,75,4)","(5,61,4)","(0,7,4)","(10,86,4)","(2,94,4)","(7,93,4)","(8,88,4)","(3,97,4)","(6,92,4)","(8,49,4)","(4,41,4)","(2,62,4)","(3,16,4)","(1,95,4)","(10,54,4)","(1,24,4)","(3,55,4)","(9,31,4)","(6,81,4)","(2,83,4)","(8,88,4)","(8,54,4)","(8,9,4)"}'::sf[]) sf   
union all  
         select (select string_agg(vid::text, ',')  
                 from (select vid  
                       from test_video_box_b , (select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,   
    coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole   
    from video_user_read where user_id = 7619473860554752) as  t   
                       where categray = sf.c and keyword = sf.k  
                       and  (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)   
                       order by weight desc   
                       limit sf.l) t) as x   
         from unnest('{"(5,63,4)","(0,59,4)","(9,31,4)","(7,33,4)","(6,18,4)","(7,13,4)","(9,77,4)","(8,70,4)","(3,30,4)","(2,75,4)","(9,3,4)","(5,41,4)","(3,35,4)","(4,27,4)","(6,35,4)","(8,13,4)","(10,85,4)","(7,85,4)","(2,41,4)","(2,74,4)","(6,83,4)","(9,50,4)","(6,69,4)","(2,89,4)","(10,44,4)","(6,84,4)","(5,13,4)","(2,75,4)","(5,61,4)","(0,7,4)","(10,86,4)","(2,94,4)","(7,93,4)","(8,88,4)","(3,97,4)","(6,92,4)","(8,49,4)","(4,41,4)","(2,62,4)","(3,16,4)","(1,95,4)","(10,54,4)","(1,24,4)","(3,55,4)","(9,31,4)","(6,81,4)","(2,83,4)","(8,88,4)","(8,54,4)","(8,9,4)"}'::sf[]) sf   
union all  
         select (select string_agg(vid::text, ',')  
                 from (select vid  
                       from test_video_box_xxxx , (select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,   
    coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole   
    from video_user_read where user_id = 7619473860554752) as  t   
                       where categray = sf.c and keyword = sf.k  
                       and  (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)   
                       order by weight desc   
                       limit sf.l) t) as x   
         from unnest('{"(5,63,4)","(0,59,4)","(9,31,4)","(7,33,4)","(6,18,4)","(7,13,4)","(9,77,4)","(8,70,4)","(3,30,4)","(2,75,4)","(9,3,4)","(5,41,4)","(3,35,4)","(4,27,4)","(6,35,4)","(8,13,4)","(10,85,4)","(7,85,4)","(2,41,4)","(2,74,4)","(6,83,4)","(9,50,4)","(6,69,4)","(2,89,4)","(10,44,4)","(6,84,4)","(5,13,4)","(2,75,4)","(5,61,4)","(0,7,4)","(10,86,4)","(2,94,4)","(7,93,4)","(8,88,4)","(3,97,4)","(6,92,4)","(8,49,4)","(4,41,4)","(2,62,4)","(3,16,4)","(1,95,4)","(10,54,4)","(1,24,4)","(3,55,4)","(9,31,4)","(6,81,4)","(2,83,4)","(8,88,4)","(8,54,4)","(8,9,4)"}'::sf[]) sf   
) t;  

性能压测, 随机生成查询分组, limit条数的sf数组. 总共返回4*50=200条

CREATE OR REPLACE FUNCTION public.gen_array_sf()  
 RETURNS sf[]  
 LANGUAGE plpgsql  
 STRICT  
AS $function$  
declare  
  out_arr sf[];  
begin  
  for i in 1..50 loop  
    out_arr[i] := row (random() * 10, random() * 100,  4)::sf;   
  end loop;  
  return out_arr;  
end;  
$function$;  

压测函数

create or replace function test_union() returns text as $$  
declare  
  sql text;   
  cond sf[] := gen_array_sf();  
  res text;  
begin  
  sql := format($_$  
select string_agg(x, ',')  
from (  
         select (select string_agg(vid::text, ',')  
                 from (select vid  
                       from test_video_box_a , (select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,   
    coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole   
    from video_user_read where user_id = 26503284841971712) as  t   
                       where categray = sf.c and keyword = sf.k  
                       and  (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)   
                       order by weight desc   
                       limit sf.l) t) as x   
         from unnest(%L::sf[]) sf   
union all  
         select (select string_agg(vid::text, ',')  
                 from (select vid  
                       from test_video_box_b , (select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,   
    coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole   
    from video_user_read where user_id = 26503284841971712) as  t   
                       where categray = sf.c and keyword = sf.k  
                       and  (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)   
                       order by weight desc   
                       limit sf.l) t) as x   
         from unnest(%L::sf[]) sf   
union all  
         select (select string_agg(vid::text, ',')  
                 from (select vid  
                       from test_video_box_xxxx , (select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,   
    coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole   
    from video_user_read where user_id = 26503284841971712) as  t   
                       where categray = sf.c and keyword = sf.k  
                       and  (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)   
                       order by weight desc   
                       limit sf.l) t) as x   
         from unnest(%L::sf[]) sf   
) t  
$_$,  
cond,cond,cond);  
-- raise notice '%', sql;  
execute sql into res;  
return res;  
end;  
$$ language plpgsql strict;  

压测方法

vi test.sql  
select test_union();  
  
pgbench -M prepared -n -r -P 1 -f ./test.sql -c 16 -j 16 -T 1200  

您的愿望将传达给PG kernel hacker、数据库厂商等, 帮助提高数据库产品质量和功能, 说不定下一个PG版本就有您提出的功能点. 针对非常好的提议,奖励限量版PG文化衫、纪念品、贴纸、PG热门书籍等,奖品丰富,快来许愿。开不开森.

digoal's wechat