Skip to content

Commit c235a11

Browse files
committed
improve
1 parent 48fa37c commit c235a11

File tree

2 files changed

+105
-17
lines changed

2 files changed

+105
-17
lines changed

201711/20171126_01.md

Lines changed: 104 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,10 @@ create table tbl2 (
7979

8080
```
8181
create table tbl3 (
82-
imei text primary key, -- 手机唯一标识
83-
tagids int[] -- 标签数组
82+
imei text primary key, -- 手机唯一标识
83+
tagids int[], -- 标签数组
84+
ins_tags int[], -- 合并操作需要的中间字段
85+
del_tags int[] -- 合并操作需要的中间字段
8486
);
8587
8688
create index idx_tbl3_tagids on tbl3 using gin (tagids);
@@ -107,18 +109,20 @@ select imei from tbl3 where tagids && array[标签1, 标签2] and tagid @> array
107109
select v8,count(*) from tbl1 where
108110
imei = any (array(
109111
select imei from tbl3 where tagids @> array[标签1, 标签2]
110-
) )
112+
) )
111113
group by v8;
112114
```
113115

114116
3、新增或追加标签
115117

118+
使用intarray插件,简化数组交、并、差操作。
119+
116120
```
117121
create extension intarray;
118122
```
119123

120124
```
121-
insert into tbl3 (imei, tagids) values (?, ?[], 'insert') on conflict (imei) do update set tagids=tbl3.tagids|excluded.tagids;
125+
insert into tbl3 (imei, tagids) values (?, ?[]) on conflict (imei) do update set tagids=tbl3.tagids|excluded.tagids;
122126
```
123127

124128
4、删标签
@@ -133,38 +137,122 @@ update tbl3 set tagids = tagids - ?[] where imei=?;
133137
update tbl3 set tagids = ?[] where imei=?;
134138
```
135139

136-
6、批量并行新增、追加、删除、更新标签注意
140+
6、批量并行新增、追加、删除、更新标签优化
137141

138142
如果要一次性操作很多条记录(例如1000万条记录),并且有并行的贴标签操作(同一条用户被多个SQL更新)。需要注意两个问题:
139143

140144
6\.1 大事务导致膨胀的问题,建议分段操作。
141145

142146
6\.2 行锁冲突问题,建议新增(插入),然后合并到标签表。
143147

144-
优化方法,将直接写标签表,改成写行为日志,任务调度,批量合并:
148+
优化方法,
149+
150+
实现标签最终一致性。
151+
152+
将直接增、删、改标签表,改成写行为日志tag_log,采用任务调度,批量合并到标签表:
145153

146154
```
147155
create table tag_log (
148156
imei text, -- 手机唯一标识
149-
action text, -- insert, update, delete
157+
action text, -- insert, delete 表示增加、删除标签 (更新需求应该没有,如有,直接到标签表操作)
150158
tagids int[], -- 标签IDs
151159
crt_time timestamp default clock_timestamp() -- 时间
152160
);
153161
154162
create index idx_tag_log_1 on tag_log (crt_time);
155-
163+
```
164+
165+
串行任务,阅后即焚(假设-99999999是一个永远不存在的TAGID)
166+
167+
```
156168
-- CTE语法,支持阅后即焚的批量合并方法
157169
with tmp as (delete from tag_log where ctid = any ( array (
158170
select ctid from tag_log order by crt_time limit 10000 -- 按时序,批量取1万条
159171
)) returning * )
160-
, tmp1 as (... 按时序生成合并数据 ...)
161-
, ... 写入
162-
, ... 更新
163-
, ... 删除
164-
```
165-
166-
阅后即焚的处理速度,每秒百万行。
167-
172+
, tmp1 as (select imei,
173+
uniq(sort(array_agg(case when action='insert' then tagids else -99999999 end))) - (-99999999) AS ins_tags,
174+
uniq(sort(array_agg(case when action='delete' then tagids else -99999999 end))) - (-99999999) AS del_tags
175+
from (select imei, action, unnest(tagids) as tagids from tmp) t group by imei)
176+
insert into tbl3 (imei, tagids, ins_tags, del_tags)
177+
select imei, ins_tags-del_tags, ins_tags, del_tags from tmp1
178+
on conflict (imei) do update set tagids=((tbl3.tagids | excluded.ins_tags) - excluded.del_tags), ins_tags=excluded.ins_tags, del_tags=excluded.del_tags;
179+
```
180+
181+
并行任务,阅后即焚
182+
183+
```
184+
例如开启16个并行
185+
186+
abs(mod(hashtext(imei), 16))=?
187+
```
188+
189+
```
190+
-- CTE语法,支持阅后即焚的批量合并方法
191+
with tmp as (delete from tag_log where ctid = any ( array (
192+
select ctid from tag_log where abs(mod(hashtext(imei), 16))=0 order by crt_time limit 10000 -- 按时序,批量取1万条,按HASH并行
193+
)) returning * )
194+
, tmp1 as (select imei,
195+
uniq(sort(array_agg(case when action='insert' then tagids else -99999999 end))) - (-99999999) AS ins_tags,
196+
uniq(sort(array_agg(case when action='delete' then tagids else -99999999 end))) - (-99999999) AS del_tags
197+
from (select imei, action, unnest(tagids) as tagids from tmp) t group by imei)
198+
insert into tbl3 (imei, tagids, ins_tags, del_tags)
199+
select imei, ins_tags-del_tags, ins_tags, del_tags from tmp1
200+
on conflict (imei) do update set tagids=((tbl3.tagids | excluded.ins_tags) - excluded.del_tags), ins_tags=excluded.ins_tags, del_tags=excluded.del_tags;
201+
```
202+
203+
写成函数,方便调用
204+
205+
```
206+
create or replace function consume_tag_log(mo int, mov int, lim int) returns void as $$
207+
declare
208+
begin
209+
execute format($_$with tmp as (delete from tag_log where ctid = any ( array (
210+
select ctid from tag_log where abs(mod(hashtext(imei), %s))=%s order by crt_time limit %s
211+
)) returning * )
212+
, tmp1 as (select imei,
213+
uniq(sort(array_agg(case when action='insert' then tagids else -99999999 end))) - (-99999999) AS ins_tags,
214+
uniq(sort(array_agg(case when action='delete' then tagids else -99999999 end))) - (-99999999) AS del_tags
215+
from (select imei, action, unnest(tagids) as tagids from tmp) t group by imei)
216+
insert into tbl3 (imei, tagids, ins_tags, del_tags)
217+
select imei, ins_tags-del_tags, ins_tags, del_tags from tmp1
218+
on conflict (imei) do update set tagids=((tbl3.tagids | excluded.ins_tags) - excluded.del_tags), ins_tags=excluded.ins_tags, del_tags=excluded.del_tags$_$,
219+
mo, mov, lim);
220+
end;
221+
$$ language plpgsql strict;
222+
223+
224+
select consume_tag_log(16,0,10000); -- 并行处理
225+
select consume_tag_log(16,1,10000);
226+
.....
227+
select consume_tag_log(16,15,10000);
228+
```
229+
230+
```
231+
create or replace function consume_tag_log(lim int) returns void as $$
232+
declare
233+
begin
234+
execute format($_$with tmp as (delete from tag_log where ctid = any ( array (
235+
select ctid from tag_log order by crt_time limit %s
236+
)) returning * )
237+
, tmp1 as (select imei,
238+
uniq(sort(array_agg(case when action='insert' then tagids else -99999999 end))) - (-99999999) AS ins_tags,
239+
uniq(sort(array_agg(case when action='delete' then tagids else -99999999 end))) - (-99999999) AS del_tags
240+
from (select imei, action, unnest(tagids) as tagids from tmp) t group by imei)
241+
insert into tbl3 (imei, tagids, ins_tags, del_tags)
242+
select imei, ins_tags-del_tags, ins_tags, del_tags from tmp1
243+
on conflict (imei) do update set tagids=((tbl3.tagids | excluded.ins_tags) - excluded.del_tags), ins_tags=excluded.ins_tags, del_tags=excluded.del_tags$_$,
244+
lim);
245+
end;
246+
$$ language plpgsql strict;
247+
248+
249+
select consume_tag_log(10000); -- 每次处理1万条
250+
```
251+
252+
创建调度任务,执行消费函数调度即可。
253+
254+
**阅后即焚的处理速度,每秒 百万行。**
255+
168256
[《(OLTP) 高吞吐数据进出(堆存、行扫、无需索引) - 阅后即焚(读写大吞吐并测)》](../201711/20171107_32.md)
169257

170258
除了以上基于数组、GIN索引的设计,PostgreSQL还有一些技术,可以用在经营分析系统。

201711/20171126_02.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
## [未完待续] 数据库生命周期管理
1+
## [未完待续] 数据库sharding
22

33
### 作者
44
digoal

0 commit comments

Comments
 (0)