|
| 1 | +## PostgreSQL 按需切片的实现(TimescaleDB插件自动切片功能的plpgsql schemaless实现) |
| 2 | + |
| 3 | +### 作者 |
| 4 | +digoal |
| 5 | + |
| 6 | +### 日期 |
| 7 | +2017-11-02 |
| 8 | + |
| 9 | +### 标签 |
| 10 | +PostgreSQL , schemaless , 自动切片 , track_count , 计数器 , udf , plpgsql , timescaledb |
| 11 | + |
| 12 | +---- |
| 13 | + |
| 14 | +## 背景 |
| 15 | +TimescaleDB是PostgreSQL的一款时序数据库插件,其中自动切片是一个非常深入人心的功能。 |
| 16 | + |
| 17 | +http://www.timescale.com/ |
| 18 | + |
| 19 | + |
| 20 | + |
| 21 | +实际上PostgreSQL plpgsql也可以实现类似的功能,当然,前提是需要使用schemaless的模式。 |
| 22 | + |
| 23 | +schemaless的设计思路和应用举例: |
| 24 | + |
| 25 | +[《PostgreSQL 在铁老大订单系统中的schemaless设计和性能压测》](201709/20170927_03.md) |
| 26 | + |
| 27 | +[《PostgreSQL schemaless 的实现(类mongodb collection)》](201705/20170511_01.md) |
| 28 | + |
| 29 | +[《PostgreSQL 时序最佳实践 - 证券交易系统数据库设计 - 阿里云RDS PostgreSQL最佳实践》](201704/20170417_01.md) |
| 30 | + |
| 31 | +下面,就自动切片这个功能,我们看看schemaless的实现例子。 |
| 32 | + |
| 33 | +1、首先要监测写入量,通过track_counts参数,可以给数据的写入计数(默认track_counts参数是开启的)。 |
| 34 | + |
| 35 | +[《PostgreSQL pg_stat_reset清除track_counts的隐患》](../201711/20171101_01.md) |
| 36 | + |
| 37 | +``` |
| 38 | +postgres=# select * from pg_stat_all_tables where relname='test1'; |
| 39 | +-[ RECORD 1 ]-------+------- |
| 40 | +relid | 31129 |
| 41 | +schemaname | public |
| 42 | +relname | test1 |
| 43 | +seq_scan | 0 |
| 44 | +seq_tup_read | 0 |
| 45 | +idx_scan | |
| 46 | +idx_tup_fetch | |
| 47 | +n_tup_ins | 1000 |
| 48 | +n_tup_upd | 0 |
| 49 | +n_tup_del | 0 |
| 50 | +n_tup_hot_upd | 0 |
| 51 | +n_live_tup | 1000 |
| 52 | +n_dead_tup | 0 |
| 53 | +n_mod_since_analyze | 1000 |
| 54 | +last_vacuum | |
| 55 | +last_autovacuum | |
| 56 | +last_analyze | |
| 57 | +last_autoanalyze | |
| 58 | +vacuum_count | 0 |
| 59 | +autovacuum_count | 0 |
| 60 | +analyze_count | 0 |
| 61 | +autoanalyze_count | 0 |
| 62 | +``` |
| 63 | + |
| 64 | +2、当数据写入到一定量时,自动写下一张表。 |
| 65 | + |
| 66 | +## schemaless自动切片例子 |
| 67 | +1、设计: |
| 68 | + |
| 69 | +写入时,通过UDF写入,并且实时监控每种数据流的写入速度,并动态做数据分片。 |
| 70 | + |
| 71 | + |
| 72 | +2、测试表: |
| 73 | + |
| 74 | +``` |
| 75 | +create table log(id int, info text, crt_time timestamp default now()); |
| 76 | +create index idx_log_crt_time on log(crt_time); |
| 77 | +create table log_tmp (like log including all) inherits(log); |
| 78 | +``` |
| 79 | + |
| 80 | +3、分片规则: |
| 81 | + |
| 82 | +当记录数超过100000时,自动切换分区。 |
| 83 | + |
| 84 | + |
| 85 | +4、UDF定义: |
| 86 | + |
| 87 | +``` |
| 88 | +create or replace function f(v_id int, v_info text) returns void as $$ |
| 89 | +declare |
| 90 | + suffix int; |
| 91 | + v_rows int8; |
| 92 | + min_time timestamp; |
| 93 | + max_time timestamp; |
| 94 | +begin |
| 95 | + -- 插入 |
| 96 | + insert into log_tmp(id,info) values (v_id, v_info); |
| 97 | + |
| 98 | + -- 判断记录数是否达到切换阈值 |
| 99 | + select n_live_tup into v_rows from pg_stat_all_tables where relname='log_tmp' and schemaname='public'; |
| 100 | + |
| 101 | + -- 达到阈值,切换表 |
| 102 | + if v_rows >=100000 then |
| 103 | + select count(*) into suffix from pg_inherits where inhparent='log'::regclass; |
| 104 | + select min(crt_time), max(crt_time) into min_time, max_time from log_tmp ; |
| 105 | + execute 'alter table log_tmp add constraint ck_log_'||suffix||'_1 check (crt_time>='''||min_time||''' and crt_time<='''||max_time||''')'; |
| 106 | + execute 'alter table log_tmp rename to log_'||suffix; |
| 107 | + create table log_tmp (like log including all) inherits(log); |
| 108 | + end if; |
| 109 | + |
| 110 | + return; |
| 111 | + exception when others then |
| 112 | + return; |
| 113 | +end; |
| 114 | +$$ language plpgsql strict; |
| 115 | +``` |
| 116 | + |
| 117 | +5、压测 |
| 118 | + |
| 119 | +``` |
| 120 | +vi test.sql |
| 121 | + |
| 122 | +select f(1, 'test'); |
| 123 | +``` |
| 124 | + |
| 125 | +``` |
| 126 | +pgbench -M prepared -n -r -P 1 -f ./test.sql -c 64 -j 64 -T 120 |
| 127 | + |
| 128 | + |
| 129 | +NOTICE: merging column "id" with inherited definition |
| 130 | +NOTICE: merging column "info" with inherited definition |
| 131 | +NOTICE: merging column "crt_time" with inherited definition |
| 132 | +progress: 1.0 s, 25350.5 tps, lat 2.487 ms stddev 0.986 |
| 133 | +progress: 2.0 s, 26309.0 tps, lat 2.432 ms stddev 0.688 |
| 134 | +progress: 3.0 s, 26251.9 tps, lat 2.438 ms stddev 0.741 |
| 135 | +progress: 4.0 s, 26451.0 tps, lat 2.420 ms stddev 0.737 |
| 136 | +NOTICE: merging column "id" with inherited definition |
| 137 | +NOTICE: merging column "info" with inherited definition |
| 138 | +NOTICE: merging column "crt_time" with inherited definition |
| 139 | +progress: 5.0 s, 29471.0 tps, lat 2.172 ms stddev 0.844 |
| 140 | +progress: 6.0 s, 32971.0 tps, lat 1.941 ms stddev 0.670 |
| 141 | +progress: 7.0 s, 33028.0 tps, lat 1.938 ms stddev 0.661 |
| 142 | +progress: 8.0 s, 33101.0 tps, lat 1.933 ms stddev 0.656 |
| 143 | +NOTICE: merging column "id" with inherited definition |
| 144 | +NOTICE: merging column "info" with inherited definition |
| 145 | +NOTICE: merging column "crt_time" with inherited definition |
| 146 | +progress: 9.0 s, 32805.0 tps, lat 1.951 ms stddev 0.752 |
| 147 | + |
| 148 | +...... |
| 149 | +``` |
| 150 | + |
| 151 | +自动切片成功: |
| 152 | + |
| 153 | +``` |
| 154 | +postgres=# \d log_1 |
| 155 | + Table "public.log_1" |
| 156 | + Column | Type | Collation | Nullable | Default |
| 157 | +----------+-----------------------------+-----------+----------+--------- |
| 158 | + id | integer | | | |
| 159 | + info | text | | | |
| 160 | + crt_time | timestamp without time zone | | | now() |
| 161 | +Indexes: |
| 162 | + "log_tmp_crt_time_idx" btree (crt_time) |
| 163 | +Check constraints: |
| 164 | + "ck_log_1_1" CHECK (crt_time >= '2017-11-02 23:52:34.264264'::timestamp without time zone AND crt_time <= '2017-11-02 23:54:05.939958'::timestamp without time zone) |
| 165 | +Inherits: log |
| 166 | + |
| 167 | +postgres=# \d log_2 |
| 168 | + Table "public.log_2" |
| 169 | + Column | Type | Collation | Nullable | Default |
| 170 | +----------+-----------------------------+-----------+----------+--------- |
| 171 | + id | integer | | | |
| 172 | + info | text | | | |
| 173 | + crt_time | timestamp without time zone | | | now() |
| 174 | +Indexes: |
| 175 | + "log_tmp_crt_time_idx1" btree (crt_time) |
| 176 | +Check constraints: |
| 177 | + "ck_log_2_1" CHECK (crt_time >= '2017-11-02 23:54:05.948796'::timestamp without time zone AND crt_time <= '2017-11-02 23:54:10.946987'::timestamp without time zone) |
| 178 | +Inherits: log |
| 179 | +``` |
| 180 | + |
| 181 | +## 小结 |
| 182 | +timescaleDB插件还有很多其他的功能,在使用方便也更加的边界,待TimescaleDB插件成熟,还是推荐使用TimescaleDB。 |
0 commit comments