digoal
2021-09-08
PostgreSQL , hash , partition table
如何根据分区字段的value得到它应该属于哪个分区?
使用分区键值计算hash分片ID:
- 分区字段: id
- id类型: int4
- 分区个数: 256
- 计算分片ID, 返回结果: 0..255
计算方法:
(
(
(hashint4extended(id, 8816678312871386365)::numeric + 5305509591434766563
) % 256
)::int + 256
) % 256
或
(case
when
(hashint4extended(id, 8816678312871386365)::numeric + 5305509591434766563) % 256 < 0
then
256 + (hashint4extended(id, 8816678312871386365)::numeric + 5305509591434766563) % 256
else
(hashint4extended(id, 8816678312871386365)::numeric + 5305509591434766563) % 256
end)::int
或
执行计划会告诉你是哪个分区
postgres=# explain select * from p where id=2;
QUERY PLAN
----------------------------------------------------------
Seq Scan on p122 p (cost=0.00..195.19 rows=1 width=263)
Filter: (id = 2)
(2 rows)
https://doxygen.postgresql.org/partbounds_8c_source.html#l04740
https://blog.csdn.net/kmblack1/article/details/112639278
https://www.modb.pro/db/105816
/*
* Combine two 64-bit hash values, resulting in another hash value, using the
* same kind of technique as hash_combine(). Testing shows that this also
* produces good bit mixing.
*/
static inline uint64
hash_combine64(uint64 a, uint64 b)
{
/* 0x49a0f4dd15e5a8e3 is 64bit random data */
a ^= b + UINT64CONST(0x49a0f4dd15e5a8e3) + (a << 54) + (a >> 7);
return a;
}
src/include/catalog/partition.h
#define HASH_PARTITION_SEED UINT64CONST(0x7A5B22367996DCFD)
select
cast(cast('x'||'7A5B22367996DCFD' as bit(64))as bigint) as "HASH_PARTITION_SEED",
cast(cast('x'||'49a0f4dd15e5a8e3' as bit(64))as bigint) as "hash_combine64"
HASH_PARTITION_SEED | hash_combine64
---------------------+---------------------
8816678312871386365 | 5305509591434766563
(1 row)
postgres=# \df *.*extended
List of functions
Schema | Name | Result data type | Argument data types | Type
------------+--------------------------+------------------+-------------------------------------+------
pg_catalog | hash_aclitem_extended | bigint | aclitem, bigint | func
pg_catalog | hash_array_extended | bigint | anyarray, bigint | func
pg_catalog | hash_multirange_extended | bigint | anymultirange, bigint | func
pg_catalog | hash_numeric_extended | bigint | numeric, bigint | func
pg_catalog | hash_range_extended | bigint | anyrange, bigint | func
pg_catalog | hash_record_extended | bigint | record, bigint | func
pg_catalog | hashbpcharextended | bigint | character, bigint | func
pg_catalog | hashcharextended | bigint | "char", bigint | func
pg_catalog | hashenumextended | bigint | anyenum, bigint | func
pg_catalog | hashfloat4extended | bigint | real, bigint | func
pg_catalog | hashfloat8extended | bigint | double precision, bigint | func
pg_catalog | hashinetextended | bigint | inet, bigint | func
pg_catalog | hashint2extended | bigint | smallint, bigint | func
pg_catalog | hashint4extended | bigint | integer, bigint | func
pg_catalog | hashint8extended | bigint | bigint, bigint | func
pg_catalog | hashmacaddr8extended | bigint | macaddr8, bigint | func
pg_catalog | hashmacaddrextended | bigint | macaddr, bigint | func
pg_catalog | hashnameextended | bigint | name, bigint | func
pg_catalog | hashoidextended | bigint | oid, bigint | func
pg_catalog | hashoidvectorextended | bigint | oidvector, bigint | func
pg_catalog | hashtextextended | bigint | text, bigint | func
pg_catalog | hashtidextended | bigint | tid, bigint | func
pg_catalog | hashvarlenaextended | bigint | internal, bigint | func
pg_catalog | interval_hash_extended | bigint | interval, bigint | func
pg_catalog | jsonb_hash_extended | bigint | jsonb, bigint | func
pg_catalog | pg_lsn_hash_extended | bigint | pg_lsn, bigint | func
pg_catalog | time_hash_extended | bigint | time without time zone, bigint | func
pg_catalog | timestamp_hash_extended | bigint | timestamp without time zone, bigint | func
pg_catalog | timetz_hash_extended | bigint | time with time zone, bigint | func
pg_catalog | uuid_hash_extended | bigint | uuid, bigint | func
(30 rows)
do language plpgsql $$
declare
begin
create unlogged table p (id int, info text, crt_time timestamp, x uuid, n name, i int8, c1 varchar(500), c2 char(100), c3 int2, ts timestamptz(3))
partition by hash (id);
for i in 0..255 loop
execute format ( 'create unlogged table p%s partition of p for values with (modulus %s, REMAINDER %s)', i, 256, i);
end loop;
end;
$$;
insert into p
select i, random()::text, clock_timestamp(),
gen_random_uuid(), md5(random()::text), i+1,
md5(random()::text), md5(random()::text),
random()*32767, clock_timestamp()
from generate_series(1,1000000) i;
SQL 获得hash分片, 从0开始计数
select tableoid::regclass, id,
(case when
(hashint4extended(id, 8816678312871386365)::numeric + 5305509591434766563) % 256 < 0 then
256 + (hashint4extended(id, 8816678312871386365)::numeric + 5305509591434766563) % 256
else (hashint4extended(id, 8816678312871386365)::numeric + 5305509591434766563) % 256
end)::int as hash
from p order by random() limit 10;
tableoid | id | hash
----------+--------+------
p242 | 280431 | 242
p83 | 19278 | 83
p224 | 222672 | 224
p6 | 970558 | 6
p140 | 170988 | 140
p41 | 693193 | 41
p73 | 120319 | 73
p148 | 979893 | 148
p65 | 151618 | 65
p199 | 365620 | 199
(10 rows)
或
select tableoid::regclass, id,
(
(
(hashint4extended(id, 8816678312871386365)::numeric + 5305509591434766563
) % 256
)::int + 256
) % 256
from p order by random() limit 10;
验证正确性
select satisfies_hash_partition('p'::regclass, 256,
(case when
(hashint4extended(id, 8816678312871386365)::numeric + 5305509591434766563) % 256 < 0 then
256 + (hashint4extended(id, 8816678312871386365)::numeric + 5305509591434766563) % 256
else (hashint4extended(id, 8816678312871386365)::numeric + 5305509591434766563) % 256
end)::int,
id)
from p
where not
satisfies_hash_partition('p'::regclass, 256,
(case when
(hashint4extended(id, 8816678312871386365)::numeric + 5305509591434766563) % 256 < 0 then
256 + (hashint4extended(id, 8816678312871386365)::numeric + 5305509591434766563) % 256
else (hashint4extended(id, 8816678312871386365)::numeric + 5305509591434766563) % 256
end)::int,
id) ;
返回0条, 说明全部正确.
satisfies_hash_partition
--------------------------
(0 rows)
drop table p;
do language plpgsql $$
declare
begin
create unlogged table p (id int, info text, crt_time timestamp, x uuid, n name, i int8, c1 varchar(500), c2 char(100), c3 int2, ts timestamptz(3))
partition by hash (info);
for i in 0..255 loop
execute format ( 'create unlogged table p%s partition of p for values with (modulus %s, REMAINDER %s)', i, 256, i);
end loop;
end;
$$;
insert into p
select i, random()::text, clock_timestamp(),
gen_random_uuid(), md5(random()::text), i+1,
md5(random()::text), md5(random()::text),
random()*32767, clock_timestamp()
from generate_series(1,1000000) i;
SQL 获得hash分片, 从0开始计数
select tableoid::regclass, info,
(case when
(hashtextextended(info, 8816678312871386365)::numeric + 5305509591434766563) % 256 < 0 then
256 + (hashtextextended(info, 8816678312871386365)::numeric + 5305509591434766563) % 256
else (hashtextextended(info, 8816678312871386365)::numeric + 5305509591434766563) % 256
end)::int as hash
from p order by random() limit 10;
tableoid | info | hash
----------+---------------------+------
p103 | 0.8017627229598645 | 103
p3 | 0.7708656162212222 | 3
p250 | 0.5574657855237355 | 250
p84 | 0.5828628389821198 | 84
p69 | 0.05682110760098524 | 69
p250 | 0.09112121076390522 | 250
p51 | 0.09714514059177048 | 51
p82 | 0.15281675218450275 | 82
p209 | 0.7884218172648474 | 209
p76 | 0.5867557439132653 | 76
(10 rows)
验证正确性
select satisfies_hash_partition('p'::regclass, 256,
(
(
(hashtextextended(info, 8816678312871386365)::numeric + 5305509591434766563
) % 256
)::int + 256
) % 256,
info)
from p
where not
satisfies_hash_partition('p'::regclass, 256,
(
(
(hashtextextended(info, 8816678312871386365)::numeric + 5305509591434766563
) % 256
)::int + 256
) % 256,
info) ;
返回0条, 说明全部正确.
satisfies_hash_partition
--------------------------
(0 rows)
drop table p;
do language plpgsql $$
declare
begin
create unlogged table p (id int, info text, crt_time timestamp, x uuid, n name, i int8, c1 varchar(500), c2 char(100), c3 int2, ts timestamptz(3))
partition by hash (x);
for i in 0..255 loop
execute format ( 'create unlogged table p%s partition of p for values with (modulus %s, REMAINDER %s)', i, 256, i);
end loop;
end;
$$;
insert into p
select i, random()::text, clock_timestamp(),
gen_random_uuid(), md5(random()::text), i+1,
md5(random()::text), md5(random()::text),
random()*32767, clock_timestamp()
from generate_series(1,1000000) i;
SQL 获得hash分片, 从0开始计数
select tableoid::regclass, x,
(case when
(uuid_hash_extended(x, 8816678312871386365)::numeric + 5305509591434766563) % 256 < 0 then
256 + (uuid_hash_extended(x, 8816678312871386365)::numeric + 5305509591434766563) % 256
else (uuid_hash_extended(x, 8816678312871386365)::numeric + 5305509591434766563) % 256
end)::int as hash
from p order by random() limit 10;
tableoid | x | hash
----------+--------------------------------------+------
p46 | adb6a830-6d1d-42b4-b870-ebda0f6d763c | 46
p96 | 9ab848a8-d115-4b8b-926b-1c32264e88b3 | 96
p214 | d7ab3e96-c2f6-409b-94e6-e844d1fe7949 | 214
p236 | b663ba77-07d0-4cec-b1ce-3ab42d182507 | 236
p236 | a2d42573-b08d-4e9d-b0f3-fa4f7ae04f18 | 236
p107 | 9d0b647f-584d-4c20-a7e8-c0f3bac9e057 | 107
p143 | 574ef574-8256-4583-b9b6-63bd89d78c19 | 143
p224 | 528f9177-a0fc-4889-949a-6648ba7d7fde | 224
p141 | 05d739d1-bb44-452e-8ef7-3b153af991ac | 141
p58 | 90ee0320-6f72-430b-8c79-dcbcc9e32f88 | 58
(10 rows)
验证正确性
select satisfies_hash_partition('p'::regclass, 256,
(
(
(uuid_hash_extended(x, 8816678312871386365)::numeric + 5305509591434766563
) % 256
)::int + 256
) % 256,
x)
from p
where not
satisfies_hash_partition('p'::regclass, 256,
(
(
(uuid_hash_extended(x, 8816678312871386365)::numeric + 5305509591434766563
) % 256
)::int + 256
) % 256,
x) ;
返回0条, 说明全部正确.
satisfies_hash_partition
--------------------------
(0 rows)
计算哈希
/*
* Combine two 64-bit hash values, resulting in another hash value, using the
* same kind of technique as hash_combine(). Testing shows that this also
* produces good bit mixing.
*/
static inline uint64
hash_combine64(uint64 a, uint64 b)
{
/* 0x49a0f4dd15e5a8e3 is 64bit random data */
a ^= b + UINT64CONST(0x49a0f4dd15e5a8e3) + (a << 54) + (a >> 7);
return a;
}
/*
* compute_partition_hash_value
*
* Compute the hash value for given partition key values.
*/
检验分片是否正确
/*
* satisfies_hash_partition
*
* This is an SQL-callable function for use in hash partition constraints.
* The first three arguments are the parent table OID, modulus, and remainder.
* The remaining arguments are the value of the partitioning columns (or
* expressions); these are hashed and the results are combined into a single
* hash value by calling hash_combine64.
*
* Returns true if remainder produced when this computed single hash value is
* divided by the given modulus is equal to given remainder, otherwise false.
* NB: it's important that this never return null, as the constraint machinery
* would consider that to be a "pass".
*
* See get_qual_for_hash() for usage.
*/
您的愿望将传达给PG kernel hacker、数据库厂商等, 帮助提高数据库产品质量和功能, 说不定下一个PG版本就有您提出的功能点. 针对非常好的提议,奖励限量版PG文化衫、纪念品、贴纸、PG热门书籍等,奖品丰富,快来许愿。开不开森.