diff --git a/_posts/2023-10-30-PERCONA_RRE_COLUMN_COMPRESSION.md b/_posts/2023-10-30-PERCONA_RRE_COLUMN_COMPRESSION.md index b0b38ca..378f0fb 100644 --- a/_posts/2023-10-30-PERCONA_RRE_COLUMN_COMPRESSION.md +++ b/_posts/2023-10-30-PERCONA_RRE_COLUMN_COMPRESSION.md @@ -66,17 +66,14 @@ mysql> CREATE TABLE t1( ### Code Analysis 列压缩在Percona上经过5.6-8.0的迭代主要的commit 如下,这里只以8.0版本做代码分析。 - [Implemented "Column Compression with optional Predefined Dictionary"](https://github.com/percona/percona-server/commit/35d5d3faf00db7e32f48dcb39f776e43b83f1cb2) - [Compressed columns functionality merged from 5.6 to 5.7](https://github.com/percona/percona-server/commit/d142eb0ffc7301f638060181748a9ff1b9910761) - [Re-implement compression dictionaries in 8.0](https://github.com/percona/percona-server/commit/14e2a96c6954433970ce15b613dbbe85fb7239f0)
-#### Physical Format +### Physical Format Column最终的存储格式是 @@ -85,14 +82,10 @@ ZIP_COLUMN_HEADER_LENGTH + Len + Compressed_data ``` ZIP_COLUMN_HEADER_LENGTH: 压缩之后 Column Header 部分占用(2)bytes - Len: 压缩之前的长度,其占用的字节数是个变长的数值1-4bytes,具体这个column占用多长记录在ZIP_COLUMN_HEADER_LENGTH的zip_column_data_length里面。主要用户后续解压缩之后长度的校验。 - Compressed_data: 压缩之后的数据 -
- ZIP_COLUMN_HEADER_LENGTH 如下: ```c++ @@ -111,12 +104,10 @@ static constexpr uint zip_column_compressed = 10;
-#### Row_compress_column && row_decompress_column +### Row_compress_column && row_decompress_column 压缩跟解压缩主要逻辑位于row_compress_column跟 row_decompress_column两个函数当中。 - row_compress_column逻辑相对简单,压缩接口调用zlib 的 deflateSetDictionary,如果成功就写入相应的column header,如果不成功也要占用2bytes 写入相应的column header,原因是解压的时候需要判断这个column 是否被压缩,需要读取column header 的zip_column_compressed 字段来做判断。 - row_decompress_column 逻辑先解析column header,判断是否压缩。如果没有压缩,返回不带column header 的数据就可以。如果是压缩数据,就要调用inflateSetDictionary 解压并返回解压之后的数据。这里用column header 里的未压缩之前的数据长度跟这里解压之后的数据长度进行了一个校验。 ```c++ @@ -147,15 +138,15 @@ const byte *row_decompress_column(
-#### 字典信息读取 +### 字典信息读取 字典信息是如何传入row_compress_column 函数里的呢? -```c++ mysql -h127.0.0.1 -uroot -P7788 -A (使用-A 参数use sbtest 的时候不会open table) use sbtest; INSERT INTO t1 VALUES (1, REPEAT('a', 200)); +```c++ #0 fill_column_from_dd () at sql/dd_table_share.cc #1 fill_columns_from_dd () sql/dd_table_share.cc #2 open_table_def () sql/dd_table_share.cc @@ -175,36 +166,41 @@ INSERT INTO t1 VALUES (1, REPEAT('a', 200));
第一次 open talbe的时候,fill_column_from_dd (sql/dd_table_share.cc:)调用 compression_dict::get_name_for_id(zip_dict_id) (zip_dict_id 是create 时候column_options带的id)读mysql.compression_dictionary这个表,获取到了zip_dict_name和zip_dict_data 存到了TABLE_SHARE当中。 + 之后通过TABLE_SHARE存到了m_prebuilt->mysql_template build_template_field(storage/innobase/handler/ha_innodb.cc) -​ => m_prebuilt->mysql_template = share->field->zip_dict_data -之后在row_mysql_convert_row_to_innobase(storage/innobase/row/row0mysql.cc) 中row_prebuilt_t为参数传到了row_compress_column +​=> m_prebuilt->mysql_template = share->field->zip_dict_data + +之后在row_mysql_convert_row_to_innobase(storage/innobase/row/row0mysql.cc)中row_prebuilt_t为参数传到了row_compress_column
-#### New Added Table (Bootstrap and Invoke) +### New Added Table (Bootstrap and Invoke) INFORMATION_SCHEMA.COMPRESSION_DICTIONARY_TABLES 和INFORMATION_SCHEMA.COMPRESSION_DICTIONARY 两张表是两个view,数据实际来源自mysql.compression_dictionary 和mysql.compression_dictionary_cols 两张表中。 -
-sql/sql_zip_dict.cc // mysql.compression_dictionary和mysql.compression_dictionary_cols 调用接口以及两张表的初始化(bootstrap) +// mysql.compression_dictionary和mysql.compression_dictionary_cols 调用接口以及两张表的初始化(bootstrap) +sql/sql_zip_dict.cc -sql/dd/impl/system_views/compression_dictionary.cc // 定义INFORMATION_SCHEMA.COMPRESSION_DICTIONARY +// 定义INFORMATION_SCHEMA.COMPRESSION_DICTIONARY +sql/dd/impl/system_views/compression_dictionary.cc -sql/dd/impl/system_views/compression_dictionary_tables.cc // 定义 INFORMATION_SCHEMA.COMPRESSION_DICTIONARY_TABLES +// 定义 INFORMATION_SCHEMA.COMPRESSION_DICTIONARY_TABLES +sql/dd/impl/system_views/compression_dictionary_tables.cc -sql/dd/info_schema/metadata.cc // create_system_views 创建information_schema 的view +// create_system_views 创建information_schema 的view +sql/dd/info_schema/metadata.cc
-#### Create Dict +### Create Dict -``` SET @dictionary_data = 'one' 'two' 'three' 'four'; CREATE COMPRESSION_DICTIONARY numbers (@dictionary_data); +``` sql/sql_parse.cc // SQLCOM_CREATE_COMPRESSION_DICTIONARY create_zip_dict => open_dictionary_table_write @@ -214,26 +210,29 @@ create_zip_dict
-#### Combine Dict and Column +### Combine Dict and Column -``` create table user_data (a int(10), b int(10), c varchar(100) COLUMN_FORMAT COMPRESSED WITH COMPRESSION_DICTIONARY numbers); +``` + sql/sql_table.cc rea_create_base_table =>compression_dict::cols_table_insert - =>open_dictionary_cols_table_write(open mysql.compression_dictionary_cols and write table columns) + =>open_dictionary_cols_table_write + (open mysql.compression_dictionary_cols and write table columns) ```
-#### Insert & Select From Compressed Column +### Insert & Select From Compressed Column -``` CREATE TABLE t1 (id INT PRIMARY KEY, b1 varchar(200) COLUMN_FORMAT COMPRESSED); INSERT INTO t1 VALUES (1, REPEAT('a', 200)); +``` + #0 row_compress_column() at storage/innobase/row/row0mysql.cc:392 #1 row_mysql_store_col_in_innobase_format () at storage/innobase/row/row0mysql.cc:911 #2 row_mysql_convert_row_to_innobase () at storage/innobase/row/row0mysql.cc:1088 @@ -274,9 +273,8 @@ select * from t1;
-#### Alter Column To Compressed Column (Copy DDL) +### Alter Column To Compressed Column (Copy DDL) -``` CREATE TABLE t1 (id INT PRIMARY KEY, b1 varchar(200) COLUMN_FORMAT COMPRESSED, b2 varchar(200)); INSERT INTO t1 VALUES (1, REPEAT('a', 200), REPEAT('a', 200)); ALTER TABLE t1 MODIFY COLUMN b2 varchar(200) COLUMN_FORMAT COMPRESSED; @@ -284,6 +282,8 @@ ALTER TABLE t1 MODIFY COLUMN b2 varchar(200) COLUMN_FORMAT COMPRESSED; 这里走Copy ddl 先从原table 里面读出原有的record,再把需要修改的列压缩,之后写入一个新的table当中。这里原table有一部分列已经是压缩的了,所以读原table record的逻辑也会走到row_decompress_column。 完整的copy 逻辑在 copy_data_between_tables 函数内部。 + +``` 读 #0 row_decompress_column() at storage/innobase/row/row0mysql.cc:500 #1 row_sel_field_store_in_mysql_format_func() at storage/innobase/row/row0sel.cc:2557