Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New series of ao/co - related cherry-picks #897

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/backend/access/appendonly/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -248,3 +248,22 @@ index entries will still point to the segment being compacted. This will be the
case up until the index entries are bulk deleted, but by then the new index
entries along with new block directory rows would already have been written and
would be able to answer uniqueness checks.

Transaction isolation: Since uniqueness checks utilize the special dirty
snapshot, these checks can cross transaction isolation boundaries. For instance,
let us consider what will happen if we are in a repeatable read transaction and
we insert a key that was inserted by a concurrent transaction. Further let's say
that the repeatable read transaction's snapshot was taken before the concurrent
transaction started. This means that the repeatable read transaction won't be
able to see the conflicting key (for eg. with a SELECT). In spite of that
conflicts will still be detected. Depending on whether the concurrent
transaction committed or is still in progress, the repeatable read transaction
will raise a conflict or enter into xwait respectively. This behavior is table
AM agnostic.

Partial unique indexes: We don't have to do anything special for partial indexes.
Keys not satisfying the partial index predicate are never inserted into the
index, and hence there are no uniqueness checks triggered (see
ExecInsertIndexTuples()). Also during partial unique index builds, keys that
don't satisfy the partial index predicate are never inserted into the index
(see *_index_build_range_scan()).
7 changes: 7 additions & 0 deletions src/backend/access/appendonly/appendonlyam.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ static void appendonly_insert_finish_guts(AppendOnlyInsertDesc aoInsertDesc);

/* Hook for plugins to get control in appendonly_delete() */
appendonly_delete_hook_type appendonly_delete_hook = NULL;
static void AppendOnlyScanDesc_UpdateTotalBytesRead(
AppendOnlyScanDesc scan);

/* ----------------
* initscan - scan code common to appendonly_beginscan and appendonly_rescan
Expand Down Expand Up @@ -1251,6 +1253,8 @@ getNextBlock(AppendOnlyScanDesc scan)
AppendOnlyExecutorReadBlock_GetContents(
&scan->executorReadBlock);

AppendOnlyScanDesc_UpdateTotalBytesRead(scan);

return true;
}

Expand Down Expand Up @@ -1583,6 +1587,9 @@ appendonly_beginrangescan_internal(Relation relation,
AccessShareLock,
appendOnlyMetaDataSnapshot);
}

scan->totalBytesRead = 0;

return scan;
}

Expand Down
20 changes: 16 additions & 4 deletions src/backend/access/appendonly/appendonlyam_handler.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "catalog/storage_xlog.h"
#include "cdb/cdbappendonlyam.h"
#include "cdb/cdbvars.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "commands/progress.h"
#include "executor/executor.h"
Expand Down Expand Up @@ -1572,7 +1573,7 @@ appendonly_index_build_range_scan(Relation heapRelation,
FileSegInfo **seginfo = NULL;
int segfile_count;
int64 total_blockcount = 0;
AppendOnlyScanDesc hscan;
int64 previous_blkno = -1;

/*
* sanity checks
Expand Down Expand Up @@ -1720,10 +1721,22 @@ appendonly_index_build_range_scan(Relation heapRelation,
(numblocks != InvalidBlockNumber && ItemPointerGetBlockNumber(&slot->tts_tid) >= numblocks))
continue;

/* Report scan progress, if asked to. */
if (progress)
{
hscan = (AppendOnlyScanDesc) &aoscan->rs_base;
pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE, hscan->rs_nblocks);
int64 current_blkno =
RelationGuessNumberOfBlocksFromSize(aoscan->totalBytesRead);

/* XXX: How can we report for builds with parallel scans? */
Assert(!aoscan->rs_base.rs_parallel);

/* As soon as a new block starts, report it as scanned */
if (current_blkno != previous_blkno)
{
pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
current_blkno);
previous_blkno = current_blkno;
}
}

aoTupleId = (AOTupleId *) &slot->tts_tid;
Expand Down Expand Up @@ -1820,7 +1833,6 @@ appendonly_index_validate_scan(Relation heapRelation,
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("index validate scan - feature not supported on appendoptimized relations")));

}

/* ------------------------------------------------------------------------
Expand Down
3 changes: 3 additions & 0 deletions src/backend/cdb/cdbappendonlystorageread.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "cdb/cdbappendonlystorageformat.h"
#include "cdb/cdbappendonlystorageread.h"
#include "storage/gp_compress.h"
#include "utils/faultinjector.h"
#include "utils/guc.h"


Expand Down Expand Up @@ -1011,6 +1012,8 @@ AppendOnlyStorageRead_ReadNextBlock(AppendOnlyStorageRead *storageRead)
/* UNDONE: Finish the read for the information only header. */
}

SIMPLE_FAULT_INJECTOR("AppendOnlyStorageRead_ReadNextBlock_success");

return true;
}

Expand Down
13 changes: 0 additions & 13 deletions src/backend/commands/tablecmds.c
Original file line number Diff line number Diff line change
Expand Up @@ -16053,19 +16053,6 @@ ATPrepSetAccessMethod(AlteredTableInfo *tab, Relation rel, const char *amname)
if (rel->rd_rel->relam == amoid)
return;

/*
* Since we don't support unique indexes for AO/AOCO tables, ban
* heap->AO/AOCO in case the heap table has a unique index.
*/
if (rel->rd_rel->relam == HEAP_TABLE_AM_OID &&
(amoid == AO_ROW_TABLE_AM_OID || amoid == AO_COLUMN_TABLE_AM_OID))
{
if (relationHasUniqueIndex(rel))
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("append-only tables do not support unique indexes"),
errdetail("heap table \"%s\" being altered contains unique index", RelationGetRelationName(rel))));
}

/* Save info for Phase 3 to do the real work */
tab->rewrite |= AT_REWRITE_ACCESS_METHOD;
tab->newAccessMethod = amoid;
Expand Down
13 changes: 0 additions & 13 deletions src/backend/storage/buffer/bufmgr.c
Original file line number Diff line number Diff line change
Expand Up @@ -3168,19 +3168,6 @@ RelationGetNumberOfBlocksInFork(Relation relation, ForkNumber forkNum)
return 0; /* keep compiler quiet */
}

/*
* GPDB: it is possible to need to calculate the number of blocks from the table
* size. An example use case is when we are the dispatcher and we need to
* acquire the number of blocks from all segments.
*
* Use the same calculation that RelationGetNumberOfBlocksInFork is using.
*/
BlockNumber
RelationGuessNumberOfBlocksFromSize(uint64 szbytes)
{
return (szbytes + (BLCKSZ - 1)) / BLCKSZ;
}

/*
* BufferIsPermanent
* Determines whether a buffer will potentially still be around after
Expand Down
20 changes: 20 additions & 0 deletions src/include/cdb/cdbappendonlyam.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,11 @@ typedef struct AppendOnlyScanDescData
/* used in predicate pushdown */
ExprContext *aos_pushdown_econtext;
ExprState *aos_pushdown_qual;
/*
* The total number of bytes read, compressed, across all segment files, so
* far. This is used for scan progress reporting.
*/
int64 totalBytesRead;

} AppendOnlyScanDescData;

Expand Down Expand Up @@ -494,5 +499,20 @@ extern bool AppendOnlyExecutorReadBlock_ScanNextTuple(AppendOnlyExecutorReadBloc
TupleTableSlot *slot);

extern void AppendOnlyExecutorReadBlock_GetContents(AppendOnlyExecutorReadBlock *executorReadBlock);
/*
* Update total bytes read for the entire scan. If the block was compressed,
* update it with the compressed length. If the block was not compressed, update
* it with the uncompressed length.
*/
static inline void
AppendOnlyScanDesc_UpdateTotalBytesRead(AppendOnlyScanDesc scan)
{
Assert(scan->storageRead.isActive);

if (scan->storageRead.current.isCompressed)
scan->totalBytesRead += scan->storageRead.current.compressedLen;
else
scan->totalBytesRead += scan->storageRead.current.uncompressedLen;
}

#endif /* CDBAPPENDONLYAM_H */
13 changes: 12 additions & 1 deletion src/include/storage/bufmgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,18 @@ extern void DropRelFileNodeBuffers(struct SMgrRelationData *smgr_reln, ForkNumbe
extern void DropRelFileNodesAllBuffers(struct SMgrRelationData **smgr_reln, int nnodes);
extern void DropDatabaseBuffers(Oid dbid);

extern BlockNumber RelationGuessNumberOfBlocksFromSize(uint64 szbytes);
/*
* GPDB: it is possible to need to calculate the number of blocks from the table
* size. An example use case is when we are the dispatcher and we need to
* acquire the number of blocks from all segments.
*
* Use the same calculation that RelationGetNumberOfBlocksInFork is using.
*/
static inline BlockNumber
RelationGuessNumberOfBlocksFromSize(uint64 szbytes)
{
return (szbytes + (BLCKSZ - 1)) / BLCKSZ;
}

#define RelationGetNumberOfBlocks(reln) \
RelationGetNumberOfBlocksInFork(reln, MAIN_FORKNUM)
Expand Down
1 change: 1 addition & 0 deletions src/test/isolation2/expected/ao_unique_index.out
Original file line number Diff line number Diff line change
Expand Up @@ -571,3 +571,4 @@ DETAIL: Key (a, b)=(1, 1) already exists.
ERROR: duplicate key value violates unique constraint "a_b_unique"
DROP TABLE unique_index_ao_row;
DROP
RESET
1 change: 1 addition & 0 deletions src/test/isolation2/expected/aocs_unique_index.out
Original file line number Diff line number Diff line change
Expand Up @@ -571,3 +571,4 @@ DETAIL: Key (a, b)=(1, 1) already exists.
ERROR: duplicate key value violates unique constraint "a_b_unique"
DROP TABLE unique_index_ao_column;
DROP
RESET
2 changes: 2 additions & 0 deletions src/test/isolation2/isolation2_schedule
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ test: idle_gang_cleaner
# test idle_in_transaction_session_timeout
test: write_gang_idle_in_transaction_session_timeout

test: ao_index_build_progress

# Tests for FTS
test: fts_errors
test: segwalrep/replication_keeps_crash
Expand Down
2 changes: 1 addition & 1 deletion src/test/isolation2/sql/ao_unique_index.sql
Original file line number Diff line number Diff line change
Expand Up @@ -376,4 +376,4 @@ CREATE UNIQUE INDEX a_b_unique ON unique_index_ao_row(a,b);
INSERT INTO unique_index_ao_row VALUES (1,2);
-- should conflict
INSERT INTO unique_index_ao_row VALUES (1,1);
DROP TABLE unique_index_ao_row;
DROP TABLE unique_index_ao_row;
2 changes: 1 addition & 1 deletion src/test/isolation2/sql/aocs_unique_index.sql
Original file line number Diff line number Diff line change
Expand Up @@ -376,4 +376,4 @@ CREATE UNIQUE INDEX a_b_unique ON unique_index_ao_column(a,b);
INSERT INTO unique_index_ao_column VALUES (1,2);
-- should conflict
INSERT INTO unique_index_ao_column VALUES (1,1);
DROP TABLE unique_index_ao_column;
DROP TABLE unique_index_ao_column;
64 changes: 35 additions & 29 deletions src/test/regress/expected/alter_table_set_am.out
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
-- Check changing table access method
\set HIDE_TABLEAM off
-- Scenario 1: Changing to the same AM: it should have no effect but
-- make sure it doesn't rewrite table or blow up existing reloptions:
CREATE TABLE sameam_heap(a int, b int) WITH (fillfactor=70) DISTRIBUTED BY (a);
Expand Down Expand Up @@ -68,41 +69,46 @@ CREATE TEMP TABLE relfilebeforeao AS
WHERE relname in ('heap2ao', 'heap2ao2', 'heapi') ORDER BY segid;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'segid' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-- Altering a heap table with a unique index to AO should error out
-- as unique indexes aren't supported on AO tables
ALTER TABLE heap2ao2 SET ACCESS METHOD ao_row;
ERROR: append-only tables do not support unique indexes
DETAIL: heap table "heap2ao2" being altered contains unique index
ALTER TABLE heap2ao2 DROP CONSTRAINT unique_constraint;
-- Set default storage options for the table to inherit from
SET gp_default_storage_options = 'blocksize=65536, compresstype=zlib, compresslevel=5, checksum=true';
-- Alter table heap to AO should work
-- Alter table heap to AO should work, even if heap table has unique indexes.
ALTER TABLE heap2ao SET ACCESS METHOD ao_row;
ALTER TABLE heap2ao2 SET WITH (appendoptimized=true);
-- The altered tables should have AO AM
SELECT c.relname, a.amname FROM pg_class c JOIN pg_am a ON c.relam = a.oid WHERE c.relname LIKE 'heap2ao%';
relname | amname
----------+--------
heap2ao | ao_row
heap2ao2 | ao_row
(2 rows)

-- The altered tables should inherit storage options from gp_default_storage_options
-- And, the original heap reloptions are gone (in this case, 'fillfactor').
SELECT blocksize,compresslevel,checksum,compresstype,columnstore
FROM pg_appendonly WHERE relid in ('heap2ao'::regclass::oid, 'heap2ao2'::regclass::oid);
blocksize | compresslevel | checksum | compresstype | columnstore
-----------+---------------+----------+--------------+-------------
65536 | 5 | t | zlib | f
65536 | 5 | t | zlib | f
(2 rows)

SELECT reloptions from pg_class where relname in ('heap2ao', 'heap2ao2');
reloptions
-----------------------------------
{blocksize=65536,compresslevel=5}
{blocksize=65536,compresslevel=5}
(2 rows)
-- And, the original heap reloptions are gone (in this case, 'fillfactor').
-- Also, equivalent indexes have been created
\d+ heap2ao
Table "public.heap2ao"
Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
--------+---------+-----------+----------+---------+---------+--------------+-------------
a | integer | | | | plain | |
b | integer | | | | plain | |
Compression Type: zlib
Compression Level: 5
Block Size: 65536
Checksum: t
Indexes:
"heapi" btree (b)
Distributed by: (a)
Access method: ao_row
Options: blocksize=65536, compresslevel=5

\d+ heap2ao2
Table "public.heap2ao2"
Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
--------+---------+-----------+----------+---------+---------+--------------+-------------
a | integer | | | | plain | |
b | integer | | | | plain | |
Compression Type: zlib
Compression Level: 5
Block Size: 65536
Checksum: t
Indexes:
"unique_constraint" UNIQUE CONSTRAINT, btree (a)
Distributed by: (a)
Access method: ao_row
Options: blocksize=65536, compresslevel=5

-- Check data is intact
SELECT * FROM heap2ao;
Expand Down
Loading
Loading