Re-think predicate locking on GIN indexes.
The principle behind the locking was not very well thought-out, and not documented. Add a section in the README to explain how it's supposed to work, and change the code so that it actually works that way. This fixes two bugs: 1. If fast update was turned on concurrently, subsequent inserts to the pending list would not conflict with predicate locks that were acquired earlier, on entry pages. The included 'predicate-gin-fastupdate' test demonstrates that. To fix, make all scans acquire a predicate lock on the metapage. That lock represents a scan of the pending list, whether or not there is a pending list at the moment. Forget about the optimization to skip locking/checking for locks, when fastupdate=off. 2. If a scan finds no match, it still needs to lock the entry page. The point of predicate locks is to lock the gabs between values, whether or not there is a match. The included 'predicate-gin-nomatch' test tests that case. In addition to those two bug fixes, this removes some unnecessary locking, following the principle laid out in the README. Because all items in a posting tree have the same key value, a lock on the posting tree root is enough to cover all the items. (With a very large posting tree, it would possibly be better to lock the posting tree leaf pages instead, so that a "skip scan" with a query like "A & B", you could avoid unnecessary conflict if a new tuple is inserted with A but !B. But let's keep this simple.) Also, some spelling fixes. Author: Heikki Linnakangas with some editorization by me Review: Andrey Borodin, Alexander Korotkov Discussion: https://www.postgresql.org/message-id/0b3ad2c2-2692-62a9-3a04-5724f2af9114@iki.fi
This commit is contained in:
parent
7d8679975f
commit
0bef1c0678
@ -331,6 +331,40 @@ page-deletions safe; it stamps the deleted pages with an XID and keeps the
|
||||
deleted pages around with the right-link intact until all concurrent scans
|
||||
have finished.)
|
||||
|
||||
Predicate Locking
|
||||
-----------------
|
||||
|
||||
GIN supports predicate locking, for serializable snapshot isolation.
|
||||
A predicate locks represent that a scan has scanned a range of values. They
|
||||
are not concerned with physical pages as such, but the logical key values.
|
||||
A predicate lock on a page covers the key range that would belong on that
|
||||
page, whether or not there are any matching tuples there currently. In other
|
||||
words, a predicate lock on an index page covers the "gaps" between the index
|
||||
tuples. To minimize false positives, predicate locks are acquired at the
|
||||
finest level possible.
|
||||
|
||||
* Like in the B-tree index, it is enough to lock only leaf pages, because all
|
||||
insertions happen at the leaf level.
|
||||
|
||||
* In an equality search (i.e. not a partial match search), if a key entry has
|
||||
a posting tree, we lock the posting tree root page, to represent a lock on
|
||||
just that key entry. Otherwise, we lock the entry tree page. We also lock
|
||||
the entry tree page if no match is found, to lock the "gap" where the entry
|
||||
would've been, had there been one.
|
||||
|
||||
* In a partial match search, we lock all the entry leaf pages that we scan,
|
||||
in addition to locks on posting tree roots, to represent the "gaps" between
|
||||
values.
|
||||
|
||||
* In addition to the locks on entry leaf pages and posting tree roots, all
|
||||
scans grab a lock the metapage. This is to interlock with insertions to
|
||||
the fast update pending list. An insertion to the pending list can really
|
||||
belong anywhere in the tree, and the lock on the metapage represents that.
|
||||
|
||||
The interlock for fastupdate pending lists means that with fastupdate=on,
|
||||
we effectively always grab a full-index lock, so you could get a lot of false
|
||||
positives.
|
||||
|
||||
Compatibility
|
||||
-------------
|
||||
|
||||
|
@ -84,6 +84,9 @@ ginFindLeafPage(GinBtree btree, bool searchMode, Snapshot snapshot)
|
||||
stack->parent = NULL;
|
||||
stack->predictNumber = 1;
|
||||
|
||||
if (!searchMode)
|
||||
CheckForSerializableConflictIn(btree->index, NULL, stack->buffer);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
Page page;
|
||||
|
@ -1812,8 +1812,8 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
|
||||
blkno = BufferGetBlockNumber(buffer);
|
||||
|
||||
/*
|
||||
* Copy a predicate lock from entry tree leaf (containing posting list) to
|
||||
* posting tree.
|
||||
* Copy any predicate locks from the entry tree leaf (containing posting
|
||||
* list) to the posting tree.
|
||||
*/
|
||||
PredicateLockPageSplit(index, BufferGetBlockNumber(entrybuffer), blkno);
|
||||
|
||||
@ -1864,7 +1864,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
|
||||
return blkno;
|
||||
}
|
||||
|
||||
void
|
||||
static void
|
||||
ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno)
|
||||
{
|
||||
memset(btree, 0, sizeof(GinBtreeData));
|
||||
@ -1911,7 +1911,6 @@ ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
|
||||
btree.itemptr = insertdata.items[insertdata.curitem];
|
||||
stack = ginFindLeafPage(&btree, false, NULL);
|
||||
|
||||
GinCheckForSerializableConflictIn(btree.index, NULL, stack->buffer);
|
||||
ginInsertValue(&btree, stack, &insertdata, buildStats);
|
||||
}
|
||||
}
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include "postmaster/autovacuum.h"
|
||||
#include "storage/indexfsm.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "storage/predicate.h"
|
||||
#include "utils/builtins.h"
|
||||
|
||||
/* GUC parameter */
|
||||
@ -245,6 +246,13 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
|
||||
metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
|
||||
metapage = BufferGetPage(metabuffer);
|
||||
|
||||
/*
|
||||
* An insertion to the pending list could logically belong anywhere in
|
||||
* the tree, so it conflicts with all serializable scans. All scans
|
||||
* acquire a predicate lock on the metabuffer to represent that.
|
||||
*/
|
||||
CheckForSerializableConflictIn(index, NULL, metabuffer);
|
||||
|
||||
if (collector->sumsize + collector->ntuples * sizeof(ItemIdData) > GinListPageSize)
|
||||
{
|
||||
/*
|
||||
|
@ -35,20 +35,6 @@ typedef struct pendingPosition
|
||||
} pendingPosition;
|
||||
|
||||
|
||||
/*
|
||||
* Place predicate lock on GIN page if needed.
|
||||
*/
|
||||
static void
|
||||
GinPredicateLockPage(Relation index, BlockNumber blkno, Snapshot snapshot)
|
||||
{
|
||||
/*
|
||||
* When fast update is on then no need in locking pages, because we anyway
|
||||
* need to lock the whole index.
|
||||
*/
|
||||
if (!GinGetUseFastUpdate(index))
|
||||
PredicateLockPage(index, blkno, snapshot);
|
||||
}
|
||||
|
||||
/*
|
||||
* Goes to the next page if current offset is outside of bounds
|
||||
*/
|
||||
@ -68,7 +54,7 @@ moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack, Snapshot snapshot
|
||||
stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
|
||||
stack->blkno = BufferGetBlockNumber(stack->buffer);
|
||||
stack->off = FirstOffsetNumber;
|
||||
GinPredicateLockPage(btree->index, stack->blkno, snapshot);
|
||||
PredicateLockPage(btree->index, stack->blkno, snapshot);
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -100,11 +86,6 @@ scanPostingTree(Relation index, GinScanEntry scanEntry,
|
||||
*/
|
||||
for (;;)
|
||||
{
|
||||
/*
|
||||
* Predicate lock each leaf page in posting tree
|
||||
*/
|
||||
GinPredicateLockPage(index, BufferGetBlockNumber(buffer), snapshot);
|
||||
|
||||
page = BufferGetPage(buffer);
|
||||
if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
|
||||
{
|
||||
@ -158,7 +139,7 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
|
||||
* Predicate lock entry leaf page, following pages will be locked by
|
||||
* moveRightIfItNeeded()
|
||||
*/
|
||||
GinPredicateLockPage(btree->index, stack->buffer, snapshot);
|
||||
PredicateLockPage(btree->index, stack->buffer, snapshot);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
@ -253,6 +234,13 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
|
||||
|
||||
LockBuffer(stack->buffer, GIN_UNLOCK);
|
||||
|
||||
/*
|
||||
* Acquire predicate lock on the posting tree. We already hold
|
||||
* a lock on the entry page, but insertions to the posting tree
|
||||
* don't check for conflicts on that level.
|
||||
*/
|
||||
PredicateLockPage(btree->index, rootPostingTree, snapshot);
|
||||
|
||||
/* Collect all the TIDs in this entry's posting tree */
|
||||
scanPostingTree(btree->index, scanEntry, rootPostingTree,
|
||||
snapshot);
|
||||
@ -400,10 +388,6 @@ restartScanEntry:
|
||||
{
|
||||
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
|
||||
|
||||
/* Predicate lock visited entry leaf page */
|
||||
GinPredicateLockPage(ginstate->index,
|
||||
BufferGetBlockNumber(stackEntry->buffer), snapshot);
|
||||
|
||||
if (GinIsPostingTree(itup))
|
||||
{
|
||||
BlockNumber rootPostingTree = GinGetPostingTree(itup);
|
||||
@ -411,6 +395,13 @@ restartScanEntry:
|
||||
Page page;
|
||||
ItemPointerData minItem;
|
||||
|
||||
/*
|
||||
* This is an equality scan, so lock the root of the posting tree.
|
||||
* It represents a lock on the exact key value, and covers all the
|
||||
* items in the posting tree.
|
||||
*/
|
||||
PredicateLockPage(ginstate->index, rootPostingTree, snapshot);
|
||||
|
||||
/*
|
||||
* We should unlock entry page before touching posting tree to
|
||||
* prevent deadlocks with vacuum processes. Because entry is never
|
||||
@ -425,12 +416,6 @@ restartScanEntry:
|
||||
rootPostingTree, snapshot);
|
||||
entry->buffer = stack->buffer;
|
||||
|
||||
/*
|
||||
* Predicate lock visited posting tree page, following pages will
|
||||
* be locked by moveRightIfItNeeded or entryLoadMoreItems
|
||||
*/
|
||||
GinPredicateLockPage(ginstate->index, BufferGetBlockNumber(entry->buffer), snapshot);
|
||||
|
||||
/*
|
||||
* We keep buffer pinned because we need to prevent deletion of
|
||||
* page during scan. See GIN's vacuum implementation. RefCount is
|
||||
@ -452,15 +437,38 @@ restartScanEntry:
|
||||
freeGinBtreeStack(stack);
|
||||
entry->isFinished = false;
|
||||
}
|
||||
else if (GinGetNPosting(itup) > 0)
|
||||
else
|
||||
{
|
||||
entry->list = ginReadTuple(ginstate, entry->attnum, itup,
|
||||
&entry->nlist);
|
||||
entry->predictNumberResult = entry->nlist;
|
||||
/*
|
||||
* Lock the entry leaf page. This is more coarse-grained than
|
||||
* necessary, because it will conflict with any insertions that
|
||||
* land on the same leaf page, not only the exacty key we searched
|
||||
* for. But locking an individual tuple would require updating
|
||||
* that lock whenever it moves because of insertions or vacuums,
|
||||
* which seems too complicated.
|
||||
*/
|
||||
PredicateLockPage(ginstate->index,
|
||||
BufferGetBlockNumber(stackEntry->buffer),
|
||||
snapshot);
|
||||
if (GinGetNPosting(itup) > 0)
|
||||
{
|
||||
entry->list = ginReadTuple(ginstate, entry->attnum, itup,
|
||||
&entry->nlist);
|
||||
entry->predictNumberResult = entry->nlist;
|
||||
|
||||
entry->isFinished = false;
|
||||
entry->isFinished = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* No entry found. Predicate lock the leaf page, to lock the place
|
||||
* where the entry would've been, had there been one.
|
||||
*/
|
||||
PredicateLockPage(ginstate->index,
|
||||
BufferGetBlockNumber(stackEntry->buffer), snapshot);
|
||||
}
|
||||
|
||||
if (needUnlock)
|
||||
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
|
||||
@ -533,7 +541,7 @@ startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
|
||||
|
||||
for (i = 0; i < key->nentries - 1; i++)
|
||||
{
|
||||
/* Pass all entries <= i as false, and the rest as MAYBE */
|
||||
/* Pass all entries <= i as FALSE, and the rest as MAYBE */
|
||||
for (j = 0; j <= i; j++)
|
||||
key->entryRes[entryIndexes[j]] = GIN_FALSE;
|
||||
for (j = i + 1; j < key->nentries; j++)
|
||||
@ -673,8 +681,6 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
|
||||
entry->btree.fullScan = false;
|
||||
stack = ginFindLeafPage(&entry->btree, true, snapshot);
|
||||
|
||||
GinPredicateLockPage(ginstate->index, BufferGetBlockNumber(stack->buffer), snapshot);
|
||||
|
||||
/* we don't need the stack, just the buffer. */
|
||||
entry->buffer = stack->buffer;
|
||||
IncrBufferRefCount(entry->buffer);
|
||||
@ -719,10 +725,6 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
|
||||
entry->buffer = ginStepRight(entry->buffer,
|
||||
ginstate->index,
|
||||
GIN_SHARE);
|
||||
|
||||
GinPredicateLockPage(ginstate->index, BufferGetBlockNumber(entry->buffer), snapshot);
|
||||
|
||||
|
||||
page = BufferGetPage(entry->buffer);
|
||||
}
|
||||
stepright = true;
|
||||
@ -1084,8 +1086,8 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
|
||||
* lossy page even when none of the other entries match.
|
||||
*
|
||||
* Our strategy is to call the tri-state consistent function, with the
|
||||
* lossy-page entries set to MAYBE, and all the other entries false. If it
|
||||
* returns false, none of the lossy items alone are enough for a match, so
|
||||
* lossy-page entries set to MAYBE, and all the other entries FALSE. If it
|
||||
* returns FALSE, none of the lossy items alone are enough for a match, so
|
||||
* we don't need to return a lossy-page pointer. Otherwise, return a
|
||||
* lossy-page pointer to indicate that the whole heap page must be
|
||||
* checked. (On subsequent calls, we'll do nothing until minItem is past
|
||||
@ -1746,8 +1748,7 @@ collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
|
||||
}
|
||||
|
||||
/*
|
||||
* Collect all matched rows from pending list into bitmap. Also function
|
||||
* takes PendingLockRelation if it's needed.
|
||||
* Collect all matched rows from pending list into bitmap.
|
||||
*/
|
||||
static void
|
||||
scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
|
||||
@ -1764,6 +1765,12 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
|
||||
|
||||
*ntids = 0;
|
||||
|
||||
/*
|
||||
* Acquire predicate lock on the metapage, to conflict with any
|
||||
* fastupdate insertions.
|
||||
*/
|
||||
PredicateLockPage(scan->indexRelation, GIN_METAPAGE_BLKNO, scan->xs_snapshot);
|
||||
|
||||
LockBuffer(metabuffer, GIN_SHARE);
|
||||
page = BufferGetPage(metabuffer);
|
||||
TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
|
||||
@ -1777,24 +1784,9 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
|
||||
{
|
||||
/* No pending list, so proceed with normal scan */
|
||||
UnlockReleaseBuffer(metabuffer);
|
||||
|
||||
/*
|
||||
* If fast update is enabled, we acquire a predicate lock on the
|
||||
* entire relation as fast update postpones the insertion of tuples
|
||||
* into index structure due to which we can't detect rw conflicts.
|
||||
*/
|
||||
if (GinGetUseFastUpdate(scan->indexRelation))
|
||||
PredicateLockRelation(scan->indexRelation, scan->xs_snapshot);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Pending list is not empty, we need to lock the index doesn't despite on
|
||||
* fastupdate state
|
||||
*/
|
||||
PredicateLockRelation(scan->indexRelation, scan->xs_snapshot);
|
||||
|
||||
pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
|
||||
LockBuffer(pos.pendingBuffer, GIN_SHARE);
|
||||
pos.firstOffset = FirstOffsetNumber;
|
||||
|
@ -219,7 +219,7 @@ ginEntryInsert(GinState *ginstate,
|
||||
return;
|
||||
}
|
||||
|
||||
GinCheckForSerializableConflictIn(btree.index, NULL, stack->buffer);
|
||||
CheckForSerializableConflictIn(ginstate->index, NULL, stack->buffer);
|
||||
/* modify an existing leaf entry */
|
||||
itup = addItemPointersToLeafTuple(ginstate, itup,
|
||||
items, nitem, buildStats, stack->buffer);
|
||||
@ -228,7 +228,7 @@ ginEntryInsert(GinState *ginstate,
|
||||
}
|
||||
else
|
||||
{
|
||||
GinCheckForSerializableConflictIn(btree.index, NULL, stack->buffer);
|
||||
CheckForSerializableConflictIn(ginstate->index, NULL, stack->buffer);
|
||||
/* no match, so construct a new leaf entry */
|
||||
itup = buildFreshLeafTuple(ginstate, attnum, key, category,
|
||||
items, nitem, buildStats, stack->buffer);
|
||||
@ -517,18 +517,6 @@ gininsert(Relation index, Datum *values, bool *isnull,
|
||||
|
||||
memset(&collector, 0, sizeof(GinTupleCollector));
|
||||
|
||||
/*
|
||||
* With fastupdate on each scan and each insert begin with access to
|
||||
* pending list, so it effectively lock entire index. In this case we
|
||||
* aquire predicate lock and check for conflicts over index relation,
|
||||
* and hope that it will reduce locking overhead.
|
||||
*
|
||||
* Do not use GinCheckForSerializableConflictIn() here, because it
|
||||
* will do nothing (it does actual work only with fastupdate off).
|
||||
* Check for conflicts for entire index.
|
||||
*/
|
||||
CheckForSerializableConflictIn(index, NULL, InvalidBuffer);
|
||||
|
||||
for (i = 0; i < ginstate->origTupdesc->natts; i++)
|
||||
ginHeapTupleFastCollect(ginstate, &collector,
|
||||
(OffsetNumber) (i + 1),
|
||||
@ -539,16 +527,6 @@ gininsert(Relation index, Datum *values, bool *isnull,
|
||||
}
|
||||
else
|
||||
{
|
||||
GinStatsData stats;
|
||||
|
||||
/*
|
||||
* Fastupdate is off but if pending list isn't empty then we need to
|
||||
* check conflicts with PredicateLockRelation in scanPendingInsert().
|
||||
*/
|
||||
ginGetStats(index, &stats);
|
||||
if (stats.nPendingPages > 0)
|
||||
CheckForSerializableConflictIn(index, NULL, InvalidBuffer);
|
||||
|
||||
for (i = 0; i < ginstate->origTupdesc->natts; i++)
|
||||
ginHeapTupleInsert(ginstate, (OffsetNumber) (i + 1),
|
||||
values[i], isnull[i],
|
||||
|
@ -718,10 +718,3 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
|
||||
|
||||
END_CRIT_SECTION();
|
||||
}
|
||||
|
||||
void
|
||||
GinCheckForSerializableConflictIn(Relation relation, HeapTuple tuple, Buffer buffer)
|
||||
{
|
||||
if (!GinGetUseFastUpdate(relation))
|
||||
CheckForSerializableConflictIn(relation, tuple, buffer);
|
||||
}
|
||||
|
@ -166,7 +166,6 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
|
||||
START_CRIT_SECTION();
|
||||
|
||||
/* Unlink the page by changing left sibling's rightlink */
|
||||
|
||||
page = BufferGetPage(lBuffer);
|
||||
GinPageGetOpaque(page)->rightlink = rightlink;
|
||||
|
||||
|
@ -1220,7 +1220,7 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
|
||||
bool is_split;
|
||||
|
||||
/*
|
||||
* Check for any rw conflicts (in serialisation isolation level) just
|
||||
* Check for any rw conflicts (in serializable isolation level) just
|
||||
* before we intend to modify the page
|
||||
*/
|
||||
CheckForSerializableConflictIn(state->r, NULL, stack->buffer);
|
||||
|
@ -373,21 +373,22 @@ index *leaf* pages needed to lock the appropriate index range. If,
|
||||
however, a search discovers that no root page has yet been created, a
|
||||
predicate lock on the index relation is required.
|
||||
|
||||
* Like a B-tree, GIN searches acquire predicate locks only on the
|
||||
leaf pages of entry tree. When performing an equality scan, and an
|
||||
entry has a posting tree, the posting tree root is locked instead, to
|
||||
lock only that key value. However, fastupdate=on postpones the
|
||||
insertion of tuples into index structure by temporarily storing them
|
||||
into pending list. That makes us unable to detect r-w conflicts using
|
||||
page-level locks. To cope with that, insertions to the pending list
|
||||
conflict with all scans.
|
||||
|
||||
* GiST searches can determine that there are no matches at any
|
||||
level of the index, so we acquire predicate lock at each index
|
||||
level during a GiST search. An index insert at the leaf level can
|
||||
then be trusted to ripple up to all levels and locations where
|
||||
conflicting predicate locks may exist. In case there is a page split,
|
||||
we need to copy predicate lock from an original page to all new pages.
|
||||
|
||||
* GIN searches acquire predicate locks only on the leaf pages
|
||||
of entry tree and posting tree. During a page split, a predicate locks are
|
||||
copied from the original page to the new page. In the same way predicate locks
|
||||
are copied from entry tree leaf page to freshly created posting tree root.
|
||||
However, when fast update is enabled, a predicate lock on the whole index
|
||||
relation is required. Fast update postpones the insertion of tuples into index
|
||||
structure by temporarily storing them into pending list. That makes us unable
|
||||
to detect r-w conflicts using page-level locks.
|
||||
we need to copy predicate lock from the original page to all the new
|
||||
pages.
|
||||
|
||||
* Hash index searches acquire predicate locks on the primary
|
||||
page of a bucket. It acquires a lock on both the old and new buckets
|
||||
@ -395,7 +396,6 @@ for scans that happen concurrently with page splits. During a bucket
|
||||
split, a predicate lock is copied from the primary page of an old
|
||||
bucket to the primary page of a new bucket.
|
||||
|
||||
|
||||
* The effects of page splits, overflows, consolidations, and
|
||||
removals must be carefully reviewed to ensure that predicate locks
|
||||
aren't "lost" during those operations, or kept with pages which could
|
||||
|
@ -103,8 +103,6 @@ extern Datum *ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
|
||||
extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
|
||||
extern Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple,
|
||||
GinNullCategory *category);
|
||||
extern void GinCheckForSerializableConflictIn(Relation relation,
|
||||
HeapTuple tuple, Buffer buffer);
|
||||
|
||||
/* gininsert.c */
|
||||
extern IndexBuildResult *ginbuild(Relation heap, Relation index,
|
||||
@ -227,7 +225,6 @@ extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
|
||||
GinStatsData *buildStats);
|
||||
extern GinBtreeStack *ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno, Snapshot snapshot);
|
||||
extern void ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
|
||||
extern void ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno);
|
||||
|
||||
/*
|
||||
* This is declared in ginvacuum.c, but is passed between ginVacuumItemPointers
|
||||
|
30
src/test/isolation/expected/predicate-gin-fastupdate.out
Normal file
30
src/test/isolation/expected/predicate-gin-fastupdate.out
Normal file
@ -0,0 +1,30 @@
|
||||
Parsed test spec with 3 sessions
|
||||
|
||||
starting permutation: r1 r2 w1 c1 w2 c2
|
||||
step r1: SELECT count(*) FROM gin_tbl WHERE p @> array[1000];
|
||||
count
|
||||
|
||||
2
|
||||
step r2: SELECT * FROM other_tbl;
|
||||
id
|
||||
|
||||
step w1: INSERT INTO other_tbl VALUES (42);
|
||||
step c1: COMMIT;
|
||||
step w2: INSERT INTO gin_tbl SELECT array[1000,19001];
|
||||
ERROR: could not serialize access due to read/write dependencies among transactions
|
||||
step c2: COMMIT;
|
||||
|
||||
starting permutation: r1 r2 w1 c1 fastupdate_on w2 c2
|
||||
step r1: SELECT count(*) FROM gin_tbl WHERE p @> array[1000];
|
||||
count
|
||||
|
||||
2
|
||||
step r2: SELECT * FROM other_tbl;
|
||||
id
|
||||
|
||||
step w1: INSERT INTO other_tbl VALUES (42);
|
||||
step c1: COMMIT;
|
||||
step fastupdate_on: ALTER INDEX ginidx SET (fastupdate = on);
|
||||
step w2: INSERT INTO gin_tbl SELECT array[1000,19001];
|
||||
ERROR: could not serialize access due to read/write dependencies among transactions
|
||||
step c2: COMMIT;
|
15
src/test/isolation/expected/predicate-gin-nomatch.out
Normal file
15
src/test/isolation/expected/predicate-gin-nomatch.out
Normal file
@ -0,0 +1,15 @@
|
||||
Parsed test spec with 2 sessions
|
||||
|
||||
starting permutation: r1 r2 w1 c1 w2 c2
|
||||
step r1: SELECT count(*) FROM gin_tbl WHERE p @> array[-1];
|
||||
count
|
||||
|
||||
0
|
||||
step r2: SELECT * FROM other_tbl;
|
||||
id
|
||||
|
||||
step w1: INSERT INTO other_tbl VALUES (42);
|
||||
step c1: COMMIT;
|
||||
step w2: INSERT INTO gin_tbl SELECT array[-1];
|
||||
ERROR: could not serialize access due to read/write dependencies among transactions
|
||||
step c2: COMMIT;
|
@ -737,8 +737,8 @@ step c2: commit;
|
||||
starting permutation: fu1 rxy1 rxy2fu wx1 c1 wy2fu c2
|
||||
step fu1: alter index ginidx set (fastupdate = on);
|
||||
commit;
|
||||
begin isolation level serializable;
|
||||
set enable_seqscan=off;
|
||||
begin isolation level serializable;
|
||||
set enable_seqscan=off;
|
||||
step rxy1: select count(*) from gin_tbl where p @> array[4,5];
|
||||
count
|
||||
|
||||
|
@ -69,6 +69,8 @@ test: vacuum-concurrent-drop
|
||||
test: predicate-hash
|
||||
test: predicate-gist
|
||||
test: predicate-gin
|
||||
test: predicate-gin-fastupdate
|
||||
test: predicate-gin-nomatch
|
||||
test: partition-key-update-1
|
||||
test: partition-key-update-2
|
||||
test: partition-key-update-3
|
||||
|
49
src/test/isolation/specs/predicate-gin-fastupdate.spec
Normal file
49
src/test/isolation/specs/predicate-gin-fastupdate.spec
Normal file
@ -0,0 +1,49 @@
|
||||
#
|
||||
# Test that predicate locking on a GIN index works correctly, even if
|
||||
# fastupdate is turned on concurrently.
|
||||
#
|
||||
# 0. fastupdate is off
|
||||
# 1. Session 's1' acquires predicate lock on page X
|
||||
# 2. fastupdate is turned on
|
||||
# 3. Session 's2' inserts a new tuple to the pending list
|
||||
#
|
||||
# This test tests that if the lock acquired in step 1 would conflict with
|
||||
# the scan in step 1, we detect that conflict correctly, even if fastupdate
|
||||
# was turned on in-between.
|
||||
#
|
||||
setup
|
||||
{
|
||||
create table gin_tbl(p int4[]);
|
||||
insert into gin_tbl select array[g, g*2,g*3] from generate_series(1, 10000) g;
|
||||
insert into gin_tbl select array[4,5,6] from generate_series(10001, 20000) g;
|
||||
create index ginidx on gin_tbl using gin(p) with (fastupdate = off);
|
||||
|
||||
create table other_tbl (id int4);
|
||||
}
|
||||
|
||||
teardown
|
||||
{
|
||||
drop table gin_tbl;
|
||||
drop table other_tbl;
|
||||
}
|
||||
|
||||
session "s1"
|
||||
setup { BEGIN ISOLATION LEVEL SERIALIZABLE; SET enable_seqscan=off; }
|
||||
step "r1" { SELECT count(*) FROM gin_tbl WHERE p @> array[1000]; }
|
||||
step "w1" { INSERT INTO other_tbl VALUES (42); }
|
||||
step "c1" { COMMIT; }
|
||||
|
||||
session "s2"
|
||||
setup { BEGIN ISOLATION LEVEL SERIALIZABLE; SET enable_seqscan=off; }
|
||||
step "r2" { SELECT * FROM other_tbl; }
|
||||
step "w2" { INSERT INTO gin_tbl SELECT array[1000,19001]; }
|
||||
step "c2" { COMMIT; }
|
||||
|
||||
session "s3"
|
||||
step "fastupdate_on" { ALTER INDEX ginidx SET (fastupdate = on); }
|
||||
|
||||
# This correctly throws serialization failure.
|
||||
permutation "r1" "r2" "w1" "c1" "w2" "c2"
|
||||
|
||||
# But if fastupdate is turned on in the middle, we miss it.
|
||||
permutation "r1" "r2" "w1" "c1" "fastupdate_on" "w2" "c2"
|
35
src/test/isolation/specs/predicate-gin-nomatch.spec
Normal file
35
src/test/isolation/specs/predicate-gin-nomatch.spec
Normal file
@ -0,0 +1,35 @@
|
||||
#
|
||||
# Check that GIN index grabs an appropriate lock, even if there is no match.
|
||||
#
|
||||
setup
|
||||
{
|
||||
create table gin_tbl(p int4[]);
|
||||
insert into gin_tbl select array[g, g*2,g*3] from generate_series(1, 10000) g;
|
||||
insert into gin_tbl select array[4,5,6] from generate_series(10001, 20000) g;
|
||||
create index ginidx on gin_tbl using gin(p) with (fastupdate = off);
|
||||
|
||||
create table other_tbl (id int4);
|
||||
}
|
||||
|
||||
teardown
|
||||
{
|
||||
drop table gin_tbl;
|
||||
drop table other_tbl;
|
||||
}
|
||||
|
||||
session "s1"
|
||||
setup { BEGIN ISOLATION LEVEL SERIALIZABLE; SET enable_seqscan=off; }
|
||||
# Scan with no match.
|
||||
step "r1" { SELECT count(*) FROM gin_tbl WHERE p @> array[-1]; }
|
||||
step "w1" { INSERT INTO other_tbl VALUES (42); }
|
||||
step "c1" { COMMIT; }
|
||||
|
||||
session "s2"
|
||||
setup { BEGIN ISOLATION LEVEL SERIALIZABLE; SET enable_seqscan=off; }
|
||||
step "r2" { SELECT * FROM other_tbl; }
|
||||
# Insert row that would've matched in step "r1"
|
||||
step "w2" { INSERT INTO gin_tbl SELECT array[-1]; }
|
||||
step "c2" { COMMIT; }
|
||||
|
||||
# This should throw serialization failure.
|
||||
permutation "r1" "r2" "w1" "c1" "w2" "c2"
|
@ -32,8 +32,8 @@ setup
|
||||
# enable pending list for a small subset of tests
|
||||
step "fu1" { alter index ginidx set (fastupdate = on);
|
||||
commit;
|
||||
begin isolation level serializable;
|
||||
set enable_seqscan=off; }
|
||||
begin isolation level serializable;
|
||||
set enable_seqscan=off; }
|
||||
|
||||
step "rxy1" { select count(*) from gin_tbl where p @> array[4,5]; }
|
||||
step "wx1" { insert into gin_tbl select g, array[5,6] from generate_series
|
||||
|
Loading…
x
Reference in New Issue
Block a user