Fix duplicates handling.
This commit is contained in:
parent
43b6f1e678
commit
3f5834fb8c
@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.12 1997/04/16 01:48:11 vadim Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.13 1997/05/30 18:35:31 vadim Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -28,13 +28,14 @@
|
||||
#endif
|
||||
|
||||
static InsertIndexResult _bt_insertonpg(Relation rel, Buffer buf, BTStack stack, int keysz, ScanKey scankey, BTItem btitem, BTItem afteritem);
|
||||
static Buffer _bt_split(Relation rel, Buffer buf);
|
||||
static Buffer _bt_split(Relation rel, Buffer buf, BTItem hiRightItem);
|
||||
static OffsetNumber _bt_findsplitloc(Relation rel, Page page, OffsetNumber start, OffsetNumber maxoff, Size llimit);
|
||||
static void _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf);
|
||||
static OffsetNumber _bt_pgaddtup(Relation rel, Buffer buf, int keysz, ScanKey itup_scankey, Size itemsize, BTItem btitem, BTItem afteritem);
|
||||
static bool _bt_goesonpg(Relation rel, Buffer buf, Size keysz, ScanKey scankey, BTItem afteritem);
|
||||
static void _bt_updateitem(Relation rel, Size keysz, Buffer buf, BTItem oldItem, BTItem newItem);
|
||||
static bool _bt_isequal (TupleDesc itupdesc, Page page, OffsetNumber offnum, int keysz, ScanKey scankey);
|
||||
static InsertIndexResult _bt_shift (Relation rel, Buffer buf, BTStack stack, int keysz, ScanKey scankey, BTItem btitem, BTItem hikey);
|
||||
|
||||
/*
|
||||
* _bt_doinsert() -- Handle insertion of a single btitem in the tree.
|
||||
@ -225,31 +226,152 @@ _bt_insertonpg(Relation rel,
|
||||
Buffer rbuf;
|
||||
Buffer pbuf;
|
||||
Page rpage;
|
||||
ScanKey newskey;
|
||||
BTItem ritem;
|
||||
BTPageOpaque lpageop;
|
||||
BTPageOpaque rpageop;
|
||||
BlockNumber rbknum, itup_blkno;
|
||||
OffsetNumber itup_off;
|
||||
int itemsz;
|
||||
InsertIndexResult newres;
|
||||
BTItem new_item = (BTItem) NULL;
|
||||
BTItem lowLeftItem;
|
||||
OffsetNumber leftmost_offset;
|
||||
Page ppage;
|
||||
BTPageOpaque ppageop;
|
||||
BlockNumber bknum;
|
||||
|
||||
page = BufferGetPage(buf);
|
||||
lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
itemsz = IndexTupleDSize(btitem->bti_itup)
|
||||
+ (sizeof(BTItemData) - sizeof(IndexTupleData));
|
||||
|
||||
itemsz = DOUBLEALIGN(itemsz); /* be safe, PageAddItem will do this
|
||||
but we need to be consistent */
|
||||
/*
|
||||
* If we have to insert item on the leftmost page which is the first
|
||||
* page in the chain of duplicates then:
|
||||
* 1. if scankey == hikey (i.e. - new duplicate item) then
|
||||
* insert it here;
|
||||
* 2. if scankey < hikey then we grab new page, copy current page
|
||||
* content there and insert new item on the current page.
|
||||
*/
|
||||
if ( lpageop->btpo_flags & BTP_CHAIN )
|
||||
{
|
||||
OffsetNumber maxoff = PageGetMaxOffsetNumber (page);
|
||||
ItemId hitemid;
|
||||
BTItem hitem;
|
||||
|
||||
Assert ( !P_RIGHTMOST(lpageop) );
|
||||
hitemid = PageGetItemId(page, P_HIKEY);
|
||||
hitem = (BTItem) PageGetItem(page, hitemid);
|
||||
if ( maxoff > P_HIKEY &&
|
||||
!_bt_itemcmp (rel, keysz, hitem,
|
||||
(BTItem) PageGetItem(page, PageGetItemId(page, P_FIRSTKEY)),
|
||||
BTEqualStrategyNumber) )
|
||||
elog (FATAL, "btree: bad key on the page in the chain of duplicates");
|
||||
|
||||
if ( !_bt_skeycmp (rel, keysz, scankey, page, hitemid,
|
||||
BTEqualStrategyNumber) )
|
||||
{
|
||||
if ( !P_LEFTMOST(lpageop) )
|
||||
elog (FATAL, "btree: attempt to insert bad key on the non-leftmost page in the chain of duplicates");
|
||||
if ( !_bt_skeycmp (rel, keysz, scankey, page, hitemid,
|
||||
BTLessStrategyNumber) )
|
||||
elog (FATAL, "btree: attempt to insert higher key on the leftmost page in the chain of duplicates");
|
||||
return (_bt_shift(rel, buf, stack, keysz, scankey, btitem, hitem));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (PageGetFreeSpace(page) < itemsz) {
|
||||
if (PageGetFreeSpace(page) < itemsz)
|
||||
{
|
||||
BlockNumber bknum = BufferGetBlockNumber(buf);
|
||||
BTItem lowLeftItem;
|
||||
BTItem hiRightItem = NULL;
|
||||
|
||||
/*
|
||||
* If we have to split leaf page in the chain of duplicates
|
||||
* then we try to move righter to avoid splitting.
|
||||
*/
|
||||
if ( ( lpageop->btpo_flags & BTP_CHAIN ) &&
|
||||
( lpageop->btpo_flags & BTP_LEAF ) )
|
||||
{
|
||||
bool use_left = true;
|
||||
|
||||
for ( ; ; )
|
||||
{
|
||||
bool keys_equal = false;
|
||||
|
||||
rbuf = _bt_getbuf(rel, lpageop->btpo_next, BT_WRITE);
|
||||
rpage = BufferGetPage(rbuf);
|
||||
rpageop = (BTPageOpaque) PageGetSpecialPointer(rpage);
|
||||
if ( P_RIGHTMOST (rpageop) )
|
||||
{
|
||||
Assert ( !( rpageop->btpo_flags & BTP_CHAIN ) );
|
||||
use_left = false;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* If we have the same hikey here then it's
|
||||
* yet another page in chain and we may move
|
||||
* even righter.
|
||||
*/
|
||||
if ( _bt_skeycmp (rel, keysz, scankey, rpage,
|
||||
PageGetItemId(rpage, P_HIKEY),
|
||||
BTEqualStrategyNumber) )
|
||||
{
|
||||
if ( !( rpageop->btpo_flags & BTP_CHAIN ) )
|
||||
elog (FATAL, "btree: lost page in the chain of duplicates");
|
||||
keys_equal = true;
|
||||
}
|
||||
else if ( _bt_skeycmp (rel, keysz, scankey, rpage,
|
||||
PageGetItemId(rpage, P_HIKEY),
|
||||
BTGreaterStrategyNumber) )
|
||||
elog (FATAL, "btree: hikey is out of order");
|
||||
/*
|
||||
* If hikey > scankey and BTP_CHAIN is ON
|
||||
* then it's first page of the chain of higher keys:
|
||||
* our left sibling hikey was lying! We can't add new
|
||||
* item here, but we can turn BTP_CHAIN off on our
|
||||
* left page and overwrite its hikey.
|
||||
*/
|
||||
if ( !keys_equal && ( rpageop->btpo_flags & BTP_CHAIN ) )
|
||||
{
|
||||
BTItem tmp;
|
||||
|
||||
lpageop->btpo_flags &= ~BTP_CHAIN;
|
||||
tmp = (BTItem) PageGetItem(rpage,
|
||||
PageGetItemId(rpage, P_HIKEY));
|
||||
hiRightItem = _bt_formitem(&(tmp->bti_itup));
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* if there is room here or hikey > scankey (so it's our
|
||||
* last page in the chain and we can't move righter)
|
||||
* we have to use this page .
|
||||
*/
|
||||
if ( PageGetFreeSpace (rpage) > itemsz || !keys_equal )
|
||||
{
|
||||
use_left = false;
|
||||
break;
|
||||
}
|
||||
/* try to move righter */
|
||||
_bt_relbuf(rel, buf, BT_WRITE);
|
||||
buf = rbuf;
|
||||
page = rpage;
|
||||
lpageop = rpageop;
|
||||
}
|
||||
if ( !use_left ) /* insert on the right page */
|
||||
{
|
||||
_bt_relbuf(rel, buf, BT_WRITE);
|
||||
return ( _bt_insertonpg(rel, rbuf, stack, keysz,
|
||||
scankey, btitem, afteritem) );
|
||||
}
|
||||
_bt_relbuf(rel, rbuf, BT_WRITE);
|
||||
bknum = BufferGetBlockNumber(buf);
|
||||
}
|
||||
|
||||
/* split the buffer into left and right halves */
|
||||
rbuf = _bt_split(rel, buf);
|
||||
rbuf = _bt_split(rel, buf, hiRightItem);
|
||||
|
||||
if ( hiRightItem != (BTItem) NULL )
|
||||
pfree (hiRightItem);
|
||||
|
||||
/* which new page (left half or right half) gets the tuple? */
|
||||
if (_bt_goesonpg(rel, buf, keysz, scankey, afteritem)) {
|
||||
@ -264,6 +386,14 @@ _bt_insertonpg(Relation rel,
|
||||
itup_blkno = BufferGetBlockNumber(rbuf);
|
||||
}
|
||||
|
||||
lowLeftItem = (BTItem) PageGetItem(page,
|
||||
PageGetItemId(page, P_FIRSTKEY));
|
||||
|
||||
if ( _bt_itemcmp (rel, keysz, lowLeftItem,
|
||||
(BTItem) PageGetItem(page, PageGetItemId(page, P_HIKEY)),
|
||||
BTEqualStrategyNumber) )
|
||||
lpageop->btpo_flags |= BTP_CHAIN;
|
||||
|
||||
/*
|
||||
* By here,
|
||||
*
|
||||
@ -287,6 +417,11 @@ _bt_insertonpg(Relation rel,
|
||||
_bt_relbuf(rel, rbuf, BT_WRITE);
|
||||
|
||||
} else {
|
||||
ScanKey newskey;
|
||||
InsertIndexResult newres;
|
||||
BTItem new_item;
|
||||
OffsetNumber upditem_offset = P_HIKEY;
|
||||
bool do_update = false;
|
||||
|
||||
/* form a index tuple that points at the new right page */
|
||||
rbknum = BufferGetBlockNumber(rbuf);
|
||||
@ -294,27 +429,43 @@ _bt_insertonpg(Relation rel,
|
||||
rpageop = (BTPageOpaque) PageGetSpecialPointer(rpage);
|
||||
|
||||
/*
|
||||
* By convention, the first entry (0) on every
|
||||
* By convention, the first entry (1) on every
|
||||
* non-rightmost page is the high key for that page. In
|
||||
* order to get the lowest key on the new right page, we
|
||||
* actually look at its second (1) entry.
|
||||
* actually look at its second (2) entry.
|
||||
*/
|
||||
|
||||
if (! P_RIGHTMOST(rpageop)) {
|
||||
if (! P_RIGHTMOST(rpageop))
|
||||
{
|
||||
ritem = (BTItem) PageGetItem(rpage,
|
||||
PageGetItemId(rpage, P_FIRSTKEY));
|
||||
} else {
|
||||
if ( _bt_itemcmp (rel, keysz, ritem,
|
||||
(BTItem) PageGetItem(rpage,
|
||||
PageGetItemId(rpage, P_HIKEY)),
|
||||
BTEqualStrategyNumber) )
|
||||
rpageop->btpo_flags |= BTP_CHAIN;
|
||||
}
|
||||
else
|
||||
ritem = (BTItem) PageGetItem(rpage,
|
||||
PageGetItemId(rpage, P_HIKEY));
|
||||
}
|
||||
|
||||
/* get a unique btitem for this key */
|
||||
new_item = _bt_formitem(&(ritem->bti_itup));
|
||||
|
||||
ItemPointerSet(&(new_item->bti_itup.t_tid), rbknum, P_HIKEY);
|
||||
|
||||
/* find the parent buffer */
|
||||
/*
|
||||
* Find the parent buffer and get the parent page.
|
||||
*
|
||||
* Oops - if we were moved right then we need to
|
||||
* change stack item! We want to find parent pointing to
|
||||
* where we are, right ? - vadim 05/27/97
|
||||
*/
|
||||
ItemPointerSet(&(stack->bts_btitem->bti_itup.t_tid),
|
||||
bknum, P_HIKEY);
|
||||
pbuf = _bt_getstackbuf(rel, stack, BT_WRITE);
|
||||
ppage = BufferGetPage(pbuf);
|
||||
ppageop = (BTPageOpaque) PageGetSpecialPointer(ppage);
|
||||
|
||||
/*
|
||||
* If the key of new_item is < than the key of the item
|
||||
@ -330,29 +481,59 @@ _bt_insertonpg(Relation rel,
|
||||
* key spills over to our new right page, we get an
|
||||
* inconsistency if we don't update the left key in the
|
||||
* parent page.
|
||||
*
|
||||
* Also, new duplicates handling code require us to update
|
||||
* parent item if some smaller items left on the left page
|
||||
* (which is possible in splitting leftmost page) and
|
||||
* current parent item == new_item. - vadim 05/27/97
|
||||
*/
|
||||
|
||||
if (_bt_itemcmp(rel, keysz, stack->bts_btitem, new_item,
|
||||
BTGreaterStrategyNumber)) {
|
||||
ppageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
Assert (P_LEFTMOST(ppageop));
|
||||
lowLeftItem =
|
||||
(BTItem) PageGetItem(page,
|
||||
PageGetItemId(page, P_FIRSTKEY));
|
||||
|
||||
/* this method does not work--_bt_updateitem tries to */
|
||||
/* overwrite an entry with another entry that might be */
|
||||
/* bigger. if lowLeftItem is bigger, it corrupts the */
|
||||
/* parent page. instead, we have to delete the original */
|
||||
/* leftmost item from the parent, and insert the new one */
|
||||
/* with a regular _bt_insertonpg (it could cause a split */
|
||||
/* because it's bigger than what was there before). */
|
||||
/* --djm 8/21/96 */
|
||||
|
||||
if ( _bt_itemcmp (rel, keysz, stack->bts_btitem, new_item,
|
||||
BTGreaterStrategyNumber) ||
|
||||
( _bt_itemcmp(rel, keysz, stack->bts_btitem,
|
||||
new_item, BTEqualStrategyNumber) &&
|
||||
_bt_itemcmp(rel, keysz, lowLeftItem,
|
||||
new_item, BTLessStrategyNumber) ) )
|
||||
{
|
||||
do_update = true;
|
||||
/*
|
||||
* but it works for items with the same size and so why don't
|
||||
* use it for them ? - vadim 12/05/96
|
||||
* figure out which key is leftmost (if the parent page
|
||||
* is rightmost, too, it must be the root)
|
||||
*/
|
||||
if(P_RIGHTMOST(ppageop))
|
||||
upditem_offset = P_HIKEY;
|
||||
else
|
||||
upditem_offset = P_FIRSTKEY;
|
||||
if ( !P_LEFTMOST(lpageop) ||
|
||||
stack->bts_offset != upditem_offset )
|
||||
elog (FATAL, "btree: items are out of order");
|
||||
}
|
||||
/*
|
||||
* There was bug caused by deletion all minimum keys (K1) from
|
||||
* an index page and insertion there (up to page splitting)
|
||||
* higher duplicate keys (K2): after it parent item for left
|
||||
* page contained K1 and the next item (for new right page) - K2,
|
||||
* - and scan for the key = K2 lost items on the left page.
|
||||
* So, we have to update parent item if its key < minimum
|
||||
* key on the left and minimum keys on the left and on the right
|
||||
* are equal. It would be nice to update hikey on the previous
|
||||
* page of the left one too, but we may get deadlock here
|
||||
* (read comments in _bt_split), so we leave previous page
|
||||
* hikey _inconsistent_, but there should to be BTP_CHAIN flag
|
||||
* on it, which privents _bt_moveright from dangerous movings
|
||||
* from there. - vadim 05/27/97
|
||||
*/
|
||||
else if ( _bt_itemcmp (rel, keysz, stack->bts_btitem,
|
||||
lowLeftItem, BTLessStrategyNumber) &&
|
||||
_bt_itemcmp (rel, keysz, new_item,
|
||||
lowLeftItem, BTEqualStrategyNumber) )
|
||||
{
|
||||
do_update = true;
|
||||
upditem_offset = stack->bts_offset;
|
||||
}
|
||||
|
||||
if ( do_update )
|
||||
{
|
||||
/* Try to update in place. */
|
||||
if ( DOUBLEALIGN (IndexTupleDSize (lowLeftItem->bti_itup)) ==
|
||||
DOUBLEALIGN (IndexTupleDSize (stack->bts_btitem->bti_itup)) )
|
||||
{
|
||||
@ -363,33 +544,16 @@ _bt_insertonpg(Relation rel,
|
||||
}
|
||||
else
|
||||
{
|
||||
/* get the parent page */
|
||||
ppage = BufferGetPage(pbuf);
|
||||
ppageop = (BTPageOpaque) PageGetSpecialPointer(ppage);
|
||||
|
||||
/*
|
||||
* figure out which key is leftmost (if the parent page
|
||||
* is rightmost, too, it must be the root)
|
||||
*/
|
||||
if(P_RIGHTMOST(ppageop)) {
|
||||
leftmost_offset = P_HIKEY;
|
||||
} else {
|
||||
leftmost_offset = P_FIRSTKEY;
|
||||
}
|
||||
PageIndexTupleDelete(ppage, leftmost_offset);
|
||||
PageIndexTupleDelete(ppage, upditem_offset);
|
||||
|
||||
/*
|
||||
* don't write anything out yet--we still have the write
|
||||
* lock, and now we call another _bt_insertonpg to
|
||||
* insert the correct leftmost key
|
||||
* insert the correct key.
|
||||
* First, make a new item, using the tuple data from
|
||||
* lowLeftItem. Point it to the left child.
|
||||
* Update it on the stack at the same time.
|
||||
*/
|
||||
|
||||
/*
|
||||
* make a new leftmost item, using the tuple data from
|
||||
* lowLeftItem. point it to the left child.
|
||||
* update it on the stack at the same time.
|
||||
*/
|
||||
bknum = BufferGetBlockNumber(buf);
|
||||
pfree(stack->bts_btitem);
|
||||
stack->bts_btitem = _bt_formitem(&(lowLeftItem->bti_itup));
|
||||
ItemPointerSet(&(stack->bts_btitem->bti_itup.t_tid),
|
||||
@ -400,9 +564,10 @@ _bt_insertonpg(Relation rel,
|
||||
_bt_relbuf(rel, rbuf, BT_WRITE);
|
||||
|
||||
/*
|
||||
* a regular _bt_binsrch should find the right place to
|
||||
* put the new entry, since it should be lower than any
|
||||
* other key on the page, therefore set afteritem to NULL
|
||||
* A regular _bt_binsrch should find the right place to
|
||||
* put the new entry, since it should be either lower
|
||||
* than any other key on the page or unique.
|
||||
* Therefore set afteritem to NULL.
|
||||
*/
|
||||
newskey = _bt_mkscankey(rel, &(stack->bts_btitem->bti_itup));
|
||||
newres = _bt_insertonpg(rel, pbuf, stack->bts_parent,
|
||||
@ -458,7 +623,7 @@ _bt_insertonpg(Relation rel,
|
||||
* pin and lock on buf are maintained.
|
||||
*/
|
||||
static Buffer
|
||||
_bt_split(Relation rel, Buffer buf)
|
||||
_bt_split(Relation rel, Buffer buf, BTItem hiRightItem)
|
||||
{
|
||||
Buffer rbuf;
|
||||
Page origpage;
|
||||
@ -492,6 +657,7 @@ _bt_split(Relation rel, Buffer buf)
|
||||
|
||||
/* if we're splitting this page, it won't be the root when we're done */
|
||||
oopaque->btpo_flags &= ~BTP_ROOT;
|
||||
oopaque->btpo_flags &= ~BTP_CHAIN;
|
||||
lopaque->btpo_flags = ropaque->btpo_flags = oopaque->btpo_flags;
|
||||
lopaque->btpo_prev = oopaque->btpo_prev;
|
||||
ropaque->btpo_prev = BufferGetBlockNumber(buf);
|
||||
@ -516,10 +682,23 @@ _bt_split(Relation rel, Buffer buf)
|
||||
/* splitting a non-rightmost page, start at the first data item */
|
||||
start = P_FIRSTKEY;
|
||||
|
||||
/* copy the original high key to the new page */
|
||||
itemid = PageGetItemId(origpage, P_HIKEY);
|
||||
itemsz = ItemIdGetLength(itemid);
|
||||
item = (BTItem) PageGetItem(origpage, itemid);
|
||||
/*
|
||||
* Copy the original high key to the new page if high key
|
||||
* was not passed by caller.
|
||||
*/
|
||||
if ( hiRightItem == NULL )
|
||||
{
|
||||
itemid = PageGetItemId(origpage, P_HIKEY);
|
||||
itemsz = ItemIdGetLength(itemid);
|
||||
item = (BTItem) PageGetItem(origpage, itemid);
|
||||
}
|
||||
else
|
||||
{
|
||||
item = hiRightItem;
|
||||
itemsz = IndexTupleDSize(hiRightItem->bti_itup)
|
||||
+ (sizeof(BTItemData) - sizeof(IndexTupleData));
|
||||
itemsz = DOUBLEALIGN(itemsz);
|
||||
}
|
||||
(void) PageAddItem(rightpage, (Item) item, itemsz, P_HIKEY, LP_USED);
|
||||
rightoff = P_FIRSTKEY;
|
||||
} else {
|
||||
@ -744,7 +923,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
|
||||
itemsz = ItemIdGetLength(itemid);
|
||||
item = (BTItem) PageGetItem(lpage, itemid);
|
||||
new_item = _bt_formitem(&(item->bti_itup));
|
||||
ItemPointerSet(&(new_item->bti_itup.t_tid), lbkno, P_FIRSTKEY);
|
||||
ItemPointerSet(&(new_item->bti_itup.t_tid), lbkno, P_HIKEY);
|
||||
|
||||
/*
|
||||
* insert the left page pointer into the new root page. the root
|
||||
@ -1098,3 +1277,137 @@ _bt_isequal (TupleDesc itupdesc, Page page, OffsetNumber offnum,
|
||||
/* by here, the keys are equal */
|
||||
return (true);
|
||||
}
|
||||
|
||||
/*
|
||||
* _bt_shift - insert btitem on the passed page after shifting page
|
||||
* to the right in the tree.
|
||||
*
|
||||
* NOTE: tested for shifting leftmost page only, having btitem < hikey.
|
||||
*/
|
||||
static InsertIndexResult
|
||||
_bt_shift (Relation rel, Buffer buf, BTStack stack, int keysz,
|
||||
ScanKey scankey, BTItem btitem, BTItem hikey)
|
||||
{
|
||||
InsertIndexResult res;
|
||||
int itemsz;
|
||||
Page page;
|
||||
BlockNumber bknum;
|
||||
BTPageOpaque pageop;
|
||||
Buffer rbuf;
|
||||
Page rpage;
|
||||
BTPageOpaque rpageop;
|
||||
Buffer pbuf;
|
||||
Page ppage;
|
||||
BTPageOpaque ppageop;
|
||||
Buffer nbuf;
|
||||
Page npage;
|
||||
BTPageOpaque npageop;
|
||||
BlockNumber nbknum;
|
||||
BTItem nitem;
|
||||
OffsetNumber afteroff;
|
||||
|
||||
btitem = _bt_formitem(&(btitem->bti_itup));
|
||||
hikey = _bt_formitem(&(hikey->bti_itup));
|
||||
|
||||
page = BufferGetPage(buf);
|
||||
|
||||
/* grab new page */
|
||||
nbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
|
||||
nbknum = BufferGetBlockNumber(nbuf);
|
||||
npage = BufferGetPage(nbuf);
|
||||
_bt_pageinit(npage, BufferGetPageSize(nbuf));
|
||||
npageop = (BTPageOpaque) PageGetSpecialPointer(npage);
|
||||
|
||||
/* copy content of the passed page */
|
||||
memmove ((char *) npage, (char *) page, BufferGetPageSize(buf));
|
||||
|
||||
/* re-init old (passed) page */
|
||||
_bt_pageinit(page, BufferGetPageSize(buf));
|
||||
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
/* init old page opaque */
|
||||
pageop->btpo_flags = npageop->btpo_flags; /* restore flags */
|
||||
pageop->btpo_flags &= ~BTP_CHAIN;
|
||||
if ( _bt_itemcmp (rel, keysz, hikey, btitem, BTEqualStrategyNumber) )
|
||||
pageop->btpo_flags |= BTP_CHAIN;
|
||||
pageop->btpo_prev = npageop->btpo_prev; /* restore prev */
|
||||
pageop->btpo_next = nbknum; /* next points to the new page */
|
||||
|
||||
/* init shifted page opaque */
|
||||
npageop->btpo_prev = bknum = BufferGetBlockNumber(buf);
|
||||
|
||||
/* shifted page is ok, populate old page */
|
||||
|
||||
/* add passed hikey */
|
||||
itemsz = IndexTupleDSize(hikey->bti_itup)
|
||||
+ (sizeof(BTItemData) - sizeof(IndexTupleData));
|
||||
itemsz = DOUBLEALIGN(itemsz);
|
||||
(void) PageAddItem(page, (Item) hikey, itemsz, P_HIKEY, LP_USED);
|
||||
pfree (hikey);
|
||||
|
||||
/* add btitem */
|
||||
itemsz = IndexTupleDSize(btitem->bti_itup)
|
||||
+ (sizeof(BTItemData) - sizeof(IndexTupleData));
|
||||
itemsz = DOUBLEALIGN(itemsz);
|
||||
(void) PageAddItem(page, (Item) btitem, itemsz, P_FIRSTKEY, LP_USED);
|
||||
pfree (btitem);
|
||||
nitem = (BTItem) PageGetItem(page, PageGetItemId(page, P_FIRSTKEY));
|
||||
btitem = _bt_formitem(&(nitem->bti_itup));
|
||||
ItemPointerSet(&(btitem->bti_itup.t_tid), bknum, P_HIKEY);
|
||||
|
||||
/* ok, write them out */
|
||||
_bt_wrtnorelbuf(rel, nbuf);
|
||||
_bt_wrtnorelbuf(rel, buf);
|
||||
|
||||
/* fix btpo_prev on right sibling of old page */
|
||||
if ( !P_RIGHTMOST (npageop) )
|
||||
{
|
||||
rbuf = _bt_getbuf(rel, npageop->btpo_next, BT_WRITE);
|
||||
rpage = BufferGetPage(rbuf);
|
||||
rpageop = (BTPageOpaque) PageGetSpecialPointer(rpage);
|
||||
rpageop->btpo_prev = nbknum;
|
||||
_bt_wrtbuf(rel, rbuf);
|
||||
}
|
||||
|
||||
/* get parent pointing to the old page */
|
||||
ItemPointerSet(&(stack->bts_btitem->bti_itup.t_tid),
|
||||
bknum, P_HIKEY);
|
||||
pbuf = _bt_getstackbuf(rel, stack, BT_WRITE);
|
||||
ppage = BufferGetPage(pbuf);
|
||||
ppageop = (BTPageOpaque) PageGetSpecialPointer(ppage);
|
||||
|
||||
_bt_relbuf(rel, nbuf, BT_WRITE);
|
||||
_bt_relbuf(rel, buf, BT_WRITE);
|
||||
|
||||
/* re-set parent' pointer - we shifted our page to the right ! */
|
||||
nitem = (BTItem) PageGetItem (ppage,
|
||||
PageGetItemId (ppage, stack->bts_offset));
|
||||
ItemPointerSet(&(nitem->bti_itup.t_tid), nbknum, P_HIKEY);
|
||||
ItemPointerSet(&(stack->bts_btitem->bti_itup.t_tid), nbknum, P_HIKEY);
|
||||
_bt_wrtnorelbuf(rel, pbuf);
|
||||
|
||||
/*
|
||||
* Now we want insert into the parent pointer to our old page. It has to
|
||||
* be inserted before the pointer to new page. You may get problems here
|
||||
* (in the _bt_goesonpg and/or _bt_pgaddtup), but may be not - I don't
|
||||
* know. It works if old page is leftmost (nitem is NULL) and
|
||||
* btitem < hikey and it's all what we need currently. - vadim 05/30/97
|
||||
*/
|
||||
nitem = NULL;
|
||||
afteroff = P_FIRSTKEY;
|
||||
if ( !P_RIGHTMOST (ppageop) )
|
||||
afteroff = OffsetNumberNext (afteroff);
|
||||
if ( stack->bts_offset >= afteroff )
|
||||
{
|
||||
afteroff = OffsetNumberPrev (stack->bts_offset);
|
||||
nitem = (BTItem) PageGetItem (ppage, PageGetItemId (ppage, afteroff));
|
||||
nitem = _bt_formitem(&(nitem->bti_itup));
|
||||
}
|
||||
res = _bt_insertonpg(rel, pbuf, stack->bts_parent,
|
||||
keysz, scankey, btitem, nitem);
|
||||
pfree (btitem);
|
||||
|
||||
ItemPointerSet(&(res->pointerData), nbknum, P_HIKEY);
|
||||
|
||||
return (res);
|
||||
}
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.7 1997/04/16 01:48:15 vadim Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.8 1997/05/30 18:35:33 vadim Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Postgres btree pages look like ordinary relation pages. The opaque
|
||||
@ -441,6 +441,9 @@ _bt_metaproot(Relation rel, BlockNumber rootbknum, int level)
|
||||
* This is possible because we save a bit image of the last item
|
||||
* we looked at in the parent, and the update algorithm guarantees
|
||||
* that if items above us in the tree move, they only move right.
|
||||
*
|
||||
* Also, re-set bts_blkno & bts_offset if changed and
|
||||
* bts_btitem (it may be changed - see _bt_insertonpg).
|
||||
*/
|
||||
Buffer
|
||||
_bt_getstackbuf(Relation rel, BTStack stack, int access)
|
||||
@ -453,6 +456,8 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access)
|
||||
ItemId itemid;
|
||||
BTItem item;
|
||||
BTPageOpaque opaque;
|
||||
BTItem item_save;
|
||||
int item_nbytes;
|
||||
|
||||
blkno = stack->bts_blkno;
|
||||
buf = _bt_getbuf(rel, blkno, access);
|
||||
@ -466,7 +471,14 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access)
|
||||
|
||||
/* if the item is where we left it, we're done */
|
||||
if ( BTItemSame (item, stack->bts_btitem) )
|
||||
{
|
||||
pfree(stack->bts_btitem);
|
||||
item_nbytes = ItemIdGetLength(itemid);
|
||||
item_save = (BTItem) palloc(item_nbytes);
|
||||
memmove((char *) item_save, (char *) item, item_nbytes);
|
||||
stack->bts_btitem = item_save;
|
||||
return (buf);
|
||||
}
|
||||
|
||||
/* if the item has just moved right on this page, we're done */
|
||||
for (i = OffsetNumberNext(stack->bts_offset);
|
||||
@ -477,7 +489,15 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access)
|
||||
|
||||
/* if the item is where we left it, we're done */
|
||||
if ( BTItemSame (item, stack->bts_btitem) )
|
||||
{
|
||||
stack->bts_offset = i;
|
||||
pfree(stack->bts_btitem);
|
||||
item_nbytes = ItemIdGetLength(itemid);
|
||||
item_save = (BTItem) palloc(item_nbytes);
|
||||
memmove((char *) item_save, (char *) item, item_nbytes);
|
||||
stack->bts_btitem = item_save;
|
||||
return (buf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -503,7 +523,16 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access)
|
||||
itemid = PageGetItemId(page, offnum);
|
||||
item = (BTItem) PageGetItem(page, itemid);
|
||||
if ( BTItemSame (item, stack->bts_btitem) )
|
||||
{
|
||||
stack->bts_offset = offnum;
|
||||
stack->bts_blkno = blkno;
|
||||
pfree(stack->bts_btitem);
|
||||
item_nbytes = ItemIdGetLength(itemid);
|
||||
item_save = (BTItem) palloc(item_nbytes);
|
||||
memmove((char *) item_save, (char *) item, item_nbytes);
|
||||
stack->bts_btitem = item_save;
|
||||
return (buf);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.19 1997/05/05 03:41:19 vadim Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.20 1997/05/30 18:35:37 vadim Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -158,9 +158,7 @@ _bt_moveright(Relation rel,
|
||||
Page page;
|
||||
BTPageOpaque opaque;
|
||||
ItemId hikey;
|
||||
ItemId itemid;
|
||||
BlockNumber rblkno;
|
||||
int natts = rel->rd_rel->relnatts;
|
||||
|
||||
page = BufferGetPage(buf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
@ -184,7 +182,7 @@ _bt_moveright(Relation rel,
|
||||
/* move right as long as we need to */
|
||||
do
|
||||
{
|
||||
OffsetNumber offmax;
|
||||
OffsetNumber offmax = PageGetMaxOffsetNumber(page);
|
||||
/*
|
||||
* If this page consists of all duplicate keys (hikey and first
|
||||
* key on the page have the same value), then we don't need to
|
||||
@ -200,22 +198,43 @@ _bt_moveright(Relation rel,
|
||||
* our scankey is x = 2. Scankey >= (2,1) because of
|
||||
* we compare first attrs only, but we shouldn't to move
|
||||
* right of here. - vadim 04/15/97
|
||||
*
|
||||
* XXX
|
||||
* This code changed again! Actually, we break our
|
||||
* duplicates handling in single case: if we insert
|
||||
* new minimum key into leftmost page with duplicates
|
||||
* and splitting doesn't occure then _bt_insertonpg doesn't
|
||||
* worry about duplicates-rule. Fix _bt_insertonpg ?
|
||||
* But I don't see why don't compare scankey with _last_
|
||||
* item on the page instead of first one, in any cases.
|
||||
* So - we do it in that way now. - vadim 05/26/97
|
||||
*
|
||||
* Also, if we are on an "pseudo-empty" leaf page (i.e. there is
|
||||
* only hikey here) and scankey == hikey then we don't move
|
||||
* right! It's fix for bug described in _bt_insertonpg(). It's
|
||||
* right - at least till index cleanups are perfomed by vacuum
|
||||
* in exclusive mode: so, though this page may be just splitted,
|
||||
* it may not be "emptied" before we got here. - vadim 05/27/97
|
||||
*/
|
||||
if ( (offmax = PageGetMaxOffsetNumber(page)) > P_HIKEY)
|
||||
|
||||
if ( _bt_skeycmp (rel, keysz, scankey, page, hikey,
|
||||
BTEqualStrategyNumber) )
|
||||
{
|
||||
itemid = PageGetItemId(page, P_FIRSTKEY);
|
||||
if (_bt_skeycmp(rel, keysz, scankey, page, itemid,
|
||||
BTEqualStrategyNumber)) {
|
||||
/* break is for the "move right" while loop */
|
||||
break;
|
||||
}
|
||||
else if ( natts > keysz )
|
||||
{
|
||||
itemid = PageGetItemId(page, offmax);
|
||||
if (_bt_skeycmp(rel, keysz, scankey, page, itemid,
|
||||
BTLessEqualStrategyNumber))
|
||||
if ( opaque->btpo_flags & BTP_CHAIN )
|
||||
{
|
||||
Assert ( ( opaque->btpo_flags & BTP_LEAF ) || offmax > P_HIKEY );
|
||||
break;
|
||||
}
|
||||
if ( offmax > P_HIKEY )
|
||||
{
|
||||
if ( _bt_skeycmp (rel, keysz, scankey, page,
|
||||
PageGetItemId (page, offmax),
|
||||
BTLessEqualStrategyNumber) )
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if ( offmax == P_HIKEY &&
|
||||
( opaque->btpo_flags & BTP_LEAF ) )
|
||||
break;
|
||||
}
|
||||
|
||||
/* step right one page */
|
||||
@ -371,27 +390,37 @@ _bt_binsrch(Relation rel,
|
||||
int natts = rel->rd_rel->relnatts;
|
||||
int result;
|
||||
|
||||
itupdesc = RelationGetTupleDescriptor(rel);
|
||||
page = BufferGetPage(buf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
/* by convention, item 0 on any non-rightmost page is the high key */
|
||||
/* by convention, item 1 on any non-rightmost page is the high key */
|
||||
low = mid = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
|
||||
|
||||
high = PageGetMaxOffsetNumber(page);
|
||||
|
||||
/*
|
||||
* Since for non-rightmost pages, the zeroeth item on the page is the
|
||||
* Since for non-rightmost pages, the first item on the page is the
|
||||
* high key, there are two notions of emptiness. One is if nothing
|
||||
* appears on the page. The other is if nothing but the high key does.
|
||||
* The reason we test high <= low, rather than high == low, is that
|
||||
* after vacuuming there may be nothing *but* the high key on a page.
|
||||
* In that case, given the scheme above, low = 1 and high = 0.
|
||||
* In that case, given the scheme above, low = 2 and high = 1.
|
||||
*/
|
||||
|
||||
if (PageIsEmpty(page) || (! P_RIGHTMOST(opaque) && high <= low))
|
||||
if ( PageIsEmpty (page) )
|
||||
return (low);
|
||||
|
||||
itupdesc = RelationGetTupleDescriptor(rel);
|
||||
if ( (! P_RIGHTMOST(opaque) && high <= low))
|
||||
{
|
||||
if ( high < low ||
|
||||
(srchtype == BT_DESCENT && !(opaque->btpo_flags & BTP_LEAF)) )
|
||||
return (low);
|
||||
/* It's insertion and high == low == 2 */
|
||||
result = _bt_compare(rel, itupdesc, page, keysz, scankey, low);
|
||||
if ( result > 0 )
|
||||
return ( OffsetNumberNext (low) );
|
||||
return (low);
|
||||
}
|
||||
|
||||
while ((high - low) > 1) {
|
||||
mid = low + ((high - low) / 2);
|
||||
@ -736,6 +765,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
TupleDesc itupdesc;
|
||||
Buffer buf;
|
||||
Page page;
|
||||
BTPageOpaque pop;
|
||||
BTStack stack;
|
||||
OffsetNumber offnum, maxoff;
|
||||
bool offGmax = false;
|
||||
@ -803,11 +833,10 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
|
||||
stack = _bt_search(rel, 1, &skdata, &buf);
|
||||
_bt_freestack(stack);
|
||||
|
||||
/* find the nearest match to the manufactured scan key on the page */
|
||||
offnum = _bt_binsrch(rel, buf, 1, &skdata, BT_DESCENT);
|
||||
|
||||
blkno = BufferGetBlockNumber(buf);
|
||||
page = BufferGetPage(buf);
|
||||
|
||||
|
||||
/*
|
||||
* This will happen if the tree we're searching is entirely empty,
|
||||
* or if we're doing a search for a key that would appear on an
|
||||
@ -821,8 +850,39 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
_bt_relbuf(rel, buf, BT_READ);
|
||||
return ((RetrieveIndexResult) NULL);
|
||||
}
|
||||
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
pop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
/*
|
||||
* Now _bt_moveright doesn't move from non-rightmost leaf page
|
||||
* if scankey == hikey and there is only hikey there. It's
|
||||
* good for insertion, but we need to do work for scan here.
|
||||
* - vadim 05/27/97
|
||||
*/
|
||||
|
||||
while ( maxoff == P_HIKEY && !P_RIGHTMOST(pop) &&
|
||||
_bt_skeycmp(rel, 1, &skdata, page,
|
||||
PageGetItemId(page, P_HIKEY),
|
||||
BTGreaterEqualStrategyNumber) )
|
||||
{
|
||||
/* step right one page */
|
||||
blkno = pop->btpo_next;
|
||||
_bt_relbuf(rel, buf, BT_READ);
|
||||
buf = _bt_getbuf(rel, blkno, BT_READ);
|
||||
page = BufferGetPage(buf);
|
||||
if (PageIsEmpty(page)) {
|
||||
ItemPointerSetInvalid(current);
|
||||
so->btso_curbuf = InvalidBuffer;
|
||||
_bt_relbuf(rel, buf, BT_READ);
|
||||
return ((RetrieveIndexResult) NULL);
|
||||
}
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
pop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
}
|
||||
|
||||
|
||||
/* find the nearest match to the manufactured scan key on the page */
|
||||
offnum = _bt_binsrch(rel, buf, 1, &skdata, BT_DESCENT);
|
||||
|
||||
if (offnum > maxoff)
|
||||
{
|
||||
@ -830,7 +890,6 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
offGmax = true;
|
||||
}
|
||||
|
||||
blkno = BufferGetBlockNumber(buf);
|
||||
ItemPointerSet(current, blkno, offnum);
|
||||
|
||||
/*
|
||||
@ -889,7 +948,32 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
break;
|
||||
|
||||
case BTGreaterEqualStrategyNumber:
|
||||
if (result < 0) {
|
||||
if ( offGmax )
|
||||
{
|
||||
if (result < 0)
|
||||
{
|
||||
Assert ( !P_RIGHTMOST(pop) && maxoff == P_HIKEY );
|
||||
if ( !_bt_step(scan, &buf, ForwardScanDirection) )
|
||||
{
|
||||
_bt_relbuf(scan->relation, buf, BT_READ);
|
||||
so->btso_curbuf = InvalidBuffer;
|
||||
ItemPointerSetInvalid(&(scan->currentItemData));
|
||||
return ((RetrieveIndexResult) NULL);
|
||||
}
|
||||
}
|
||||
else if (result > 0)
|
||||
{ /*
|
||||
* Just remember: _bt_binsrch() returns the OffsetNumber of
|
||||
* the first matching key on the page, or the OffsetNumber at
|
||||
* which the matching key WOULD APPEAR IF IT WERE on this page.
|
||||
* No key on this page, but offnum from _bt_binsrch() greater
|
||||
* maxoff - have to move right. - vadim 12/06/96
|
||||
*/
|
||||
(void) _bt_twostep(scan, &buf, ForwardScanDirection);
|
||||
}
|
||||
}
|
||||
else if (result < 0)
|
||||
{
|
||||
do {
|
||||
if (!_bt_twostep(scan, &buf, BackwardScanDirection))
|
||||
break;
|
||||
@ -902,16 +986,6 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
if (result > 0)
|
||||
(void) _bt_twostep(scan, &buf, ForwardScanDirection);
|
||||
}
|
||||
else if ( offGmax && result > 0 )
|
||||
{ /*
|
||||
* Just remember: _bt_binsrch() returns the OffsetNumber of
|
||||
* the first matching key on the page, or the OffsetNumber at
|
||||
* which the matching key WOULD APPEAR IF IT WERE on this page.
|
||||
* No key on this page, but offnum from _bt_binsrch() greater
|
||||
* maxoff - have to move right. - vadim 12/06/96
|
||||
*/
|
||||
(void) _bt_twostep(scan, &buf, ForwardScanDirection);
|
||||
}
|
||||
break;
|
||||
|
||||
case BTGreaterStrategyNumber:
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Id: nbtsort.c,v 1.15 1997/04/18 03:37:57 vadim Exp $
|
||||
* $Id: nbtsort.c,v 1.16 1997/05/30 18:35:40 vadim Exp $
|
||||
*
|
||||
* NOTES
|
||||
*
|
||||
@ -983,6 +983,12 @@ _bt_buildadd(Relation index, void *pstate, BTItem bti, int flags)
|
||||
oopaque->btpo_next = BufferGetBlockNumber(nbuf);
|
||||
nopaque->btpo_prev = BufferGetBlockNumber(obuf);
|
||||
nopaque->btpo_next = P_NONE;
|
||||
|
||||
if ( _bt_itemcmp(index, _bt_nattr,
|
||||
(BTItem) PageGetItem(opage, PageGetItemId(opage, P_HIKEY)),
|
||||
(BTItem) PageGetItem(opage, PageGetItemId(opage, P_FIRSTKEY)),
|
||||
BTEqualStrategyNumber) )
|
||||
oopaque->btpo_flags |= BTP_CHAIN;
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
x
Reference in New Issue
Block a user