Fix sloppiness about alignment requirements in findsplitloc() space

calculation, also make it stop when it has a 'good enough' split instead of exhaustively trying all split points.
2000-07-21 19:21:00 +00:00 · 2000-07-21 19:21:00 +00:00 · 1ea912e16d
commit 1ea912e16d
parent 2f011a9c72
2 changed files with 101 additions and 38 deletions
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.60 2000/07/21 06:42:32 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.61 2000/07/21 19:21:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -377,6 +377,10 @@ _bt_insertonpg(Relation rel,

 	/*
 	 * Do we need to split the page to fit the item on it?
+	 *
+	 * Note: PageGetFreeSpace() subtracts sizeof(ItemIdData) from its
+	 * result, so this comparison is correct even though we appear to
+	 * be accounting only for the item and not for its line pointer.
 	 */
 	if (PageGetFreeSpace(page) < itemsz)
 	{
@ -743,11 +747,14 @@ _bt_findsplitloc(Relation rel,
 	FindSplitData state;
 	int			leftspace,
 				rightspace,
+				goodenough,
 				dataitemtotal,
 				dataitemstoleft;

 	opaque = (BTPageOpaque) PageGetSpecialPointer(page);

+	/* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
+	newitemsz += sizeof(ItemIdData);
 	state.newitemsz = newitemsz;
 	state.non_leaf = ! P_ISLEAF(opaque);
 	state.have_split = false;
@ -758,11 +765,23 @@ _bt_findsplitloc(Relation rel,
 		MAXALIGN(sizeof(BTPageOpaqueData))
 		+ sizeof(ItemIdData);

+	/*
+	 * Finding the best possible split would require checking all the possible
+	 * split points, because of the high-key and left-key special cases.
+	 * That's probably more work than it's worth; instead, stop as soon as
+	 * we find a "good-enough" split, where good-enough is defined as an
+	 * imbalance in free space of no more than pagesize/16 (arbitrary...)
+	 * This should let us stop near the middle on most pages, instead of
+	 * plowing to the end.
+	 */
+	goodenough = leftspace / 16;
+
 	/* The right page will have the same high key as the old page */
 	if (!P_RIGHTMOST(opaque))
 	{
 		itemid = PageGetItemId(page, P_HIKEY);
-		rightspace -= (int) (ItemIdGetLength(itemid) + sizeof(ItemIdData));
+		rightspace -= (int) (MAXALIGN(ItemIdGetLength(itemid)) +
+							 sizeof(ItemIdData));
 	}

 	/* Count up total space in data items without actually scanning 'em */
@ -770,8 +789,7 @@ _bt_findsplitloc(Relation rel,

 	/*
 	 * Scan through the data items and calculate space usage for a split
-	 * at each possible position.  XXX we could probably stop somewhere
-	 * near the middle...
+	 * at each possible position.
 	 */
 	dataitemstoleft = 0;
 	maxoff = PageGetMaxOffsetNumber(page);
@ -785,44 +803,65 @@ _bt_findsplitloc(Relation rel,
 					rightfree;

 		itemid = PageGetItemId(page, offnum);
-		itemsz = ItemIdGetLength(itemid) + sizeof(ItemIdData);
+		itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);

 		/*
 		 * We have to allow for the current item becoming the high key of
-		 * the left page; therefore it counts against left space.
+		 * the left page; therefore it counts against left space as well
+		 * as right space.
 		 */
 		leftfree = leftspace - dataitemstoleft - (int) itemsz;
 		rightfree = rightspace - (dataitemtotal - dataitemstoleft);
-		if (offnum < newitemoff)
+		/*
+		 * Will the new item go to left or right of split?
+		 */
+		if (offnum > newitemoff)
+			_bt_checksplitloc(&state, offnum, leftfree, rightfree,
+							  true, itemsz);
+		else if (offnum < newitemoff)
 			_bt_checksplitloc(&state, offnum, leftfree, rightfree,
 							  false, itemsz);
-		else if (offnum > newitemoff)
-			_bt_checksplitloc(&state, offnum, leftfree, rightfree,
-							  true, itemsz);
 		else
 		{
-			/* need to try it both ways!! */
-			_bt_checksplitloc(&state, offnum, leftfree, rightfree,
-							  false, newitemsz);
+			/* need to try it both ways! */
 			_bt_checksplitloc(&state, offnum, leftfree, rightfree,
 							  true, itemsz);
+			/* here we are contemplating newitem as first on right */
+			_bt_checksplitloc(&state, offnum, leftfree, rightfree,
+							  false, newitemsz);
 		}

+		/* Abort scan once we find a good-enough choice */
+		if (state.have_split && state.best_delta <= goodenough)
+			break;
+
 		dataitemstoleft += itemsz;
 	}

+	/*
+	 * I believe it is not possible to fail to find a feasible split,
+	 * but just in case ...
+	 */
 	if (! state.have_split)
 		elog(FATAL, "_bt_findsplitloc: can't find a feasible split point for %s",
 			 RelationGetRelationName(rel));
+
 	*newitemonleft = state.newitemonleft;
 	return state.firstright;
 }

+/*
+ * Subroutine to analyze a particular possible split choice (ie, firstright
+ * and newitemonleft settings), and record the best split so far in *state.
+ */
 static void
 _bt_checksplitloc(FindSplitData *state, OffsetNumber firstright,
 				  int leftfree, int rightfree,
 				  bool newitemonleft, Size firstrightitemsz)
 {
+	/*
+	 * Account for the new item on whichever side it is to be put.
+	 */
 	if (newitemonleft)
 		leftfree -= (int) state->newitemsz;
 	else
@ -833,7 +872,7 @@ _bt_checksplitloc(FindSplitData *state, OffsetNumber firstright,
 	 */
 	if (state->non_leaf)
 		rightfree += (int) firstrightitemsz -
-			(int) (sizeof(BTItemData) + sizeof(ItemIdData));
+			(int) (MAXALIGN(sizeof(BTItemData)) + sizeof(ItemIdData));
 	/*
 	 * If feasible split point, remember best delta.
 	 */
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.38 2000/07/21 06:42:33 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.39 2000/07/21 19:21:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -103,6 +103,9 @@ _bt_freeskey(ScanKey skey)
 	pfree(skey);
 }

+/*
+ * free a retracement stack made by _bt_search.
+ */
 void
 _bt_freestack(BTStack stack)
 {
@ -116,12 +119,38 @@ _bt_freestack(BTStack stack)
 	}
 }

+/*
+ * Construct a BTItem from a plain IndexTuple.
+ *
+ * This is now useless code, since a BTItem *is* an index tuple with
+ * no extra stuff.  We hang onto it for the moment to preserve the
+ * notational distinction, in case we want to add some extra stuff
+ * again someday.
+ */
+BTItem
+_bt_formitem(IndexTuple itup)
+{
+	int			nbytes_btitem;
+	BTItem		btitem;
+	Size		tuplen;
+	extern Oid	newoid();
+
+	/* make a copy of the index tuple with room for extra stuff */
+	tuplen = IndexTupleSize(itup);
+	nbytes_btitem = tuplen + (sizeof(BTItemData) - sizeof(IndexTupleData));
+
+	btitem = (BTItem) palloc(nbytes_btitem);
+	memcpy((char *) &(btitem->bti_itup), (char *) itup, tuplen);
+
+	return btitem;
+}
+
 /*
 *	_bt_orderkeys() -- Put keys in a sensible order for conjunctive quals.
 *
 *		The order of the keys in the qual match the ordering imposed by
- *		the index.	This routine only needs to be called if there are
- *		more than one qual clauses using this index.
+ *		the index.	This routine only needs to be called if there is
+ *		more than one qual clause using this index.
 */
 void
 _bt_orderkeys(Relation relation, BTScanOpaque so)
@ -189,7 +218,8 @@ _bt_orderkeys(Relation relation, BTScanOpaque so)
 		if (i == numberOfKeys || cur->sk_attno != attno)
 		{
 			if (cur->sk_attno != attno + 1 && i < numberOfKeys)
-				elog(ERROR, "_bt_orderkeys: key(s) for attribute %d missed", attno + 1);
+				elog(ERROR, "_bt_orderkeys: key(s) for attribute %d missed",
+					 attno + 1);

 			underEqualStrategy = (!equalStrategyEnd);

@ -320,24 +350,18 @@ _bt_orderkeys(Relation relation, BTScanOpaque so)
 	pfree(xform);
 }

-BTItem
-_bt_formitem(IndexTuple itup)
-{
-	int			nbytes_btitem;
-	BTItem		btitem;
-	Size		tuplen;
-	extern Oid	newoid();
-
-	/* make a copy of the index tuple with room for the sequence number */
-	tuplen = IndexTupleSize(itup);
-	nbytes_btitem = tuplen + (sizeof(BTItemData) - sizeof(IndexTupleData));
-
-	btitem = (BTItem) palloc(nbytes_btitem);
-	memcpy((char *) &(btitem->bti_itup), (char *) itup, tuplen);
-
-	return btitem;
-}
-
+/*
+ * Test whether an indextuple satisfies all the scankey conditions
+ *
+ * If not ("false" return), the number of conditions satisfied is
+ * returned in *keysok.  Given proper ordering of the scankey conditions,
+ * we can use this to determine whether it's worth continuing the scan.
+ * See _bt_orderkeys().
+ *
+ * HACK: *keysok == (Size) -1 means we stopped evaluating because we found
+ * a NULL value in the index tuple.  It's not quite clear to me why this
+ * case has to be treated specially --- tgl 7/00.
+ */
 bool
 _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, Size *keysok)
 {
@ -389,9 +413,9 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, Size *keysok)
 		if (DatumGetBool(test) == !!(key[0].sk_flags & SK_NEGATE))
 			return false;

-		keysz -= 1;
-		key++;
 		(*keysok)++;
+		key++;
+		keysz--;
 	}

 	return true;