mirror of https://github.com/postgres/postgres
Fix logic bug in gistchoose and gistRelocateBuildBuffersOnSplit.
Every time the best-tuple-found-so-far changes, we need to reset all the penalty values in which_grow[] to the penalties for the new best tuple. The old code failed to do this, resulting in inferior index quality. The original patch from Alexander Korotkov was just two lines; I took the liberty of fleshing that out by adding a bunch of comments that I hope will make this logic easier for others to understand than it was for me.
This commit is contained in:
parent
d1a4db8d25
commit
c8ba697a4b
|
@ -625,8 +625,13 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
|
|||
}
|
||||
|
||||
/*
|
||||
* Loop through all index tuples on the buffer on the splitted page,
|
||||
* moving them to buffers on the new pages.
|
||||
* Loop through all index tuples on the buffer on the page being split,
|
||||
* moving them to buffers on the new pages. We try to move each tuple
|
||||
* the page that will result in the lowest penalty for the leading column
|
||||
* or, in the case of a tie, the lowest penalty for the earliest column
|
||||
* that is not tied.
|
||||
*
|
||||
* The guts of this loop are very similar to gistchoose().
|
||||
*/
|
||||
while (gistPopItupFromNodeBuffer(gfbb, &oldBuf, &itup))
|
||||
{
|
||||
|
@ -637,14 +642,18 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
|
|||
IndexTuple newtup;
|
||||
RelocationBufferInfo *targetBufferInfo;
|
||||
|
||||
/*
|
||||
* Choose which page this tuple should go to.
|
||||
*/
|
||||
gistDeCompressAtt(giststate, r,
|
||||
itup, NULL, (OffsetNumber) 0, entry, isnull);
|
||||
|
||||
which = -1;
|
||||
*which_grow = -1.0f;
|
||||
|
||||
/*
|
||||
* Loop over possible target pages. We'll exit early if we find an index key that
|
||||
* can accommodate the new key with no penalty on any column. sum_grow is used to
|
||||
* track this condition. It doesn't need to be exactly accurate, just >0 whenever
|
||||
* we want the loop to continue and equal to 0 when we want it to terminate.
|
||||
*/
|
||||
sum_grow = 1.0f;
|
||||
|
||||
for (i = 0; i < splitPagesCount && sum_grow; i++)
|
||||
|
@ -653,6 +662,8 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
|
|||
RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i];
|
||||
|
||||
sum_grow = 0.0f;
|
||||
|
||||
/* Loop over index attributes. */
|
||||
for (j = 0; j < r->rd_att->natts; j++)
|
||||
{
|
||||
float usize;
|
||||
|
@ -664,16 +675,36 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
|
|||
|
||||
if (which_grow[j] < 0 || usize < which_grow[j])
|
||||
{
|
||||
/*
|
||||
* We get here in two cases. First, we may have just discovered that the
|
||||
* current tuple is the best one we've seen so far; that is, for the first
|
||||
* column for which the penalty is not equal to the best tuple seen so far,
|
||||
* this one has a lower penalty than the previously-seen one. But, when
|
||||
* a new best tuple is found, we must record the best penalty value for
|
||||
* all the remaining columns. We'll end up here for each remaining index
|
||||
* column in that case, too.
|
||||
*/
|
||||
which = i;
|
||||
which_grow[j] = usize;
|
||||
if (j < r->rd_att->natts - 1 && i == 0)
|
||||
if (j < r->rd_att->natts - 1)
|
||||
which_grow[j + 1] = -1;
|
||||
sum_grow += which_grow[j];
|
||||
}
|
||||
else if (which_grow[j] == usize)
|
||||
{
|
||||
/*
|
||||
* The current tuple is exactly as good for this column as the best tuple
|
||||
* seen so far. The next iteration of this loop will compare the next
|
||||
* column.
|
||||
*/
|
||||
sum_grow += usize;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* The current tuple is worse for this column than the best tuple seen so
|
||||
* far. Skip the remaining columns and move on to the next tuple, if any.
|
||||
*/
|
||||
sum_grow = 1;
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -363,7 +363,12 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
|
|||
}
|
||||
|
||||
/*
|
||||
* find entry with lowest penalty
|
||||
* Search a page for the entry with lowest penalty.
|
||||
*
|
||||
* The index may have multiple columns, and there's a penalty value for each column.
|
||||
* The penalty associated with a column which appears earlier in the index definition is
|
||||
* strictly more important than the penalty of column which appears later in the index
|
||||
* definition.
|
||||
*/
|
||||
OffsetNumber
|
||||
gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
|
||||
|
@ -389,12 +394,28 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
|
|||
Assert(maxoff >= FirstOffsetNumber);
|
||||
Assert(!GistPageIsLeaf(p));
|
||||
|
||||
/*
|
||||
* Loop over tuples on page.
|
||||
*
|
||||
* We'll exit early if we find an index key that can accommodate the new key with no
|
||||
* penalty on any column. sum_grow is used to track this condition. Normally, it is the
|
||||
* sum of the penalties we've seen for this column so far, which is not a very useful
|
||||
* quantity in general because the penalties for each column are only considered
|
||||
* independently, but all we really care about is whether or not it's greater than zero.
|
||||
* Since penalties can't be negative, the sum of the penalties will be greater than
|
||||
* zero if and only if at least one penalty was greater than zero. To make things just
|
||||
* a bit more complicated, we arbitrarily set sum_grow to 1.0 whenever we want to force
|
||||
* the at least one more iteration of this outer loop. Any non-zero value would serve
|
||||
* just as well.
|
||||
*/
|
||||
for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i))
|
||||
{
|
||||
int j;
|
||||
IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
|
||||
|
||||
sum_grow = 0;
|
||||
|
||||
/* Loop over indexed attribtues. */
|
||||
for (j = 0; j < r->rd_att->natts; j++)
|
||||
{
|
||||
Datum datum;
|
||||
|
@ -409,16 +430,36 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
|
|||
|
||||
if (which_grow[j] < 0 || usize < which_grow[j])
|
||||
{
|
||||
/*
|
||||
* We get here in two cases. First, we may have just discovered that the
|
||||
* current tuple is the best one we've seen so far; that is, for the first
|
||||
* column for which the penalty is not equal to the best tuple seen so far,
|
||||
* this one has a lower penalty than the previously-seen one. But, when
|
||||
* a new best tuple is found, we must record the best penalty value for
|
||||
* all the remaining columns. We'll end up here for each remaining index
|
||||
* column in that case, too.
|
||||
*/
|
||||
which = i;
|
||||
which_grow[j] = usize;
|
||||
if (j < r->rd_att->natts - 1 && i == FirstOffsetNumber)
|
||||
if (j < r->rd_att->natts - 1)
|
||||
which_grow[j + 1] = -1;
|
||||
sum_grow += which_grow[j];
|
||||
}
|
||||
else if (which_grow[j] == usize)
|
||||
{
|
||||
/*
|
||||
* The current tuple is exactly as good for this column as the best tuple
|
||||
* seen so far. The next iteration of this loop will compare the next
|
||||
* column.
|
||||
*/
|
||||
sum_grow += usize;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* The current tuple is worse for this column than the best tuple seen so
|
||||
* far. Skip the remaining columns and move on to the next tuple, if any.
|
||||
*/
|
||||
sum_grow = 1;
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue