From de1f586f093ffba732d7918beffac6242713516b Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 28 May 2002 15:24:53 +0000 Subject: [PATCH] Fix a bug with building rtree_gist indexes. Patch from Teodor Sigaev. --- contrib/rtree_gist/rtree_gist.c | 85 +++++++++++++++++++++++++-------- 1 file changed, 65 insertions(+), 20 deletions(-) diff --git a/contrib/rtree_gist/rtree_gist.c b/contrib/rtree_gist/rtree_gist.c index cbbe024c03..580ca6427f 100644 --- a/contrib/rtree_gist/rtree_gist.c +++ b/contrib/rtree_gist/rtree_gist.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/contrib/rtree_gist/Attic/rtree_gist.c,v 1.4 2001/10/25 05:49:20 momjian Exp $ + * $Header: /cvsroot/pgsql/contrib/rtree_gist/Attic/rtree_gist.c,v 1.5 2002/05/28 15:24:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -161,6 +161,22 @@ gbox_penalty(PG_FUNCTION_ARGS) PG_RETURN_POINTER(result); } +typedef struct { + BOX *key; + int pos; +} KBsort; + +static int +compare_KB(const void* a, const void* b) { + BOX *abox = ((KBsort*)a)->key; + BOX *bbox = ((KBsort*)b)->key; + float sa = (abox->high.x - abox->low.x) * (abox->high.y - abox->low.y); + float sb = (bbox->high.x - bbox->low.x) * (bbox->high.y - bbox->low.y); + + if ( sa==sb ) return 0; + return ( sa>sb ) ? 1 : -1; +} + /* ** The GiST PickSplit method ** New linear algorithm, see 'New Linear Node Splitting Algorithm for R-tree', @@ -201,26 +217,22 @@ gbox_picksplit(PG_FUNCTION_ARGS) for (i = OffsetNumberNext(FirstOffsetNumber); i <= maxoff; i = OffsetNumberNext(i)) { cur = DatumGetBoxP(((GISTENTRY *) VARDATA(entryvec))[i].key); + if ( allisequal == true && ( + pageunion.high.x != cur->high.x || + pageunion.high.y != cur->high.y || + pageunion.low.x != cur->low.x || + pageunion.low.y != cur->low.y + ) ) + allisequal = false; + if (pageunion.high.x < cur->high.x) - { - allisequal = false; pageunion.high.x = cur->high.x; - } if (pageunion.low.x > cur->low.x) - { - allisequal = false; pageunion.low.x = cur->low.x; - } if (pageunion.high.y < cur->high.y) - { - allisequal = false; pageunion.high.y = cur->high.y; - } if (pageunion.low.y > cur->low.y) - { - allisequal = false; pageunion.low.y = cur->low.y; - } } nbytes = (maxoff + 2) * sizeof(OffsetNumber); @@ -264,7 +276,7 @@ gbox_picksplit(PG_FUNCTION_ARGS) unionB = (BOX *) palloc(sizeof(BOX)); unionT = (BOX *) palloc(sizeof(BOX)); -#define ADDLIST( list, unionD, pos ) do { \ +#define ADDLIST( list, unionD, pos, num ) do { \ if ( pos ) { \ if ( unionD->high.x < cur->high.x ) unionD->high.x = cur->high.x; \ if ( unionD->low.x > cur->low.x ) unionD->low.x = cur->low.x; \ @@ -273,7 +285,7 @@ gbox_picksplit(PG_FUNCTION_ARGS) } else { \ memcpy( (void*)unionD, (void*) cur, sizeof( BOX ) ); \ } \ - list[pos] = i; \ + list[pos] = num; \ (pos)++; \ } while(0) @@ -281,17 +293,50 @@ gbox_picksplit(PG_FUNCTION_ARGS) { cur = DatumGetBoxP(((GISTENTRY *) VARDATA(entryvec))[i].key); if (cur->low.x - pageunion.low.x < pageunion.high.x - cur->high.x) - ADDLIST(listL, unionL, posL); + ADDLIST(listL, unionL, posL,i); else - ADDLIST(listR, unionR, posR); + ADDLIST(listR, unionR, posR,i); if (cur->low.y - pageunion.low.y < pageunion.high.y - cur->high.y) - ADDLIST(listB, unionB, posB); + ADDLIST(listB, unionB, posB,i); else - ADDLIST(listT, unionT, posT); + ADDLIST(listT, unionT, posT,i); + } + + /* bad disposition, sort by ascending and resplit */ + if ( (posR==0 || posL==0) && (posT==0 || posB==0) ) { + KBsort *arr = (KBsort*)palloc( sizeof(KBsort) * maxoff ); + posL = posR = posB = posT = 0; + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { + arr[i-1].key = DatumGetBoxP(((GISTENTRY *) VARDATA(entryvec))[i].key); + arr[i-1].pos = i; + } + qsort( arr, maxoff, sizeof(KBsort), compare_KB ); + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { + cur = arr[i-1].key; + if (cur->low.x - pageunion.low.x < pageunion.high.x - cur->high.x) + ADDLIST(listL, unionL, posL,arr[i-1].pos); + else if ( cur->low.x - pageunion.low.x == pageunion.high.x - cur->high.x ) { + if ( posL>posR ) + ADDLIST(listR, unionR, posR,arr[i-1].pos); + else + ADDLIST(listL, unionL, posL,arr[i-1].pos); + } else + ADDLIST(listR, unionR, posR,arr[i-1].pos); + + if (cur->low.y - pageunion.low.y < pageunion.high.y - cur->high.y) + ADDLIST(listB, unionB, posB,arr[i-1].pos); + else if ( cur->low.y - pageunion.low.y == pageunion.high.y - cur->high.y ) { + if ( posB>posT ) + ADDLIST(listT, unionT, posT,arr[i-1].pos); + else + ADDLIST(listB, unionB, posB,arr[i-1].pos); + } else + ADDLIST(listT, unionT, posT,arr[i-1].pos); + } + pfree(arr); } /* which split more optimal? */ - if (Max(posL, posR) < Max(posB, posT)) direction = 'x'; else if (Max(posL, posR) > Max(posB, posT))