Performance optimization on balance_nonroot() and related routines.
2.6% faster overall with a size increase of less than 750 bytes. FossilOrigin-Name: 25131e7062125e91c2d60ed2cadf134dd7609124
This commit is contained in:
commit
9c739e26da
13
manifest
13
manifest
@ -1,5 +1,5 @@
|
||||
C Mark\ssome\sbranches\sas\sunreachable\safter\sthe\srecent\schange\sthat\srecognizes\nmismatch\sresult\sset\ssizes\son\scompound\sSELECT\sstatements\ssooner.
|
||||
D 2015-06-23T23:31:52.120
|
||||
C Performance\soptimization\son\sbalance_nonroot()\sand\srelated\sroutines.\n2.6%\sfaster\soverall\swith\sa\ssize\sincrease\sof\sless\sthan\s750\sbytes.
|
||||
D 2015-06-24T00:05:45.705
|
||||
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
|
||||
F Makefile.in 1063c58075b7400d93326b0eb332b48a54f53025
|
||||
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
|
||||
@ -192,7 +192,7 @@ F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240
|
||||
F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3
|
||||
F src/bitvec.c 5eb7958c3bf65210211cbcfc44eff86d0ded7c9d
|
||||
F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79
|
||||
F src/btree.c 173c2ba1b8cf941971683f584965369791125f12
|
||||
F src/btree.c 587dd00c675e99d61614c9537feb73bfc2e9683d
|
||||
F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1
|
||||
F src/btreeInt.h 6ece2dd9c8e2eac05f0a8ded8772a44e96486c65
|
||||
F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70
|
||||
@ -1286,7 +1286,8 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
|
||||
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
|
||||
F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b
|
||||
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
|
||||
P 4df852ce26c95d5d23c83dbe9c59d2c3435acddf
|
||||
R f33f66149dfc88305222eaacd7fef5a8
|
||||
P c8d1f305b6e9dfc36b8e3f4ab92de4457884d903 eed6a3314518cdab809264284d7680385f0d5d2a
|
||||
R 89850ed2148a00ac2d9831c087418020
|
||||
T +closed eed6a3314518cdab809264284d7680385f0d5d2a
|
||||
U drh
|
||||
Z fe73de6bdd6d9128d20e7fa5150f1b6d
|
||||
Z 115864ffd982466ffd849126735907ac
|
||||
|
@ -1 +1 @@
|
||||
c8d1f305b6e9dfc36b8e3f4ab92de4457884d903
|
||||
25131e7062125e91c2d60ed2cadf134dd7609124
|
384
src/btree.c
384
src/btree.c
@ -956,28 +956,6 @@ static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){
|
||||
*/
|
||||
#define findCell(P,I) \
|
||||
((P)->aData + ((P)->maskPage & get2byte(&(P)->aCellIdx[2*(I)])))
|
||||
#define findCellv2(D,M,O,I) (D+(M&get2byte(D+(O+2*(I)))))
|
||||
|
||||
|
||||
/*
|
||||
** This a more complex version of findCell() that works for
|
||||
** pages that do contain overflow cells.
|
||||
*/
|
||||
static u8 *findOverflowCell(MemPage *pPage, int iCell){
|
||||
int i;
|
||||
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
|
||||
for(i=pPage->nOverflow-1; i>=0; i--){
|
||||
int k;
|
||||
k = pPage->aiOvfl[i];
|
||||
if( k<=iCell ){
|
||||
if( k==iCell ){
|
||||
return pPage->apOvfl[i];
|
||||
}
|
||||
iCell--;
|
||||
}
|
||||
}
|
||||
return findCell(pPage, iCell);
|
||||
}
|
||||
|
||||
/*
|
||||
** This is common tail processing for btreeParseCellPtr() and
|
||||
@ -1575,7 +1553,7 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){
|
||||
nFrag = iFreeBlk - iEnd;
|
||||
if( iEnd>iFreeBlk ) return SQLITE_CORRUPT_BKPT;
|
||||
iEnd = iFreeBlk + get2byte(&data[iFreeBlk+2]);
|
||||
if( iEnd > pPage->pBt->usableSize ) return SQLITE_CORRUPT_BKPT;
|
||||
if( NEVER(iEnd > pPage->pBt->usableSize) ) return SQLITE_CORRUPT_BKPT;
|
||||
iSize = iEnd - iStart;
|
||||
iFreeBlk = get2byte(&data[iFreeBlk]);
|
||||
}
|
||||
@ -6245,6 +6223,14 @@ static void insertCell(
|
||||
assert( j<(int)(sizeof(pPage->apOvfl)/sizeof(pPage->apOvfl[0])) );
|
||||
pPage->apOvfl[j] = pCell;
|
||||
pPage->aiOvfl[j] = (u16)i;
|
||||
|
||||
/* When multiple overflows occur, they are always sequential and in
|
||||
** sorted order. This invariants arise because multiple overflows can
|
||||
** only occur when inserting divider cells into the parent page during
|
||||
** balancing, and the dividers are adjacent and sorted.
|
||||
*/
|
||||
assert( j==0 || pPage->aiOvfl[j-1]<(u16)i ); /* Overflows in sorted order */
|
||||
assert( j==0 || i==pPage->aiOvfl[j-1]+1 ); /* Overflows are sequential */
|
||||
}else{
|
||||
int rc = sqlite3PagerWrite(pPage->pDbPage);
|
||||
if( rc!=SQLITE_OK ){
|
||||
@ -6282,6 +6268,52 @@ static void insertCell(
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** A CellArray object contains a cache of pointers and sizes for a
|
||||
** consecutive sequence of cells that might be held multiple pages.
|
||||
*/
|
||||
typedef struct CellArray CellArray;
|
||||
struct CellArray {
|
||||
int nCell; /* Number of cells in apCell[] */
|
||||
MemPage *pRef; /* Reference page */
|
||||
u8 **apCell; /* All cells begin balanced */
|
||||
u16 *szCell; /* Local size of all cells in apCell[] */
|
||||
};
|
||||
|
||||
/*
|
||||
** Make sure the cell sizes at idx, idx+1, ..., idx+N-1 have been
|
||||
** computed.
|
||||
*/
|
||||
static void populateCellCache(CellArray *p, int idx, int N){
|
||||
assert( idx>=0 && idx+N<=p->nCell );
|
||||
while( N>0 ){
|
||||
assert( p->apCell[idx]!=0 );
|
||||
if( p->szCell[idx]==0 ){
|
||||
p->szCell[idx] = p->pRef->xCellSize(p->pRef, p->apCell[idx]);
|
||||
}else{
|
||||
assert( CORRUPT_DB ||
|
||||
p->szCell[idx]==p->pRef->xCellSize(p->pRef, p->apCell[idx]) );
|
||||
}
|
||||
idx++;
|
||||
N--;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Return the size of the Nth element of the cell array
|
||||
*/
|
||||
static SQLITE_NOINLINE u16 computeCellSize(CellArray *p, int N){
|
||||
assert( N>=0 && N<p->nCell );
|
||||
assert( p->szCell[N]==0 );
|
||||
p->szCell[N] = p->pRef->xCellSize(p->pRef, p->apCell[N]);
|
||||
return p->szCell[N];
|
||||
}
|
||||
static u16 cachedCellSize(CellArray *p, int N){
|
||||
assert( N>=0 && N<p->nCell );
|
||||
if( p->szCell[N] ) return p->szCell[N];
|
||||
return computeCellSize(p, N);
|
||||
}
|
||||
|
||||
/*
|
||||
** Array apCell[] contains pointers to nCell b-tree page cells. The
|
||||
** szCell[] array contains the size in bytes of each cell. This function
|
||||
@ -6295,7 +6327,7 @@ static void insertCell(
|
||||
** The MemPage.nFree field is invalidated by this function. It is the
|
||||
** responsibility of the caller to set it correctly.
|
||||
*/
|
||||
static void rebuildPage(
|
||||
static int rebuildPage(
|
||||
MemPage *pPg, /* Edit this page */
|
||||
int nCell, /* Final number of cells on page */
|
||||
u8 **apCell, /* Array of cells */
|
||||
@ -6320,11 +6352,12 @@ static void rebuildPage(
|
||||
pCell = &pTmp[pCell - aData];
|
||||
}
|
||||
pData -= szCell[i];
|
||||
memcpy(pData, pCell, szCell[i]);
|
||||
put2byte(pCellptr, (pData - aData));
|
||||
pCellptr += 2;
|
||||
if( pData < pCellptr ) return SQLITE_CORRUPT_BKPT;
|
||||
memcpy(pData, pCell, szCell[i]);
|
||||
assert( szCell[i]==pPg->xCellSize(pPg, pCell) || CORRUPT_DB );
|
||||
testcase( szCell[i]==pPg->xCellSize(pPg,pCell) );
|
||||
testcase( szCell[i]!=pPg->xCellSize(pPg,pCell) );
|
||||
}
|
||||
|
||||
/* The pPg->nFree field is now set incorrectly. The caller will fix it. */
|
||||
@ -6335,6 +6368,7 @@ static void rebuildPage(
|
||||
put2byte(&aData[hdr+3], pPg->nCell);
|
||||
put2byte(&aData[hdr+5], pData - aData);
|
||||
aData[hdr+7] = 0x00;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -6367,25 +6401,26 @@ static int pageInsertArray(
|
||||
u8 *pBegin, /* End of cell-pointer array */
|
||||
u8 **ppData, /* IN/OUT: Page content -area pointer */
|
||||
u8 *pCellptr, /* Pointer to cell-pointer area */
|
||||
int iFirst, /* Index of first cell to add */
|
||||
int nCell, /* Number of cells to add to pPg */
|
||||
u8 **apCell, /* Array of cells */
|
||||
u16 *szCell /* Array of cell sizes */
|
||||
CellArray *pCArray /* Array of cells */
|
||||
){
|
||||
int i;
|
||||
u8 *aData = pPg->aData;
|
||||
u8 *pData = *ppData;
|
||||
const int bFreelist = aData[1] || aData[2];
|
||||
int iEnd = iFirst + nCell;
|
||||
assert( CORRUPT_DB || pPg->hdrOffset==0 ); /* Never called on page 1 */
|
||||
for(i=0; i<nCell; i++){
|
||||
int sz = szCell[i];
|
||||
int rc;
|
||||
for(i=iFirst; i<iEnd; i++){
|
||||
int sz, rc;
|
||||
u8 *pSlot;
|
||||
sz = cachedCellSize(pCArray, i);
|
||||
if( bFreelist==0 || (pSlot = pageFindSlot(pPg, sz, &rc, 0))==0 ){
|
||||
pData -= sz;
|
||||
if( pData<pBegin ) return 1;
|
||||
pSlot = pData;
|
||||
}
|
||||
memcpy(pSlot, apCell[i], sz);
|
||||
memcpy(pSlot, pCArray->apCell[i], sz);
|
||||
put2byte(pCellptr, (pSlot - aData));
|
||||
pCellptr += 2;
|
||||
}
|
||||
@ -6404,22 +6439,27 @@ static int pageInsertArray(
|
||||
*/
|
||||
static int pageFreeArray(
|
||||
MemPage *pPg, /* Page to edit */
|
||||
int iFirst, /* First cell to delete */
|
||||
int nCell, /* Cells to delete */
|
||||
u8 **apCell, /* Array of cells */
|
||||
u16 *szCell /* Array of cell sizes */
|
||||
CellArray *pCArray /* Array of cells */
|
||||
){
|
||||
u8 * const aData = pPg->aData;
|
||||
u8 * const pEnd = &aData[pPg->pBt->usableSize];
|
||||
u8 * const pStart = &aData[pPg->hdrOffset + 8 + pPg->childPtrSize];
|
||||
int nRet = 0;
|
||||
int i;
|
||||
int iEnd = iFirst + nCell;
|
||||
u8 *pFree = 0;
|
||||
int szFree = 0;
|
||||
|
||||
for(i=0; i<nCell; i++){
|
||||
u8 *pCell = apCell[i];
|
||||
for(i=iFirst; i<iEnd; i++){
|
||||
u8 *pCell = pCArray->apCell[i];
|
||||
if( pCell>=pStart && pCell<pEnd ){
|
||||
int sz = szCell[i];
|
||||
int sz;
|
||||
/* No need to use cachedCellSize() here. The sizes of all cells that
|
||||
** are to be freed have already been computing while deciding which
|
||||
** cells need freeing */
|
||||
sz = pCArray->szCell[i]; assert( sz>0 );
|
||||
if( pFree!=(pCell + sz) ){
|
||||
if( pFree ){
|
||||
assert( pFree>aData && (pFree - aData)<65536 );
|
||||
@ -6454,13 +6494,12 @@ static int pageFreeArray(
|
||||
** The pPg->nFree field is invalid when this function returns. It is the
|
||||
** responsibility of the caller to set it correctly.
|
||||
*/
|
||||
static void editPage(
|
||||
static int editPage(
|
||||
MemPage *pPg, /* Edit this page */
|
||||
int iOld, /* Index of first cell currently on page */
|
||||
int iNew, /* Index of new first cell on page */
|
||||
int nNew, /* Final number of cells on page */
|
||||
u8 **apCell, /* Array of cells */
|
||||
u16 *szCell /* Array of cell sizes */
|
||||
CellArray *pCArray /* Array of cells and sizes */
|
||||
){
|
||||
u8 * const aData = pPg->aData;
|
||||
const int hdr = pPg->hdrOffset;
|
||||
@ -6479,16 +6518,12 @@ static void editPage(
|
||||
|
||||
/* Remove cells from the start and end of the page */
|
||||
if( iOld<iNew ){
|
||||
int nShift = pageFreeArray(
|
||||
pPg, iNew-iOld, &apCell[iOld], &szCell[iOld]
|
||||
);
|
||||
int nShift = pageFreeArray(pPg, iOld, iNew-iOld, pCArray);
|
||||
memmove(pPg->aCellIdx, &pPg->aCellIdx[nShift*2], nCell*2);
|
||||
nCell -= nShift;
|
||||
}
|
||||
if( iNewEnd < iOldEnd ){
|
||||
nCell -= pageFreeArray(
|
||||
pPg, iOldEnd-iNewEnd, &apCell[iNewEnd], &szCell[iNewEnd]
|
||||
);
|
||||
nCell -= pageFreeArray(pPg, iNewEnd, iOldEnd - iNewEnd, pCArray);
|
||||
}
|
||||
|
||||
pData = &aData[get2byteNotZero(&aData[hdr+5])];
|
||||
@ -6502,7 +6537,7 @@ static void editPage(
|
||||
memmove(&pCellptr[nAdd*2], pCellptr, nCell*2);
|
||||
if( pageInsertArray(
|
||||
pPg, pBegin, &pData, pCellptr,
|
||||
nAdd, &apCell[iNew], &szCell[iNew]
|
||||
iNew, nAdd, pCArray
|
||||
) ) goto editpage_fail;
|
||||
nCell += nAdd;
|
||||
}
|
||||
@ -6516,7 +6551,7 @@ static void editPage(
|
||||
nCell++;
|
||||
if( pageInsertArray(
|
||||
pPg, pBegin, &pData, pCellptr,
|
||||
1, &apCell[iCell + iNew], &szCell[iCell + iNew]
|
||||
iCell+iNew, 1, pCArray
|
||||
) ) goto editpage_fail;
|
||||
}
|
||||
}
|
||||
@ -6525,7 +6560,7 @@ static void editPage(
|
||||
pCellptr = &pPg->aCellIdx[nCell*2];
|
||||
if( pageInsertArray(
|
||||
pPg, pBegin, &pData, pCellptr,
|
||||
nNew-nCell, &apCell[iNew+nCell], &szCell[iNew+nCell]
|
||||
iNew+nCell, nNew-nCell, pCArray
|
||||
) ) goto editpage_fail;
|
||||
|
||||
pPg->nCell = nNew;
|
||||
@ -6536,19 +6571,21 @@ static void editPage(
|
||||
|
||||
#ifdef SQLITE_DEBUG
|
||||
for(i=0; i<nNew && !CORRUPT_DB; i++){
|
||||
u8 *pCell = apCell[i+iNew];
|
||||
u8 *pCell = pCArray->apCell[i+iNew];
|
||||
int iOff = get2byte(&pPg->aCellIdx[i*2]);
|
||||
if( pCell>=aData && pCell<&aData[pPg->pBt->usableSize] ){
|
||||
pCell = &pTmp[pCell - aData];
|
||||
}
|
||||
assert( 0==memcmp(pCell, &aData[iOff], szCell[i+iNew]) );
|
||||
assert( 0==memcmp(pCell, &aData[iOff],
|
||||
pCArray->pRef->xCellSize(pCArray->pRef, pCArray->apCell[i+iNew])) );
|
||||
}
|
||||
#endif
|
||||
|
||||
return;
|
||||
return SQLITE_OK;
|
||||
editpage_fail:
|
||||
/* Unable to edit this page. Rebuild it from scratch instead. */
|
||||
rebuildPage(pPg, nNew, &apCell[iNew], &szCell[iNew]);
|
||||
populateCellCache(pCArray, iNew, nNew);
|
||||
return rebuildPage(pPg, nNew, &pCArray->apCell[iNew], &pCArray->szCell[iNew]);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -6620,7 +6657,8 @@ static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){
|
||||
assert( sqlite3PagerIswriteable(pNew->pDbPage) );
|
||||
assert( pPage->aData[0]==(PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF) );
|
||||
zeroPage(pNew, PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF);
|
||||
rebuildPage(pNew, 1, &pCell, &szCell);
|
||||
rc = rebuildPage(pNew, 1, &pCell, &szCell);
|
||||
if( NEVER(rc) ) return rc;
|
||||
pNew->nFree = pBt->usableSize - pNew->cellOffset - 2 - szCell;
|
||||
|
||||
/* If this is an auto-vacuum database, update the pointer map
|
||||
@ -6824,7 +6862,6 @@ static int balance_nonroot(
|
||||
int bBulk /* True if this call is part of a bulk load */
|
||||
){
|
||||
BtShared *pBt; /* The whole database */
|
||||
int nCell = 0; /* Number of cells in apCell[] */
|
||||
int nMaxCells = 0; /* Allocated size of apCell, szCell, aFrom. */
|
||||
int nNew = 0; /* Number of pages in apNew[] */
|
||||
int nOld; /* Number of pages in apOld[] */
|
||||
@ -6835,7 +6872,6 @@ static int balance_nonroot(
|
||||
int leafData; /* True if pPage is a leaf of a LEAFDATA tree */
|
||||
int usableSpace; /* Bytes in pPage beyond the header */
|
||||
int pageFlags; /* Value of pPage->aData[0] */
|
||||
int subtotal; /* Subtotal of bytes in cells on one page */
|
||||
int iSpace1 = 0; /* First unused byte of aSpace1[] */
|
||||
int iOvflSpace = 0; /* First unused byte of aOvflSpace[] */
|
||||
int szScratch; /* Size of scratch memory requested */
|
||||
@ -6843,19 +6879,20 @@ static int balance_nonroot(
|
||||
MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */
|
||||
u8 *pRight; /* Location in parent of right-sibling pointer */
|
||||
u8 *apDiv[NB-1]; /* Divider cells in pParent */
|
||||
int cntNew[NB+2]; /* Index in aCell[] of cell after i-th page */
|
||||
int cntOld[NB+2]; /* Old index in aCell[] after i-th page */
|
||||
int cntNew[NB+2]; /* Index in b.paCell[] of cell after i-th page */
|
||||
int cntOld[NB+2]; /* Old index in b.apCell[] */
|
||||
int szNew[NB+2]; /* Combined size of cells placed on i-th page */
|
||||
u8 **apCell = 0; /* All cells begin balanced */
|
||||
u16 *szCell; /* Local size of all cells in apCell[] */
|
||||
u8 *aSpace1; /* Space for copies of dividers cells */
|
||||
Pgno pgno; /* Temp var to store a page number in */
|
||||
u8 abDone[NB+2]; /* True after i'th new page is populated */
|
||||
Pgno aPgno[NB+2]; /* Page numbers of new pages before shuffling */
|
||||
Pgno aPgOrder[NB+2]; /* Copy of aPgno[] used for sorting pages */
|
||||
u16 aPgFlags[NB+2]; /* flags field of new pages before shuffling */
|
||||
CellArray b; /* Parsed information on cells being balanced */
|
||||
|
||||
memset(abDone, 0, sizeof(abDone));
|
||||
b.nCell = 0;
|
||||
b.apCell = 0;
|
||||
pBt = pParent->pBt;
|
||||
assert( sqlite3_mutex_held(pBt->mutex) );
|
||||
assert( sqlite3PagerIswriteable(pParent->pDbPage) );
|
||||
@ -6964,43 +7001,48 @@ static int balance_nonroot(
|
||||
** Allocate space for memory structures
|
||||
*/
|
||||
szScratch =
|
||||
nMaxCells*sizeof(u8*) /* apCell */
|
||||
+ nMaxCells*sizeof(u16) /* szCell */
|
||||
nMaxCells*sizeof(u8*) /* b.apCell */
|
||||
+ nMaxCells*sizeof(u16) /* b.szCell */
|
||||
+ pBt->pageSize; /* aSpace1 */
|
||||
|
||||
/* EVIDENCE-OF: R-28375-38319 SQLite will never request a scratch buffer
|
||||
** that is more than 6 times the database page size. */
|
||||
assert( szScratch<=6*(int)pBt->pageSize );
|
||||
apCell = sqlite3ScratchMalloc( szScratch );
|
||||
if( apCell==0 ){
|
||||
b.apCell = sqlite3ScratchMalloc( szScratch );
|
||||
if( b.apCell==0 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
goto balance_cleanup;
|
||||
}
|
||||
szCell = (u16*)&apCell[nMaxCells];
|
||||
aSpace1 = (u8*)&szCell[nMaxCells];
|
||||
b.szCell = (u16*)&b.apCell[nMaxCells];
|
||||
aSpace1 = (u8*)&b.szCell[nMaxCells];
|
||||
assert( EIGHT_BYTE_ALIGNMENT(aSpace1) );
|
||||
|
||||
/*
|
||||
** Load pointers to all cells on sibling pages and the divider cells
|
||||
** into the local apCell[] array. Make copies of the divider cells
|
||||
** into the local b.apCell[] array. Make copies of the divider cells
|
||||
** into space obtained from aSpace1[]. The divider cells have already
|
||||
** been removed from pParent.
|
||||
**
|
||||
** If the siblings are on leaf pages, then the child pointers of the
|
||||
** divider cells are stripped from the cells before they are copied
|
||||
** into aSpace1[]. In this way, all cells in apCell[] are without
|
||||
** into aSpace1[]. In this way, all cells in b.apCell[] are without
|
||||
** child pointers. If siblings are not leaves, then all cell in
|
||||
** apCell[] include child pointers. Either way, all cells in apCell[]
|
||||
** b.apCell[] include child pointers. Either way, all cells in b.apCell[]
|
||||
** are alike.
|
||||
**
|
||||
** leafCorrection: 4 if pPage is a leaf. 0 if pPage is not a leaf.
|
||||
** leafData: 1 if pPage holds key+data and pParent holds only keys.
|
||||
*/
|
||||
leafCorrection = apOld[0]->leaf*4;
|
||||
leafData = apOld[0]->intKeyLeaf;
|
||||
b.pRef = apOld[0];
|
||||
leafCorrection = b.pRef->leaf*4;
|
||||
leafData = b.pRef->intKeyLeaf;
|
||||
for(i=0; i<nOld; i++){
|
||||
int limit;
|
||||
MemPage *pOld = apOld[i];
|
||||
int limit = pOld->nCell;
|
||||
u8 *aData = pOld->aData;
|
||||
u16 maskPage = pOld->maskPage;
|
||||
u8 *piCell = aData + pOld->cellOffset;
|
||||
u8 *piEnd;
|
||||
|
||||
/* Verify that all sibling pages are of the same "type" (table-leaf,
|
||||
** table-interior, index-leaf, or index-interior).
|
||||
@ -7010,92 +7052,151 @@ static int balance_nonroot(
|
||||
goto balance_cleanup;
|
||||
}
|
||||
|
||||
limit = pOld->nCell+pOld->nOverflow;
|
||||
/* Load b.apCell[] with pointers to all cells in pOld. If pOld
|
||||
** constains overflow cells, include them in the b.apCell[] array
|
||||
** in the correct spot.
|
||||
**
|
||||
** Note that when there are multiple overflow cells, it is always the
|
||||
** case that they are sequential and adjacent. This invariant arises
|
||||
** because multiple overflows can only occurs when inserting divider
|
||||
** cells into a parent on a prior balance, and divider cells are always
|
||||
** adjacent and are inserted in order. There is an assert() tagged
|
||||
** with "NOTE 1" in the overflow cell insertion loop to prove this
|
||||
** invariant.
|
||||
**
|
||||
** This must be done in advance. Once the balance starts, the cell
|
||||
** offset section of the btree page will be overwritten and we will no
|
||||
** long be able to find the cells if a pointer to each cell is not saved
|
||||
** first.
|
||||
*/
|
||||
memset(&b.szCell[b.nCell], 0, sizeof(b.szCell[0])*limit);
|
||||
if( pOld->nOverflow>0 ){
|
||||
memset(&b.szCell[b.nCell+limit], 0, sizeof(b.szCell[0])*pOld->nOverflow);
|
||||
limit = pOld->aiOvfl[0];
|
||||
for(j=0; j<limit; j++){
|
||||
assert( nCell<nMaxCells );
|
||||
apCell[nCell] = findOverflowCell(pOld, j);
|
||||
szCell[nCell] = pOld->xCellSize(pOld, apCell[nCell]);
|
||||
nCell++;
|
||||
b.apCell[b.nCell] = aData + (maskPage & get2byte(piCell));
|
||||
piCell += 2;
|
||||
b.nCell++;
|
||||
}
|
||||
}else{
|
||||
u8 *aData = pOld->aData;
|
||||
u16 maskPage = pOld->maskPage;
|
||||
u16 cellOffset = pOld->cellOffset;
|
||||
for(j=0; j<limit; j++){
|
||||
assert( nCell<nMaxCells );
|
||||
apCell[nCell] = findCellv2(aData, maskPage, cellOffset, j);
|
||||
szCell[nCell] = pOld->xCellSize(pOld, apCell[nCell]);
|
||||
nCell++;
|
||||
for(k=0; k<pOld->nOverflow; k++){
|
||||
assert( k==0 || pOld->aiOvfl[k-1]+1==pOld->aiOvfl[k] );/* NOTE 1 */
|
||||
b.apCell[b.nCell] = pOld->apOvfl[k];
|
||||
b.nCell++;
|
||||
}
|
||||
limit = pOld->nCell - pOld->aiOvfl[0];
|
||||
}
|
||||
cntOld[i] = nCell;
|
||||
piEnd = aData + pOld->cellOffset + 2*pOld->nCell;
|
||||
while( piCell<piEnd ){
|
||||
assert( b.nCell<nMaxCells );
|
||||
b.apCell[b.nCell] = aData + (maskPage & get2byte(piCell));
|
||||
piCell += 2;
|
||||
b.nCell++;
|
||||
}
|
||||
|
||||
cntOld[i] = b.nCell;
|
||||
if( i<nOld-1 && !leafData){
|
||||
u16 sz = (u16)szNew[i];
|
||||
u8 *pTemp;
|
||||
assert( nCell<nMaxCells );
|
||||
szCell[nCell] = sz;
|
||||
assert( b.nCell<nMaxCells );
|
||||
b.szCell[b.nCell] = sz;
|
||||
pTemp = &aSpace1[iSpace1];
|
||||
iSpace1 += sz;
|
||||
assert( sz<=pBt->maxLocal+23 );
|
||||
assert( iSpace1 <= (int)pBt->pageSize );
|
||||
memcpy(pTemp, apDiv[i], sz);
|
||||
apCell[nCell] = pTemp+leafCorrection;
|
||||
b.apCell[b.nCell] = pTemp+leafCorrection;
|
||||
assert( leafCorrection==0 || leafCorrection==4 );
|
||||
szCell[nCell] = szCell[nCell] - leafCorrection;
|
||||
b.szCell[b.nCell] = b.szCell[b.nCell] - leafCorrection;
|
||||
if( !pOld->leaf ){
|
||||
assert( leafCorrection==0 );
|
||||
assert( pOld->hdrOffset==0 );
|
||||
/* The right pointer of the child page pOld becomes the left
|
||||
** pointer of the divider cell */
|
||||
memcpy(apCell[nCell], &pOld->aData[8], 4);
|
||||
memcpy(b.apCell[b.nCell], &pOld->aData[8], 4);
|
||||
}else{
|
||||
assert( leafCorrection==4 );
|
||||
while( szCell[nCell]<4 ){
|
||||
while( b.szCell[b.nCell]<4 ){
|
||||
/* Do not allow any cells smaller than 4 bytes. If a smaller cell
|
||||
** does exist, pad it with 0x00 bytes. */
|
||||
assert( szCell[nCell]==3 || CORRUPT_DB );
|
||||
assert( apCell[nCell]==&aSpace1[iSpace1-3] || CORRUPT_DB );
|
||||
assert( b.szCell[b.nCell]==3 || CORRUPT_DB );
|
||||
assert( b.apCell[b.nCell]==&aSpace1[iSpace1-3] || CORRUPT_DB );
|
||||
aSpace1[iSpace1++] = 0x00;
|
||||
szCell[nCell]++;
|
||||
b.szCell[b.nCell]++;
|
||||
}
|
||||
}
|
||||
nCell++;
|
||||
b.nCell++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Figure out the number of pages needed to hold all nCell cells.
|
||||
** Figure out the number of pages needed to hold all b.nCell cells.
|
||||
** Store this number in "k". Also compute szNew[] which is the total
|
||||
** size of all cells on the i-th page and cntNew[] which is the index
|
||||
** in apCell[] of the cell that divides page i from page i+1.
|
||||
** cntNew[k] should equal nCell.
|
||||
** in b.apCell[] of the cell that divides page i from page i+1.
|
||||
** cntNew[k] should equal b.nCell.
|
||||
**
|
||||
** Values computed by this block:
|
||||
**
|
||||
** k: The total number of sibling pages
|
||||
** szNew[i]: Spaced used on the i-th sibling page.
|
||||
** cntNew[i]: Index in apCell[] and szCell[] for the first cell to
|
||||
** cntNew[i]: Index in b.apCell[] and b.szCell[] for the first cell to
|
||||
** the right of the i-th sibling page.
|
||||
** usableSpace: Number of bytes of space available on each sibling.
|
||||
**
|
||||
*/
|
||||
usableSpace = pBt->usableSize - 12 + leafCorrection;
|
||||
for(subtotal=k=i=0; i<nCell; i++){
|
||||
assert( i<nMaxCells );
|
||||
subtotal += szCell[i] + 2;
|
||||
if( subtotal > usableSpace ){
|
||||
szNew[k] = subtotal - szCell[i] - 2;
|
||||
cntNew[k] = i;
|
||||
if( leafData ){ i--; }
|
||||
subtotal = 0;
|
||||
k++;
|
||||
if( k>NB+1 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; }
|
||||
for(i=0; i<nOld; i++){
|
||||
MemPage *p = apOld[i];
|
||||
szNew[i] = usableSpace - p->nFree;
|
||||
if( szNew[i]<0 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; }
|
||||
for(j=0; j<p->nOverflow; j++){
|
||||
szNew[i] += 2 + p->xCellSize(p, p->apOvfl[j]);
|
||||
}
|
||||
cntNew[i] = cntOld[i];
|
||||
}
|
||||
k = nOld;
|
||||
for(i=0; i<k; i++){
|
||||
int sz;
|
||||
while( szNew[i]>usableSpace ){
|
||||
if( i+1>=k ){
|
||||
k = i+2;
|
||||
if( k>NB+2 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; }
|
||||
szNew[k-1] = 0;
|
||||
cntNew[k-1] = b.nCell;
|
||||
}
|
||||
sz = 2 + cachedCellSize(&b, cntNew[i]-1);
|
||||
szNew[i] -= sz;
|
||||
if( !leafData ){
|
||||
if( cntNew[i]<b.nCell ){
|
||||
sz = 2 + cachedCellSize(&b, cntNew[i]);
|
||||
}else{
|
||||
sz = 0;
|
||||
}
|
||||
}
|
||||
szNew[i+1] += sz;
|
||||
cntNew[i]--;
|
||||
}
|
||||
while( cntNew[i]<b.nCell ){
|
||||
sz = 2 + cachedCellSize(&b, cntNew[i]);
|
||||
if( szNew[i]+sz>usableSpace ) break;
|
||||
szNew[i] += sz;
|
||||
cntNew[i]++;
|
||||
if( !leafData ){
|
||||
if( cntNew[i]<b.nCell ){
|
||||
sz = 2 + cachedCellSize(&b, cntNew[i]);
|
||||
}else{
|
||||
sz = 0;
|
||||
}
|
||||
}
|
||||
szNew[i+1] -= sz;
|
||||
}
|
||||
if( cntNew[i]>=b.nCell ){
|
||||
k = i+1;
|
||||
}else if( cntNew[i] - (i>0 ? cntNew[i-1] : 0) <= 0 ){
|
||||
rc = SQLITE_CORRUPT_BKPT;
|
||||
goto balance_cleanup;
|
||||
}
|
||||
}
|
||||
szNew[k] = subtotal;
|
||||
cntNew[k] = nCell;
|
||||
k++;
|
||||
|
||||
/*
|
||||
** The packing computed by the previous block is biased toward the siblings
|
||||
@ -7116,16 +7217,24 @@ static int balance_nonroot(
|
||||
|
||||
r = cntNew[i-1] - 1;
|
||||
d = r + 1 - leafData;
|
||||
(void)cachedCellSize(&b, d);
|
||||
while(1){
|
||||
assert( d<nMaxCells );
|
||||
assert( r<nMaxCells );
|
||||
while( szRight==0
|
||||
|| (!bBulk && szRight+szCell[d]+2<=szLeft-(szCell[r]+2))
|
||||
){
|
||||
szRight += szCell[d] + 2;
|
||||
szLeft -= szCell[r] + 2;
|
||||
cntNew[i-1]--;
|
||||
r = cntNew[i-1] - 1;
|
||||
d = r + 1 - leafData;
|
||||
(void)cachedCellSize(&b, r);
|
||||
if( szRight!=0
|
||||
&& (bBulk || szRight+b.szCell[d]+2 > szLeft-(b.szCell[r]+2)) ){
|
||||
break;
|
||||
}
|
||||
szRight += b.szCell[d] + 2;
|
||||
szLeft -= b.szCell[r] + 2;
|
||||
cntNew[i-1] = r;
|
||||
if( cntNew[i-1] <= 0 ){
|
||||
rc = SQLITE_CORRUPT_BKPT;
|
||||
goto balance_cleanup;
|
||||
}
|
||||
r--;
|
||||
d--;
|
||||
}
|
||||
szNew[i] = szRight;
|
||||
szNew[i-1] = szLeft;
|
||||
@ -7164,7 +7273,7 @@ static int balance_nonroot(
|
||||
zeroPage(pNew, pageFlags);
|
||||
apNew[i] = pNew;
|
||||
nNew++;
|
||||
cntOld[i] = nCell;
|
||||
cntOld[i] = b.nCell;
|
||||
|
||||
/* Set the pointer-map entry for the new sibling page. */
|
||||
if( ISAUTOVACUUM ){
|
||||
@ -7269,8 +7378,8 @@ static int balance_nonroot(
|
||||
int iNew = 0;
|
||||
int iOld = 0;
|
||||
|
||||
for(i=0; i<nCell; i++){
|
||||
u8 *pCell = apCell[i];
|
||||
for(i=0; i<b.nCell; i++){
|
||||
u8 *pCell = b.apCell[i];
|
||||
if( i==cntOldNext ){
|
||||
MemPage *pOld = (++iOld)<nNew ? apNew[iOld] : apOld[iOld];
|
||||
cntOldNext += pOld->nCell + pOld->nOverflow + !leafData;
|
||||
@ -7295,9 +7404,10 @@ static int balance_nonroot(
|
||||
if( !leafCorrection ){
|
||||
ptrmapPut(pBt, get4byte(pCell), PTRMAP_BTREE, pNew->pgno, &rc);
|
||||
}
|
||||
if( szCell[i]>pNew->minLocal ){
|
||||
if( cachedCellSize(&b,i)>pNew->minLocal ){
|
||||
ptrmapPutOvflPtr(pNew, pCell, &rc);
|
||||
}
|
||||
if( rc ) goto balance_cleanup;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -7311,20 +7421,21 @@ static int balance_nonroot(
|
||||
j = cntNew[i];
|
||||
|
||||
assert( j<nMaxCells );
|
||||
pCell = apCell[j];
|
||||
sz = szCell[j] + leafCorrection;
|
||||
assert( b.apCell[j]!=0 );
|
||||
pCell = b.apCell[j];
|
||||
sz = b.szCell[j] + leafCorrection;
|
||||
pTemp = &aOvflSpace[iOvflSpace];
|
||||
if( !pNew->leaf ){
|
||||
memcpy(&pNew->aData[8], pCell, 4);
|
||||
}else if( leafData ){
|
||||
/* If the tree is a leaf-data tree, and the siblings are leaves,
|
||||
** then there is no divider cell in apCell[]. Instead, the divider
|
||||
** then there is no divider cell in b.apCell[]. Instead, the divider
|
||||
** cell consists of the integer key for the right-most cell of
|
||||
** the sibling-page assembled above only.
|
||||
*/
|
||||
CellInfo info;
|
||||
j--;
|
||||
pNew->xParseCell(pNew, apCell[j], &info);
|
||||
pNew->xParseCell(pNew, b.apCell[j], &info);
|
||||
pCell = pTemp;
|
||||
sz = 4 + putVarint(&pCell[4], info.nKey);
|
||||
pTemp = 0;
|
||||
@ -7341,7 +7452,7 @@ static int balance_nonroot(
|
||||
** cells are at least 4 bytes. It only happens in b-trees used
|
||||
** to evaluate "IN (SELECT ...)" and similar clauses.
|
||||
*/
|
||||
if( szCell[j]==4 ){
|
||||
if( b.szCell[j]==4 ){
|
||||
assert(leafCorrection==4);
|
||||
sz = pParent->xCellSize(pParent, pCell);
|
||||
}
|
||||
@ -7399,12 +7510,13 @@ static int balance_nonroot(
|
||||
iNew = iOld = 0;
|
||||
nNewCell = cntNew[0];
|
||||
}else{
|
||||
iOld = iPg<nOld ? (cntOld[iPg-1] + !leafData) : nCell;
|
||||
iOld = iPg<nOld ? (cntOld[iPg-1] + !leafData) : b.nCell;
|
||||
iNew = cntNew[iPg-1] + !leafData;
|
||||
nNewCell = cntNew[iPg] - iNew;
|
||||
}
|
||||
|
||||
editPage(apNew[iPg], iOld, iNew, nNewCell, apCell, szCell);
|
||||
rc = editPage(apNew[iPg], iOld, iNew, nNewCell, &b);
|
||||
if( rc ) goto balance_cleanup;
|
||||
abDone[iPg]++;
|
||||
apNew[iPg]->nFree = usableSpace-szNew[iPg];
|
||||
assert( apNew[iPg]->nOverflow==0 );
|
||||
@ -7455,7 +7567,7 @@ static int balance_nonroot(
|
||||
|
||||
assert( pParent->isInit );
|
||||
TRACE(("BALANCE: finished: old=%d new=%d cells=%d\n",
|
||||
nOld, nNew, nCell));
|
||||
nOld, nNew, b.nCell));
|
||||
|
||||
/* Free any old pages that were not reused as new pages.
|
||||
*/
|
||||
@ -7478,7 +7590,7 @@ static int balance_nonroot(
|
||||
** Cleanup before returning.
|
||||
*/
|
||||
balance_cleanup:
|
||||
sqlite3ScratchFree(apCell);
|
||||
sqlite3ScratchFree(b.apCell);
|
||||
for(i=0; i<nOld; i++){
|
||||
releasePage(apOld[i]);
|
||||
}
|
||||
@ -7929,7 +8041,7 @@ int sqlite3BtreeDelete(BtCursor *pCur){
|
||||
unsigned char *pTmp;
|
||||
|
||||
pCell = findCell(pLeaf, pLeaf->nCell-1);
|
||||
if( pCell<&pLeaf->aData[4] ) return SQLITE_CORRUPT_BKPT;
|
||||
if( NEVER(pCell<&pLeaf->aData[4]) ) return SQLITE_CORRUPT_BKPT;
|
||||
nCell = pLeaf->xCellSize(pLeaf, pCell);
|
||||
assert( MX_CELL_SIZE(pBt) >= nCell );
|
||||
pTmp = pBt->pTmpSpace;
|
||||
|
Loading…
Reference in New Issue
Block a user