Optimize copying data from fts5 in-memory hash tables to top level segments.

FossilOrigin-Name: 8e3ca6323a2beab5f04250e24ae15b159d2aa0ac
This commit is contained in:
dan 2015-02-26 20:49:09 +00:00
parent 4a7e11c629
commit dfdc4b4613
4 changed files with 150 additions and 112 deletions

@ -393,55 +393,6 @@ static int fts5HashEntrySort(
return SQLITE_OK;
}
int sqlite3Fts5HashIterate(
Fts5Hash *pHash,
void *pCtx,
int (*xTerm)(void*, const char*, int),
int (*xEntry)(void*, i64, const u8*, int),
int (*xTermDone)(void*)
){
Fts5HashEntry *pList;
int rc;
rc = fts5HashEntrySort(pHash, 0, 0, &pList);
if( rc==SQLITE_OK ){
memset(pHash->aSlot, 0, sizeof(Fts5HashEntry*) * pHash->nSlot);
while( pList ){
Fts5HashEntry *pNext = pList->pScanNext;
if( rc==SQLITE_OK ){
const int nKey = strlen(pList->zKey);
i64 iRowid = 0;
u8 *pPtr = (u8*)pList;
int iOff = sizeof(Fts5HashEntry) + nKey + 1;
/* Fill in the final poslist size field */
fts5HashAddPoslistSize(pList);
/* Issue the new-term callback */
rc = xTerm(pCtx, pList->zKey, nKey);
/* Issue the xEntry callbacks */
while( rc==SQLITE_OK && iOff<pList->nData ){
i64 iDelta; /* Rowid delta value */
int nPoslist; /* Size of position list in bytes */
int nVarint;
iOff += getVarint(&pPtr[iOff], (u64*)&iDelta);
iRowid += iDelta;
nVarint = fts5GetVarint32(&pPtr[iOff], nPoslist);
rc = xEntry(pCtx, iRowid, &pPtr[iOff], nPoslist+nVarint);
iOff += nVarint+nPoslist;
}
/* Issue the term-done callback */
if( rc==SQLITE_OK ) rc = xTermDone(pCtx);
}
sqlite3_free(pList);
pList = pNext;
}
}
return rc;
}
/*
** Query the hash table for a doclist associated with term pTerm/nTerm.
*/
@ -478,9 +429,8 @@ void sqlite3Fts5HashScanInit(
}
void sqlite3Fts5HashScanNext(Fts5Hash *p){
if( p->pScan ){
p->pScan = p->pScan->pScanNext;
}
Fts5HashEntry *pScan = p->pScan;
if( pScan ) p->pScan = pScan->pScanNext;
}
int sqlite3Fts5HashScanEof(Fts5Hash *p){

@ -113,7 +113,7 @@
** poslist: first poslist
** zero-or-more {
** varint: rowid delta (always > 0)
** poslist: first poslist
** poslist: next poslist
** }
** 0x00 byte
**
@ -2677,7 +2677,7 @@ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
int bFlag = 0;
Fts5PageWriter *pPg;
pPg = &pWriter->aWriter[1];
if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE && pWriter->cdlidx.n ){
i64 iKey = FTS5_DOCLIST_IDX_ROWID(
pWriter->iIdx, pWriter->iSegid,
pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty
@ -3004,12 +3004,15 @@ static void fts5WriteFinish(
){
int i;
if( p->rc==SQLITE_OK ){
*pnLeaf = pWriter->aWriter[0].pgno;
if( *pnLeaf==1 && pWriter->aWriter[0].buf.n==0 ){
Fts5PageWriter *pLeaf = &pWriter->aWriter[0];
if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){
*pnLeaf = 0;
*pnHeight = 0;
}else{
fts5WriteFlushLeaf(p, pWriter);
if( pLeaf->buf.n>4 ){
fts5WriteFlushLeaf(p, pWriter);
}
*pnLeaf = pLeaf->pgno-1;
if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
fts5WriteBtreeGrow(p, pWriter);
}
@ -3381,44 +3384,20 @@ struct Fts5FlushCtx {
Fts5SegWriter writer;
};
static int fts5FlushNewTerm(void *pCtx, const char *zTerm, int nTerm){
Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
int rc = SQLITE_OK;
fts5WriteAppendTerm(p->pIdx, &p->writer, nTerm, (const u8*)zTerm);
return rc;
}
static int fts5FlushTermDone(void *pCtx){
Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
int rc = SQLITE_OK;
/* Write the doclist terminator */
fts5WriteAppendZerobyte(p->pIdx, &p->writer);
return rc;
}
static int fts5FlushNewEntry(
void *pCtx,
i64 iRowid,
const u8 *aPoslist,
int nPoslist
){
Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
Fts5Index *pIdx = p->pIdx;
#ifdef SQLITE_DEBUG
/* The poslist-size varint should already be at the start of the
** aPoslist/nPoslist buffer. This assert verifies that. */
int n, i;
i = fts5GetVarint32(aPoslist, n);
assert( nPoslist==(n+i) );
#endif
/* Append the rowid itself */
fts5WriteAppendRowid(pIdx, &p->writer, iRowid);
/* And the poslist data */
fts5WriteAppendPoslistData(pIdx, &p->writer, aPoslist, nPoslist);
return pIdx->rc;
/*
** Buffer aBuf[] contains a list of varints, all small enough to fit
** in a 32-bit integer. Return the size of the largest prefix of this
** list nMax bytes or less in size.
*/
static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
int ret = 0;
while( 1 ){
u32 dummy;
int i = fts5GetVarint32(&aBuf[ret], dummy);
if( (ret + i) > nMax ) break;
ret += i;
}
return ret;
}
/*
@ -3429,6 +3408,7 @@ static int fts5FlushNewEntry(
** already occurred, this function is a no-op.
*/
static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){
Fts5Hash *pHash = p->apHash[iHash];
Fts5Structure *pStruct;
int iSegid;
int pgnoLast = 0; /* Last leaf page number in segment */
@ -3439,19 +3419,127 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){
iSegid = fts5AllocateSegid(p, pStruct);
if( iSegid ){
const int pgsz = p->pConfig->pgsz;
Fts5StructureSegment *pSeg; /* New segment within pStruct */
int nHeight; /* Height of new segment b-tree */
int rc;
Fts5FlushCtx ctx;
Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
fts5WriteInit(p, &ctx.writer, iHash, iSegid);
ctx.pIdx = p;
Fts5SegWriter writer;
fts5WriteInit(p, &writer, iHash, iSegid);
rc = sqlite3Fts5HashIterate( p->apHash[iHash], (void*)&ctx,
fts5FlushNewTerm, fts5FlushNewEntry, fts5FlushTermDone
);
if( p->rc==SQLITE_OK ) p->rc = rc;
fts5WriteFinish(p, &ctx.writer, &nHeight, &pgnoLast);
/* Pre-allocate the buffer used to assemble leaf pages to the target
** page size. */
assert( pgsz>0 );
pBuf = &writer.aWriter[0].buf;
fts5BufferGrow(&p->rc, pBuf, pgsz + 20);
/* Begin scanning through hash table entries. */
if( p->rc==SQLITE_OK ){
memset(pBuf->p, 0, 4);
pBuf->n = 4;
sqlite3Fts5HashScanInit(pHash, 0, 0);
}
while( 0==sqlite3Fts5HashScanEof(pHash) ){
const char *zTerm;
int nTerm;
const u8 *pDoclist;
int nDoclist;
sqlite3Fts5HashScanEntry(pHash, &zTerm,(const char**)&pDoclist,&nDoclist);
nTerm = strlen(zTerm);
/* Decide if the term fits on the current leaf. If not, flush it
** to disk. */
if( (pBuf->n + nTerm + 2) > pgsz ){
fts5WriteFlushLeaf(p, &writer);
pBuf = &writer.aWriter[0].buf;
if( (nTerm + 32) > pBuf->nSpace ){
fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n);
}
}
/* Write the term to the leaf. And push it up into the b-tree hierarchy */
if( writer.bFirstTermInPage==0 ){
pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], 0);
}else{
fts5PutU16(&pBuf->p[2], pBuf->n);
writer.bFirstTermInPage = 0;
if( writer.aWriter[0].pgno!=1 ){
fts5WriteBtreeTerm(p, &writer, nTerm, (const u8*)zTerm);
pBuf = &writer.aWriter[0].buf;
}
}
pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nTerm);
fts5BufferAppendBlob(&p->rc, pBuf, nTerm, (const u8*)zTerm);
if( pgsz>=(pBuf->n + nDoclist + 1) ){
/* The entire doclist will fit on the current leaf. */
fts5BufferAppendBlob(&p->rc, pBuf, nDoclist, pDoclist);
}else{
i64 iRowid = 0;
i64 iDelta = 0;
int iOff = 0;
int bFirstDocid = 0;
/* The entire doclist will not fit on this leaf. The following
** loop iterates through the poslists that make up the current
** doclist. */
while( iOff<nDoclist ){
u32 nPos;
int nCopy;
iOff += getVarint(&pDoclist[iOff], (u64*)&iDelta);
nCopy = fts5GetVarint32(&pDoclist[iOff], nPos);
nCopy += nPos;
iRowid += iDelta;
if( bFirstDocid ){
fts5PutU16(&pBuf->p[0], pBuf->n); /* first docid on page */
pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid);
bFirstDocid = 0;
}else{
pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iDelta);
}
assert( pBuf->n<=pBuf->nSpace );
if( (pBuf->n + nCopy) <= pgsz ){
/* The entire poslist will fit on the current leaf. So copy
** it in one go. */
fts5BufferAppendBlob(&p->rc, pBuf, nCopy, &pDoclist[iOff]);
}else{
/* The entire poslist will not fit on this leaf. So it needs
** to be broken into sections. The only qualification being
** that each varint must be stored contiguously. */
const u8 *pPoslist = &pDoclist[iOff];
int iPos = 0;
while( 1 ){
int nSpace = pgsz - pBuf->n;
int n;
if( (nCopy - iPos)<=nSpace ){
n = nCopy - iPos;
}else{
n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
}
fts5BufferAppendBlob(&p->rc, pBuf, n, &pPoslist[iPos]);
iPos += n;
if( iPos>=nCopy ) break;
fts5WriteFlushLeaf(p, &writer);
pBuf = &writer.aWriter[0].buf;
}
bFirstDocid = 1;
}
assert( pBuf->n<=pgsz );
iOff += nCopy;
}
}
pBuf->p[pBuf->n++] = '\0';
assert( pBuf->n<=pBuf->nSpace );
sqlite3Fts5HashScanNext(pHash);
}
sqlite3Fts5HashClear(pHash);
fts5WriteFinish(p, &writer, &nHeight, &pgnoLast);
/* Update the Fts5Structure. It is written back to the database by the
** fts5StructureRelease() call below. */

@ -1,5 +1,5 @@
C Fix\san\sfts5\sbug\sin\slarge\sincremental\smerges.
D 2015-02-26T14:54:03.688
C Optimize\scopying\sdata\sfrom\sfts5\sin-memory\shash\stables\sto\stop\slevel\ssegments.
D 2015-02-26T20:49:09.566
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -111,8 +111,8 @@ F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f
F ext/fts5/fts5_buffer.c b92ba0eb67532d174934087f93716caf9a2168c7
F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700
F ext/fts5/fts5_expr.c eee52c9df84eade48eaa3f50c8876f44b552ff9b
F ext/fts5/fts5_hash.c 9032dd35bf8da6f9d4fc8c955c348dd6d229d8e4
F ext/fts5/fts5_index.c 97ce5c919be5a70b623f89c66c60bda15408d577
F ext/fts5/fts5_hash.c 323099a445bf8f608af069e2d8ff4bb93db9904c
F ext/fts5/fts5_index.c 7a9de0c033a8f702f8e3659a23c2ea31bbbb789b
F ext/fts5/fts5_storage.c f7c12c9f454b2a525827b3d85fd222789236f548
F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5
F ext/fts5/fts5_tokenize.c 0d108148c26132448487926fe683425002aee369
@ -1284,7 +1284,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 7eb022d7e5fdb180af823c82c47c938e4a7a355f
R a31df37a1e652372bc458cec184c4145
P 208e3cb6b6dc8c7d824b64dec2034004c9fcbba5
R 2ae35fa8ad1fd2b74f86acf33cc74d9f
U dan
Z ba5b725699c58a1a7e341156a5c76451
Z 84061e5477aeb4ed2552f0a9aa275eda

@ -1 +1 @@
208e3cb6b6dc8c7d824b64dec2034004c9fcbba5
8e3ca6323a2beab5f04250e24ae15b159d2aa0ac