Optimize copying data from fts5 in-memory hash tables to top level segments.
FossilOrigin-Name: 8e3ca6323a2beab5f04250e24ae15b159d2aa0ac
This commit is contained in:
parent
4a7e11c629
commit
dfdc4b4613
@ -393,55 +393,6 @@ static int fts5HashEntrySort(
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
int sqlite3Fts5HashIterate(
|
||||
Fts5Hash *pHash,
|
||||
void *pCtx,
|
||||
int (*xTerm)(void*, const char*, int),
|
||||
int (*xEntry)(void*, i64, const u8*, int),
|
||||
int (*xTermDone)(void*)
|
||||
){
|
||||
Fts5HashEntry *pList;
|
||||
int rc;
|
||||
|
||||
rc = fts5HashEntrySort(pHash, 0, 0, &pList);
|
||||
if( rc==SQLITE_OK ){
|
||||
memset(pHash->aSlot, 0, sizeof(Fts5HashEntry*) * pHash->nSlot);
|
||||
while( pList ){
|
||||
Fts5HashEntry *pNext = pList->pScanNext;
|
||||
if( rc==SQLITE_OK ){
|
||||
const int nKey = strlen(pList->zKey);
|
||||
i64 iRowid = 0;
|
||||
u8 *pPtr = (u8*)pList;
|
||||
int iOff = sizeof(Fts5HashEntry) + nKey + 1;
|
||||
|
||||
/* Fill in the final poslist size field */
|
||||
fts5HashAddPoslistSize(pList);
|
||||
|
||||
/* Issue the new-term callback */
|
||||
rc = xTerm(pCtx, pList->zKey, nKey);
|
||||
|
||||
/* Issue the xEntry callbacks */
|
||||
while( rc==SQLITE_OK && iOff<pList->nData ){
|
||||
i64 iDelta; /* Rowid delta value */
|
||||
int nPoslist; /* Size of position list in bytes */
|
||||
int nVarint;
|
||||
iOff += getVarint(&pPtr[iOff], (u64*)&iDelta);
|
||||
iRowid += iDelta;
|
||||
nVarint = fts5GetVarint32(&pPtr[iOff], nPoslist);
|
||||
rc = xEntry(pCtx, iRowid, &pPtr[iOff], nPoslist+nVarint);
|
||||
iOff += nVarint+nPoslist;
|
||||
}
|
||||
|
||||
/* Issue the term-done callback */
|
||||
if( rc==SQLITE_OK ) rc = xTermDone(pCtx);
|
||||
}
|
||||
sqlite3_free(pList);
|
||||
pList = pNext;
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Query the hash table for a doclist associated with term pTerm/nTerm.
|
||||
*/
|
||||
@ -478,9 +429,8 @@ void sqlite3Fts5HashScanInit(
|
||||
}
|
||||
|
||||
void sqlite3Fts5HashScanNext(Fts5Hash *p){
|
||||
if( p->pScan ){
|
||||
p->pScan = p->pScan->pScanNext;
|
||||
}
|
||||
Fts5HashEntry *pScan = p->pScan;
|
||||
if( pScan ) p->pScan = pScan->pScanNext;
|
||||
}
|
||||
|
||||
int sqlite3Fts5HashScanEof(Fts5Hash *p){
|
||||
|
@ -113,7 +113,7 @@
|
||||
** poslist: first poslist
|
||||
** zero-or-more {
|
||||
** varint: rowid delta (always > 0)
|
||||
** poslist: first poslist
|
||||
** poslist: next poslist
|
||||
** }
|
||||
** 0x00 byte
|
||||
**
|
||||
@ -2677,7 +2677,7 @@ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
|
||||
int bFlag = 0;
|
||||
Fts5PageWriter *pPg;
|
||||
pPg = &pWriter->aWriter[1];
|
||||
if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
|
||||
if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE && pWriter->cdlidx.n ){
|
||||
i64 iKey = FTS5_DOCLIST_IDX_ROWID(
|
||||
pWriter->iIdx, pWriter->iSegid,
|
||||
pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty
|
||||
@ -3004,12 +3004,15 @@ static void fts5WriteFinish(
|
||||
){
|
||||
int i;
|
||||
if( p->rc==SQLITE_OK ){
|
||||
*pnLeaf = pWriter->aWriter[0].pgno;
|
||||
if( *pnLeaf==1 && pWriter->aWriter[0].buf.n==0 ){
|
||||
Fts5PageWriter *pLeaf = &pWriter->aWriter[0];
|
||||
if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){
|
||||
*pnLeaf = 0;
|
||||
*pnHeight = 0;
|
||||
}else{
|
||||
fts5WriteFlushLeaf(p, pWriter);
|
||||
if( pLeaf->buf.n>4 ){
|
||||
fts5WriteFlushLeaf(p, pWriter);
|
||||
}
|
||||
*pnLeaf = pLeaf->pgno-1;
|
||||
if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
|
||||
fts5WriteBtreeGrow(p, pWriter);
|
||||
}
|
||||
@ -3381,44 +3384,20 @@ struct Fts5FlushCtx {
|
||||
Fts5SegWriter writer;
|
||||
};
|
||||
|
||||
static int fts5FlushNewTerm(void *pCtx, const char *zTerm, int nTerm){
|
||||
Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
|
||||
int rc = SQLITE_OK;
|
||||
fts5WriteAppendTerm(p->pIdx, &p->writer, nTerm, (const u8*)zTerm);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int fts5FlushTermDone(void *pCtx){
|
||||
Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
|
||||
int rc = SQLITE_OK;
|
||||
/* Write the doclist terminator */
|
||||
fts5WriteAppendZerobyte(p->pIdx, &p->writer);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int fts5FlushNewEntry(
|
||||
void *pCtx,
|
||||
i64 iRowid,
|
||||
const u8 *aPoslist,
|
||||
int nPoslist
|
||||
){
|
||||
Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
|
||||
Fts5Index *pIdx = p->pIdx;
|
||||
|
||||
#ifdef SQLITE_DEBUG
|
||||
/* The poslist-size varint should already be at the start of the
|
||||
** aPoslist/nPoslist buffer. This assert verifies that. */
|
||||
int n, i;
|
||||
i = fts5GetVarint32(aPoslist, n);
|
||||
assert( nPoslist==(n+i) );
|
||||
#endif
|
||||
|
||||
/* Append the rowid itself */
|
||||
fts5WriteAppendRowid(pIdx, &p->writer, iRowid);
|
||||
|
||||
/* And the poslist data */
|
||||
fts5WriteAppendPoslistData(pIdx, &p->writer, aPoslist, nPoslist);
|
||||
return pIdx->rc;
|
||||
/*
|
||||
** Buffer aBuf[] contains a list of varints, all small enough to fit
|
||||
** in a 32-bit integer. Return the size of the largest prefix of this
|
||||
** list nMax bytes or less in size.
|
||||
*/
|
||||
static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
|
||||
int ret = 0;
|
||||
while( 1 ){
|
||||
u32 dummy;
|
||||
int i = fts5GetVarint32(&aBuf[ret], dummy);
|
||||
if( (ret + i) > nMax ) break;
|
||||
ret += i;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3429,6 +3408,7 @@ static int fts5FlushNewEntry(
|
||||
** already occurred, this function is a no-op.
|
||||
*/
|
||||
static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){
|
||||
Fts5Hash *pHash = p->apHash[iHash];
|
||||
Fts5Structure *pStruct;
|
||||
int iSegid;
|
||||
int pgnoLast = 0; /* Last leaf page number in segment */
|
||||
@ -3439,19 +3419,127 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){
|
||||
iSegid = fts5AllocateSegid(p, pStruct);
|
||||
|
||||
if( iSegid ){
|
||||
const int pgsz = p->pConfig->pgsz;
|
||||
|
||||
Fts5StructureSegment *pSeg; /* New segment within pStruct */
|
||||
int nHeight; /* Height of new segment b-tree */
|
||||
int rc;
|
||||
Fts5FlushCtx ctx;
|
||||
Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
|
||||
|
||||
fts5WriteInit(p, &ctx.writer, iHash, iSegid);
|
||||
ctx.pIdx = p;
|
||||
Fts5SegWriter writer;
|
||||
fts5WriteInit(p, &writer, iHash, iSegid);
|
||||
|
||||
rc = sqlite3Fts5HashIterate( p->apHash[iHash], (void*)&ctx,
|
||||
fts5FlushNewTerm, fts5FlushNewEntry, fts5FlushTermDone
|
||||
);
|
||||
if( p->rc==SQLITE_OK ) p->rc = rc;
|
||||
fts5WriteFinish(p, &ctx.writer, &nHeight, &pgnoLast);
|
||||
/* Pre-allocate the buffer used to assemble leaf pages to the target
|
||||
** page size. */
|
||||
assert( pgsz>0 );
|
||||
pBuf = &writer.aWriter[0].buf;
|
||||
fts5BufferGrow(&p->rc, pBuf, pgsz + 20);
|
||||
|
||||
/* Begin scanning through hash table entries. */
|
||||
if( p->rc==SQLITE_OK ){
|
||||
memset(pBuf->p, 0, 4);
|
||||
pBuf->n = 4;
|
||||
sqlite3Fts5HashScanInit(pHash, 0, 0);
|
||||
}
|
||||
|
||||
while( 0==sqlite3Fts5HashScanEof(pHash) ){
|
||||
const char *zTerm;
|
||||
int nTerm;
|
||||
const u8 *pDoclist;
|
||||
int nDoclist;
|
||||
|
||||
sqlite3Fts5HashScanEntry(pHash, &zTerm,(const char**)&pDoclist,&nDoclist);
|
||||
nTerm = strlen(zTerm);
|
||||
|
||||
/* Decide if the term fits on the current leaf. If not, flush it
|
||||
** to disk. */
|
||||
if( (pBuf->n + nTerm + 2) > pgsz ){
|
||||
fts5WriteFlushLeaf(p, &writer);
|
||||
pBuf = &writer.aWriter[0].buf;
|
||||
if( (nTerm + 32) > pBuf->nSpace ){
|
||||
fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n);
|
||||
}
|
||||
}
|
||||
|
||||
/* Write the term to the leaf. And push it up into the b-tree hierarchy */
|
||||
if( writer.bFirstTermInPage==0 ){
|
||||
pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], 0);
|
||||
}else{
|
||||
fts5PutU16(&pBuf->p[2], pBuf->n);
|
||||
writer.bFirstTermInPage = 0;
|
||||
if( writer.aWriter[0].pgno!=1 ){
|
||||
fts5WriteBtreeTerm(p, &writer, nTerm, (const u8*)zTerm);
|
||||
pBuf = &writer.aWriter[0].buf;
|
||||
}
|
||||
}
|
||||
pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nTerm);
|
||||
fts5BufferAppendBlob(&p->rc, pBuf, nTerm, (const u8*)zTerm);
|
||||
|
||||
if( pgsz>=(pBuf->n + nDoclist + 1) ){
|
||||
/* The entire doclist will fit on the current leaf. */
|
||||
fts5BufferAppendBlob(&p->rc, pBuf, nDoclist, pDoclist);
|
||||
}else{
|
||||
i64 iRowid = 0;
|
||||
i64 iDelta = 0;
|
||||
int iOff = 0;
|
||||
int bFirstDocid = 0;
|
||||
|
||||
/* The entire doclist will not fit on this leaf. The following
|
||||
** loop iterates through the poslists that make up the current
|
||||
** doclist. */
|
||||
while( iOff<nDoclist ){
|
||||
u32 nPos;
|
||||
int nCopy;
|
||||
iOff += getVarint(&pDoclist[iOff], (u64*)&iDelta);
|
||||
nCopy = fts5GetVarint32(&pDoclist[iOff], nPos);
|
||||
nCopy += nPos;
|
||||
iRowid += iDelta;
|
||||
|
||||
if( bFirstDocid ){
|
||||
fts5PutU16(&pBuf->p[0], pBuf->n); /* first docid on page */
|
||||
pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid);
|
||||
bFirstDocid = 0;
|
||||
}else{
|
||||
pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iDelta);
|
||||
}
|
||||
assert( pBuf->n<=pBuf->nSpace );
|
||||
|
||||
if( (pBuf->n + nCopy) <= pgsz ){
|
||||
/* The entire poslist will fit on the current leaf. So copy
|
||||
** it in one go. */
|
||||
fts5BufferAppendBlob(&p->rc, pBuf, nCopy, &pDoclist[iOff]);
|
||||
}else{
|
||||
/* The entire poslist will not fit on this leaf. So it needs
|
||||
** to be broken into sections. The only qualification being
|
||||
** that each varint must be stored contiguously. */
|
||||
const u8 *pPoslist = &pDoclist[iOff];
|
||||
int iPos = 0;
|
||||
while( 1 ){
|
||||
int nSpace = pgsz - pBuf->n;
|
||||
int n;
|
||||
if( (nCopy - iPos)<=nSpace ){
|
||||
n = nCopy - iPos;
|
||||
}else{
|
||||
n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
|
||||
}
|
||||
fts5BufferAppendBlob(&p->rc, pBuf, n, &pPoslist[iPos]);
|
||||
iPos += n;
|
||||
if( iPos>=nCopy ) break;
|
||||
fts5WriteFlushLeaf(p, &writer);
|
||||
pBuf = &writer.aWriter[0].buf;
|
||||
}
|
||||
bFirstDocid = 1;
|
||||
}
|
||||
assert( pBuf->n<=pgsz );
|
||||
iOff += nCopy;
|
||||
}
|
||||
}
|
||||
|
||||
pBuf->p[pBuf->n++] = '\0';
|
||||
assert( pBuf->n<=pBuf->nSpace );
|
||||
sqlite3Fts5HashScanNext(pHash);
|
||||
}
|
||||
sqlite3Fts5HashClear(pHash);
|
||||
fts5WriteFinish(p, &writer, &nHeight, &pgnoLast);
|
||||
|
||||
/* Update the Fts5Structure. It is written back to the database by the
|
||||
** fts5StructureRelease() call below. */
|
||||
|
14
manifest
14
manifest
@ -1,5 +1,5 @@
|
||||
C Fix\san\sfts5\sbug\sin\slarge\sincremental\smerges.
|
||||
D 2015-02-26T14:54:03.688
|
||||
C Optimize\scopying\sdata\sfrom\sfts5\sin-memory\shash\stables\sto\stop\slevel\ssegments.
|
||||
D 2015-02-26T20:49:09.566
|
||||
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
|
||||
F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610
|
||||
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
|
||||
@ -111,8 +111,8 @@ F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f
|
||||
F ext/fts5/fts5_buffer.c b92ba0eb67532d174934087f93716caf9a2168c7
|
||||
F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700
|
||||
F ext/fts5/fts5_expr.c eee52c9df84eade48eaa3f50c8876f44b552ff9b
|
||||
F ext/fts5/fts5_hash.c 9032dd35bf8da6f9d4fc8c955c348dd6d229d8e4
|
||||
F ext/fts5/fts5_index.c 97ce5c919be5a70b623f89c66c60bda15408d577
|
||||
F ext/fts5/fts5_hash.c 323099a445bf8f608af069e2d8ff4bb93db9904c
|
||||
F ext/fts5/fts5_index.c 7a9de0c033a8f702f8e3659a23c2ea31bbbb789b
|
||||
F ext/fts5/fts5_storage.c f7c12c9f454b2a525827b3d85fd222789236f548
|
||||
F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5
|
||||
F ext/fts5/fts5_tokenize.c 0d108148c26132448487926fe683425002aee369
|
||||
@ -1284,7 +1284,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
|
||||
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
|
||||
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
|
||||
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
|
||||
P 7eb022d7e5fdb180af823c82c47c938e4a7a355f
|
||||
R a31df37a1e652372bc458cec184c4145
|
||||
P 208e3cb6b6dc8c7d824b64dec2034004c9fcbba5
|
||||
R 2ae35fa8ad1fd2b74f86acf33cc74d9f
|
||||
U dan
|
||||
Z ba5b725699c58a1a7e341156a5c76451
|
||||
Z 84061e5477aeb4ed2552f0a9aa275eda
|
||||
|
@ -1 +1 @@
|
||||
208e3cb6b6dc8c7d824b64dec2034004c9fcbba5
|
||||
8e3ca6323a2beab5f04250e24ae15b159d2aa0ac
|
Loading…
x
Reference in New Issue
Block a user