Implement optimize() function.
Backports check-in (5417) from fts3. (CVS 5458) FossilOrigin-Name: c16900dc7603cab30f8729b25361bc88bb37ae43
This commit is contained in:
parent
08904673c8
commit
db94e39b07
331
ext/fts2/fts2.c
331
ext/fts2/fts2.c
@ -1789,6 +1789,7 @@ typedef enum fulltext_statement {
|
|||||||
SEGDIR_SELECT_SEGMENT_STMT,
|
SEGDIR_SELECT_SEGMENT_STMT,
|
||||||
SEGDIR_SELECT_ALL_STMT,
|
SEGDIR_SELECT_ALL_STMT,
|
||||||
SEGDIR_DELETE_ALL_STMT,
|
SEGDIR_DELETE_ALL_STMT,
|
||||||
|
SEGDIR_COUNT_STMT,
|
||||||
|
|
||||||
MAX_STMT /* Always at end! */
|
MAX_STMT /* Always at end! */
|
||||||
} fulltext_statement;
|
} fulltext_statement;
|
||||||
@ -1830,6 +1831,7 @@ static const char *const fulltext_zStatement[MAX_STMT] = {
|
|||||||
"select start_block, leaves_end_block, root from %_segdir "
|
"select start_block, leaves_end_block, root from %_segdir "
|
||||||
" order by level desc, idx asc",
|
" order by level desc, idx asc",
|
||||||
/* SEGDIR_DELETE_ALL */ "delete from %_segdir",
|
/* SEGDIR_DELETE_ALL */ "delete from %_segdir",
|
||||||
|
/* SEGDIR_COUNT */ "select count(*), ifnull(max(level),0) from %_segdir",
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1980,15 +1982,18 @@ static int sql_single_step(sqlite3_stmt *s){
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Like sql_get_statement(), but for special replicated LEAF_SELECT
|
/* Like sql_get_statement(), but for special replicated LEAF_SELECT
|
||||||
** statements.
|
** statements. idx -1 is a special case for an uncached version of
|
||||||
|
** the statement (used in the optimize implementation).
|
||||||
*/
|
*/
|
||||||
/* TODO(shess) Write version for generic statements and then share
|
/* TODO(shess) Write version for generic statements and then share
|
||||||
** that between the cached-statement functions.
|
** that between the cached-statement functions.
|
||||||
*/
|
*/
|
||||||
static int sql_get_leaf_statement(fulltext_vtab *v, int idx,
|
static int sql_get_leaf_statement(fulltext_vtab *v, int idx,
|
||||||
sqlite3_stmt **ppStmt){
|
sqlite3_stmt **ppStmt){
|
||||||
assert( idx>=0 && idx<MERGE_COUNT );
|
assert( idx>=-1 && idx<MERGE_COUNT );
|
||||||
if( v->pLeafSelectStmts[idx]==NULL ){
|
if( idx==-1 ){
|
||||||
|
return sql_prepare(v->db, v->zDb, v->zName, ppStmt, LEAF_SELECT);
|
||||||
|
}else if( v->pLeafSelectStmts[idx]==NULL ){
|
||||||
int rc = sql_prepare(v->db, v->zDb, v->zName, &v->pLeafSelectStmts[idx],
|
int rc = sql_prepare(v->db, v->zDb, v->zName, &v->pLeafSelectStmts[idx],
|
||||||
LEAF_SELECT);
|
LEAF_SELECT);
|
||||||
if( rc!=SQLITE_OK ) return rc;
|
if( rc!=SQLITE_OK ) return rc;
|
||||||
@ -2315,6 +2320,37 @@ static int segdir_delete_all(fulltext_vtab *v){
|
|||||||
return sql_single_step(s);
|
return sql_single_step(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Returns SQLITE_OK with *pnSegments set to the number of entries in
|
||||||
|
** %_segdir and *piMaxLevel set to the highest level which has a
|
||||||
|
** segment. Otherwise returns the SQLite error which caused failure.
|
||||||
|
*/
|
||||||
|
static int segdir_count(fulltext_vtab *v, int *pnSegments, int *piMaxLevel){
|
||||||
|
sqlite3_stmt *s;
|
||||||
|
int rc = sql_get_statement(v, SEGDIR_COUNT_STMT, &s);
|
||||||
|
if( rc!=SQLITE_OK ) return rc;
|
||||||
|
|
||||||
|
rc = sqlite3_step(s);
|
||||||
|
/* TODO(shess): This case should not be possible? Should stronger
|
||||||
|
** measures be taken if it happens?
|
||||||
|
*/
|
||||||
|
if( rc==SQLITE_DONE ){
|
||||||
|
*pnSegments = 0;
|
||||||
|
*piMaxLevel = 0;
|
||||||
|
return SQLITE_OK;
|
||||||
|
}
|
||||||
|
if( rc!=SQLITE_ROW ) return rc;
|
||||||
|
|
||||||
|
*pnSegments = sqlite3_column_int(s, 0);
|
||||||
|
*piMaxLevel = sqlite3_column_int(s, 1);
|
||||||
|
|
||||||
|
/* We expect only one row. We must execute another sqlite3_step()
|
||||||
|
* to complete the iteration; otherwise the table will remain locked. */
|
||||||
|
rc = sqlite3_step(s);
|
||||||
|
if( rc==SQLITE_DONE ) return SQLITE_OK;
|
||||||
|
if( rc==SQLITE_ROW ) return SQLITE_ERROR;
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
/* TODO(shess) clearPendingTerms() is far down the file because
|
/* TODO(shess) clearPendingTerms() is far down the file because
|
||||||
** writeZeroSegment() is far down the file because LeafWriter is far
|
** writeZeroSegment() is far down the file because LeafWriter is far
|
||||||
** down the file. Consider refactoring the code to move the non-vtab
|
** down the file. Consider refactoring the code to move the non-vtab
|
||||||
@ -5004,6 +5040,12 @@ static int leavesReaderReset(LeavesReader *pReader){
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void leavesReaderDestroy(LeavesReader *pReader){
|
static void leavesReaderDestroy(LeavesReader *pReader){
|
||||||
|
/* If idx is -1, that means we're using a non-cached statement
|
||||||
|
** handle in the optimize() case, so we need to release it.
|
||||||
|
*/
|
||||||
|
if( pReader->pStmt!=NULL && pReader->idx==-1 ){
|
||||||
|
sqlite3_finalize(pReader->pStmt);
|
||||||
|
}
|
||||||
leafReaderDestroy(&pReader->leafReader);
|
leafReaderDestroy(&pReader->leafReader);
|
||||||
dataBufferDestroy(&pReader->rootData);
|
dataBufferDestroy(&pReader->rootData);
|
||||||
SCRAMBLE(pReader);
|
SCRAMBLE(pReader);
|
||||||
@ -5949,6 +5991,285 @@ static void snippetOffsetsFunc(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* OptLeavesReader is nearly identical to LeavesReader, except that
|
||||||
|
** where LeavesReader is geared towards the merging of complete
|
||||||
|
** segment levels (with exactly MERGE_COUNT segments), OptLeavesReader
|
||||||
|
** is geared towards implementation of the optimize() function, and
|
||||||
|
** can merge all segments simultaneously. This version may be
|
||||||
|
** somewhat less efficient than LeavesReader because it merges into an
|
||||||
|
** accumulator rather than doing an N-way merge, but since segment
|
||||||
|
** size grows exponentially (so segment count logrithmically) this is
|
||||||
|
** probably not an immediate problem.
|
||||||
|
*/
|
||||||
|
/* TODO(shess): Prove that assertion, or extend the merge code to
|
||||||
|
** merge tree fashion (like the prefix-searching code does).
|
||||||
|
*/
|
||||||
|
/* TODO(shess): OptLeavesReader and LeavesReader could probably be
|
||||||
|
** merged with little or no loss of performance for LeavesReader. The
|
||||||
|
** merged code would need to handle >MERGE_COUNT segments, and would
|
||||||
|
** also need to be able to optionally optimize away deletes.
|
||||||
|
*/
|
||||||
|
typedef struct OptLeavesReader {
|
||||||
|
/* Segment number, to order readers by age. */
|
||||||
|
int segment;
|
||||||
|
LeavesReader reader;
|
||||||
|
} OptLeavesReader;
|
||||||
|
|
||||||
|
static int optLeavesReaderAtEnd(OptLeavesReader *pReader){
|
||||||
|
return leavesReaderAtEnd(&pReader->reader);
|
||||||
|
}
|
||||||
|
static int optLeavesReaderTermBytes(OptLeavesReader *pReader){
|
||||||
|
return leavesReaderTermBytes(&pReader->reader);
|
||||||
|
}
|
||||||
|
static const char *optLeavesReaderData(OptLeavesReader *pReader){
|
||||||
|
return leavesReaderData(&pReader->reader);
|
||||||
|
}
|
||||||
|
static int optLeavesReaderDataBytes(OptLeavesReader *pReader){
|
||||||
|
return leavesReaderDataBytes(&pReader->reader);
|
||||||
|
}
|
||||||
|
static const char *optLeavesReaderTerm(OptLeavesReader *pReader){
|
||||||
|
return leavesReaderTerm(&pReader->reader);
|
||||||
|
}
|
||||||
|
static int optLeavesReaderStep(fulltext_vtab *v, OptLeavesReader *pReader){
|
||||||
|
return leavesReaderStep(v, &pReader->reader);
|
||||||
|
}
|
||||||
|
static int optLeavesReaderTermCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){
|
||||||
|
return leavesReaderTermCmp(&lr1->reader, &lr2->reader);
|
||||||
|
}
|
||||||
|
/* Order by term ascending, segment ascending (oldest to newest), with
|
||||||
|
** exhausted readers to the end.
|
||||||
|
*/
|
||||||
|
static int optLeavesReaderCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){
|
||||||
|
int c = optLeavesReaderTermCmp(lr1, lr2);
|
||||||
|
if( c!=0 ) return c;
|
||||||
|
return lr1->segment-lr2->segment;
|
||||||
|
}
|
||||||
|
/* Bubble pLr[0] to appropriate place in pLr[1..nLr-1]. Assumes that
|
||||||
|
** pLr[1..nLr-1] is already sorted.
|
||||||
|
*/
|
||||||
|
static void optLeavesReaderReorder(OptLeavesReader *pLr, int nLr){
|
||||||
|
while( nLr>1 && optLeavesReaderCmp(pLr, pLr+1)>0 ){
|
||||||
|
OptLeavesReader tmp = pLr[0];
|
||||||
|
pLr[0] = pLr[1];
|
||||||
|
pLr[1] = tmp;
|
||||||
|
nLr--;
|
||||||
|
pLr++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* optimize() helper function. Put the readers in order and iterate
|
||||||
|
** through them, merging doclists for matching terms into pWriter.
|
||||||
|
** Returns SQLITE_OK on success, or the SQLite error code which
|
||||||
|
** prevented success.
|
||||||
|
*/
|
||||||
|
static int optimizeInternal(fulltext_vtab *v,
|
||||||
|
OptLeavesReader *readers, int nReaders,
|
||||||
|
LeafWriter *pWriter){
|
||||||
|
int i, rc = SQLITE_OK;
|
||||||
|
DataBuffer doclist, merged, tmp;
|
||||||
|
|
||||||
|
/* Order the readers. */
|
||||||
|
i = nReaders;
|
||||||
|
while( i-- > 0 ){
|
||||||
|
optLeavesReaderReorder(&readers[i], nReaders-i);
|
||||||
|
}
|
||||||
|
|
||||||
|
dataBufferInit(&doclist, LEAF_MAX);
|
||||||
|
dataBufferInit(&merged, LEAF_MAX);
|
||||||
|
|
||||||
|
/* Exhausted readers bubble to the end, so when the first reader is
|
||||||
|
** at eof, all are at eof.
|
||||||
|
*/
|
||||||
|
while( !optLeavesReaderAtEnd(&readers[0]) ){
|
||||||
|
|
||||||
|
/* Figure out how many readers share the next term. */
|
||||||
|
for(i=1; i<nReaders && !optLeavesReaderAtEnd(&readers[i]); i++){
|
||||||
|
if( 0!=optLeavesReaderTermCmp(&readers[0], &readers[i]) ) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Special-case for no merge. */
|
||||||
|
if( i==1 ){
|
||||||
|
/* Trim deletions from the doclist. */
|
||||||
|
dataBufferReset(&merged);
|
||||||
|
docListTrim(DL_DEFAULT,
|
||||||
|
optLeavesReaderData(&readers[0]),
|
||||||
|
optLeavesReaderDataBytes(&readers[0]),
|
||||||
|
-1, DL_DEFAULT, &merged);
|
||||||
|
}else{
|
||||||
|
DLReader dlReaders[MERGE_COUNT];
|
||||||
|
int iReader, nReaders;
|
||||||
|
|
||||||
|
/* Prime the pipeline with the first reader's doclist. After
|
||||||
|
** one pass index 0 will reference the accumulated doclist.
|
||||||
|
*/
|
||||||
|
dlrInit(&dlReaders[0], DL_DEFAULT,
|
||||||
|
optLeavesReaderData(&readers[0]),
|
||||||
|
optLeavesReaderDataBytes(&readers[0]));
|
||||||
|
iReader = 1;
|
||||||
|
|
||||||
|
assert( iReader<i ); /* Must execute the loop at least once. */
|
||||||
|
while( iReader<i ){
|
||||||
|
/* Merge 16 inputs per pass. */
|
||||||
|
for( nReaders=1; iReader<i && nReaders<MERGE_COUNT;
|
||||||
|
iReader++, nReaders++ ){
|
||||||
|
dlrInit(&dlReaders[nReaders], DL_DEFAULT,
|
||||||
|
optLeavesReaderData(&readers[iReader]),
|
||||||
|
optLeavesReaderDataBytes(&readers[iReader]));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Merge doclists and swap result into accumulator. */
|
||||||
|
dataBufferReset(&merged);
|
||||||
|
docListMerge(&merged, dlReaders, nReaders);
|
||||||
|
tmp = merged;
|
||||||
|
merged = doclist;
|
||||||
|
doclist = tmp;
|
||||||
|
|
||||||
|
while( nReaders-- > 0 ){
|
||||||
|
dlrDestroy(&dlReaders[nReaders]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Accumulated doclist to reader 0 for next pass. */
|
||||||
|
dlrInit(&dlReaders[0], DL_DEFAULT, doclist.pData, doclist.nData);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Destroy reader that was left in the pipeline. */
|
||||||
|
dlrDestroy(&dlReaders[0]);
|
||||||
|
|
||||||
|
/* Trim deletions from the doclist. */
|
||||||
|
dataBufferReset(&merged);
|
||||||
|
docListTrim(DL_DEFAULT, doclist.pData, doclist.nData,
|
||||||
|
-1, DL_DEFAULT, &merged);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Only pass doclists with hits (skip if all hits deleted). */
|
||||||
|
if( merged.nData>0 ){
|
||||||
|
rc = leafWriterStep(v, pWriter,
|
||||||
|
optLeavesReaderTerm(&readers[0]),
|
||||||
|
optLeavesReaderTermBytes(&readers[0]),
|
||||||
|
merged.pData, merged.nData);
|
||||||
|
if( rc!=SQLITE_OK ) goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Step merged readers to next term and reorder. */
|
||||||
|
while( i-- > 0 ){
|
||||||
|
rc = optLeavesReaderStep(v, &readers[i]);
|
||||||
|
if( rc!=SQLITE_OK ) goto err;
|
||||||
|
|
||||||
|
optLeavesReaderReorder(&readers[i], nReaders-i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err:
|
||||||
|
dataBufferDestroy(&doclist);
|
||||||
|
dataBufferDestroy(&merged);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Implement optimize() function for FTS3. optimize(t) merges all
|
||||||
|
** segments in the fts index into a single segment. 't' is the magic
|
||||||
|
** table-named column.
|
||||||
|
*/
|
||||||
|
static void optimizeFunc(sqlite3_context *pContext,
|
||||||
|
int argc, sqlite3_value **argv){
|
||||||
|
fulltext_cursor *pCursor;
|
||||||
|
if( argc>1 ){
|
||||||
|
sqlite3_result_error(pContext, "excess arguments to optimize()",-1);
|
||||||
|
}else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
|
||||||
|
sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
|
||||||
|
sqlite3_result_error(pContext, "illegal first argument to optimize",-1);
|
||||||
|
}else{
|
||||||
|
fulltext_vtab *v;
|
||||||
|
int i, rc, iMaxLevel;
|
||||||
|
OptLeavesReader *readers;
|
||||||
|
int nReaders;
|
||||||
|
LeafWriter writer;
|
||||||
|
sqlite3_stmt *s;
|
||||||
|
|
||||||
|
memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
|
||||||
|
v = cursor_vtab(pCursor);
|
||||||
|
|
||||||
|
/* Flush any buffered updates before optimizing. */
|
||||||
|
rc = flushPendingTerms(v);
|
||||||
|
if( rc!=SQLITE_OK ) goto err;
|
||||||
|
|
||||||
|
rc = segdir_count(v, &nReaders, &iMaxLevel);
|
||||||
|
if( rc!=SQLITE_OK ) goto err;
|
||||||
|
if( nReaders==0 || nReaders==1 ){
|
||||||
|
sqlite3_result_text(pContext, "Index already optimal", -1,
|
||||||
|
SQLITE_STATIC);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
|
||||||
|
if( rc!=SQLITE_OK ) goto err;
|
||||||
|
|
||||||
|
readers = sqlite3_malloc(nReaders*sizeof(readers[0]));
|
||||||
|
if( readers==NULL ) goto err;
|
||||||
|
|
||||||
|
/* Note that there will already be a segment at this position
|
||||||
|
** until we call segdir_delete() on iMaxLevel.
|
||||||
|
*/
|
||||||
|
leafWriterInit(iMaxLevel, 0, &writer);
|
||||||
|
|
||||||
|
i = 0;
|
||||||
|
while( (rc = sqlite3_step(s))==SQLITE_ROW ){
|
||||||
|
sqlite_int64 iStart = sqlite3_column_int64(s, 0);
|
||||||
|
sqlite_int64 iEnd = sqlite3_column_int64(s, 1);
|
||||||
|
const char *pRootData = sqlite3_column_blob(s, 2);
|
||||||
|
int nRootData = sqlite3_column_bytes(s, 2);
|
||||||
|
|
||||||
|
assert( i<nReaders );
|
||||||
|
rc = leavesReaderInit(v, -1, iStart, iEnd, pRootData, nRootData,
|
||||||
|
&readers[i].reader);
|
||||||
|
if( rc!=SQLITE_OK ) break;
|
||||||
|
|
||||||
|
readers[i].segment = i;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we managed to succesfully read them all, optimize them. */
|
||||||
|
if( rc==SQLITE_DONE ){
|
||||||
|
assert( i==nReaders );
|
||||||
|
rc = optimizeInternal(v, readers, nReaders, &writer);
|
||||||
|
}
|
||||||
|
|
||||||
|
while( i-- > 0 ){
|
||||||
|
leavesReaderDestroy(&readers[i].reader);
|
||||||
|
}
|
||||||
|
sqlite3_free(readers);
|
||||||
|
|
||||||
|
/* If we've successfully gotten to here, delete the old segments
|
||||||
|
** and flush the interior structure of the new segment.
|
||||||
|
*/
|
||||||
|
if( rc==SQLITE_OK ){
|
||||||
|
for( i=0; i<=iMaxLevel; i++ ){
|
||||||
|
rc = segdir_delete(v, i);
|
||||||
|
if( rc!=SQLITE_OK ) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( rc==SQLITE_OK ) rc = leafWriterFinalize(v, &writer);
|
||||||
|
}
|
||||||
|
|
||||||
|
leafWriterDestroy(&writer);
|
||||||
|
|
||||||
|
if( rc!=SQLITE_OK ) goto err;
|
||||||
|
|
||||||
|
sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC);
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* TODO(shess): Error-handling needs to be improved along the
|
||||||
|
** lines of the dump_ functions.
|
||||||
|
*/
|
||||||
|
err:
|
||||||
|
{
|
||||||
|
char buf[512];
|
||||||
|
sqlite3_snprintf(sizeof(buf), buf, "Error in optimize: %s",
|
||||||
|
sqlite3_errmsg(sqlite3_context_db_handle(pContext)));
|
||||||
|
sqlite3_result_error(pContext, buf, -1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef SQLITE_TEST
|
#ifdef SQLITE_TEST
|
||||||
/* Generate an error of the form "<prefix>: <msg>". If msg is NULL,
|
/* Generate an error of the form "<prefix>: <msg>". If msg is NULL,
|
||||||
** pull the error from the context's db handle.
|
** pull the error from the context's db handle.
|
||||||
@ -6346,6 +6667,9 @@ static int fulltextFindFunction(
|
|||||||
}else if( strcmp(zName,"offsets")==0 ){
|
}else if( strcmp(zName,"offsets")==0 ){
|
||||||
*pxFunc = snippetOffsetsFunc;
|
*pxFunc = snippetOffsetsFunc;
|
||||||
return 1;
|
return 1;
|
||||||
|
}else if( strcmp(zName,"optimize")==0 ){
|
||||||
|
*pxFunc = optimizeFunc;
|
||||||
|
return 1;
|
||||||
#ifdef SQLITE_TEST
|
#ifdef SQLITE_TEST
|
||||||
/* NOTE(shess): These functions are present only for testing
|
/* NOTE(shess): These functions are present only for testing
|
||||||
** purposes. No particular effort is made to optimize their
|
** purposes. No particular effort is made to optimize their
|
||||||
@ -6479,6 +6803,7 @@ int sqlite3Fts2Init(sqlite3 *db){
|
|||||||
&& SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer"))
|
&& SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer"))
|
||||||
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
|
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
|
||||||
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1))
|
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1))
|
||||||
|
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", -1))
|
||||||
#ifdef SQLITE_TEST
|
#ifdef SQLITE_TEST
|
||||||
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1))
|
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1))
|
||||||
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1))
|
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1))
|
||||||
|
14
manifest
14
manifest
@ -1,5 +1,5 @@
|
|||||||
C Delete\sall\sfts2\sindex\sdata\sthe\stable\sbecomes\sempty.\r\nBackports\scheck-in\s(5413)\sfrom\sfts3.\s(CVS\s5457)
|
C Implement\soptimize()\sfunction.\r\nBackports\scheck-in\s(5417)\sfrom\sfts3.\s(CVS\s5458)
|
||||||
D 2008-07-22T23:41:26
|
D 2008-07-22T23:49:44
|
||||||
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
|
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
|
||||||
F Makefile.in 77ff156061bb870aa0a8b3d545c670d08070f7e6
|
F Makefile.in 77ff156061bb870aa0a8b3d545c670d08070f7e6
|
||||||
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
|
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
|
||||||
@ -39,7 +39,7 @@ F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d
|
|||||||
F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9
|
F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9
|
||||||
F ext/fts2/README.tokenizers 21e3684ea5a095b55d70f6878b4ce6af5932dfb7
|
F ext/fts2/README.tokenizers 21e3684ea5a095b55d70f6878b4ce6af5932dfb7
|
||||||
F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
|
F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
|
||||||
F ext/fts2/fts2.c c0d287669e35e3f0998b42fb06ebd2f89e5b2593
|
F ext/fts2/fts2.c ff1d7646d48810d08b5a081be3495b16c3a603f1
|
||||||
F ext/fts2/fts2.h da5f76c65163301d1068a971fd32f4119e3c95fa
|
F ext/fts2/fts2.h da5f76c65163301d1068a971fd32f4119e3c95fa
|
||||||
F ext/fts2/fts2_hash.c 2689e42e1107ea67207f725cf69cf8972d00cf93
|
F ext/fts2/fts2_hash.c 2689e42e1107ea67207f725cf69cf8972d00cf93
|
||||||
F ext/fts2/fts2_hash.h 9a5b1be94664139f93217a0770d7144425cffb3a
|
F ext/fts2/fts2_hash.h 9a5b1be94664139f93217a0770d7144425cffb3a
|
||||||
@ -316,7 +316,7 @@ F test/fts2m.test 4b30142ead6f3ed076e880a2a464064c5ad58c51
|
|||||||
F test/fts2n.test 12b9c5352128cebd1c6b8395e43788d4b09087c2
|
F test/fts2n.test 12b9c5352128cebd1c6b8395e43788d4b09087c2
|
||||||
F test/fts2o.test c6a79567d85403dc4d15b89f3f9799a0a0aef065
|
F test/fts2o.test c6a79567d85403dc4d15b89f3f9799a0a0aef065
|
||||||
F test/fts2p.test 4b48c35c91e6a7dbf5ac8d1e5691823cc999aafb
|
F test/fts2p.test 4b48c35c91e6a7dbf5ac8d1e5691823cc999aafb
|
||||||
F test/fts2q.test ee2cb138a599a07de3a03cd7a71a4e9e40120f93
|
F test/fts2q.test b2fbbe038b7a31a52a6079b215e71226d8c6a682
|
||||||
F test/fts2token.test d8070b241a15ff13592a9ae4a8b7c171af6f445a
|
F test/fts2token.test d8070b241a15ff13592a9ae4a8b7c171af6f445a
|
||||||
F test/fts3.test 6ee4c38b0864583c80e82a2d4372f63aae8b10c7
|
F test/fts3.test 6ee4c38b0864583c80e82a2d4372f63aae8b10c7
|
||||||
F test/fts3aa.test 432d1d5c41939bb5405d4d6c80a9ec759b363393
|
F test/fts3aa.test 432d1d5c41939bb5405d4d6c80a9ec759b363393
|
||||||
@ -611,7 +611,7 @@ F tool/speedtest16.c c8a9c793df96db7e4933f0852abb7a03d48f2e81
|
|||||||
F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
|
F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
|
||||||
F tool/speedtest8.c 1dbced29de5f59ba2ebf877edcadf171540374d1
|
F tool/speedtest8.c 1dbced29de5f59ba2ebf877edcadf171540374d1
|
||||||
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
|
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
|
||||||
P 4e47394be9dfbf0f9309e55eb6c6a3a517ea2006
|
P 4c98179be258319f441ae4e123cf59af77e96409
|
||||||
R 47eed6cf50908f452be50506a6925d0d
|
R 7d4ed449896aa36fee287e4efa2f50e0
|
||||||
U shess
|
U shess
|
||||||
Z f9e0e9386065f5f55b31594485ae0dd8
|
Z 5157ad4962dd96eaaf179dc694e76db5
|
||||||
|
@ -1 +1 @@
|
|||||||
4c98179be258319f441ae4e123cf59af77e96409
|
c16900dc7603cab30f8729b25361bc88bb37ae43
|
222
test/fts2q.test
222
test/fts2q.test
@ -11,7 +11,7 @@
|
|||||||
# This file implements regression tests for SQLite library. The focus
|
# This file implements regression tests for SQLite library. The focus
|
||||||
# of this script is testing the FTS2 module's optimize() function.
|
# of this script is testing the FTS2 module's optimize() function.
|
||||||
#
|
#
|
||||||
# $Id: fts2q.test,v 1.1 2008/07/22 23:41:26 shess Exp $
|
# $Id: fts2q.test,v 1.2 2008/07/22 23:49:44 shess Exp $
|
||||||
#
|
#
|
||||||
|
|
||||||
set testdir [file dirname $argv0]
|
set testdir [file dirname $argv0]
|
||||||
@ -123,6 +123,224 @@ check_doclist fts2q-1.2.2 0 0 is {[1 0[1]]}
|
|||||||
check_doclist fts2q-1.2.3 0 0 test {[1 0[3]]}
|
check_doclist fts2q-1.2.3 0 0 test {[1 0[3]]}
|
||||||
check_doclist fts2q-1.2.4 0 0 this {[1 0[0]]}
|
check_doclist fts2q-1.2.4 0 0 this {[1 0[0]]}
|
||||||
|
|
||||||
# TODO(shess): optimize() tests here.
|
#*************************************************************************
|
||||||
|
# Test results when everything is optimized manually.
|
||||||
|
# NOTE(shess): This is a copy of fts2c-1.3. I've pulled a copy here
|
||||||
|
# because fts2q-2 and fts2q-3 should have identical results.
|
||||||
|
db eval {
|
||||||
|
DROP TABLE IF EXISTS t1;
|
||||||
|
CREATE VIRTUAL TABLE t1 USING fts2(c);
|
||||||
|
INSERT INTO t1 (rowid, c) VALUES (1, 'This is a test');
|
||||||
|
INSERT INTO t1 (rowid, c) VALUES (2, 'That was a test');
|
||||||
|
INSERT INTO t1 (rowid, c) VALUES (3, 'This is a test');
|
||||||
|
DELETE FROM t1 WHERE rowid IN (1,3);
|
||||||
|
DROP TABLE IF EXISTS t1old;
|
||||||
|
ALTER TABLE t1 RENAME TO t1old;
|
||||||
|
CREATE VIRTUAL TABLE t1 USING fts2(c);
|
||||||
|
INSERT INTO t1 (rowid, c) SELECT rowid, c FROM t1old;
|
||||||
|
DROP TABLE t1old;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Should be a single optimal segment with the same logical results.
|
||||||
|
do_test fts2q-2.segments {
|
||||||
|
execsql {
|
||||||
|
SELECT level, idx FROM t1_segdir ORDER BY level, idx;
|
||||||
|
}
|
||||||
|
} {0 0}
|
||||||
|
do_test fts2q-2.matches {
|
||||||
|
execsql {
|
||||||
|
SELECT OFFSETS(t1) FROM t1
|
||||||
|
WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY rowid;
|
||||||
|
}
|
||||||
|
} {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}}
|
||||||
|
|
||||||
|
check_terms_all fts2q-2.1 {a test that was}
|
||||||
|
check_doclist_all fts2q-2.1.1 a {[2 0[2]]}
|
||||||
|
check_doclist_all fts2q-2.1.2 test {[2 0[3]]}
|
||||||
|
check_doclist_all fts2q-2.1.3 that {[2 0[0]]}
|
||||||
|
check_doclist_all fts2q-2.1.4 was {[2 0[1]]}
|
||||||
|
|
||||||
|
check_terms fts2q-2.2 0 0 {a test that was}
|
||||||
|
check_doclist fts2q-2.2.1 0 0 a {[2 0[2]]}
|
||||||
|
check_doclist fts2q-2.2.2 0 0 test {[2 0[3]]}
|
||||||
|
check_doclist fts2q-2.2.3 0 0 that {[2 0[0]]}
|
||||||
|
check_doclist fts2q-2.2.4 0 0 was {[2 0[1]]}
|
||||||
|
|
||||||
|
#*************************************************************************
|
||||||
|
# Test results when everything is optimized via optimize().
|
||||||
|
db eval {
|
||||||
|
DROP TABLE IF EXISTS t1;
|
||||||
|
CREATE VIRTUAL TABLE t1 USING fts2(c);
|
||||||
|
INSERT INTO t1 (rowid, c) VALUES (1, 'This is a test');
|
||||||
|
INSERT INTO t1 (rowid, c) VALUES (2, 'That was a test');
|
||||||
|
INSERT INTO t1 (rowid, c) VALUES (3, 'This is a test');
|
||||||
|
DELETE FROM t1 WHERE rowid IN (1,3);
|
||||||
|
SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Should be a single optimal segment with the same logical results.
|
||||||
|
do_test fts2q-3.segments {
|
||||||
|
execsql {
|
||||||
|
SELECT level, idx FROM t1_segdir ORDER BY level, idx;
|
||||||
|
}
|
||||||
|
} {0 0}
|
||||||
|
do_test fts2q-3.matches {
|
||||||
|
execsql {
|
||||||
|
SELECT OFFSETS(t1) FROM t1
|
||||||
|
WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY rowid;
|
||||||
|
}
|
||||||
|
} {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}}
|
||||||
|
|
||||||
|
check_terms_all fts2q-3.1 {a test that was}
|
||||||
|
check_doclist_all fts2q-3.1.1 a {[2 0[2]]}
|
||||||
|
check_doclist_all fts2q-3.1.2 test {[2 0[3]]}
|
||||||
|
check_doclist_all fts2q-3.1.3 that {[2 0[0]]}
|
||||||
|
check_doclist_all fts2q-3.1.4 was {[2 0[1]]}
|
||||||
|
|
||||||
|
check_terms fts2q-3.2 0 0 {a test that was}
|
||||||
|
check_doclist fts2q-3.2.1 0 0 a {[2 0[2]]}
|
||||||
|
check_doclist fts2q-3.2.2 0 0 test {[2 0[3]]}
|
||||||
|
check_doclist fts2q-3.2.3 0 0 that {[2 0[0]]}
|
||||||
|
check_doclist fts2q-3.2.4 0 0 was {[2 0[1]]}
|
||||||
|
|
||||||
|
#*************************************************************************
|
||||||
|
# Test optimize() against a table involving segment merges.
|
||||||
|
# NOTE(shess): Since there's no transaction, each of the INSERT/UPDATE
|
||||||
|
# statements generates a segment.
|
||||||
|
db eval {
|
||||||
|
DROP TABLE IF EXISTS t1;
|
||||||
|
CREATE VIRTUAL TABLE t1 USING fts2(c);
|
||||||
|
|
||||||
|
INSERT INTO t1 (rowid, c) VALUES (1, 'This is a test');
|
||||||
|
INSERT INTO t1 (rowid, c) VALUES (2, 'That was a test');
|
||||||
|
INSERT INTO t1 (rowid, c) VALUES (3, 'This is a test');
|
||||||
|
|
||||||
|
UPDATE t1 SET c = 'This is a test one' WHERE rowid = 1;
|
||||||
|
UPDATE t1 SET c = 'That was a test one' WHERE rowid = 2;
|
||||||
|
UPDATE t1 SET c = 'This is a test one' WHERE rowid = 3;
|
||||||
|
|
||||||
|
UPDATE t1 SET c = 'This is a test two' WHERE rowid = 1;
|
||||||
|
UPDATE t1 SET c = 'That was a test two' WHERE rowid = 2;
|
||||||
|
UPDATE t1 SET c = 'This is a test two' WHERE rowid = 3;
|
||||||
|
|
||||||
|
UPDATE t1 SET c = 'This is a test three' WHERE rowid = 1;
|
||||||
|
UPDATE t1 SET c = 'That was a test three' WHERE rowid = 2;
|
||||||
|
UPDATE t1 SET c = 'This is a test three' WHERE rowid = 3;
|
||||||
|
|
||||||
|
UPDATE t1 SET c = 'This is a test four' WHERE rowid = 1;
|
||||||
|
UPDATE t1 SET c = 'That was a test four' WHERE rowid = 2;
|
||||||
|
UPDATE t1 SET c = 'This is a test four' WHERE rowid = 3;
|
||||||
|
|
||||||
|
UPDATE t1 SET c = 'This is a test' WHERE rowid = 1;
|
||||||
|
UPDATE t1 SET c = 'That was a test' WHERE rowid = 2;
|
||||||
|
UPDATE t1 SET c = 'This is a test' WHERE rowid = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
# 2 segments in level 0, 1 in level 1 (18 segments created, 16
|
||||||
|
# merged).
|
||||||
|
do_test fts2q-4.segments {
|
||||||
|
execsql {
|
||||||
|
SELECT level, idx FROM t1_segdir ORDER BY level, idx;
|
||||||
|
}
|
||||||
|
} {0 0 0 1 1 0}
|
||||||
|
|
||||||
|
do_test fts2q-4.matches {
|
||||||
|
execsql {
|
||||||
|
SELECT OFFSETS(t1) FROM t1
|
||||||
|
WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY rowid;
|
||||||
|
}
|
||||||
|
} [list {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4} \
|
||||||
|
{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \
|
||||||
|
{0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}]
|
||||||
|
|
||||||
|
check_terms_all fts2q-4.1 {a four is one test that this three two was}
|
||||||
|
check_doclist_all fts2q-4.1.1 a {[1 0[2]] [2 0[2]] [3 0[2]]}
|
||||||
|
check_doclist_all fts2q-4.1.2 four {}
|
||||||
|
check_doclist_all fts2q-4.1.3 is {[1 0[1]] [3 0[1]]}
|
||||||
|
check_doclist_all fts2q-4.1.4 one {}
|
||||||
|
check_doclist_all fts2q-4.1.5 test {[1 0[3]] [2 0[3]] [3 0[3]]}
|
||||||
|
check_doclist_all fts2q-4.1.6 that {[2 0[0]]}
|
||||||
|
check_doclist_all fts2q-4.1.7 this {[1 0[0]] [3 0[0]]}
|
||||||
|
check_doclist_all fts2q-4.1.8 three {}
|
||||||
|
check_doclist_all fts2q-4.1.9 two {}
|
||||||
|
check_doclist_all fts2q-4.1.10 was {[2 0[1]]}
|
||||||
|
|
||||||
|
check_terms fts2q-4.2 0 0 {a four test that was}
|
||||||
|
check_doclist fts2q-4.2.1 0 0 a {[2 0[2]]}
|
||||||
|
check_doclist fts2q-4.2.2 0 0 four {[2]}
|
||||||
|
check_doclist fts2q-4.2.3 0 0 test {[2 0[3]]}
|
||||||
|
check_doclist fts2q-4.2.4 0 0 that {[2 0[0]]}
|
||||||
|
check_doclist fts2q-4.2.5 0 0 was {[2 0[1]]}
|
||||||
|
|
||||||
|
check_terms fts2q-4.3 0 1 {a four is test this}
|
||||||
|
check_doclist fts2q-4.3.1 0 1 a {[3 0[2]]}
|
||||||
|
check_doclist fts2q-4.3.2 0 1 four {[3]}
|
||||||
|
check_doclist fts2q-4.3.3 0 1 is {[3 0[1]]}
|
||||||
|
check_doclist fts2q-4.3.4 0 1 test {[3 0[3]]}
|
||||||
|
check_doclist fts2q-4.3.5 0 1 this {[3 0[0]]}
|
||||||
|
|
||||||
|
check_terms fts2q-4.4 1 0 {a four is one test that this three two was}
|
||||||
|
check_doclist fts2q-4.4.1 1 0 a {[1 0[2]] [2 0[2]] [3 0[2]]}
|
||||||
|
check_doclist fts2q-4.4.2 1 0 four {[1] [2 0[4]] [3 0[4]]}
|
||||||
|
check_doclist fts2q-4.4.3 1 0 is {[1 0[1]] [3 0[1]]}
|
||||||
|
check_doclist fts2q-4.4.4 1 0 one {[1] [2] [3]}
|
||||||
|
check_doclist fts2q-4.4.5 1 0 test {[1 0[3]] [2 0[3]] [3 0[3]]}
|
||||||
|
check_doclist fts2q-4.4.6 1 0 that {[2 0[0]]}
|
||||||
|
check_doclist fts2q-4.4.7 1 0 this {[1 0[0]] [3 0[0]]}
|
||||||
|
check_doclist fts2q-4.4.8 1 0 three {[1] [2] [3]}
|
||||||
|
check_doclist fts2q-4.4.9 1 0 two {[1] [2] [3]}
|
||||||
|
check_doclist fts2q-4.4.10 1 0 was {[2 0[1]]}
|
||||||
|
|
||||||
|
# Optimize should leave the result in the level of the highest-level
|
||||||
|
# prior segment.
|
||||||
|
do_test fts2q-4.5 {
|
||||||
|
execsql {
|
||||||
|
SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
|
||||||
|
SELECT level, idx FROM t1_segdir ORDER BY level, idx;
|
||||||
|
}
|
||||||
|
} {{Index optimized} 1 0}
|
||||||
|
|
||||||
|
# Identical to fts2q-4.matches.
|
||||||
|
do_test fts2q-4.5.matches {
|
||||||
|
execsql {
|
||||||
|
SELECT OFFSETS(t1) FROM t1
|
||||||
|
WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY rowid;
|
||||||
|
}
|
||||||
|
} [list {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4} \
|
||||||
|
{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \
|
||||||
|
{0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}]
|
||||||
|
|
||||||
|
check_terms_all fts2q-4.5.1 {a is test that this was}
|
||||||
|
check_doclist_all fts2q-4.5.1.1 a {[1 0[2]] [2 0[2]] [3 0[2]]}
|
||||||
|
check_doclist_all fts2q-4.5.1.2 is {[1 0[1]] [3 0[1]]}
|
||||||
|
check_doclist_all fts2q-4.5.1.3 test {[1 0[3]] [2 0[3]] [3 0[3]]}
|
||||||
|
check_doclist_all fts2q-4.5.1.4 that {[2 0[0]]}
|
||||||
|
check_doclist_all fts2q-4.5.1.5 this {[1 0[0]] [3 0[0]]}
|
||||||
|
check_doclist_all fts2q-4.5.1.6 was {[2 0[1]]}
|
||||||
|
|
||||||
|
check_terms fts2q-4.5.2 1 0 {a is test that this was}
|
||||||
|
check_doclist fts2q-4.5.2.1 1 0 a {[1 0[2]] [2 0[2]] [3 0[2]]}
|
||||||
|
check_doclist fts2q-4.5.2.2 1 0 is {[1 0[1]] [3 0[1]]}
|
||||||
|
check_doclist fts2q-4.5.2.3 1 0 test {[1 0[3]] [2 0[3]] [3 0[3]]}
|
||||||
|
check_doclist fts2q-4.5.2.4 1 0 that {[2 0[0]]}
|
||||||
|
check_doclist fts2q-4.5.2.5 1 0 this {[1 0[0]] [3 0[0]]}
|
||||||
|
check_doclist fts2q-4.5.2.6 1 0 was {[2 0[1]]}
|
||||||
|
|
||||||
|
# Re-optimizing does nothing.
|
||||||
|
do_test fts2q-5.0 {
|
||||||
|
execsql {
|
||||||
|
SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
|
||||||
|
SELECT level, idx FROM t1_segdir ORDER BY level, idx;
|
||||||
|
}
|
||||||
|
} {{Index already optimal} 1 0}
|
||||||
|
|
||||||
|
# Even if we move things around, still does nothing.
|
||||||
|
do_test fts2q-5.1 {
|
||||||
|
execsql {
|
||||||
|
UPDATE t1_segdir SET level = 2 WHERE level = 1 AND idx = 0;
|
||||||
|
SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
|
||||||
|
SELECT level, idx FROM t1_segdir ORDER BY level, idx;
|
||||||
|
}
|
||||||
|
} {{Index already optimal} 2 0}
|
||||||
|
|
||||||
finish_test
|
finish_test
|
||||||
|
Loading…
x
Reference in New Issue
Block a user