Propagate prefix flag through implementation of doclist query code.

Also implement correct prefix-handling for traversal of interior nodes
of segment tree.  A given prefix can span multiple children of an
interior node, and from there the branches need to be followed in
parallel. (CVS 3889)

FossilOrigin-Name: cae844a01a1d87ffb00bba8b4e7b62a92e633aa9
This commit is contained in:
shess 2007-04-30 22:09:36 +00:00
parent 276fdbfd61
commit 0b6212090f
3 changed files with 93 additions and 45 deletions

View File

@ -1520,6 +1520,7 @@ typedef struct QueryTerm {
short int iColumn; /* Column of the index that must match this term */
signed char isOr; /* this term is preceded by "OR" */
signed char isNot; /* this term is preceded by "-" */
signed char isPrefix; /* this term is followed by "*" */
char *pTerm; /* text of the term. '\000' terminated. malloced */
int nTerm; /* Number of bytes in pTerm[] */
} QueryTerm;
@ -3232,7 +3233,7 @@ static int fulltextNext(sqlite3_vtab_cursor *pCursor){
** docListOfTerm().
*/
static int termSelect(fulltext_vtab *v, int iColumn,
const char *pTerm, int nTerm,
const char *pTerm, int nTerm, int isPrefix,
DocListType iType, DataBuffer *out);
/* Return a DocList corresponding to the query term *pTerm. If *pTerm
@ -3258,13 +3259,13 @@ static int docListOfTerm(
assert( v->nPendingData<0 );
dataBufferInit(&left, 0);
rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm,
rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pQTerm->isPrefix,
0<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &left);
if( rc ) return rc;
for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){
dataBufferInit(&right, 0);
rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm,
DL_POSITIONS, &right);
pQTerm[i].isPrefix, DL_POSITIONS, &right);
if( rc ){
dataBufferDestroy(&left);
return rc;
@ -3297,6 +3298,7 @@ static void queryAdd(Query *q, const char *pTerm, int nTerm){
t->pTerm[nTerm] = 0;
t->nTerm = nTerm;
t->isOr = q->nextIsOr;
t->isPrefix = 0;
q->nextIsOr = 0;
t->iColumn = q->nextColumn;
q->nextColumn = q->dfltColumn;
@ -4182,10 +4184,10 @@ static void interiorReaderStep(InteriorReader *pReader){
}
/* Compare the current term to pTerm[nTerm], returning strcmp-style
** results.
** results. If isPrefix, equality means equal through nTerm bytes.
*/
static int interiorReaderTermCmp(InteriorReader *pReader,
const char *pTerm, int nTerm){
const char *pTerm, int nTerm, int isPrefix){
const char *pReaderTerm = interiorReaderTerm(pReader);
int nReaderTerm = interiorReaderTermBytes(pReader);
int c, n = nReaderTerm<nTerm ? nReaderTerm : nTerm;
@ -4198,6 +4200,7 @@ static int interiorReaderTermCmp(InteriorReader *pReader,
c = memcmp(pReaderTerm, pTerm, n);
if( c!=0 ) return c;
if( isPrefix && n==nTerm ) return 0;
return nReaderTerm - nTerm;
}
@ -5150,10 +5153,12 @@ static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData,
*/
static int loadSegmentLeaves(fulltext_vtab *v,
sqlite_int64 iStartLeaf, sqlite_int64 iEndLeaf,
const char *pTerm, int nTerm,
DataBuffer *out){
const char *pTerm, int nTerm, DataBuffer *out){
int rc;
LeavesReader reader;
int rc = leavesReaderInit(v, 0, iStartLeaf, iEndLeaf, NULL, 0, &reader);
assert( iStartLeaf<=iEndLeaf );
rc = leavesReaderInit(v, 0, iStartLeaf, iEndLeaf, NULL, 0, &reader);
if( rc!=SQLITE_OK ) return rc;
rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, out);
@ -5162,43 +5167,63 @@ static int loadSegmentLeaves(fulltext_vtab *v,
return rc;
}
/* Taking pData/nData as an interior node, find the child node which
** could include pTerm/nTerm. Note that the interior node terms
** logically come between the blocks, so there is one more blockid
** than there are terms (that block contains terms >= the last
** interior-node term).
/* Taking pData/nData as an interior node, find the sequence of child
** nodes which could include pTerm/nTerm/isPrefix. Note that the
** interior node terms logically come between the blocks, so there is
** one more blockid than there are terms (that block contains terms >=
** the last interior-node term).
*/
static void getChildContaining(const char *pData, int nData,
const char *pTerm, int nTerm,
sqlite_int64 *piBlockid){
static void getChildrenContaining(const char *pData, int nData,
const char *pTerm, int nTerm, int isPrefix,
sqlite_int64 *piStartChild,
sqlite_int64 *piEndChild){
InteriorReader reader;
assert( nData>1 );
assert( *pData!='\0' );
interiorReaderInit(pData, nData, &reader);
/* Scan for the first child which could contain pTerm/nTerm. */
while( !interiorReaderAtEnd(&reader) ){
if( interiorReaderTermCmp(&reader, pTerm, nTerm)>0 ) break;
if( interiorReaderTermCmp(&reader, pTerm, nTerm, 0)>0 ) break;
interiorReaderStep(&reader);
}
*piBlockid = interiorReaderCurrentBlockid(&reader);
*piStartChild = interiorReaderCurrentBlockid(&reader);
/* Keep scanning to find a term greater than our term, using prefix
** comparison if indicated. If isPrefix is false, this will be the
** same blockid as the starting block.
*/
while( !interiorReaderAtEnd(&reader) ){
if( interiorReaderTermCmp(&reader, pTerm, nTerm, isPrefix)>0 ) break;
interiorReaderStep(&reader);
}
*piEndChild = interiorReaderCurrentBlockid(&reader);
interiorReaderDestroy(&reader);
/* Children must ascend, and if !prefix, both must be the same. */
assert( *piEndChild>=*piStartChild );
assert( isPrefix || *piStartChild==*piEndChild );
}
/* Read block at iBlockid and pass it with other params to
** getChildContaining().
** getChildrenContaining().
*/
static int loadAndGetChildContaining(fulltext_vtab *v, sqlite_int64 iBlockid,
const char *pTerm, int nTerm,
sqlite_int64 *piBlockid){
static int loadAndGetChildrenContaining(
fulltext_vtab *v,
sqlite_int64 iBlockid,
const char *pTerm, int nTerm, int isPrefix,
sqlite_int64 *piStartChild, sqlite_int64 *piEndChild
){
sqlite3_stmt *s = NULL;
int rc;
assert( iBlockid!=0 );
assert( pTerm!=NULL );
assert( nTerm!=0 ); /* TODO(shess) Why not allow this? */
assert( piBlockid!=NULL );
assert( piStartChild!=NULL );
assert( piEndChild!=NULL );
rc = sql_get_statement(v, BLOCK_SELECT_STMT, &s);
if( rc!=SQLITE_OK ) return rc;
@ -5210,8 +5235,8 @@ static int loadAndGetChildContaining(fulltext_vtab *v, sqlite_int64 iBlockid,
if( rc==SQLITE_DONE ) return SQLITE_ERROR;
if( rc!=SQLITE_ROW ) return rc;
getChildContaining(sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0),
pTerm, nTerm, piBlockid);
getChildrenContaining(sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0),
pTerm, nTerm, isPrefix, piStartChild, piEndChild);
/* We expect only one row. We must execute another sqlite3_step()
* to complete the iteration; otherwise the table will remain
@ -5229,24 +5254,44 @@ static int loadAndGetChildContaining(fulltext_vtab *v, sqlite_int64 iBlockid,
*/
static int loadSegmentInt(fulltext_vtab *v, const char *pData, int nData,
sqlite_int64 iLeavesEnd,
const char *pTerm, int nTerm, DataBuffer *out){
const char *pTerm, int nTerm, int isPrefix,
DataBuffer *out){
/* Special case where root is a leaf. */
if( *pData=='\0' ){
assert( !isPrefix ); /* TODO(shess) Add prefix support. */
return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, out);
}else{
int rc;
sqlite_int64 iBlockid;
sqlite_int64 iStartChild, iEndChild;
/* Process pData as an interior node, then loop down the tree
** until we find a leaf node to scan for the term.
** until we find the set of leaf nodes to scan for the term.
*/
getChildContaining(pData, nData, pTerm, nTerm, &iBlockid);
while( iBlockid>iLeavesEnd ){
rc = loadAndGetChildContaining(v, iBlockid, pTerm, nTerm, &iBlockid);
getChildrenContaining(pData, nData, pTerm, nTerm, isPrefix,
&iStartChild, &iEndChild);
while( iStartChild>iLeavesEnd ){
sqlite_int64 iNextStart, iNextEnd;
rc = loadAndGetChildrenContaining(v, iStartChild, pTerm, nTerm, isPrefix,
&iNextStart, &iNextEnd);
if( rc!=SQLITE_OK ) return rc;
}
return loadSegmentLeaves(v, iBlockid, iBlockid, pTerm, nTerm, out);
/* If we've branched, follow the end branch, too. */
if( iStartChild!=iEndChild ){
sqlite_int64 iDummy;
rc = loadAndGetChildrenContaining(v, iEndChild, pTerm, nTerm, isPrefix,
&iDummy, &iNextEnd);
if( rc!=SQLITE_OK ) return rc;
}
assert( iNextStart<=iNextEnd );
iStartChild = iNextStart;
iEndChild = iNextEnd;
}
assert( iStartChild<=iLeavesEnd );
assert( iEndChild<=iLeavesEnd );
assert( !isPrefix ); /* TODO(shess) Add prefix support. */
return loadSegmentLeaves(v, iStartChild, iEndChild, pTerm, nTerm, out);
}
}
@ -5260,7 +5305,8 @@ static int loadSegmentInt(fulltext_vtab *v, const char *pData, int nData,
*/
static int loadSegment(fulltext_vtab *v, const char *pData, int nData,
sqlite_int64 iLeavesEnd,
const char *pTerm, int nTerm, DataBuffer *out){
const char *pTerm, int nTerm, int isPrefix,
DataBuffer *out){
DataBuffer result;
int rc;
@ -5270,7 +5316,8 @@ static int loadSegment(fulltext_vtab *v, const char *pData, int nData,
assert( v->nPendingData<0 );
dataBufferInit(&result, 0);
rc = loadSegmentInt(v, pData, nData, iLeavesEnd, pTerm, nTerm, &result);
rc = loadSegmentInt(v, pData, nData, iLeavesEnd,
pTerm, nTerm, isPrefix, &result);
if( rc==SQLITE_OK && result.nData>0 ){
if( out->nData==0 ){
DataBuffer tmp = *out;
@ -5298,7 +5345,7 @@ static int loadSegment(fulltext_vtab *v, const char *pData, int nData,
** into *out.
*/
static int termSelect(fulltext_vtab *v, int iColumn,
const char *pTerm, int nTerm,
const char *pTerm, int nTerm, int isPrefix,
DocListType iType, DataBuffer *out){
DataBuffer doclist;
sqlite3_stmt *s;
@ -5317,7 +5364,8 @@ static int termSelect(fulltext_vtab *v, int iColumn,
const char *pData = sqlite3_column_blob(s, 0);
const int nData = sqlite3_column_bytes(s, 0);
const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1);
rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, &doclist);
rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix,
&doclist);
if( rc!=SQLITE_OK ) goto err;
}
if( rc==SQLITE_DONE ){

View File

@ -1,5 +1,5 @@
C Fix\sa\spotential\ssegfault\sfollowing\sa\smalloc()\sfailure\sduring\sa\scall\nto\ssqlite3_prepare()\swhere\sthe\snBytes\sparameter\sis\spositive\sbut\sless\sthan\nthe\slength\sof\sthe\sinput\sSQL\sstring.\s(CVS\s3888)
D 2007-04-30T21:39:16
C Propagate\sprefix\sflag\sthrough\simplementation\sof\sdoclist\squery\scode.\nAlso\simplement\scorrect\sprefix-handling\sfor\straversal\sof\sinterior\snodes\nof\ssegment\stree.\s\sA\sgiven\sprefix\scan\sspan\smultiple\schildren\sof\san\ninterior\snode,\sand\sfrom\sthere\sthe\sbranches\sneed\sto\sbe\sfollowed\sin\nparallel.\s(CVS\s3889)
D 2007-04-30T22:09:36
F Makefile.in 8cab54f7c9f5af8f22fd97ddf1ecfd1e1860de62
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
@ -34,7 +34,7 @@ F ext/fts1/fulltext.h 08525a47852d1d62a0be81d3fc3fe2d23b094efd
F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d
F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9
F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
F ext/fts2/fts2.c cb7ca4e320f9aa99dad1d160288944c13778a678
F ext/fts2/fts2.c c750b2db623587021a402631a7aa582d81852c44
F ext/fts2/fts2.h 591916a822cfb6426518fdbf6069359119bc46eb
F ext/fts2/fts2_hash.c b3f22116d4ef0bc8f2da6e3fdc435c86d0951a9b
F ext/fts2/fts2_hash.h e283308156018329f042816eb09334df714e105e
@ -466,7 +466,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5
P 7ddb82668906e33e2d6a796f2da1795032e036d5
R 6c84bdbf40bcc10c544725efed0e51c5
U drh
Z dbcc6f5007a9724aa47f910a6b885e46
P 27bf3fc3cf3c9c7acdbf9281a4669c9f642b0097
R 63d81c2be8a311270ed3c4c7a986450a
U shess
Z 814272b1a687282acf6dc59544d4e360

View File

@ -1 +1 @@
27bf3fc3cf3c9c7acdbf9281a4669c9f642b0097
cae844a01a1d87ffb00bba8b4e7b62a92e633aa9