Modify loadSegmentLeavesInt() to correctly handle prefix searching.
The new function docListUnion() is used to accumulate a union of the hits for the matching terms, which will be merged across segments using docListMerge(). (CVS 3891) FossilOrigin-Name: 72c796307338c2751a91c30f6fb16989afbf3816
This commit is contained in:
parent
c80f058db3
commit
cc3e986643
184
ext/fts2/fts2.c
184
ext/fts2/fts2.c
@ -708,6 +708,7 @@ static void docListValidate(DocListType iType, const char *pData, int nData,
|
||||
** dlwInit - initialize to write a given type doclistto a buffer.
|
||||
** dlwDestroy - clear the writer's memory. Does not free buffer.
|
||||
** dlwAppend - append raw doclist data to buffer.
|
||||
** dlwCopy - copy next doclist from reader to writer.
|
||||
** dlwAdd - construct doclist element and append to buffer.
|
||||
** Only apply dlwAdd() to DL_DOCIDS doclists (else use PLWriter).
|
||||
*/
|
||||
@ -771,6 +772,10 @@ static void dlwAppend(DLWriter *pWriter,
|
||||
}
|
||||
pWriter->iPrevDocid = iLastDocid;
|
||||
}
|
||||
static void dlwCopy(DLWriter *pWriter, DLReader *pReader){
|
||||
dlwAppend(pWriter, dlrDocData(pReader), dlrDocDataBytes(pReader),
|
||||
dlrDocid(pReader), dlrDocid(pReader));
|
||||
}
|
||||
static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid){
|
||||
char c[VARINT_MAX];
|
||||
int n = putVarint(c, iDocid-pWriter->iPrevDocid);
|
||||
@ -886,6 +891,7 @@ static void plrDestroy(PLReader *pReader){
|
||||
** plwInit - init for writing a document's poslist.
|
||||
** plwDestroy - clear a writer.
|
||||
** plwAdd - append position and offset information.
|
||||
** plwCopy - copy next position's data from reader to writer.
|
||||
** plwTerminate - add any necessary doclist terminator.
|
||||
**
|
||||
** Calling plwAdd() after plwTerminate() may result in a corrupt
|
||||
@ -945,6 +951,10 @@ static void plwAdd(PLWriter *pWriter, int iColumn, int iPos,
|
||||
}
|
||||
dataBufferAppend(pWriter->dlw->b, c, n);
|
||||
}
|
||||
static void plwCopy(PLWriter *pWriter, PLReader *pReader){
|
||||
plwAdd(pWriter, plrColumn(pReader), plrPosition(pReader),
|
||||
plrStartOffset(pReader), plrEndOffset(pReader));
|
||||
}
|
||||
static void plwInit(PLWriter *pWriter, DLWriter *dlw, sqlite_int64 iDocid){
|
||||
char c[VARINT_MAX];
|
||||
int n;
|
||||
@ -1218,6 +1228,122 @@ static void docListMerge(DataBuffer *out,
|
||||
dlwDestroy(&writer);
|
||||
}
|
||||
|
||||
/* Helper function for posListUnion(). Compares the current position
|
||||
** between left and right, returning as standard C idiom of <0 if
|
||||
** left<right, >0 if left>right, and 0 if left==right. "End" always
|
||||
** compares greater.
|
||||
*/
|
||||
static int posListCmp(PLReader *pLeft, PLReader *pRight){
|
||||
assert( pLeft->iType==pRight->iType );
|
||||
if( pLeft->iType==DL_DOCIDS ) return 0;
|
||||
|
||||
if( plrAtEnd(pLeft) ) return plrAtEnd(pRight) ? 0 : 1;
|
||||
if( plrAtEnd(pRight) ) return -1;
|
||||
|
||||
if( plrColumn(pLeft)<plrColumn(pRight) ) return -1;
|
||||
if( plrColumn(pLeft)>plrColumn(pRight) ) return 1;
|
||||
|
||||
if( plrPosition(pLeft)<plrPosition(pRight) ) return -1;
|
||||
if( plrPosition(pLeft)>plrPosition(pRight) ) return 1;
|
||||
if( pLeft->iType==DL_POSITIONS ) return 0;
|
||||
|
||||
if( plrStartOffset(pLeft)<plrStartOffset(pRight) ) return -1;
|
||||
if( plrStartOffset(pLeft)>plrStartOffset(pRight) ) return 1;
|
||||
|
||||
if( plrEndOffset(pLeft)<plrEndOffset(pRight) ) return -1;
|
||||
if( plrEndOffset(pLeft)>plrEndOffset(pRight) ) return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Write the union of position lists in pLeft and pRight to pOut.
|
||||
** "Union" in this case meaning "All unique position tuples". Should
|
||||
** work with any doclist type, though both inputs and the output
|
||||
** should be the same type.
|
||||
*/
|
||||
static void posListUnion(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){
|
||||
PLReader left, right;
|
||||
PLWriter writer;
|
||||
|
||||
assert( dlrDocid(pLeft)==dlrDocid(pRight) );
|
||||
assert( pLeft->iType==pRight->iType );
|
||||
assert( pLeft->iType==pOut->iType );
|
||||
|
||||
plrInit(&left, pLeft);
|
||||
plrInit(&right, pRight);
|
||||
plwInit(&writer, pOut, dlrDocid(pLeft));
|
||||
|
||||
while( !plrAtEnd(&left) || !plrAtEnd(&right) ){
|
||||
int c = posListCmp(&left, &right);
|
||||
if( c<0 ){
|
||||
plwCopy(&writer, &left);
|
||||
plrStep(&left);
|
||||
}else if( c>0 ){
|
||||
plwCopy(&writer, &right);
|
||||
plrStep(&right);
|
||||
}else{
|
||||
plwCopy(&writer, &left);
|
||||
plrStep(&left);
|
||||
plrStep(&right);
|
||||
}
|
||||
}
|
||||
|
||||
plwTerminate(&writer);
|
||||
plwDestroy(&writer);
|
||||
plrDestroy(&left);
|
||||
plrDestroy(&right);
|
||||
}
|
||||
|
||||
/* Write the union of doclists in pLeft and pRight to pOut. For
|
||||
** docids in common between the inputs, the union of the position
|
||||
** lists is written. Inputs and outputs are always type DL_DEFAULT.
|
||||
*/
|
||||
static void docListUnion(
|
||||
const char *pLeft, int nLeft,
|
||||
const char *pRight, int nRight,
|
||||
DataBuffer *pOut /* Write the combined doclist here */
|
||||
){
|
||||
DLReader left, right;
|
||||
DLWriter writer;
|
||||
|
||||
if( nLeft==0 ){
|
||||
dataBufferAppend(pOut, pRight, nRight);
|
||||
return;
|
||||
}
|
||||
if( nRight==0 ){
|
||||
dataBufferAppend(pOut, pLeft, nLeft);
|
||||
return;
|
||||
}
|
||||
|
||||
dlrInit(&left, DL_DEFAULT, pLeft, nLeft);
|
||||
dlrInit(&right, DL_DEFAULT, pRight, nRight);
|
||||
dlwInit(&writer, DL_DEFAULT, pOut);
|
||||
|
||||
while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){
|
||||
if( dlrAtEnd(&right) ){
|
||||
dlwCopy(&writer, &left);
|
||||
dlrStep(&left);
|
||||
}else if( dlrAtEnd(&left) ){
|
||||
dlwCopy(&writer, &right);
|
||||
dlrStep(&right);
|
||||
}else if( dlrDocid(&left)<dlrDocid(&right) ){
|
||||
dlwCopy(&writer, &left);
|
||||
dlrStep(&left);
|
||||
}else if( dlrDocid(&left)>dlrDocid(&right) ){
|
||||
dlwCopy(&writer, &right);
|
||||
dlrStep(&right);
|
||||
}else{
|
||||
posListUnion(&left, &right, &writer);
|
||||
dlrStep(&left);
|
||||
dlrStep(&right);
|
||||
}
|
||||
}
|
||||
|
||||
dlrDestroy(&left);
|
||||
dlrDestroy(&right);
|
||||
dlwDestroy(&writer);
|
||||
}
|
||||
|
||||
/* pLeft and pRight are DLReaders positioned to the same docid.
|
||||
**
|
||||
** If there are no instances in pLeft or pRight where the position
|
||||
@ -1230,7 +1356,8 @@ static void docListMerge(DataBuffer *out,
|
||||
** include the positions from pRight that are one more than a
|
||||
** position in pLeft. In other words: pRight.iPos==pLeft.iPos+1.
|
||||
*/
|
||||
static void mergePosList(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){
|
||||
static void posListPhraseMerge(DLReader *pLeft, DLReader *pRight,
|
||||
DLWriter *pOut){
|
||||
PLReader left, right;
|
||||
PLWriter writer;
|
||||
int match = 0;
|
||||
@ -1302,7 +1429,7 @@ static void docListPhraseMerge(
|
||||
}else if( dlrDocid(&right)<dlrDocid(&left) ){
|
||||
dlrStep(&right);
|
||||
}else{
|
||||
mergePosList(&left, &right, &writer);
|
||||
posListPhraseMerge(&left, &right, &writer);
|
||||
dlrStep(&left);
|
||||
dlrStep(&right);
|
||||
}
|
||||
@ -4757,9 +4884,11 @@ static void leafReaderStep(LeafReader *pReader){
|
||||
}
|
||||
}
|
||||
|
||||
/* strcmp-style comparison of pReader's current term against pTerm. */
|
||||
/* strcmp-style comparison of pReader's current term against pTerm.
|
||||
** If isPrefix, equality means equal through nTerm bytes.
|
||||
*/
|
||||
static int leafReaderTermCmp(LeafReader *pReader,
|
||||
const char *pTerm, int nTerm){
|
||||
const char *pTerm, int nTerm, int isPrefix){
|
||||
int c, n = pReader->term.nData<nTerm ? pReader->term.nData : nTerm;
|
||||
if( n==0 ){
|
||||
if( pReader->term.nData>0 ) return -1;
|
||||
@ -4769,6 +4898,7 @@ static int leafReaderTermCmp(LeafReader *pReader,
|
||||
|
||||
c = memcmp(pReader->term.pData, pTerm, n);
|
||||
if( c!=0 ) return c;
|
||||
if( isPrefix && n==nTerm ) return 0;
|
||||
return pReader->term.nData - nTerm;
|
||||
}
|
||||
|
||||
@ -4916,7 +5046,8 @@ static int leavesReaderTermCmp(LeavesReader *lr1, LeavesReader *lr2){
|
||||
if( leavesReaderAtEnd(lr2) ) return -1;
|
||||
|
||||
return leafReaderTermCmp(&lr1->leafReader,
|
||||
leavesReaderTerm(lr2), leavesReaderTermBytes(lr2));
|
||||
leavesReaderTerm(lr2), leavesReaderTermBytes(lr2),
|
||||
0);
|
||||
}
|
||||
|
||||
/* Similar to leavesReaderTermCmp(), with additional ordering by idx
|
||||
@ -5105,7 +5236,8 @@ static int segmentMerge(fulltext_vtab *v, int iLevel){
|
||||
** Internal function for loadSegmentLeaf().
|
||||
*/
|
||||
static int loadSegmentLeavesInt(fulltext_vtab *v, LeavesReader *pReader,
|
||||
const char *pTerm, int nTerm, DataBuffer *out){
|
||||
const char *pTerm, int nTerm, int isPrefix,
|
||||
DataBuffer *out){
|
||||
assert( nTerm>0 );
|
||||
|
||||
/* Process while the prefix matches. */
|
||||
@ -5115,14 +5247,25 @@ static int loadSegmentLeavesInt(fulltext_vtab *v, LeavesReader *pReader,
|
||||
** on a better name. [Meanwhile, break encapsulation rather than
|
||||
** use a confusing name.]
|
||||
*/
|
||||
int rc, c = leafReaderTermCmp(&pReader->leafReader, pTerm, nTerm);
|
||||
int rc;
|
||||
int c = leafReaderTermCmp(&pReader->leafReader, pTerm, nTerm, isPrefix);
|
||||
if( c==0 ){
|
||||
const char *pData = leavesReaderData(pReader);
|
||||
int nData = leavesReaderDataBytes(pReader);
|
||||
assert( out->nData==0 );
|
||||
dataBufferReplace(out, pData, nData);
|
||||
if( out->nData==0 ){
|
||||
dataBufferReplace(out, pData, nData);
|
||||
}else{
|
||||
DataBuffer result;
|
||||
dataBufferInit(&result, out->nData+nData);
|
||||
docListUnion(out->pData, out->nData, pData, nData, &result);
|
||||
dataBufferDestroy(out);
|
||||
*out = result;
|
||||
/* TODO(shess) Rather than destroy out, we could retain it for
|
||||
** later reuse.
|
||||
*/
|
||||
}
|
||||
}
|
||||
if( c>=0 ) break; /* Past any possible matches. */
|
||||
if( c>0 ) break; /* Past any possible matches. */
|
||||
|
||||
rc = leavesReaderStep(v, pReader);
|
||||
if( rc!=SQLITE_OK ) return rc;
|
||||
@ -5132,7 +5275,8 @@ static int loadSegmentLeavesInt(fulltext_vtab *v, LeavesReader *pReader,
|
||||
|
||||
/* Call loadSegmentLeavesInt() with pData/nData as input. */
|
||||
static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData,
|
||||
const char *pTerm, int nTerm, DataBuffer *out){
|
||||
const char *pTerm, int nTerm, int isPrefix,
|
||||
DataBuffer *out){
|
||||
LeavesReader reader;
|
||||
int rc;
|
||||
|
||||
@ -5141,7 +5285,7 @@ static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData,
|
||||
rc = leavesReaderInit(v, 0, 0, 0, pData, nData, &reader);
|
||||
if( rc!=SQLITE_OK ) return rc;
|
||||
|
||||
rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, out);
|
||||
rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, isPrefix, out);
|
||||
leavesReaderReset(&reader);
|
||||
leavesReaderDestroy(&reader);
|
||||
return rc;
|
||||
@ -5153,7 +5297,8 @@ static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData,
|
||||
*/
|
||||
static int loadSegmentLeaves(fulltext_vtab *v,
|
||||
sqlite_int64 iStartLeaf, sqlite_int64 iEndLeaf,
|
||||
const char *pTerm, int nTerm, DataBuffer *out){
|
||||
const char *pTerm, int nTerm, int isPrefix,
|
||||
DataBuffer *out){
|
||||
int rc;
|
||||
LeavesReader reader;
|
||||
|
||||
@ -5161,7 +5306,7 @@ static int loadSegmentLeaves(fulltext_vtab *v,
|
||||
rc = leavesReaderInit(v, 0, iStartLeaf, iEndLeaf, NULL, 0, &reader);
|
||||
if( rc!=SQLITE_OK ) return rc;
|
||||
|
||||
rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, out);
|
||||
rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, isPrefix, out);
|
||||
leavesReaderReset(&reader);
|
||||
leavesReaderDestroy(&reader);
|
||||
return rc;
|
||||
@ -5258,8 +5403,7 @@ static int loadSegmentInt(fulltext_vtab *v, const char *pData, int nData,
|
||||
DataBuffer *out){
|
||||
/* Special case where root is a leaf. */
|
||||
if( *pData=='\0' ){
|
||||
assert( !isPrefix ); /* TODO(shess) Add prefix support. */
|
||||
return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, out);
|
||||
return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, isPrefix, out);
|
||||
}else{
|
||||
int rc;
|
||||
sqlite_int64 iStartChild, iEndChild;
|
||||
@ -5290,8 +5434,8 @@ static int loadSegmentInt(fulltext_vtab *v, const char *pData, int nData,
|
||||
assert( iStartChild<=iLeavesEnd );
|
||||
assert( iEndChild<=iLeavesEnd );
|
||||
|
||||
assert( !isPrefix ); /* TODO(shess) Add prefix support. */
|
||||
return loadSegmentLeaves(v, iStartChild, iEndChild, pTerm, nTerm, out);
|
||||
return loadSegmentLeaves(v, iStartChild, iEndChild,
|
||||
pTerm, nTerm, isPrefix, out);
|
||||
}
|
||||
}
|
||||
|
||||
@ -5299,10 +5443,6 @@ static int loadSegmentInt(fulltext_vtab *v, const char *pData, int nData,
|
||||
** merge its doclist over *out (any duplicate doclists read from the
|
||||
** segment rooted at pData will overwrite those in *out).
|
||||
*/
|
||||
/* NOTE(shess) Previous code passed out down to sub-routines for use
|
||||
** in docListMerge(). This version deoptimizes things slightly, but
|
||||
** prefix searches require a different merge function entirely.
|
||||
*/
|
||||
static int loadSegment(fulltext_vtab *v, const char *pData, int nData,
|
||||
sqlite_int64 iLeavesEnd,
|
||||
const char *pTerm, int nTerm, int isPrefix,
|
||||
|
14
manifest
14
manifest
@ -1,5 +1,5 @@
|
||||
C The\spager\stakes\sthe\ssector\ssize\sto\sbe\sthe\slarger\sof\sthe\ssector\ssize\nreported\sby\ssqlite3OsSectorSize()\sand\sthe\spage\ssize.\s(CVS\s3890)
|
||||
D 2007-05-01T16:59:49
|
||||
C Modify\sloadSegmentLeavesInt()\sto\scorrectly\shandle\sprefix\ssearching.\nThe\snew\sfunction\sdocListUnion()\sis\sused\sto\saccumulate\sa\sunion\sof\sthe\nhits\sfor\sthe\smatching\sterms,\swhich\swill\sbe\smerged\sacross\ssegments\nusing\sdocListMerge().\s(CVS\s3891)
|
||||
D 2007-05-01T17:14:59
|
||||
F Makefile.in 8cab54f7c9f5af8f22fd97ddf1ecfd1e1860de62
|
||||
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
|
||||
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
|
||||
@ -34,7 +34,7 @@ F ext/fts1/fulltext.h 08525a47852d1d62a0be81d3fc3fe2d23b094efd
|
||||
F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d
|
||||
F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9
|
||||
F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
|
||||
F ext/fts2/fts2.c c750b2db623587021a402631a7aa582d81852c44
|
||||
F ext/fts2/fts2.c a6762b7a6cc173eb83a0aa9506c1b7be66f00786
|
||||
F ext/fts2/fts2.h 591916a822cfb6426518fdbf6069359119bc46eb
|
||||
F ext/fts2/fts2_hash.c b3f22116d4ef0bc8f2da6e3fdc435c86d0951a9b
|
||||
F ext/fts2/fts2_hash.h e283308156018329f042816eb09334df714e105e
|
||||
@ -466,7 +466,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9
|
||||
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
|
||||
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
|
||||
F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5
|
||||
P cae844a01a1d87ffb00bba8b4e7b62a92e633aa9
|
||||
R 195e13614b6e5b993d99e293e129cc37
|
||||
U drh
|
||||
Z e4fd6258dd404077dd1b05dd5e45d3e2
|
||||
P e5e6af55ccc5c1a8a9206b42f1dd7bf547cb97ca
|
||||
R 2c25fe5630cee9d287b5318018624ff2
|
||||
U shess
|
||||
Z bcafc47e1cdd98b2861c3ec5b23e7e20
|
||||
|
@ -1 +1 @@
|
||||
e5e6af55ccc5c1a8a9206b42f1dd7bf547cb97ca
|
||||
72c796307338c2751a91c30f6fb16989afbf3816
|
Loading…
Reference in New Issue
Block a user