Implementation of "column:" modifiers in FTS1 queries. (CVS 3411)

FossilOrigin-Name: 820634f71e3a3499994f82b56b784d22a7e3cdcf
This commit is contained in:
drh 2006-09-13 16:02:43 +00:00
parent cbaac514bc
commit a3baa963bc
4 changed files with 111 additions and 14 deletions

View File

@ -1956,9 +1956,10 @@ static int fulltextNext(sqlite3_vtab_cursor *pCursor){
** the following structure.
*/
typedef struct QueryTerm {
int nPhrase; /* How many following terms are part of the same phrase */
int isOr; /* this term is preceded by "OR" */
int isNot; /* this term is preceded by "-" */
short int nPhrase; /* How many following terms are part of the same phrase */
short int iColumn; /* Column of the index that must match this term */
signed char isOr; /* this term is preceded by "OR" */
signed char isNot; /* this term is preceded by "-" */
char *pTerm; /* text of the term. '\000' terminated. malloced */
int nTerm; /* Number of bytes in pTerm[] */
} QueryTerm;
@ -2028,9 +2029,11 @@ static int docListOfTerm(
*
*/
typedef struct Query {
fulltext_vtab *pFts; /* The full text index */
int nTerms; /* Number of terms in the query */
QueryTerm *pTerms; /* Array of terms. Space obtained from malloc() */
int nextIsOr; /* Set the isOr flag on the next inserted term */
int iColumn; /* Text word parsed must be in this column */
} Query;
/* Add a new term pTerm[0..nTerm-1] to the query *q.
@ -2051,6 +2054,8 @@ static void queryAdd(Query *q, const char *pTerm, int nTerm){
t->nTerm = nTerm;
t->isOr = q->nextIsOr;
q->nextIsOr = 0;
t->iColumn = q->iColumn;
q->iColumn = -1;
}
/* Free all of the memory that was malloced in order to build *q.
@ -2063,6 +2068,26 @@ static void queryDestroy(Query *q){
free(q->pTerms);
}
/*
** Check to see if the string zToken[0...nToken-1] matches any
** column name in the virtual table. If it does,
** return the zero-indexed column number. If not, return -1.
*/
static int checkColumnSpecifier(
fulltext_vtab *pVtab, /* The virtual table */
const char *zToken, /* Text of the token */
int nToken /* Number of characters in the token */
){
int i;
for(i=0; i<pVtab->nColumn; i++){
if( memcmp(pVtab->azColumn[i], zToken, nToken)==0
&& pVtab->azColumn[i][nToken]==0 ){
return i;
}
}
return -1;
}
/*
** Parse the text at pSegment[0..nSegment-1]. Add additional terms
** to the query being assemblied in pQuery.
@ -2082,6 +2107,7 @@ static int tokenizeSegment(
const sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
sqlite3_tokenizer_cursor *pCursor;
int firstIndex = pQuery->nTerms;
int iCol;
int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor);
if( rc!=SQLITE_OK ) return rc;
@ -2095,6 +2121,12 @@ static int tokenizeSegment(
&pToken, &nToken,
&iBegin, &iEnd, &iPos);
if( rc!=SQLITE_OK ) break;
if( !inPhrase &&
pSegment[iEnd]==':' &&
(iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){
pQuery->iColumn = iCol;
continue;
}
if( !inPhrase && pQuery->nTerms>0 && nToken==2
&& pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){
pQuery->nextIsOr = 1;
@ -2123,6 +2155,8 @@ static int parseQuery(fulltext_vtab *v, const char *pInput, int nInput,
pQuery->nTerms = 0;
pQuery->pTerms = NULL;
pQuery->nextIsOr = 0;
pQuery->iColumn = -1;
pQuery->pFts = v;
for(iInput=0; iInput<nInput; ++iInput){
int i;
@ -2149,6 +2183,9 @@ static int parseQuery(fulltext_vtab *v, const char *pInput, int nInput,
/* Perform a full-text query using the search expression in
** pInput[0..nInput-1]. Return a list of matching documents
** in pResult.
**
** Queries must match column iColumn. Or if iColumn>=nColumn
** they are allowed to match against any column.
*/
static int fulltextQuery(fulltext_vtab *v, int iColumn,
const char *pInput, int nInput, DocList **pResult){
@ -2157,6 +2194,7 @@ static int fulltextQuery(fulltext_vtab *v, int iColumn,
DocList *pLeft = NULL;
DocList *pRight, *pNew;
int nNot = 0;
int iCol;
rc = parseQuery(v, pInput, nInput, &q);
if( rc!=SQLITE_OK ) return rc;
@ -2170,7 +2208,9 @@ static int fulltextQuery(fulltext_vtab *v, int iColumn,
continue;
}
rc = docListOfTerm(v, iColumn, &q.pTerms[i], &pRight);
iCol = q.pTerms[i].iColumn;
if( iCol<0 ) iCol = iColumn;
rc = docListOfTerm(v, iCol, &q.pTerms[i], &pRight);
if( rc ){
queryDestroy(&q);
return rc;
@ -2198,7 +2238,9 @@ static int fulltextQuery(fulltext_vtab *v, int iColumn,
/* Do the EXCEPT terms */
for(i=0; i<q.nTerms; i += q.pTerms[i].nPhrase + 1){
if( !q.pTerms[i].isNot ) continue;
rc = docListOfTerm(v, iColumn, &q.pTerms[i], &pRight);
iCol = q.pTerms[i].iColumn;
if( iCol<0 ) iCol = iColumn;
rc = docListOfTerm(v, iCol, &q.pTerms[i], &pRight);
if( rc ){
queryDestroy(&q);
docListDelete(pLeft);

View File

@ -1,5 +1,5 @@
C Module\sspec\sparser\senhancements\sfor\sFTS1.\s\sNow\sable\sto\scope\swith\scolumn\nnames\sin\sthe\sspec\sthat\sare\sSQL\skeywords\sor\shave\sspecial\scharacters,\setc.\nAlso\sadded\ssupport\sfor\sadditional\scontrol\slines.\s\sColumn\snames\scan\sbe\nfollowed\sby\sa\stype\sspecifier\s(which\sis\signored.)\s(CVS\s3410)
D 2006-09-13T15:20:13
C Implementation\sof\s"column:"\smodifiers\sin\sFTS1\squeries.\s(CVS\s3411)
D 2006-09-13T16:02:44
F Makefile.in cabd42d34340f49260bc2a7668c38eba8d4cfd99
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
@ -21,7 +21,7 @@ F ext/README.txt 913a7bd3f4837ab14d7e063304181787658b14e1
F ext/fts1/README.txt 20ac73b006a70bcfd80069bdaf59214b6cf1db5e
F ext/fts1/ft_hash.c 3927bd880e65329bdc6f506555b228b28924921b
F ext/fts1/ft_hash.h 1a35e654a235c2c662d3ca0dfc3138ad60b8b7d5
F ext/fts1/fts1.c de9c9027e0b4bfe84b6e304def7c90e7699ae309
F ext/fts1/fts1.c b5d7a61ae136f116427d63b4942540bdde20511e
F ext/fts1/fts1.h fe8e8f38dd6d2d2645b9b0d6972e80985249575f
F ext/fts1/fts1_hash.c 3196cee866edbebb1c0521e21672e6d599965114
F ext/fts1/fts1_hash.h 957d378355ed29f672cd5add012ce8b088a5e089
@ -191,7 +191,7 @@ F test/expr.test c78843f730ccbe973d0c2ad1c99978f936893131
F test/fkey1.test 153004438d51e6769fb1ce165f6313972d6263ce
F test/format4.test bf3bed3b13c63abfb3cfec232597a319a31d0bcc
F test/fts1a.test 54fd9451c00fb91074d5abdc207b05dcba6d2d65
F test/fts1b.test 7fed050efcf6ee7d8faaea6d97efdfb49e752135
F test/fts1b.test 5742c32c69ec9667c8d32df5bc79aa416d5f363a
F test/func.test 7f2c91a948a0a177635835dc9afa078413c54ae1
F test/hook.test 7e7645fd9a033f79cce8fdff151e32715e7ec50a
F test/in.test 369cb2aa1eab02296b4ec470732fe8c131260b1d
@ -398,7 +398,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513
P 528036c828c93c78ca879bf89a52131b72e24067
R e6dd8f2c3c3eb768669b7417f023def9
P adb780e0dc8bc7dcd1102efbfa4bc17eefdf968e
R cb518158115dd87ece5a8e7690dc5840
U drh
Z 48ac5c689837f5905a3d6832e62238fa
Z e9d714f2273a2ad600591f248a85a42b

View File

@ -1 +1 @@
adb780e0dc8bc7dcd1102efbfa4bc17eefdf968e
820634f71e3a3499994f82b56b784d22a7e3cdcf

View File

@ -11,7 +11,7 @@
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS1 module.
#
# $Id: fts1b.test,v 1.2 2006/09/13 15:20:13 drh Exp $
# $Id: fts1b.test,v 1.3 2006/09/13 16:02:44 drh Exp $
#
set testdir [file dirname $argv0]
@ -89,4 +89,59 @@ do_test fts1b-2.1 {
}
} {{one two three} {four five six}}
# Compute an SQL string that contains the words one, two, three,... to
# describe bits set in the value $i. Only the lower 5 bits are examined.
#
proc wordset {i} {
set x {}
for {set j 0; set k 1} {$j<5} {incr j; incr k $k} {
if {$k&$i} {lappend x [lindex {one two three four five} $j]}
}
return '$x'
}
# Create a new FTS table with three columns:
#
# norm: words for the bits of rowid
# plusone: words for the bits of rowid+1
# invert: words for the bits of ~rowid
#
db eval {
CREATE VIRTUAL TABLE t4 USING fts1([norm],'plusone',"invert");
}
for {set i 1} {$i<=15} {incr i} {
set vset [list [wordset $i] [wordset [expr {$i+1}]] [wordset [expr {~$i}]]]
db eval "INSERT INTO t4(norm,plusone,invert) VALUES([join $vset ,]);"
}
do_test fts1b-4.1 {
execsql {SELECT rowid FROM t4 WHERE _all MATCH 'norm:one'}
} {1 3 5 7 9 11 13 15}
do_test fts1b-4.2 {
execsql {SELECT rowid FROM t4 WHERE norm MATCH 'one'}
} {1 3 5 7 9 11 13 15}
do_test fts1b-4.3 {
execsql {SELECT rowid FROM t4 WHERE _all MATCH 'one'}
} {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15}
do_test fts1b-4.4 {
execsql {SELECT rowid FROM t4 WHERE _all MATCH 'plusone:one'}
} {2 4 6 8 10 12 14}
do_test fts1b-4.5 {
execsql {SELECT rowid FROM t4 WHERE plusone MATCH 'one'}
} {2 4 6 8 10 12 14}
do_test fts1b-4.6 {
execsql {SELECT rowid FROM t4 WHERE _all MATCH 'norm:one plusone:two'}
} {1 5 9 13}
do_test fts1b-4.7 {
execsql {SELECT rowid FROM t4 WHERE _all MATCH 'norm:one two'}
} {1 3 5 7 9 11 13 15}
do_test fts1b-4.8 {
execsql {SELECT rowid FROM t4 WHERE _all MATCH 'plusone:two norm:one'}
} {1 5 9 13}
do_test fts1b-4.9 {
execsql {SELECT rowid FROM t4 WHERE _all MATCH 'two norm:one'}
} {1 3 5 7 9 11 13 15}
finish_test