Additional test cases and cleanup of FTS3 parenthesis processing. (CVS 6094)

FossilOrigin-Name: afac4293000f81410d105a99956605bf7102fa62
This commit is contained in:
drh 2009-01-01 12:34:45 +00:00
parent 758bc07c43
commit b39187ae89
6 changed files with 233 additions and 66 deletions

View File

@ -239,7 +239,7 @@ int registerTokenizer(
}
static
int queryTokenizer(
int queryFts2Tokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module **pp
@ -272,7 +272,7 @@ void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
** build unless SQLITE_TEST is defined.
**
** The purpose of this is to test that the fts2_tokenizer() function
** can be used as designed by the C-code in the queryTokenizer and
** can be used as designed by the C-code in the queryFts2Tokenizer and
** registerTokenizer() functions above. These two functions are repeated
** in the README.tokenizer file as an example, so it is important to
** test them.
@ -296,10 +296,10 @@ static void intTestFunc(
/* Test the query function */
sqlite3Fts2SimpleTokenizerModule(&p1);
rc = queryTokenizer(db, "simple", &p2);
rc = queryFts2Tokenizer(db, "simple", &p2);
assert( rc==SQLITE_OK );
assert( p1==p2 );
rc = queryTokenizer(db, "nosuchtokenizer", &p2);
rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2);
assert( rc==SQLITE_ERROR );
assert( p2==0 );
assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );
@ -307,7 +307,7 @@ static void intTestFunc(
/* Test the storage function */
rc = registerTokenizer(db, "nosuchtokenizer", p1);
assert( rc==SQLITE_OK );
rc = queryTokenizer(db, "nosuchtokenizer", &p2);
rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2);
assert( rc==SQLITE_OK );
assert( p2==p1 );

View File

@ -83,7 +83,7 @@ struct ParseContext {
** any values that fall outside of the range of the unsigned char type (i.e.
** negative values).
*/
static int safe_isspace_expr(char c){
static int fts3isspace(char c){
return (c&0x80)==0 ? isspace(c) : 0;
}
@ -156,7 +156,12 @@ static int getNextToken(
return rc;
}
void *realloc_or_free(void *pOrig, int nNew){
/*
** Enlarge a memory allocation. If an out-of-memory allocation occurs,
** then free the old allocation.
*/
void *fts3ReallocOrFree(void *pOrig, int nNew){
void *pRet = sqlite3_realloc(pOrig, nNew);
if( !pRet ){
sqlite3_free(pOrig);
@ -199,8 +204,8 @@ static int getNextString(
rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
if( rc==SQLITE_OK ){
int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
p = realloc_or_free(p, nByte+ii*sizeof(struct PhraseToken));
zTemp = realloc_or_free(zTemp, nTemp + nToken);
p = fts3ReallocOrFree(p, nByte+ii*sizeof(struct PhraseToken));
zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken);
if( !p || !zTemp ){
goto no_mem;
}
@ -233,7 +238,7 @@ static int getNextString(
int nNew = 0;
int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
nByte += (p->pPhrase->nToken-1) * sizeof(struct PhraseToken);
p = realloc_or_free(p, nByte + nTemp);
p = fts3ReallocOrFree(p, nByte + nTemp);
if( !p ){
goto no_mem;
}
@ -281,15 +286,16 @@ static int getNextNode(
Fts3Expr **ppExpr, /* OUT: expression */
int *pnConsumed /* OUT: Number of bytes consumed */
){
struct Fts3Keyword {
char *z;
int n;
int eType;
static const struct Fts3Keyword {
char z[4]; /* Keyword text */
u8 n; /* Length of the keyword */
u8 parenOnly; /* Only valid in paren mode */
u8 eType; /* Keyword code */
} aKeyword[] = {
{ "OR" , 2, FTSQUERY_OR },
{ "AND", 3, FTSQUERY_AND },
{ "NOT", 3, FTSQUERY_NOT },
{ "NEAR", 4, FTSQUERY_NEAR }
{ "OR" , 2, 0, FTSQUERY_OR },
{ "AND", 3, 1, FTSQUERY_AND },
{ "NOT", 3, 1, FTSQUERY_NOT },
{ "NEAR", 4, 0, FTSQUERY_NEAR }
};
int ii;
int iCol;
@ -303,18 +309,16 @@ static int getNextNode(
/* Skip over any whitespace before checking for a keyword, an open or
** close bracket, or a quoted string.
*/
while( nInput>0 && safe_isspace_expr(*zInput) ){
while( nInput>0 && fts3isspace(*zInput) ){
nInput--;
zInput++;
}
/* See if we are dealing with a keyword. */
for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){
struct Fts3Keyword *pKey = &aKeyword[ii];
const struct Fts3Keyword *pKey = &aKeyword[ii];
if( (0==sqlite3_fts3_enable_parentheses)
&& (pKey->eType==FTSQUERY_AND || pKey->eType==FTSQUERY_NOT)
){
if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){
continue;
}
@ -336,10 +340,10 @@ static int getNextNode(
/* At this point this is probably a keyword. But for that to be true,
** the next byte must contain either whitespace, an open or close
** bracket, a quote character, or EOF.
** parenthesis, a quote character, or EOF.
*/
cNext = zInput[nKey];
if( safe_isspace_expr(cNext)
if( fts3isspace(cNext)
|| cNext=='"' || cNext=='(' || cNext==')' || cNext==0
){
pRet = (Fts3Expr *)sqlite3_malloc(sizeof(Fts3Expr));
@ -707,7 +711,7 @@ void sqlite3Fts3ExprFree(Fts3Expr *p){
/*
** Function to query the hash-table of tokenizers (see README.tokenizers).
*/
static int queryTokenizer(
static int queryTestTokenizer(
sqlite3 *db,
const char *zName,
const sqlite3_tokenizer_module **pp
@ -796,7 +800,7 @@ static void fts3ExprTest(
sqlite3_value **argv
){
sqlite3_tokenizer_module const *pModule = 0;
sqlite3_tokenizer *pTokenizer;
sqlite3_tokenizer *pTokenizer = 0;
int rc;
char **azCol = 0;
const char *zExpr;
@ -813,7 +817,8 @@ static void fts3ExprTest(
return;
}
rc = queryTokenizer(db, (const char *)sqlite3_value_text(argv[0]), &pModule);
rc = queryTestTokenizer(db,
(const char *)sqlite3_value_text(argv[0]), &pModule);
if( rc==SQLITE_NOMEM ){
sqlite3_result_error_nomem(context);
goto exprtest_out;
@ -858,7 +863,7 @@ static void fts3ExprTest(
}
exprtest_out:
if( pTokenizer ){
if( pModule && pTokenizer ){
rc = pModule->xDestroy(pTokenizer);
}
sqlite3_free(azCol);

View File

@ -21,42 +21,52 @@
** generator. This module does not use actually lemon, it uses a
** custom parser.
**
** query ::= andexpr (OR andexpr)*.
**
** andexpr ::= notexpr (AND? notexpr)*.
**
** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
** notexpr ::= LP query RP.
**
** nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
**
** distance_opt ::= .
** distance_opt ::= / INTEGER.
**
** phrase ::= TOKEN.
** phrase ::= TOKEN:COLUMN.
** phrase ::= COLUMN:TOKEN.
** phrase ::= "TOKEN TOKEN TOKEN...".
** phrase ::= phrase near phrase.
**
** near ::= NEAR.
** near ::= NEAR / INTEGER.
**
** query ::= -TOKEN.
** query ::= phrase.
** query ::= LP query RP.
** query ::= query NOT query.
** query ::= query OR query.
** query ::= query AND query.
*/
typedef struct Fts3Expr Fts3Expr;
typedef struct Fts3Phrase Fts3Phrase;
/*
** A "phrase" is a sequence of one or more tokens that must match in
** sequence. A single token is the base case and the most common case.
** For a sequence of tokens contained in "...", nToken will be the number
** of tokens in the string.
*/
struct Fts3Phrase {
int nToken; /* Number of entries in aToken[] */
int nToken; /* Number of tokens in the phrase */
int iColumn; /* Index of column this phrase must match */
int isNot; /* Phrase prefixed by unary not (-) operator */
struct PhraseToken {
char *z;
int n; /* Number of bytes in buffer pointed to by z */
int isPrefix; /* True if token ends in with a "*" character */
} aToken[1];
char *z; /* Text of the token */
int n; /* Number of bytes in buffer pointed to by z */
int isPrefix; /* True if token ends in with a "*" character */
} aToken[1]; /* One entry for each token in the phrase */
};
/*
** A tree of these objects forms the RHS of a MATCH operator.
*/
struct Fts3Expr {
int eType; /* One of the FTSQUERY_XXX values defined below */
int nNear; /* Valid if eType==FTSQUERY_NEAR */
Fts3Expr *pParent;
Fts3Expr *pLeft;
Fts3Expr *pRight;
Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */
Fts3Expr *pLeft; /* Left operand */
Fts3Expr *pRight; /* Right operand */
Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */
};
@ -84,4 +94,3 @@ void sqlite3Fts3ExprFree(Fts3Expr *);
#ifdef SQLITE_TEST
void sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
#endif

View File

@ -1,5 +1,5 @@
C Add\sa\scouple\sof\sextra\stests\sfor\sthe\sfts3\sexpression\sparser\sto\simprove\smcdc\scoverage.\s(CVS\s6093)
D 2009-01-01T07:42:49
C Additional\stest\scases\sand\scleanup\sof\sFTS3\sparenthesis\sprocessing.\s(CVS\s6094)
D 2009-01-01T12:34:46
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
F Makefile.in 77635d0909c2067cee03889a1e04ce910d8fb809
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@ -46,7 +46,7 @@ F ext/fts2/fts2_hash.c 2689e42e1107ea67207f725cf69cf8972d00cf93
F ext/fts2/fts2_hash.h 9a5b1be94664139f93217a0770d7144425cffb3a
F ext/fts2/fts2_icu.c 1ea9993a39c9783c2e2d7446d055e9d64411dda0
F ext/fts2/fts2_porter.c 8a6369b0fae98c04db95e4fa95fac7c03d7182ec
F ext/fts2/fts2_tokenizer.c 5cec41326fabe65323945a46fa9495ee85c3d5fd
F ext/fts2/fts2_tokenizer.c 26e993a00b2bd5b6e73c155597361710b12ffe25
F ext/fts2/fts2_tokenizer.h a7e46462d935a314b2682287f12f27530a3ee08e
F ext/fts2/fts2_tokenizer1.c 8545ce12b41922004da46e91a7b023b92b76f94e
F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0
@ -55,8 +55,8 @@ F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
F ext/fts3/fts3.c 3aa6aef1eadc44606f6ed3c841062735a5210077
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
F ext/fts3/fts3_expr.c ac57b3ae142da80527cccdc4ab4e100fac1737fb
F ext/fts3/fts3_expr.h 4dad4d87cf5d41ea924a815fe89a6f87dc76f277
F ext/fts3/fts3_expr.c 9394701f42abfa6abd58f446841aed65963ee1a1
F ext/fts3/fts3_expr.h b5412dcf565c6d90d6a8c22090ceb9ed8c745634
F ext/fts3/fts3_hash.c e15e84d18f8df149ab290029872d4559c4c7c15a
F ext/fts3/fts3_hash.h 004b759e1602ff16dfa02fea3ca1c77336ad6798
F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295
@ -368,7 +368,7 @@ F test/fts3b.test b3a25180a633873d37d86e1ccd00ed690d37237a
F test/fts3c.test 4c7ef29b37aca3e8ebb6a39b57910caa6506034e
F test/fts3d.test d92a47fe8ed59c9e53d2d8e6d2685bb380aadadc
F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851
F test/fts3expr.test 895b3f49679aaf7a9b463a7edc8589722ec3bf47
F test/fts3expr.test d8f8a3caabefe537461e877910c64e3f12239b79
F test/fts3expr2.test 8501de895a4c0631e7226c9bac055cd49c9f6646
F test/fts3near.test e8a9b4e16c63a795918b334b74d4aec14815bf8b
F test/func.test a50f0a4b69ac251debe1dce3ba29da7476dc8c52
@ -690,7 +690,7 @@ F tool/speedtest16.c c8a9c793df96db7e4933f0852abb7a03d48f2e81
F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
P 11c2d4686197fb3f0d601651d5bbb3492af8f0dd
R e1805dee74c86f472abd2ab7b7d394c8
U danielk1977
Z 720d3adfdda818abcd238892e17a9322
P 13146b34935d339d7b8379083e024647af07e2c1
R 02dfd71ec13a04d1ce31f05761dd721d
U drh
Z c17be682ffb102989648d0f6e43f5ed7

View File

@ -1 +1 @@
13146b34935d339d7b8379083e024647af07e2c1
afac4293000f81410d105a99956605bf7102fa62

View File

@ -11,7 +11,7 @@
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS3 module.
#
# $Id: fts3expr.test,v 1.4 2009/01/01 07:42:49 danielk1977 Exp $
# $Id: fts3expr.test,v 1.5 2009/01/01 12:34:46 drh Exp $
#
set testdir [file dirname $argv0]
@ -38,6 +38,9 @@ do_test fts3expr-1.1 {
do_test fts3expr-1.2 {
test_fts3expr "ab AND cd"
} {AND {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.2.1 {
test_fts3expr "ab cd"
} {AND {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.3 {
test_fts3expr "ab OR cd"
} {OR {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
@ -47,19 +50,67 @@ do_test fts3expr-1.4 {
do_test fts3expr-1.5 {
test_fts3expr "ab NEAR cd"
} {NEAR/10 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.6 {
do_test fts3expr-1.6.1 {
test_fts3expr "ab NEAR/5 cd"
} {NEAR/5 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.6.2 {
test_fts3expr "ab NEAR/87654321 cd"
} {NEAR/87654321 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.6.3 {
test_fts3expr "ab NEAR/7654321 cd"
} {NEAR/7654321 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.6.4 {
test_fts3expr "ab NEAR/654321 cd"
} {NEAR/654321 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.6.5 {
test_fts3expr "ab NEAR/54321 cd"
} {NEAR/54321 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.6.6 {
test_fts3expr "ab NEAR/4321 cd"
} {NEAR/4321 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.6.7 {
test_fts3expr "ab NEAR/321 cd"
} {NEAR/321 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.6.8 {
test_fts3expr "ab NEAR/21 cd"
} {NEAR/21 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.7 {
test_fts3expr {"one two three"}
} {PHRASE 3 0 one two three}
do_test fts3expr-1.8 {
do_test fts3expr-1.8.1 {
test_fts3expr {zero "one two three" four}
} {AND {AND {PHRASE 3 0 zero} {PHRASE 3 0 one two three}} {PHRASE 3 0 four}}
do_test fts3expr-1.9 {
do_test fts3expr-1.8.2 {
test_fts3expr {zero AND "one two three" four}
} {AND {AND {PHRASE 3 0 zero} {PHRASE 3 0 one two three}} {PHRASE 3 0 four}}
do_test fts3expr-1.8.3 {
test_fts3expr {zero "one two three" AND four}
} {AND {AND {PHRASE 3 0 zero} {PHRASE 3 0 one two three}} {PHRASE 3 0 four}}
do_test fts3expr-1.8.4 {
test_fts3expr {zero AND "one two three" AND four}
} {AND {AND {PHRASE 3 0 zero} {PHRASE 3 0 one two three}} {PHRASE 3 0 four}}
do_test fts3expr-1.9.1 {
test_fts3expr {"one* two three"}
} {PHRASE 3 0 one+ two three}
do_test fts3expr-1.9.2 {
test_fts3expr {"one two* three"}
} {PHRASE 3 0 one two+ three}
do_test fts3expr-1.9.3 {
test_fts3expr {"one* two* three"}
} {PHRASE 3 0 one+ two+ three}
do_test fts3expr-1.9.4 {
test_fts3expr {"one two three*"}
} {PHRASE 3 0 one two three+}
do_test fts3expr-1.9.5 {
test_fts3expr {"one* two three*"}
} {PHRASE 3 0 one+ two three+}
do_test fts3expr-1.9.6 {
test_fts3expr {"one two* three*"}
} {PHRASE 3 0 one two+ three+}
do_test fts3expr-1.9.7 {
test_fts3expr {"one* two* three*"}
} {PHRASE 3 0 one+ two+ three+}
do_test fts3expr-1.10 {
test_fts3expr {one* two}
@ -75,6 +126,109 @@ do_test fts3expr-1.15 {
test_fts3expr {one b:two}
} {AND {PHRASE 3 0 one} {PHRASE 1 0 two}}
do_test fts3expr-1.16 {
test_fts3expr {one AND two AND three AND four AND five}
} [list AND \
[list AND \
[list AND \
[list AND {PHRASE 3 0 one} {PHRASE 3 0 two}] \
{PHRASE 3 0 three} \
] \
{PHRASE 3 0 four} \
] \
{PHRASE 3 0 five} \
]
do_test fts3expr-1.17 {
test_fts3expr {(one AND two) AND ((three AND four) AND five)}
} [list AND \
[list AND {PHRASE 3 0 one} {PHRASE 3 0 two}] \
[list AND \
[list AND {PHRASE 3 0 three} {PHRASE 3 0 four}] \
{PHRASE 3 0 five} \
] \
]
do_test fts3expr-1.18 {
test_fts3expr {(one AND two) OR ((three AND four) AND five)}
} [list OR \
[list AND {PHRASE 3 0 one} {PHRASE 3 0 two}] \
[list AND \
[list AND {PHRASE 3 0 three} {PHRASE 3 0 four}] \
{PHRASE 3 0 five} \
] \
]
do_test fts3expr-1.19 {
test_fts3expr {(one AND two) AND ((three AND four) OR five)}
} [list AND \
[list AND {PHRASE 3 0 one} {PHRASE 3 0 two}] \
[list OR \
[list AND {PHRASE 3 0 three} {PHRASE 3 0 four}] \
{PHRASE 3 0 five} \
] \
]
do_test fts3expr-1.20 {
test_fts3expr {(one OR two) AND ((three OR four) AND five)}
} [list AND \
[list OR {PHRASE 3 0 one} {PHRASE 3 0 two}] \
[list AND \
[list OR {PHRASE 3 0 three} {PHRASE 3 0 four}] \
{PHRASE 3 0 five} \
] \
]
do_test fts3expr-1.21 {
test_fts3expr {(one OR two) AND ((three NOT four) AND five)}
} [list AND \
[list OR {PHRASE 3 0 one} {PHRASE 3 0 two}] \
[list AND \
[list NOT {PHRASE 3 0 three} {PHRASE 3 0 four}] \
{PHRASE 3 0 five} \
] \
]
do_test fts3expr-1.22 {
test_fts3expr {(one OR two) NOT ((three OR four) AND five)}
} [list NOT \
[list OR {PHRASE 3 0 one} {PHRASE 3 0 two}] \
[list AND \
[list OR {PHRASE 3 0 three} {PHRASE 3 0 four}] \
{PHRASE 3 0 five} \
] \
]
do_test fts3expr-1.23 {
test_fts3expr {(((((one OR two))))) NOT (((((three OR four))) AND five))}
} [list NOT \
[list OR {PHRASE 3 0 one} {PHRASE 3 0 two}] \
[list AND \
[list OR {PHRASE 3 0 three} {PHRASE 3 0 four}] \
{PHRASE 3 0 five} \
] \
]
do_test fts3expr-1.24 {
test_fts3expr {one NEAR two}
} [list NEAR/10 {PHRASE 3 0 one} {PHRASE 3 0 two}]
do_test fts3expr-1.25 {
test_fts3expr {(one NEAR two)}
} [list NEAR/10 {PHRASE 3 0 one} {PHRASE 3 0 two}]
do_test fts3expr-1.26 {
test_fts3expr {((((((one NEAR two))))))}
} [list NEAR/10 {PHRASE 3 0 one} {PHRASE 3 0 two}]
do_test fts3expr-1.27 {
test_fts3expr {(one NEAR two) OR ((three OR four) AND five)}
} [list OR \
[list NEAR/10 {PHRASE 3 0 one} {PHRASE 3 0 two}] \
[list AND \
[list OR {PHRASE 3 0 three} {PHRASE 3 0 four}] \
{PHRASE 3 0 five} \
] \
]
do_test fts3expr-1.28 {
test_fts3expr {(one NEAR/321 two) OR ((three OR four) AND five)}
} [list OR \
[list NEAR/321 {PHRASE 3 0 one} {PHRASE 3 0 two}] \
[list AND \
[list OR {PHRASE 3 0 three} {PHRASE 3 0 four}] \
{PHRASE 3 0 five} \
] \
]
proc strip_phrase_data {L} {
if {[lindex $L 0] eq "PHRASE"} {
return [lrange $L 3 end]
@ -303,4 +457,3 @@ foreach {id expr res} {
set sqlite_fts3_enable_parentheses 0
finish_test