Rebalance FTS expressions after parsing to limit recursion during evaluation. Avoid recursion when deleting FTS expression trees. Enforce a limit on the depth of an expression tree.

FossilOrigin-Name: f968d43f80cc2f236e7d09ba1e8278343e2b6976
This commit is contained in:
dan 2013-04-25 20:34:02 +00:00
parent da8caa0b2d
commit aa29c86e49
5 changed files with 504 additions and 44 deletions

View File

@ -640,8 +640,10 @@ static int fts3ExprParse(
}
pNot->eType = FTSQUERY_NOT;
pNot->pRight = p;
p->pParent = pNot;
if( pNotBranch ){
pNot->pLeft = pNotBranch;
pNotBranch->pParent = pNot;
}
pNotBranch = pNot;
p = pPrev;
@ -729,6 +731,7 @@ static int fts3ExprParse(
pIter = pIter->pLeft;
}
pIter->pLeft = pRet;
pRet->pParent = pIter;
pRet = pNotBranch;
}
}
@ -745,6 +748,223 @@ exprparse_out:
return rc;
}
/*
** Return SQLITE_ERROR if the maximum depth of the expression tree passed
** as the only argument is more than nMaxDepth.
*/
static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){
int rc = SQLITE_OK;
if( p ){
if( nMaxDepth==0 ){
rc = SQLITE_ERROR;
}else{
rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1);
if( rc==SQLITE_OK ){
rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1);
}
}
}
return rc;
}
/*
** This function attempts to transform the expression tree at (*pp) to
** an equivalent but more balanced form. The tree is modified in place.
** If successful, SQLITE_OK is returned and (*pp) set to point to the
** new root expression node.
**
** nMaxDepth is the maximum allowable depth of the balanced sub-tree.
**
** Otherwise, if an error occurs, an SQLite error code is returned and
** expression (*pp) freed.
*/
static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){
int rc = SQLITE_OK; /* Return code */
Fts3Expr *pRoot = *pp; /* Initial root node */
Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */
int eType = pRoot->eType; /* Type of node in this tree */
if( nMaxDepth==0 ){
rc = SQLITE_ERROR;
}
if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){
Fts3Expr **apLeaf;
apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth);
if( 0==apLeaf ){
rc = SQLITE_NOMEM;
}else{
memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth);
}
if( rc==SQLITE_OK ){
int i;
Fts3Expr *p;
/* Set $p to point to the left-most leaf in the tree of eType nodes. */
for(p=pRoot; p->eType==eType; p=p->pLeft){
assert( p->pParent==0 || p->pParent->pLeft==p );
assert( p->pLeft && p->pRight );
}
/* This loop runs once for each leaf in the tree of eType nodes. */
while( 1 ){
int iLvl;
Fts3Expr *pParent = p->pParent; /* Current parent of p */
assert( pParent==0 || pParent->pLeft==p );
p->pParent = 0;
if( pParent ){
pParent->pLeft = 0;
}else{
pRoot = 0;
}
rc = fts3ExprBalance(&p, nMaxDepth-1);
if( rc!=SQLITE_OK ) break;
for(iLvl=0; p && iLvl<nMaxDepth; iLvl++){
if( apLeaf[iLvl]==0 ){
apLeaf[iLvl] = p;
p = 0;
}else{
assert( pFree );
pFree->pLeft = apLeaf[iLvl];
pFree->pRight = p;
pFree->pLeft->pParent = pFree;
pFree->pRight->pParent = pFree;
p = pFree;
pFree = pFree->pParent;
p->pParent = 0;
apLeaf[iLvl] = 0;
}
}
if( p ){
sqlite3Fts3ExprFree(p);
rc = SQLITE_ERROR;
break;
}
/* If that was the last leaf node, break out of the loop */
if( pParent==0 ) break;
/* Set $p to point to the next leaf in the tree of eType nodes */
for(p=pParent->pRight; p->eType==eType; p=p->pLeft);
/* Remove pParent from the original tree. */
assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent );
pParent->pRight->pParent = pParent->pParent;
if( pParent->pParent ){
pParent->pParent->pLeft = pParent->pRight;
}else{
assert( pParent==pRoot );
pRoot = pParent->pRight;
}
/* Link pParent into the free node list. It will be used as an
** internal node of the new tree. */
pParent->pParent = pFree;
pFree = pParent;
}
if( rc==SQLITE_OK ){
p = 0;
for(i=0; i<nMaxDepth; i++){
if( apLeaf[i] ){
if( p==0 ){
p = apLeaf[i];
p->pParent = 0;
}else{
pFree->pRight = p;
pFree->pLeft = apLeaf[i];
pFree->pLeft->pParent = pFree;
pFree->pRight->pParent = pFree;
p = pFree;
pFree = pFree->pParent;
p->pParent = 0;
}
}
}
pRoot = p;
}else{
/* An error occurred. Delete the contents of the apLeaf[] array
** and pFree list. Everything else is cleaned up by the call to
** sqlite3Fts3ExprFree(pRoot) below. */
Fts3Expr *pDel;
for(i=0; i<nMaxDepth; i++){
sqlite3Fts3ExprFree(apLeaf[i]);
}
while( pDel=pFree ){
pFree = pDel->pParent;
sqlite3_free(pDel);
}
}
assert( pFree==0 );
sqlite3_free( apLeaf );
}
}
if( rc!=SQLITE_OK ){
sqlite3Fts3ExprFree(pRoot);
pRoot = 0;
}
*pp = pRoot;
return rc;
}
/*
** This function is similar to sqlite3Fts3ExprParse(), with the following
** differences:
**
** 1. It does not do expression rebalancing.
** 2. It does not check that the expression does not exceed the
** maximum allowable depth.
** 3. Even if it fails, *ppExpr may still be set to point to an
** expression tree. It should be deleted using sqlite3Fts3ExprFree()
** in this case.
*/
static int fts3ExprParseUnbalanced(
sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
int iLangid, /* Language id for tokenizer */
char **azCol, /* Array of column names for fts3 table */
int bFts4, /* True to allow FTS4-only syntax */
int nCol, /* Number of entries in azCol[] */
int iDefaultCol, /* Default column to query */
const char *z, int n, /* Text of MATCH query */
Fts3Expr **ppExpr /* OUT: Parsed query structure */
){
static const int MAX_EXPR_DEPTH = 12;
int nParsed;
int rc;
ParseContext sParse;
memset(&sParse, 0, sizeof(ParseContext));
sParse.pTokenizer = pTokenizer;
sParse.iLangid = iLangid;
sParse.azCol = (const char **)azCol;
sParse.nCol = nCol;
sParse.iDefaultCol = iDefaultCol;
sParse.bFts4 = bFts4;
if( z==0 ){
*ppExpr = 0;
return SQLITE_OK;
}
if( n<0 ){
n = (int)strlen(z);
}
rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
assert( rc==SQLITE_OK || *ppExpr==0 );
/* Check for mismatched parenthesis */
if( rc==SQLITE_OK && sParse.nNest ){
rc = SQLITE_ERROR;
}
return rc;
}
/*
** Parameters z and n contain a pointer to and length of a buffer containing
** an fts3 query expression, respectively. This function attempts to parse the
@ -779,29 +999,20 @@ int sqlite3Fts3ExprParse(
const char *z, int n, /* Text of MATCH query */
Fts3Expr **ppExpr /* OUT: Parsed query structure */
){
int nParsed;
int rc;
ParseContext sParse;
memset(&sParse, 0, sizeof(ParseContext));
sParse.pTokenizer = pTokenizer;
sParse.iLangid = iLangid;
sParse.azCol = (const char **)azCol;
sParse.nCol = nCol;
sParse.iDefaultCol = iDefaultCol;
sParse.bFts4 = bFts4;
if( z==0 ){
*ppExpr = 0;
return SQLITE_OK;
static const int MAX_EXPR_DEPTH = 12;
int rc = fts3ExprParseUnbalanced(
pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr
);
/* Rebalance the expression. And check that its depth does not exceed
** MAX_EXPR_DEPTH. */
if( rc==SQLITE_OK && *ppExpr ){
rc = fts3ExprBalance(ppExpr, MAX_EXPR_DEPTH);
if( rc==SQLITE_OK ){
rc = fts3ExprCheckDepth(*ppExpr, MAX_EXPR_DEPTH);
}
}
if( n<0 ){
n = (int)strlen(z);
}
rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
/* Check for mismatched parenthesis */
if( rc==SQLITE_OK && sParse.nNest ){
rc = SQLITE_ERROR;
if( rc!=SQLITE_OK ){
sqlite3Fts3ExprFree(*ppExpr);
*ppExpr = 0;
}
@ -810,16 +1021,40 @@ int sqlite3Fts3ExprParse(
}
/*
** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
** Free a single node of an expression tree.
*/
void sqlite3Fts3ExprFree(Fts3Expr *p){
if( p ){
assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
sqlite3Fts3ExprFree(p->pLeft);
sqlite3Fts3ExprFree(p->pRight);
sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
sqlite3_free(p->aMI);
sqlite3_free(p);
static void fts3FreeExprNode(Fts3Expr *p){
assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
sqlite3_free(p->aMI);
sqlite3_free(p);
}
/*
** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
**
** This function would be simpler if it recursively called itself. But
** that would mean passing a sufficiently large expression to ExprParse()
** could cause a stack overflow.
*/
void sqlite3Fts3ExprFree(Fts3Expr *pDel){
Fts3Expr *p;
assert( pDel==0 || pDel->pParent==0 );
for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){
assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft );
}
while( p ){
Fts3Expr *pParent = p->pParent;
fts3FreeExprNode(p);
if( pParent && p==pParent->pLeft && pParent->pRight ){
p = pParent->pRight;
while( p && (p->pLeft || p->pRight) ){
assert( p==p->pParent->pRight || p==p->pParent->pLeft );
p = (p->pLeft ? p->pLeft : p->pRight);
}
}else{
p = pParent;
}
}
}
@ -871,6 +1106,9 @@ static int queryTestTokenizer(
** the returned expression text and then freed using sqlite3_free().
*/
static char *exprToString(Fts3Expr *pExpr, char *zBuf){
if( pExpr==0 ){
return sqlite3_mprintf("");
}
switch( pExpr->eType ){
case FTSQUERY_PHRASE: {
Fts3Phrase *pPhrase = pExpr->pPhrase;
@ -978,10 +1216,19 @@ static void fts3ExprTest(
azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
}
rc = sqlite3Fts3ExprParse(
pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
);
if( sqlite3_user_data(context) ){
rc = sqlite3Fts3ExprParse(
pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
);
assert( rc==SQLITE_OK || pExpr==0 );
}else{
rc = fts3ExprParseUnbalanced(
pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
);
}
if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
sqlite3Fts3ExprFree(pExpr);
sqlite3_result_error(context, "Error parsing expression", -1);
}else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
sqlite3_result_error_nomem(context);
@ -1004,9 +1251,15 @@ exprtest_out:
** with database connection db.
*/
int sqlite3Fts3ExprInitTestInterface(sqlite3* db){
return sqlite3_create_function(
int rc = sqlite3_create_function(
db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0
);
if( rc==SQLITE_OK ){
rc = sqlite3_create_function(db, "fts3_exprtest_rebalance",
-1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0
);
}
return rc;
}
#endif

View File

@ -1,5 +1,5 @@
C Fix\sharmless\scompiler\swarnings.
D 2013-04-22T23:38:50.007
C Rebalance\sFTS\sexpressions\safter\sparsing\sto\slimit\srecursion\sduring\sevaluation.\sAvoid\srecursion\swhen\sdeleting\sFTS\sexpression\strees.\sEnforce\sa\slimit\son\sthe\sdepth\sof\san\sexpression\stree.
D 2013-04-25T20:34:02.667
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 87591ea5bf7d6ed521ad42d5bc69c124debe11a5
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -59,7 +59,7 @@ F ext/fts3/fts3.c 784aadfb4c2a217c3eb1feaecac924989f29728f
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
F ext/fts3/fts3Int.h 352c8a83ee4c6a14ced1759a39dd890ab947cbe0
F ext/fts3/fts3_aux.c b02632f6dd0e375ce97870206d914ea6d8df5ccd
F ext/fts3/fts3_expr.c 6cb4410f87676ae633bd7923bbc78526cb839c4d
F ext/fts3/fts3_expr.c a01c0a2e00d0e848f7cdda3b4114fe03b560f59b
F ext/fts3/fts3_hash.c 8dd2d06b66c72c628c2732555a32bc0943114914
F ext/fts3/fts3_hash.h 39cf6874dc239d6b4e30479b1975fe5b22a3caaf
F ext/fts3/fts3_icu.c e319e108661147bcca8dd511cd562f33a1ba81b5
@ -494,6 +494,7 @@ F test/fts3drop.test 1b906e293d6773812587b3dc458cb9e8f3f0c297
F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851
F test/fts3expr.test 5e745b2b6348499d9ef8d59015de3182072c564c
F test/fts3expr2.test 18da930352e5693eaa163a3eacf96233b7290d1a
F test/fts3expr3.test aed4e71aa40ac1dcba860d1c43e7e23aa812eb1c
F test/fts3fault.test cb72dccb0a3b9f730f16c5240f3fcb9303eb1660
F test/fts3fault2.test 3198eef2804deea7cac8403e771d9cbcb752d887
F test/fts3first.test dbdedd20914c8d539aa3206c9b34a23775644641
@ -670,7 +671,7 @@ F test/pageropt.test 6b8f6a123a5572c195ad4ae40f2987007923bbd6
F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0
F test/pcache.test 065aa286e722ab24f2e51792c1f093bf60656b16
F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025
F test/permutations.test d2918e9ee6d0008292e5a70e90eed23b0440ca88
F test/permutations.test 3d0bab9c49c1ec08b868059e30a3e1956f2162e2
F test/pragma.test 60d29cd3d8098a2c20bf4c072810f99e3bf2757a
F test/pragma2.test 3a55f82b954242c642f8342b17dffc8b47472947
F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552
@ -1054,7 +1055,10 @@ F tool/vdbe-compress.tcl f12c884766bd14277f4fcedcae07078011717381
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381
F tool/win/sqlite.vsix 97894c2790eda7b5bce3cc79cb2a8ec2fde9b3ac
P 49cfa14fceeef2d55b449eb927c283ce6f650c07
R 83c630a254d9cc209b10e1a83d85002b
U drh
Z a7a8588b07b6fce1e57eced049fe41f1
P 1a1cf5aa86734c832d845e07780262a178188d56
R 821db2ca4e9de1c38e42323dffba1c12
T *branch * fts3-expr-rebalance
T *sym-fts3-expr-rebalance *
T -sym-trunk *
U dan
Z 130034041e106884ea2e83109b05e05e

View File

@ -1 +1 @@
1a1cf5aa86734c832d845e07780262a178188d56
f968d43f80cc2f236e7d09ba1e8278343e2b6976

202
test/fts3expr3.test Normal file
View File

@ -0,0 +1,202 @@
# 2009 January 1
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the part of the FTS3 expression
# parser that rebalances large expressions.
#
# $Id: fts3expr2.test,v 1.2 2009/06/05 17:09:12 drh Exp $
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
source $testdir/malloc_common.tcl
set ::testprefix fts3expr3
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 {
finish_test
return
}
set sqlite_fts3_enable_parentheses 1
proc strip_phrase_data {L} {
if {[lindex $L 0] eq "PHRASE"} {
return [list P [lrange $L 3 end]]
}
return [list \
[lindex $L 0] \
[strip_phrase_data [lindex $L 1]] \
[strip_phrase_data [lindex $L 2]] \
]
}
proc test_fts3expr2 {expr} {
strip_phrase_data [
db one {SELECT fts3_exprtest_rebalance('simple', $expr, 'a', 'b', 'c')}
]
}
proc balanced_exprtree_structure {nEntry} {
set L [list]
for {set i 1} {$i <= $nEntry} {incr i} {
lappend L xxx
}
while {[llength $L] > 1} {
set N [list]
if {[llength $L] % 2} {
foreach {a b} [lrange $L 0 end-1] { lappend N [list AND $a $b] }
lappend N [lindex $L end]
} else {
foreach {a b} $L { lappend N [list AND $a $b] }
}
set L $N
}
return [lindex $L 0]
}
proc balanced_and_tree {nEntry} {
set query [balanced_exprtree_structure $nEntry]
if {$query == "xxx"} {
return "P 1"
}
for {set i 1} {$i <= $nEntry} {incr i} {
regsub xxx $query "{P $i}" query
}
return $query
}
proc random_tree_structure {nEntry bParen op} {
set query xxx
for {set i 1} {$i < $nEntry} {incr i} {
set x1 [expr int(rand()*4.0)]
set x2 [expr int(rand()*2.0)]
if {$x1==0 && $bParen} {
set query "($query)"
}
if {$x2} {
set query "xxx $op $query"
} else {
set query "$query $op xxx"
}
}
return $query
}
proc random_and_query {nEntry {bParen 0}} {
set query [random_tree_structure $nEntry $bParen AND]
for {set i 1} {$i <= $nEntry} {incr i} {
regsub xxx $query $i query
}
return $query
}
proc random_or_query {nEntry} {
set query [random_tree_structure $nEntry 1 OR]
for {set i 1} {$i <= $nEntry} {incr i} {
regsub xxx $query $i query
}
return $query
}
proc random_andor_query {nEntry} {
set query [random_tree_structure $nEntry 1 AND]
for {set i 1} {$i <= $nEntry} {incr i} {
regsub xxx $query "([random_or_query $nEntry])" query
}
return $query
}
proc balanced_andor_tree {nEntry} {
set tree [balanced_exprtree_structure $nEntry]
set node "{[balanced_and_tree $nEntry]}"
regsub -all AND $node OR node
regsub -all xxx $tree $node tree
return $tree
}
# Test that queries like "1 AND 2 AND 3 AND 4..." are transformed to
# balanced trees by FTS.
#
for {set i 1} {$i < 100} {incr i} {
do_test 1.$i {
test_fts3expr2 [random_and_query $i]
} [balanced_and_tree $i]
}
# Same again, except with parenthesis inserted at arbitrary points.
#
for {set i 1} {$i < 100} {incr i} {
do_test 2.$i {
test_fts3expr2 [random_and_query $i 1]
} [balanced_and_tree $i]
}
# Now attempt to balance two AND trees joined by an OR.
#
for {set i 1} {$i < 100} {incr i} {
do_test 3.$i {
test_fts3expr2 "[random_and_query $i 1] OR [random_and_query $i 1]"
} [list OR [balanced_and_tree $i] [balanced_and_tree $i]]
}
# Try trees of AND nodes with leaves that are themselves trees of OR nodes.
#
for {set i 2} {$i < 32} {incr i} {
do_test 3.$i {
test_fts3expr2 [random_andor_query $i]
} [balanced_andor_tree $i]
}
# These exceed the depth limit.
#
for {set i 33} {$i < 40} {incr i} {
do_test 3.$i {
list [catch {test_fts3expr2 [random_andor_query $i]} msg] $msg
} {1 {Error parsing expression}}
}
# This also exceeds the depth limit.
#
do_test 4.1 {
set q "1"
for {set i 2} {$i < 5000} {incr i} {
append q " AND $i"
}
list [catch {test_fts3expr2 $q} msg] $msg
} {1 {Error parsing expression}}
proc create_toggle_tree {nDepth} {
if {$nDepth == 0} { return xxx }
set nNew [expr $nDepth-1]
if {$nDepth % 2} {
return "([create_toggle_tree $nNew]) OR ([create_toggle_tree $nNew])"
}
return "([create_toggle_tree $nNew]) AND ([create_toggle_tree $nNew])"
}
do_test 4.2 {
list [catch {test_fts3expr2 [create_toggle_tree 17]} msg] $msg
} {1 {Error parsing expression}}
set query [random_andor_query 12]
set result [balanced_andor_tree 12]
do_faultsim_test fts3expr3-fault-1 -faults oom-* -body {
test_fts3expr2 $::query
} -test {
faultsim_test_result [list 0 $::result]
}
set sqlite_fts3_enable_parentheses 0
finish_test

View File

@ -186,6 +186,7 @@ test_suite "fts3" -prefix "" -description {
fts3ak.test fts3al.test fts3am.test fts3an.test fts3ao.test
fts3atoken.test fts3b.test fts3c.test fts3cov.test fts3d.test
fts3defer.test fts3defer2.test fts3e.test fts3expr.test fts3expr2.test
fts3expr3.test
fts3near.test fts3query.test fts3shared.test fts3snippet.test
fts3sort.test
fts3fault.test fts3malloc.test fts3matchinfo.test