From 99ebad90e3582641eb388daf9e440cb020770c8d Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 13 Jun 2011 09:11:01 +0000 Subject: [PATCH] Fix a bug exposed by combining matchinfo(), NEAR and "ORDER BY rowid DESC". FossilOrigin-Name: 5f6b87f420f21749aa7c72e020c50aca74890086 --- Makefile.in | 3 +- ext/fts3/fts3.c | 7 +- ext/fts3/fts3_test.c | 249 +++++++++++++++++++++++++++++++++++++++++++ main.mk | 1 + manifest | 22 ++-- manifest.uuid | 2 +- src/tclsqlite.c | 8 ++ test/fts3auto.test | 201 ++++++++++++++++++++++++++++++++++ test/tester.tcl | 4 +- 9 files changed, 481 insertions(+), 16 deletions(-) create mode 100644 ext/fts3/fts3_test.c create mode 100644 test/fts3auto.test diff --git a/Makefile.in b/Makefile.in index b0871ad940..4a81e15664 100644 --- a/Makefile.in +++ b/Makefile.in @@ -379,7 +379,8 @@ TESTSRC = \ $(TOP)/src/test_vfs.c \ $(TOP)/src/test_wholenumber.c \ $(TOP)/src/test_wsd.c \ - $(TOP)/ext/fts3/fts3_term.c + $(TOP)/ext/fts3/fts3_term.c \ + $(TOP)/ext/fts3/fts3_test.c # Source code to the library files needed by the test fixture # diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index 18a87e73ce..4b37c4e645 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -2727,8 +2727,10 @@ static int fts3RollbackMethod(sqlite3_vtab *pVtab){ ** same position list. */ static void fts3ReversePoslist(char *pStart, char **ppPoslist){ - char *p = &(*ppPoslist)[-3]; - char c = p[1]; + char *p = &(*ppPoslist)[-2]; + char c; + + while( p>pStart && (c=*p--)==0 ); while( p>pStart && (*p & 0x80) | c ){ c = *p--; } @@ -3422,6 +3424,7 @@ void sqlite3Fts3DoclistPrev( iDocid += (iMul * iDelta); pNext = pDocid; fts3PoslistCopy(0, &pDocid); + while( pDocid +#include +#include + +#define NM_MAX_TOKEN 12 + +typedef struct NearPhrase NearPhrase; +typedef struct NearDocument NearDocument; +typedef struct NearToken NearToken; + +struct NearDocument { + int nToken; /* Length of token in bytes */ + NearToken *aToken; /* Token array */ +}; + +struct NearToken { + int n; /* Length of token in bytes */ + const char *z; /* Pointer to token string */ +}; + +struct NearPhrase { + int nNear; /* Preceding NEAR value */ + int nToken; /* Number of tokens in this phrase */ + NearToken aToken[NM_MAX_TOKEN]; /* Array of tokens in this phrase */ +}; + +static int nm_phrase_match( + NearPhrase *p, + NearToken *aToken +){ + int ii; + + for(ii=0; iinToken; ii++){ + NearToken *pToken = &p->aToken[ii]; + if( pToken->n>0 && pToken->z[pToken->n-1]=='*' ){ + if( aToken[ii].n<(pToken->n-1) ) return 0; + if( memcmp(aToken[ii].z, pToken->z, pToken->n-1) ) return 0; + }else{ + if( aToken[ii].n!=pToken->n ) return 0; + if( memcmp(aToken[ii].z, pToken->z, pToken->n) ) return 0; + } + } + + return 1; +} + +static int nm_near_chain( + int iDir, /* Direction to iterate through aPhrase[] */ + NearDocument *pDoc, /* Document to match against */ + int iPos, /* Position at which iPhrase was found */ + int nPhrase, /* Size of phrase array */ + NearPhrase *aPhrase, /* Phrase array */ + int iPhrase /* Index of phrase found */ +){ + int iStart; + int iStop; + int ii; + int nNear; + int iPhrase2; + NearPhrase *p; + NearPhrase *pPrev; + + assert( iDir==1 || iDir==-1 ); + + if( iDir==1 ){ + if( (iPhrase+1)==nPhrase ) return 1; + nNear = aPhrase[iPhrase+1].nNear; + }else{ + if( iPhrase==0 ) return 1; + nNear = aPhrase[iPhrase].nNear; + } + pPrev = &aPhrase[iPhrase]; + iPhrase2 = iPhrase+iDir; + p = &aPhrase[iPhrase2]; + + iStart = iPos - nNear - p->nToken; + iStop = iPos + nNear + pPrev->nToken; + + if( iStart<0 ) iStart = 0; + if( iStop > pDoc->nToken - p->nToken ) iStop = pDoc->nToken - p->nToken; + + for(ii=iStart; ii<=iStop; ii++){ + if( nm_phrase_match(p, &pDoc->aToken[ii]) ){ + if( nm_near_chain(iDir, pDoc, ii, nPhrase, aPhrase, iPhrase2) ) return 1; + } + } + + return 0; +} + +static int nm_match_count( + NearDocument *pDoc, /* Document to match against */ + int nPhrase, /* Size of phrase array */ + NearPhrase *aPhrase, /* Phrase array */ + int iPhrase /* Index of phrase to count matches for */ +){ + int nOcc = 0; + int ii; + NearPhrase *p = &aPhrase[iPhrase]; + + for(ii=0; ii<(pDoc->nToken + 1 - p->nToken); ii++){ + if( nm_phrase_match(p, &pDoc->aToken[ii]) ){ + /* Test forward NEAR chain (i>iPhrase) */ + if( 0==nm_near_chain(1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue; + + /* Test reverse NEAR chain (iNM_MAX_TOKEN ){ + Tcl_AppendResult(interp, "Too many tokens in phrase", 0); + rc = TCL_ERROR; + goto near_match_out; + } + for(jj=0; jjz = Tcl_GetStringFromObj(apToken[jj], &pT->n); + } + aPhrase[ii].nToken = nToken; + } + for(ii=1; ii0)); + + near_match_out: + ckfree((char *)aPhrase); + ckfree((char *)doc.aToken); + return rc; +} + +int Sqlitetestfts3_Init(Tcl_Interp *interp){ + Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0); + return TCL_OK; +} + diff --git a/main.mk b/main.mk index f84e0dea8b..ec743be439 100644 --- a/main.mk +++ b/main.mk @@ -220,6 +220,7 @@ SRC += \ # Source code to the test files. # TESTSRC = \ + $(TOP)/ext/fts3/fts3_test.c \ $(TOP)/src/test1.c \ $(TOP)/src/test2.c \ $(TOP)/src/test3.c \ diff --git a/manifest b/manifest index 4398869eb3..c96551fae9 100644 --- a/manifest +++ b/manifest @@ -1,7 +1,7 @@ -C Fix\sproblems\sto\sdo\swith\susing\sboth\sOR\sand\sNEAR\soperators\sin\sa\ssingle\sexpression. -D 2011-06-09T10:48:02.352 +C Fix\sa\sbug\sexposed\sby\scombining\smatchinfo(),\sNEAR\sand\s"ORDER\sBY\srowid\sDESC". +D 2011-06-13T09:11:01.953 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f -F Makefile.in 11dcc00a8d0e5202def00e81732784fb0cc4fe1d +F Makefile.in c1d7a7f4fd8da6b1815032efca950e3d5125407e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 F Makefile.vxworks c85ec1d8597fe2f7bc225af12ac1666e21379151 F README cd04a36fbc7ea56932a4052d7d0b7f09f27c33d6 @@ -61,7 +61,7 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0 F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c 5df3b5797522d3d17949ee12d5918d6d213b5114 +F ext/fts3/fts3.c e71dafb1f324358d12fd02ea12644d8c6cea577a F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe F ext/fts3/fts3Int.h a999cfbf605efec293a88519f74192f5204c84d6 F ext/fts3/fts3_aux.c baed9dab7fb4604ae8cafdb2d7700abe93beffbe @@ -72,6 +72,7 @@ F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295 F ext/fts3/fts3_porter.c d61cfd81fb0fd8fbcb25adcaee0ba671aefaa5c2 F ext/fts3/fts3_snippet.c 82e2c1e420c871c02f6e85ea438570118d7105c8 F ext/fts3/fts3_term.c 6c7f33ab732a2a0f281898685650e3a492e1e2f1 +F ext/fts3/fts3_test.c 9376cc865447e63c671f0f9ffd1a2c9a29678230 F ext/fts3/fts3_tokenizer.c 055f3dc7369585350b28db1ee0f3b214dca6724d F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3 F ext/fts3/fts3_tokenizer1.c 6e5cbaa588924ac578263a598e4fb9f5c9bb179d @@ -102,7 +103,7 @@ F ext/rtree/tkt3363.test 142ab96eded44a3615ec79fba98c7bde7d0f96de F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 -F main.mk 6111163d4e9720e4212ef288e967b4aa2c2ce379 +F main.mk 6de0d92dcae3d399a6bafaeb23b57f6ef0f41955 F mkdll.sh 7d09b23c05d56532e9d44a50868eb4b12ff4f74a F mkextu.sh 416f9b7089d80e5590a29692c9d9280a10dbad9f F mkextw.sh 4123480947681d9b434a5e7b1ee08135abe409ac @@ -185,7 +186,7 @@ F src/sqliteInt.h 6e58c558c57c8f44011736d5fa5295eb3130f9de F src/sqliteLimit.h 164b0e6749d31e0daa1a4589a169d31c0dec7b3d F src/status.c 7ac64842c86cec2fc1a1d0e5c16d3beb8ad332bf F src/table.c 2cd62736f845d82200acfa1287e33feb3c15d62e -F src/tclsqlite.c 501c9a200fd998a268be475be5858febc90b725b +F src/tclsqlite.c c83f5b4a15ed92cb35aa04320aa4a30b95071b2f F src/test1.c efca486a25fb894988e7a82e84579a4e57388a02 F src/test2.c 80d323d11e909cf0eb1b6fbb4ac22276483bcf31 F src/test3.c 124ff9735fb6bb7d41de180d6bac90e7b1509432 @@ -454,6 +455,7 @@ F test/fts3am.test 218aa6ba0dfc50c7c16b2022aac5c6be593d08d8 F test/fts3an.test a49ccadc07a2f7d646ec1b81bc09da2d85a85b18 F test/fts3ao.test b83f99f70e9eec85f27d75801a974b3f820e01f9 F test/fts3atoken.test 402ef2f7c2fb4b3d4fa0587df6441c1447e799b3 +F test/fts3auto.test 696a2d32dd64a03aa47818c26ea64f8f27e7eb07 F test/fts3aux1.test 0b02743955d56fc0d4d66236a26177bd1b726de0 F test/fts3b.test e93bbb653e52afde110ad53bbd793f14fe7a8984 F test/fts3c.test fc723a9cf10b397fdfc2b32e73c53c8b1ec02958 @@ -692,7 +694,7 @@ F test/tclsqlite.test 8c154101e704170c2be10f137a5499ac2c6da8d3 F test/tempdb.test 19d0f66e2e3eeffd68661a11c83ba5e6ace9128c F test/temptable.test f42121a0d29a62f00f93274464164177ab1cc24a F test/temptrigger.test b0273db072ce5f37cf19140ceb1f0d524bbe9f05 -F test/tester.tcl a791ee74cb6b8f8613079ccc018bf2c8b952a26c +F test/tester.tcl 76222602e59047c6ef119473c7a2ea7c6ee73d09 F test/thread001.test a3e6a7254d1cb057836cb3145b60c10bf5b7e60f F test/thread002.test afd20095e6e845b405df4f2c920cb93301ca69db F test/thread003.test b824d4f52b870ae39fc5bae4d8070eca73085dca @@ -943,7 +945,7 @@ F tool/split-sqlite3c.tcl d9be87f1c340285a3e081eb19b4a247981ed290c F tool/symbols.sh bc2a3709940d47c8ac8e0a1fdf17ec801f015a00 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings.sh 347d974d143cf132f953b565fbc03026f19fcb4d -P 3972a787df5ec253b99b148385655e7b68d851fa -R 9f88ec7fe13b9820fcb74aa4a46dd50c +P 4e8dd19eef04777d800977faf1859a405e396f30 +R 6b4754b974de210b10ad796c5876a1bc U dan -Z 78f71a9a33e1daee77ff848d72b67bf4 +Z f1c93614cce7c70636f14d06ee6a3496 diff --git a/manifest.uuid b/manifest.uuid index f91e3008ee..5c8deb0eac 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -4e8dd19eef04777d800977faf1859a405e396f30 \ No newline at end of file +5f6b87f420f21749aa7c72e020c50aca74890086 \ No newline at end of file diff --git a/src/tclsqlite.c b/src/tclsqlite.c index 575651d7e5..49ff8bcd8d 100644 --- a/src/tclsqlite.c +++ b/src/tclsqlite.c @@ -3585,6 +3585,10 @@ static void init_all(Tcl_Interp *interp){ extern int Sqlitetestfuzzer_Init(Tcl_Interp*); extern int Sqlitetestwholenumber_Init(Tcl_Interp*); +#ifdef SQLITE_ENABLE_FTS3 + extern int Sqlitetestfts3_Init(Tcl_Interp *interp); +#endif + #ifdef SQLITE_ENABLE_ZIPVFS extern int Zipvfs_Init(Tcl_Interp*); Zipvfs_Init(interp); @@ -3625,6 +3629,10 @@ static void init_all(Tcl_Interp *interp){ Sqlitetestfuzzer_Init(interp); Sqlitetestwholenumber_Init(interp); +#ifdef SQLITE_ENABLE_FTS3 + Sqlitetestfts3_Init(interp); +#endif + Tcl_CreateObjCommand(interp,"load_testfixture_extensions",init_all_cmd,0,0); #ifdef SQLITE_SSE diff --git a/test/fts3auto.test b/test/fts3auto.test new file mode 100644 index 0000000000..fe87d9f2c7 --- /dev/null +++ b/test/fts3auto.test @@ -0,0 +1,201 @@ +# 2011 June 10 +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +# If this build does not include FTS3, skip the tests in this file. +# +ifcapable !fts3 { finish_test ; return } +source $testdir/fts3_common.tcl +source $testdir/malloc_common.tcl + +set testprefix fts3rnd2 + +proc test_fts3_near_match {tn doc expr res} { + fts3_near_match $doc $expr -phrasecountvar p + uplevel do_test [list $tn] [list [list set {} $p]] [list $res] +} + +# Simple test cases for C routine [fts3_near_match]. +# +test_fts3_near_match 1.1.1 {a b c a b} a {2} +test_fts3_near_match 1.1.2 {a b c a b} {a 5 b 6 c} {2 2 1} +test_fts3_near_match 1.1.3 {a b c a b} {"a b"} {2} +test_fts3_near_match 1.1.4 {a b c a b} {"b c"} {1} +test_fts3_near_match 1.1.5 {a b c a b} {"c c"} {0} + +test_fts3_near_match 1.2.1 "a b c d e f g" {b 2 f} {0 0} +test_fts3_near_match 1.2.2 "a b c d e f g" {b 3 f} {1 1} +test_fts3_near_match 1.2.3 "a b c d e f g" {f 2 b} {0 0} +test_fts3_near_match 1.2.4 "a b c d e f g" {f 3 b} {1 1} +test_fts3_near_match 1.2.5 "a b c d e f g" {"a b" 2 "f g"} {0 0} +test_fts3_near_match 1.2.6 "a b c d e f g" {"a b" 3 "f g"} {1 1} + +set A "a b c d e f g h i j k l m n o p q r s t u v w x y z" +test_fts3_near_match 1.3.1 $A {"c d" 5 "i j" 1 "e f"} {0 0 0} +test_fts3_near_match 1.3.2 $A {"c d" 5 "i j" 2 "e f"} {1 1 1} + +proc mit {blob} { + set scan(littleEndian) i* + set scan(bigEndian) I* + binary scan $blob $scan($::tcl_platform(byteOrder)) r + return $r +} +db func mit mit + +proc fix_near_expr {expr} { + set out [list] + lappend out [lindex $expr 0] + foreach {a b} [lrange $expr 1 end] { + if {[string match -nocase near $a]} { set a 10 } + if {[string match -nocase near/* $a]} { set a [string range $a 5 end] } + lappend out $a + lappend out $b + } + return $out +} + +proc do_near_test {tn tbl expr} { + + set expr [fix_near_expr $expr] + + # Create the MATCH expression from $expr + # + set match [lindex $expr 0] + if {[llength $match]>1} { + set match "\"$match\"" + } + foreach {nNear phrase} [lrange $expr 1 end] { + if {[llength $phrase]>1} { + append match " NEAR/$nNear \"$phrase\"" + } else { + append match " NEAR/$nNear $phrase" + } + } + + # Calculate the expected results using [fts3_near_match]. The following + # loop populates the "hits" and "counts" arrays as follows: + # + # 1. For each document in the table that matches the NEAR expression, + # hits($docid) is set to 1. The set of docids that match the expression + # can therefore be found using [array names hits]. + # + # 2. For each column of each document in the table, counts($docid,$iCol) + # is set to the -phrasecountvar output. + # + set res [list] + catch { array unset hits } + db eval "SELECT docid, * FROM $tbl" d { + set iCol 0 + foreach col [lrange $d(*) 1 end] { + set docid $d(docid) + set hit [fts3_near_match $d($col) $expr -p counts($docid,$iCol)] + if {$hit} { set hits($docid) 1 } + incr iCol + } + } + set nPhrase [expr ([llength $expr]+1)/2] + set nCol $iCol + + # This block populates the nHit and nDoc arrays. For each phrase/column + # in the query/table, array elements are set as follows: + # + # nHit($iPhrase,$iCol) - Total number of hits for phrase $iPhrase in + # column $iCol. + # + # nDoc($iPhrase,$iCol) - Number of documents with at least one hit for + # phrase $iPhrase in column $iCol. + # + for {set iPhrase 0} {$iPhrase < $nPhrase} {incr iPhrase} { + for {set iCol 0} {$iCol < $nCol} {incr iCol} { + set nHit($iPhrase,$iCol) 0 + set nDoc($iPhrase,$iCol) 0 + } + } + foreach key [array names counts] { + set iCol [lindex [split $key ,] 1] + set iPhrase 0 + foreach c $counts($key) { + if {$c>0} { incr nHit($iPhrase,$iCol) 1 } + incr nDoc($iPhrase,$iCol) $c + incr iPhrase + } + } + + # Set up the aMatchinfo array. For each document, set aMatchinfo($docid) to + # contain the output of matchinfo('x') for the document. + # + foreach docid [array names hits] { + set mi [list] + for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { + for {set iCol 0} {$iCol<$nCol} {incr iCol} { + lappend mi [lindex $counts($docid,$iCol) $iPhrase] + lappend mi $nDoc($iPhrase,$iCol) + lappend mi $nHit($iPhrase,$iCol) + } + } + set aMatchinfo($docid) $mi + } + + set matchinfo_asc [list] + foreach docid [lsort -integer -incr [array names aMatchinfo]] { + lappend matchinfo_asc $docid $aMatchinfo($docid) + } + set matchinfo_desc [list] + foreach docid [lsort -integer -decr [array names aMatchinfo]] { + lappend matchinfo_desc $docid $aMatchinfo($docid) + } + + set title "(\"$match\" -> [llength [array names hits]] rows)" + + do_execsql_test $tn$title.1 " + SELECT docid FROM $tbl WHERE $tbl MATCH '$match' ORDER BY docid ASC + " [lsort -integer -incr [array names hits]] + + do_execsql_test $tn$title.2 " + SELECT docid FROM $tbl WHERE $tbl MATCH '$match' ORDER BY docid DESC + " [lsort -integer -decr [array names hits]] + + do_execsql_test $tn$title.3 " + SELECT docid, mit(matchinfo($tbl, 'x')) FROM $tbl + WHERE $tbl MATCH '$match' ORDER BY docid DESC + " $matchinfo_desc + + do_execsql_test $tn$title.4 " + SELECT docid, mit(matchinfo($tbl, 'x')) FROM $tbl + WHERE $tbl MATCH '$match' ORDER BY docid ASC + " $matchinfo_asc +} + +do_test 2.1 { + execsql { CREATE VIRTUAL TABLE t1 USING fts3(a, b) } + for {set i 0} {$i<32} {incr i} { + set doc [list] + if {$i&0x01} {lappend doc one} + if {$i&0x02} {lappend doc two} + if {$i&0x04} {lappend doc three} + if {$i&0x08} {lappend doc four} + if {$i&0x10} {lappend doc five} + execsql { INSERT INTO t1 VALUES($doc, null) } + } +} {} +foreach {tn expr} { + 1 {one} + 2 {one NEAR/1 five} + 3 {t*} + 4 {t* NEAR/0 five} + 5 {o* NEAR/1 f*} + 6 {one NEAR five NEAR two NEAR four NEAR three} +} { + do_near_test 2.2.$tn t1 $expr +} + +finish_test + diff --git a/test/tester.tcl b/test/tester.tcl index 0b66c3d788..a53723b9a1 100644 --- a/test/tester.tcl +++ b/test/tester.tcl @@ -374,11 +374,11 @@ proc fix_testname {varname} { proc do_execsql_test {testname sql {result {}}} { fix_testname testname - uplevel do_test $testname [list "execsql {$sql}"] [list [list {*}$result]] + uplevel do_test [list $testname] [list "execsql {$sql}"] [list [list {*}$result]] } proc do_catchsql_test {testname sql result} { fix_testname testname - uplevel do_test $testname [list "catchsql {$sql}"] [list $result] + uplevel do_test [list $testname] [list "catchsql {$sql}"] [list $result] } proc do_eqp_test {name sql res} { uplevel do_execsql_test $name [list "EXPLAIN QUERY PLAN $sql"] [list $res]