Optimize "ORDER BY rowid/docid DESC/ASC" clauses on FTS tables.

FossilOrigin-Name: 13395121e3d17ab6581dc5f6736ea324321a374c
This commit is contained in:
dan 2011-05-04 12:52:59 +00:00
parent c6055c7374
commit 0f599faa29
7 changed files with 213 additions and 24 deletions

View File

@ -419,6 +419,28 @@ static void fts3GetDeltaVarint(char **pp, sqlite3_int64 *pVal){
*pVal += iVal;
}
/*
**
*/
static void fts3GetReverseDeltaVarint(
char **pp,
char *pStart,
sqlite3_int64 *pVal
){
sqlite3_int64 iVal;
char *p = *pp;
/* Pointer p now points at the first byte past the varint we are
** interested in. So, unless the doclist is corrupt, the 0x80 bit is
** clear on character p[-1]. */
for(p = (*pp)-2; p>=pStart && *p&0x80; p--);
p++;
*pp = p;
sqlite3Fts3GetVarint(p, &iVal);
*pVal -= iVal;
}
/*
** As long as *pp has not reached its end (pEnd), then do the same
** as fts3GetDeltaVarint(): read a single varint and add it to *pVal.
@ -1094,6 +1116,22 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
pInfo->aConstraintUsage[iCons].argvIndex = 1;
pInfo->aConstraintUsage[iCons].omit = 1;
}
/* Regardless of the strategy selected, FTS can deliver rows in rowid (or
** docid) order. Both ascending and descending are possible.
*/
if( pInfo->nOrderBy==1 ){
struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0];
if( pOrder->iColumn<0 || pOrder->iColumn==p->nColumn+1 ){
if( pOrder->desc ){
pInfo->idxStr = "DESC";
}else{
pInfo->idxStr = "ASC";
}
}
pInfo->orderByConsumed = 1;
}
return SQLITE_OK;
}
@ -2998,12 +3036,20 @@ static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){
}
pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0);
}else{
if( pCsr->pNextId>=&pCsr->aDoclist[pCsr->nDoclist] ){
pCsr->isEof = 1;
break;
if( pCsr->desc==0 ){
if( pCsr->pNextId>=&pCsr->aDoclist[pCsr->nDoclist] ){
pCsr->isEof = 1;
break;
}
fts3GetDeltaVarint(&pCsr->pNextId, &pCsr->iPrevId);
}else{
fts3GetReverseDeltaVarint(&pCsr->pNextId,pCsr->aDoclist,&pCsr->iPrevId);
if( pCsr->pNextId<=pCsr->aDoclist ){
pCsr->isEof = 1;
break;
}
}
sqlite3_reset(pCsr->pStmt);
fts3GetDeltaVarint(&pCsr->pNextId, &pCsr->iPrevId);
pCsr->isRequireSeek = 1;
pCsr->isMatchinfoNeeded = 1;
}
@ -3036,8 +3082,8 @@ static int fts3FilterMethod(
sqlite3_value **apVal /* Arguments for the indexing scheme */
){
const char *azSql[] = {
"SELECT %s FROM %Q.'%q_content' AS x WHERE docid = ?", /* non-full-scan */
"SELECT %s FROM %Q.'%q_content' AS x ", /* full-scan */
"SELECT %s FROM %Q.'%q_content' AS x WHERE docid = ?", /* non-full-scan */
"SELECT %s FROM %Q.'%q_content' AS x ORDER BY docid %s", /* full-scan */
};
int rc; /* Return code */
char *zSql; /* SQL statement used to access %_content */
@ -3093,7 +3139,9 @@ static int fts3FilterMethod(
** row by docid.
*/
zSql = (char *)azSql[idxNum==FTS3_FULLSCAN_SEARCH];
zSql = sqlite3_mprintf(zSql, p->zReadExprlist, p->zDb, p->zName);
zSql = sqlite3_mprintf(
zSql, p->zReadExprlist, p->zDb, p->zName, (idxStr ? idxStr : "ASC")
);
if( !zSql ){
rc = SQLITE_NOMEM;
}else{
@ -3105,7 +3153,22 @@ static int fts3FilterMethod(
}
pCsr->eSearch = (i16)idxNum;
assert( pCsr->desc==0 );
if( rc!=SQLITE_OK ) return rc;
if( rc==SQLITE_OK && pCsr->nDoclist>0 && idxStr && idxStr[0]=='D' ){
sqlite3_int64 iDocid = 0;
char *csr = pCsr->aDoclist;
while( csr<&pCsr->aDoclist[pCsr->nDoclist] ){
fts3GetDeltaVarint(&csr, &iDocid);
}
pCsr->pNextId = csr;
pCsr->iPrevId = iDocid;
pCsr->desc = 1;
pCsr->isRequireSeek = 1;
pCsr->isMatchinfoNeeded = 1;
pCsr->eEvalmode = FTS3_EVAL_NEXT;
return SQLITE_OK;
}
return fts3NextMethod(pCursor);
}
@ -3264,6 +3327,7 @@ int sqlite3Fts3ExprLoadFtDoclist(
** stored in pExpr->aDoclist.
*/
char *sqlite3Fts3FindPositions(
Fts3Cursor *pCursor, /* Associate FTS3 cursor */
Fts3Expr *pExpr, /* Access this expressions doclist */
sqlite3_int64 iDocid, /* Docid associated with requested pos-list */
int iCol /* Column of requested pos-list */
@ -3273,7 +3337,7 @@ char *sqlite3Fts3FindPositions(
char *pEnd = &pExpr->aDoclist[pExpr->nDoclist];
char *pCsr;
if( pExpr->pCurrent==0 ){
if( pExpr->pCurrent==0 || pCursor->desc ){
pExpr->pCurrent = pExpr->aDoclist;
pExpr->iCurrent = 0;
pExpr->pCurrent += sqlite3Fts3GetVarint(pExpr->pCurrent,&pExpr->iCurrent);

View File

@ -171,6 +171,7 @@ struct Fts3Cursor {
char *pNextId; /* Pointer into the body of aDoclist */
char *aDoclist; /* List of docids for full-text queries */
int nDoclist; /* Size of buffer at aDoclist */
int desc; /* True to sort in descending order */
int eEvalmode; /* An FTS3_EVAL_XX constant */
int nRowAvg; /* Average size of database rows, in pages */
@ -353,7 +354,7 @@ int sqlite3Fts3GetVarint32(const char *, int *);
int sqlite3Fts3VarintLen(sqlite3_uint64);
void sqlite3Fts3Dequote(char *);
char *sqlite3Fts3FindPositions(Fts3Expr *, sqlite3_int64, int);
char *sqlite3Fts3FindPositions(Fts3Cursor *, Fts3Expr *, sqlite3_int64, int);
int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *, Fts3Expr *);
int sqlite3Fts3ExprLoadFtDoclist(Fts3Cursor *, Fts3Expr *, char **, int *);
int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int);

View File

@ -415,7 +415,7 @@ static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
pPhrase->nToken = pExpr->pPhrase->nToken;
pCsr = sqlite3Fts3FindPositions(pExpr, p->pCsr->iPrevId, p->iCol);
pCsr = sqlite3Fts3FindPositions(p->pCsr, pExpr, p->pCsr->iPrevId, p->iCol);
if( pCsr ){
int iFirst = 0;
pPhrase->pList = pCsr;
@ -888,7 +888,7 @@ static int fts3ExprLocalHitsCb(
if( pExpr->aDoclist ){
char *pCsr;
pCsr = sqlite3Fts3FindPositions(pExpr, p->pCursor->iPrevId, -1);
pCsr = sqlite3Fts3FindPositions(p->pCursor, pExpr, p->pCursor->iPrevId, -1);
if( pCsr ){
fts3LoadColumnlistCounts(&pCsr, &p->aMatchinfo[iStart], 0);
}
@ -1055,7 +1055,7 @@ static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
LcsIterator *pIter = &aIter[i];
nToken -= pIter->pExpr->pPhrase->nToken;
pIter->iPosOffset = nToken;
pIter->pRead = sqlite3Fts3FindPositions(pIter->pExpr, pCsr->iPrevId, -1);
pIter->pRead = sqlite3Fts3FindPositions(pCsr,pIter->pExpr,pCsr->iPrevId,-1);
if( pIter->pRead ){
pIter->iPos = pIter->iPosOffset;
fts3LcsIteratorAdvance(&aIter[i]);
@ -1408,6 +1408,7 @@ struct TermOffset {
};
struct TermOffsetCtx {
Fts3Cursor *pCsr;
int iCol; /* Column of table to populate aTerm for */
int iTerm;
sqlite3_int64 iDocid;
@ -1425,7 +1426,7 @@ static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
int iPos = 0; /* First position in position-list */
UNUSED_PARAMETER(iPhrase);
pList = sqlite3Fts3FindPositions(pExpr, p->iDocid, p->iCol);
pList = sqlite3Fts3FindPositions(p->pCsr, pExpr, p->iDocid, p->iCol);
nTerm = pExpr->pPhrase->nToken;
if( pList ){
fts3GetDeltaPosition(&pList, &iPos);
@ -1478,6 +1479,7 @@ void sqlite3Fts3Offsets(
goto offsets_out;
}
sCtx.iDocid = pCsr->iPrevId;
sCtx.pCsr = pCsr;
/* Loop through the table columns, appending offset information to
** string-buffer res for each column.

View File

@ -10,6 +10,9 @@
**
******************************************************************************
**
** This file is not part of the production FTS code. It is only used for
** testing. It contains a virtual table implementation that provides direct
** access to the full-text index of an FTS table.
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
@ -134,7 +137,18 @@ static int fts3termBestIndexMethod(
sqlite3_index_info *pInfo
){
UNUSED_PARAMETER(pVTab);
UNUSED_PARAMETER(pInfo);
/* This vtab naturally does "ORDER BY term, docid, col, pos". */
if( pInfo->nOrderBy ){
int i;
for(i=0; i<pInfo->nOrderBy; i++){
if( pInfo->aOrderBy[i].iColumn!=i || pInfo->aOrderBy[i].desc ) break;
}
if( i==pInfo->nOrderBy ){
pInfo->orderByConsumed = 1;
}
}
return SQLITE_OK;
}

View File

@ -1,5 +1,5 @@
C Have\sr-tree\svirtual\stables\ssupport\son-conflict\sclauses.
D 2011-04-28T18:46:46.861
C Optimize\s"ORDER\sBY\srowid/docid\sDESC/ASC"\sclauses\son\sFTS\stables.
D 2011-05-04T12:52:59.896
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 7a4d9524721d40ef9ee26f93f9bd6a51dba106f2
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -61,17 +61,17 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0
F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
F ext/fts3/fts3.c ce37973c86f15711a020fa629d8f95cfd642ebc3
F ext/fts3/fts3.c 47e4f4da599e0ccd7b7fea08aaf2c77544e278e3
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
F ext/fts3/fts3Int.h 945926ea4b6a686c3e9834640a252d9870b7191e
F ext/fts3/fts3Int.h 8c2ac39ee17362571c58ab2c4f0667324c31f738
F ext/fts3/fts3_aux.c 9e931f55eed8498dafe7bc1160f10cbb1a652fdf
F ext/fts3/fts3_expr.c 5f49e0deaf723724b08100bb3ff40aab02ad0c93
F ext/fts3/fts3_hash.c 3c8f6387a4a7f5305588b203fa7c887d753e1f1c
F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec
F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295
F ext/fts3/fts3_porter.c d61cfd81fb0fd8fbcb25adcaee0ba671aefaa5c2
F ext/fts3/fts3_snippet.c e857c6a89d81d3b89df59f3b44b35c68d8ed5c62
F ext/fts3/fts3_term.c c1dbc904ab1c2d687b97643c671795456228ab22
F ext/fts3/fts3_snippet.c a4a3c7d2ab15ca9188e2d9b51a5e3927bf76580d
F ext/fts3/fts3_term.c f115f5a5f4298303d3b22fc6c524b8d565c7b950
F ext/fts3/fts3_tokenizer.c 055f3dc7369585350b28db1ee0f3b214dca6724d
F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3
F ext/fts3/fts3_tokenizer1.c 6e5cbaa588924ac578263a598e4fb9f5c9bb179d
@ -475,6 +475,7 @@ F test/fts3query.test ef79d31fdb355d094baec1c1b24b60439a1fb8a2
F test/fts3rnd.test 2b1a579be557ab8ac54a51b39caa4aa8043cc4ad
F test/fts3shared.test 8bb266521d7c5495c0ae522bb4d376ad5387d4a2
F test/fts3snippet.test a12f22a3ba4dd59751a57c79b031d07ab5f51ddd
F test/fts3sort.test b33d4650e8d4bff2dc00d14359a29cd1c25769f8
F test/fts4aa.test eadf85621c0a113d4c7ad3ccbf8441130e007b8f
F test/func.test 6c5ce11e3a0021ca3c0649234e2d4454c89110ca
F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f
@ -932,7 +933,7 @@ F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/split-sqlite3c.tcl d9be87f1c340285a3e081eb19b4a247981ed290c
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
P abdd70ae0424ccadb7edaf16e970c78b5257d23c
R c8f4a330c2633adfb2862fa8f9efbab2
P 822ab52f1023b1c4973c806cc75454acd4e95fd0
R bb599d547c2452f152daafa893566415
U dan
Z e17a424fc4dea753d2cbffe0969a3e63
Z 8f8ca68f3fd8a51bab340e5fb86e9e14

View File

@ -1 +1 @@
822ab52f1023b1c4973c806cc75454acd4e95fd0
13395121e3d17ab6581dc5f6736ea324321a374c

107
test/fts3sort.test Normal file
View File

@ -0,0 +1,107 @@
# 2011 May 04
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS3 module.
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 {
finish_test
return
}
set testprefix fts3sort
proc build_database {nRow} {
db close
forcedelete test.db
sqlite3 db test.db
set vocab [list aa ab ac ba bb bc ca cb cc da]
expr srand(0)
execsql { CREATE VIRTUAL TABLE t1 USING fts4 }
for {set i 0} {$i < $nRow} {incr i} {
set v [expr int(rand()*1000000)]
set doc [list]
for {set div 1} {$div < 1000000} {set div [expr $div*10]} {
lappend doc [lindex $vocab [expr ($v/$div) % 10]]
}
execsql { INSERT INTO t1 VALUES($doc) }
}
}
set nRow 1000
do_test 1.0 {
build_database $nRow
execsql { SELECT count(*) FROM t1 }
} $nRow
foreach {tn query} {
1 "SELECT docid, * FROM t1"
2 "SELECT docid, * FROM t1 WHERE t1 MATCH 'aa'"
3 "SELECT docid, * FROM t1 WHERE t1 MATCH 'a*'"
4 "SELECT docid, quote(matchinfo(t1)) FROM t1 WHERE t1 MATCH 'a*'"
5 "SELECT docid, quote(matchinfo(t1,'pcnxals')) FROM t1 WHERE t1 MATCH 'b*'"
6 "SELECT docid, * FROM t1 WHERE t1 MATCH 'a* b* c*'"
7 "SELECT docid, * FROM t1 WHERE t1 MATCH 'aa OR da'"
8 "SELECT docid, * FROM t1 WHERE t1 MATCH 'nosuchtoken'"
9 "SELECT docid, snippet(t1) FROM t1 WHERE t1 MATCH 'aa OR da'"
} {
unset -nocomplain A B C D
set A_list [list]
set B_list [list]
set C_list [list]
set D_list [list]
unset -nocomplain X
db eval "$query ORDER BY rowid ASC" X {
set A($X(docid)) [array get X]
lappend A_list $X(docid)
}
unset -nocomplain X
db eval "$query ORDER BY rowid DESC" X {
set B($X(docid)) [array get X]
lappend B_list $X(docid)
}
unset -nocomplain X
db eval "$query ORDER BY docid ASC" X {
set C($X(docid)) [array get X]
lappend C_list $X(docid)
}
unset -nocomplain X
db eval "$query ORDER BY docid DESC" X {
set D($X(docid)) [array get X]
lappend D_list $X(docid)
}
do_test 1.$tn.1 { set A_list } [lsort -integer -increasing $A_list]
do_test 1.$tn.2 { set B_list } [lsort -integer -decreasing $B_list]
do_test 1.$tn.3 { set C_list } [lsort -integer -increasing $C_list]
do_test 1.$tn.4 { set D_list } [lsort -integer -decreasing $D_list]
unset -nocomplain DATA
unset -nocomplain X
db eval "$query" X {
set DATA($X(docid)) [array get X]
}
do_test 1.$tn.5 { lsort [array get A] } [lsort [array get DATA]]
do_test 1.$tn.6 { lsort [array get B] } [lsort [array get DATA]]
do_test 1.$tn.7 { lsort [array get C] } [lsort [array get DATA]]
do_test 1.$tn.8 { lsort [array get D] } [lsort [array get DATA]]
}
finish_test