Delete all fts3 index data the table becomes empty. Previously,

deleting all rows from an fts3 table would leave a bunch of index data
describing the terms of the original data, plus deletions of those
terms, perhaps with some amount of it merged together so the deletions
knocked out the originals.  Even when all rows were deleted that
original data would hang out, though eventually it would mostly be
overwritten if new data contained the same set of terms. (CVS 5413)

FossilOrigin-Name: 8b872e426091d9ef108e52dbec0d968ed7452907
This commit is contained in:
shess 2008-07-14 20:43:15 +00:00
parent 65cef1affc
commit c2c66a030d
4 changed files with 196 additions and 9 deletions

View File

@ -1905,10 +1905,12 @@ typedef enum fulltext_statement {
CONTENT_SELECT_STMT,
CONTENT_UPDATE_STMT,
CONTENT_DELETE_STMT,
CONTENT_EXISTS_STMT,
BLOCK_INSERT_STMT,
BLOCK_SELECT_STMT,
BLOCK_DELETE_STMT,
BLOCK_DELETE_ALL_STMT,
SEGDIR_MAX_INDEX_STMT,
SEGDIR_SET_STMT,
@ -1917,6 +1919,7 @@ typedef enum fulltext_statement {
SEGDIR_DELETE_STMT,
SEGDIR_SELECT_SEGMENT_STMT,
SEGDIR_SELECT_ALL_STMT,
SEGDIR_DELETE_ALL_STMT,
MAX_STMT /* Always at end! */
} fulltext_statement;
@ -1931,11 +1934,13 @@ static const char *const fulltext_zStatement[MAX_STMT] = {
/* CONTENT_SELECT */ NULL, /* generated in contentSelectStatement() */
/* CONTENT_UPDATE */ NULL, /* generated in contentUpdateStatement() */
/* CONTENT_DELETE */ "delete from %_content where docid = ?",
/* CONTENT_EXISTS */ "select docid from %_content limit 1",
/* BLOCK_INSERT */
"insert into %_segments (blockid, block) values (null, ?)",
/* BLOCK_SELECT */ "select block from %_segments where blockid = ?",
/* BLOCK_DELETE */ "delete from %_segments where blockid between ? and ?",
/* BLOCK_DELETE_ALL */ "delete from %_segments",
/* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?",
/* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)",
@ -1956,7 +1961,7 @@ static const char *const fulltext_zStatement[MAX_STMT] = {
/* SEGDIR_SELECT_ALL */
"select start_block, leaves_end_block, root from %_segdir "
" order by level desc, idx asc",
/* SEGDIR_DELETE_ALL */ "delete from %_segdir",
};
/*
@ -2250,6 +2255,25 @@ static int content_delete(fulltext_vtab *v, sqlite_int64 iDocid){
return sql_single_step(s);
}
/* Returns SQLITE_ROW if any rows exist in %_content, SQLITE_DONE if
** no rows exist, and any error in case of failure.
*/
static int content_exists(fulltext_vtab *v){
sqlite3_stmt *s;
int rc = sql_get_statement(v, CONTENT_EXISTS_STMT, &s);
if( rc!=SQLITE_OK ) return rc;
rc = sqlite3_step(s);
if( rc!=SQLITE_ROW ) return rc;
/* We expect only one row. We must execute another sqlite3_step()
* to complete the iteration; otherwise the table will remain locked. */
rc = sqlite3_step(s);
if( rc==SQLITE_DONE ) return SQLITE_ROW;
if( rc==SQLITE_ROW ) return SQLITE_ERROR;
return rc;
}
/* insert into %_segments values ([pData])
** returns assigned blockid in *piBlockid
*/
@ -2424,6 +2448,23 @@ static int segdir_delete(fulltext_vtab *v, int iLevel){
return sql_single_step(s);
}
/* Delete entire fts index, SQLITE_OK on success, relevant error on
** failure.
*/
static int segdir_delete_all(fulltext_vtab *v){
sqlite3_stmt *s;
int rc = sql_get_statement(v, SEGDIR_DELETE_ALL_STMT, &s);
if( rc!=SQLITE_OK ) return rc;
rc = sql_single_step(s);
if( rc!=SQLITE_OK ) return rc;
rc = sql_get_statement(v, BLOCK_DELETE_ALL_STMT, &s);
if( rc!=SQLITE_OK ) return rc;
return sql_single_step(s);
}
/* TODO(shess) clearPendingTerms() is far down the file because
** writeZeroSegment() is far down the file because LeafWriter is far
** down the file. Consider refactoring the code to move the non-vtab
@ -6112,6 +6153,23 @@ static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,
if( nArg<2 ){
rc = index_delete(v, sqlite3_value_int64(ppArg[0]));
if( rc==SQLITE_OK ){
/* If we just deleted the last row in the table, clear out the
** index data.
*/
rc = content_exists(v);
if( rc==SQLITE_ROW ){
rc = SQLITE_OK;
}else if( rc==SQLITE_DONE ){
/* Clear the pending terms so we don't flush a useless level-0
** segment when the transaction closes.
*/
rc = clearPendingTerms(v);
if( rc==SQLITE_OK ){
rc = segdir_delete_all(v);
}
}
}
} else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
/* An update:
* ppArg[0] = old rowid

View File

@ -1,5 +1,5 @@
C Add\sa\snew\stest\sscript\sto\sexercise\sthe\sdeadlock\savoidance\scode\sin\sthe\nbtree\smutex\slogic.\s(CVS\s5412)
D 2008-07-14T19:39:17
C Delete\sall\sfts3\sindex\sdata\sthe\stable\sbecomes\sempty.\s\sPreviously,\ndeleting\sall\srows\sfrom\san\sfts3\stable\swould\sleave\sa\sbunch\sof\sindex\sdata\ndescribing\sthe\sterms\sof\sthe\soriginal\sdata,\splus\sdeletions\sof\sthose\nterms,\sperhaps\swith\ssome\samount\sof\sit\smerged\stogether\sso\sthe\sdeletions\nknocked\sout\sthe\soriginals.\s\sEven\swhen\sall\srows\swere\sdeleted\sthat\noriginal\sdata\swould\shang\sout,\sthough\seventually\sit\swould\smostly\sbe\noverwritten\sif\snew\sdata\scontained\sthe\ssame\sset\sof\sterms.\s(CVS\s5413)
D 2008-07-14T20:43:15
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
F Makefile.in a03f7cb4f7ad50bc53a788c6c544430e81f95de4
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@ -51,7 +51,7 @@ F ext/fts2/fts2_tokenizer1.c 8a545c232bdffafd117c4eeaf59789691909f26a
F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0
F ext/fts3/README.tokenizers 226644a0eab97724e8de83061912e8bb248461b6
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
F ext/fts3/fts3.c 192a65d39c2904873c13dc7a8e50b00cd190ec3a
F ext/fts3/fts3.c c4037314d324d900638665e802b0e97725a0bd07
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
F ext/fts3/fts3_hash.c 83e7bb4042106b32811681dd2859b4577a7a6b35
F ext/fts3/fts3_hash.h 004b759e1602ff16dfa02fea3ca1c77336ad6798
@ -333,6 +333,7 @@ F test/fts3ao.test 0aa29dd4fc1c8d46b1f7cfe5926f7ac97551bea9
F test/fts3atoken.test 25c2070e1e8755d414bf9c8200427b277a9f99fa
F test/fts3b.test b3a25180a633873d37d86e1ccd00ed690d37237a
F test/fts3c.test 4c7ef29b37aca3e8ebb6a39b57910caa6506034e
F test/fts3d.test 12ad44b84e2b71b4217288747a9744201f779892
F test/fts3near.test 2d4dadcaac5025ab65bb87e66c45f39e92966194
F test/func.test 1a2476c57e34c79aeb6323d3e3700a8c7a1ee0af
F test/fuzz.test 62fc19dd36a427777fd671b569df07166548628a
@ -606,7 +607,7 @@ F tool/speedtest16.c c8a9c793df96db7e4933f0852abb7a03d48f2e81
F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 1dbced29de5f59ba2ebf877edcadf171540374d1
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
P 3dc72a46171020c62e6028d113b0e0f5ab05d159
R 6d734a63548e6f48975b58c9676b7c06
U drh
Z 77913ffe58a126c4bbfbbf4bc5ff271d
P 7d5e1c4375599a913d23e5954fa63c10ac9d7688
R 7766dad7431ce3dddf87a600080fac2f
U shess
Z ce089cda99d0ad780780b717294a1a22

View File

@ -1 +1 @@
7d5e1c4375599a913d23e5954fa63c10ac9d7688
8b872e426091d9ef108e52dbec0d968ed7452907

128
test/fts3d.test Normal file
View File

@ -0,0 +1,128 @@
# 2008 June 26
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The focus
# of this script is testing the FTS3 module's optimize() function.
#
# $Id: fts3d.test,v 1.1 2008/07/14 20:43:15 shess Exp $
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
# If SQLITE_ENABLE_FTS3 is not defined, omit this file.
ifcapable !fts3 {
finish_test
return
}
#*************************************************************************
# Probe to see if support for the FTS3 dump_* functions is compiled in.
# TODO(shess): Change main.mk to do the right thing and remove this test.
db eval {
DROP TABLE IF EXISTS t1;
CREATE VIRTUAL TABLE t1 USING fts3(c);
INSERT INTO t1 (docid, c) VALUES (1, 'x');
}
set s {SELECT dump_terms(t1, 1) FROM t1 LIMIT 1}
set r {1 {unable to use function dump_terms in the requested context}}
if {[catchsql $s]==$r} {
finish_test
return
}
#*************************************************************************
# Utility function to check for the expected terms in the segment
# level/index. _all version does same but for entire index.
proc check_terms {test level index terms} {
# TODO(shess): Figure out why uplevel in do_test can't catch
# $level and $index directly.
set ::level $level
set ::index $index
do_test $test.terms {
execsql {
SELECT dump_terms(t1, $::level, $::index) FROM t1 LIMIT 1;
}
} [list $terms]
}
proc check_terms_all {test terms} {
do_test $test.terms {
execsql {
SELECT dump_terms(t1) FROM t1 LIMIT 1;
}
} [list $terms]
}
# Utility function to check for the expected doclist for the term in
# segment level/index. _all version does same for entire index.
proc check_doclist {test level index term doclist} {
# TODO(shess): Again, why can't the non-:: versions work?
set ::term $term
set ::level $level
set ::index $index
do_test $test {
execsql {
SELECT dump_doclist(t1, $::term, $::level, $::index) FROM t1 LIMIT 1;
}
} [list $doclist]
}
proc check_doclist_all {test term doclist} {
set ::term $term
do_test $test {
execsql {
SELECT dump_doclist(t1, $::term) FROM t1 LIMIT 1;
}
} [list $doclist]
}
#*************************************************************************
# Test results when all rows are deleted and one is added back.
# Previously older segments would continue to exist, but now the index
# should be dropped when the table is empty. The results should look
# exactly like we never added the earlier rows in the first place.
db eval {
DROP TABLE IF EXISTS t1;
CREATE VIRTUAL TABLE t1 USING fts3(c);
INSERT INTO t1 (docid, c) VALUES (1, 'This is a test');
INSERT INTO t1 (docid, c) VALUES (2, 'That was a test');
INSERT INTO t1 (docid, c) VALUES (3, 'This is a test');
DELETE FROM t1 WHERE 1=1; -- Delete each row rather than dropping table.
INSERT INTO t1 (docid, c) VALUES (1, 'This is a test');
}
# Should be a single initial segment.
do_test fts3d-1.segments {
execsql {
SELECT level, idx FROM t1_segdir ORDER BY level, idx;
}
} {0 0}
do_test fts3d-1.matches {
execsql {
SELECT OFFSETS(t1) FROM t1
WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid;
}
} {{0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}}
check_terms_all fts3d-1.1 {a is test this}
check_doclist_all fts3d-1.1.1 a {[1 0[2]]}
check_doclist_all fts3d-1.1.2 is {[1 0[1]]}
check_doclist_all fts3d-1.1.3 test {[1 0[3]]}
check_doclist_all fts3d-1.1.4 this {[1 0[0]]}
check_terms fts3d-1.2 0 0 {a is test this}
check_doclist fts3d-1.2.1 0 0 a {[1 0[2]]}
check_doclist fts3d-1.2.2 0 0 is {[1 0[1]]}
check_doclist fts3d-1.2.3 0 0 test {[1 0[3]]}
check_doclist fts3d-1.2.4 0 0 this {[1 0[0]]}
# TODO(shess): optimize() tests here.
finish_test