Re-use deleted rowids for new segments. This has a somewhat

surprising impact on performance, I believe because it keeps the index
smaller (by keeping rowids smaller), and also because it improves
locality in the table (deleting a row means we've already touched the
pages leading to that rowid). (CVS 3405)

FossilOrigin-Name: 2f5f6290c9ef99c7b060aecc4d996c976c50c9d7
This commit is contained in:
shess 2006-09-11 21:39:21 +00:00
parent 66b224cbb0
commit 4240240f12
3 changed files with 37 additions and 19 deletions

View File

@ -796,7 +796,7 @@ static const char *const fulltext_zStatement[MAX_STMT] = {
/* TERM_SELECT_ALL */
"select doclist from %_term where term = ? order by segment",
/* TERM_INSERT */
"insert into %_term (term, segment, doclist) values (?, ?, ?)",
"insert into %_term (rowid, term, segment, doclist) values (?, ?, ?, ?)",
/* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?",
/* TERM_DELETE */ "delete from %_term where rowid = ?",
};
@ -1036,21 +1036,34 @@ static int term_select_all(fulltext_vtab *v, const char *pTerm, int nTerm,
return SQLITE_OK;
}
/* insert into %_term (term, segment, doclist)
values ([pTerm], [iSegment], [doclist]) */
static int term_insert(fulltext_vtab *v, const char *pTerm, int nTerm,
/* insert into %_term (rowid, term, segment, doclist)
values ([piRowid], [pTerm], [iSegment], [doclist])
** Lets sqlite select rowid if piRowid is NULL, else uses *piRowid.
**
** NOTE(shess) piRowid is IN, with values of "space of int64" plus
** null, it is not used to pass data back to the caller.
*/
static int term_insert(fulltext_vtab *v, sqlite_int64 *piRowid,
const char *pTerm, int nTerm,
int iSegment, DocList *doclist){
sqlite3_stmt *s;
int rc = sql_get_statement(v, TERM_INSERT_STMT, &s);
if( rc!=SQLITE_OK ) return rc;
rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
if( piRowid==NULL ){
rc = sqlite3_bind_null(s, 1);
}else{
rc = sqlite3_bind_int64(s, 1, *piRowid);
}
if( rc!=SQLITE_OK ) return rc;
rc = sqlite3_bind_int(s, 2, iSegment);
rc = sqlite3_bind_text(s, 2, pTerm, nTerm, SQLITE_STATIC);
if( rc!=SQLITE_OK ) return rc;
rc = sqlite3_bind_blob(s, 3, doclist->pData, doclist->nData, SQLITE_STATIC);
rc = sqlite3_bind_int(s, 3, iSegment);
if( rc!=SQLITE_OK ) return rc;
rc = sqlite3_bind_blob(s, 4, doclist->pData, doclist->nData, SQLITE_STATIC);
if( rc!=SQLITE_OK ) return rc;
return sql_single_step_statement(v, TERM_INSERT_STMT, &s);
@ -1931,7 +1944,7 @@ static int index_insert_term(fulltext_vtab *v, const char *pTerm, int nTerm,
docListInit(&doclist, DL_POSITIONS_OFFSETS, 0, 0);
docListUpdate(&doclist, d);
/* TODO(shess) Consider length(doclist)>CHUNK_MAX? */
rc = term_insert(v, pTerm, nTerm, iSegment, &doclist);
rc = term_insert(v, NULL, pTerm, nTerm, iSegment, &doclist);
goto err;
}
if( rc!=SQLITE_ROW ) return SQLITE_ERROR;
@ -1953,19 +1966,24 @@ static int index_insert_term(fulltext_vtab *v, const char *pTerm, int nTerm,
** bucket, and put results in the next bucket.
*/
iSegment++;
while( (rc=term_insert(v, pTerm, nTerm, iSegment, &doclist))!=SQLITE_OK ){
while( (rc=term_insert(v, &iIndexRow, pTerm, nTerm, iSegment,
&doclist))!=SQLITE_OK ){
sqlite_int64 iSegmentRow;
DocList old;
int rc2;
/* Retain old error in case the term_insert() error was really an
** error rather than a bounced insert.
*/
rc2 = term_select(v, pTerm, nTerm, iSegment, &iIndexRow, &old);
rc2 = term_select(v, pTerm, nTerm, iSegment, &iSegmentRow, &old);
if( rc2!=SQLITE_ROW ) goto err;
rc = term_delete(v, iIndexRow);
rc = term_delete(v, iSegmentRow);
if( rc!=SQLITE_OK ) goto err;
/* Reusing lowest-number deleted row keeps the index smaller. */
if( iSegmentRow<iIndexRow ) iIndexRow = iSegmentRow;
/* doclist contains the newer data, so accumulate it over old.
** Then steal accumulated data for doclist.
*/

View File

@ -1,5 +1,5 @@
C Get\sVACUUM\sworking\swith\svirtual\stables.\s(CVS\s3404)
D 2006-09-11T11:13:27
C Re-use\sdeleted\srowids\sfor\snew\ssegments.\s\sThis\shas\sa\ssomewhat\nsurprising\simpact\son\sperformance,\sI\sbelieve\sbecause\sit\skeeps\sthe\sindex\nsmaller\s(by\skeeping\srowids\ssmaller),\sand\salso\sbecause\sit\simproves\nlocality\sin\sthe\stable\s(deleting\sa\srow\smeans\swe've\salready\stouched\sthe\npages\sleading\sto\sthat\srowid).\s(CVS\s3405)
D 2006-09-11T21:39:22
F Makefile.in cabd42d34340f49260bc2a7668c38eba8d4cfd99
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
@ -21,7 +21,7 @@ F ext/README.txt 913a7bd3f4837ab14d7e063304181787658b14e1
F ext/fts1/README.txt 20ac73b006a70bcfd80069bdaf59214b6cf1db5e
F ext/fts1/ft_hash.c 3927bd880e65329bdc6f506555b228b28924921b
F ext/fts1/ft_hash.h 1a35e654a235c2c662d3ca0dfc3138ad60b8b7d5
F ext/fts1/fts1.c 022a985bafaecdd6d245ddfeba68f9d268fccd9d
F ext/fts1/fts1.c 5c5e362ec08487a0bdcf58b7467a28321eed8025
F ext/fts1/fts1.h fe8e8f38dd6d2d2645b9b0d6972e80985249575f
F ext/fts1/fts1_hash.c 3196cee866edbebb1c0521e21672e6d599965114
F ext/fts1/fts1_hash.h 957d378355ed29f672cd5add012ce8b088a5e089
@ -397,7 +397,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513
P 227dc3feb537e6efd5b0c1d2dad40193db07d5aa
R c821cee55c3f4e8fdfcd092fa19a7e1a
U drh
Z ab0686f184c2e0785ce95f3b8f82b4c4
P d5ffef3870f06d2dd744ce9470d3c0e68062e804
R 4f28d2c19780234e639fe3db42d9de5e
U shess
Z 323322cfc0451e1239808887226bd556

View File

@ -1 +1 @@
d5ffef3870f06d2dd744ce9470d3c0e68062e804
2f5f6290c9ef99c7b060aecc4d996c976c50c9d7