From d75e03df2b668d78adbd54390bbd6d12c7af56ba Mon Sep 17 00:00:00 2001 From: drh Date: Tue, 3 Oct 2006 11:42:28 +0000 Subject: [PATCH] Add the option to omit offset information from posting lists in FTS1. (CVS 3456) FossilOrigin-Name: fdcea7b1ffd821f3f2b6d30997d3957f705a6d0c --- ext/fts1/fts1.c | 52 +++++++++++++++++++++++++++++++++++-------------- manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/ext/fts1/fts1.c b/ext/fts1/fts1.c index 24c6ec40d3..e62aec1cc5 100644 --- a/ext/fts1/fts1.c +++ b/ext/fts1/fts1.c @@ -179,6 +179,18 @@ typedef enum DocListType { DL_POSITIONS_OFFSETS /* docids + positions + offsets */ } DocListType; +/* +** By default, positions and offsets are stored in the doclists. +** To change this so that only positions are stored, compile +** with +** +** -DDL_DEFAULT=DL_POSITIONS +** +*/ +#ifndef DL_DEFAULT +# define DL_DEFAULT DL_POSITIONS_OFFSETS +#endif + typedef struct DocList { char *pData; int nData; @@ -273,18 +285,28 @@ static void docListAddPos(DocList *d, int iColumn, int iPos){ appendVarint(d, POS_END); /* add new terminator */ } -static void docListAddPosOffset(DocList *d, int iColumn, int iPos, - int iStartOffset, int iEndOffset){ - assert( d->iType==DL_POSITIONS_OFFSETS ); +/* +** Add a position and starting and ending offsets to a doclist. +** +** If the doclist is setup to handle only positions, then insert +** the position only and ignore the offsets. +*/ +static void docListAddPosOffset( + DocList *d, /* Doclist under construction */ + int iColumn, /* Column the inserted term is part of */ + int iPos, /* Position of the inserted term */ + int iStartOffset, /* Starting offset of inserted term */ + int iEndOffset /* Ending offset of inserted term */ +){ + assert( d->iType>=DL_POSITIONS ); addPos(d, iColumn, iPos); - - assert( iStartOffset>=d->iLastOffset ); - appendVarint(d, iStartOffset-d->iLastOffset); - d->iLastOffset = iStartOffset; - - assert( iEndOffset>=iStartOffset ); - appendVarint(d, iEndOffset-iStartOffset); - + if( d->iType==DL_POSITIONS_OFFSETS ){ + assert( iStartOffset>=d->iLastOffset ); + appendVarint(d, iStartOffset-d->iLastOffset); + d->iLastOffset = iStartOffset; + assert( iEndOffset>=iStartOffset ); + appendVarint(d, iEndOffset-iStartOffset); + } appendVarint(d, POS_END); /* add new terminator */ } @@ -1299,7 +1321,7 @@ static int term_select(fulltext_vtab *v, const char *pTerm, int nTerm, if( rc!=SQLITE_ROW ) return rc; *rowid = sqlite3_column_int64(s, 0); - docListInit(out, DL_POSITIONS_OFFSETS, + docListInit(out, DL_DEFAULT, sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1)); /* We expect only one row. We must execute another sqlite3_step() @@ -1334,7 +1356,7 @@ static int term_select_all( rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC); if( rc!=SQLITE_OK ) return rc; - docListInit(&doclist, DL_POSITIONS_OFFSETS, 0, 0); + docListInit(&doclist, DL_DEFAULT, 0, 0); /* TODO(shess) Handle schema and busy errors. */ while( (rc=sql_step_statement(v, TERM_SELECT_ALL_STMT, &s))==SQLITE_ROW ){ @@ -2917,7 +2939,7 @@ static int buildTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iDocid, p = fts1HashFind(terms, pToken, nTokenBytes); if( p==NULL ){ - p = docListNew(DL_POSITIONS_OFFSETS); + p = docListNew(DL_DEFAULT); docListAddDocid(p, iDocid); fts1HashInsert(terms, pToken, nTokenBytes, p); } @@ -2944,7 +2966,7 @@ static int index_insert_term(fulltext_vtab *v, const char *pTerm, int nTerm, rc = term_select(v, pTerm, nTerm, iSegment, &iIndexRow, &doclist); if( rc==SQLITE_DONE ){ - docListInit(&doclist, DL_POSITIONS_OFFSETS, 0, 0); + docListInit(&doclist, DL_DEFAULT, 0, 0); docListUpdate(&doclist, d); /* TODO(shess) Consider length(doclist)>CHUNK_MAX? */ rc = term_insert(v, NULL, pTerm, nTerm, iSegment, &doclist); diff --git a/manifest b/manifest index 3122aa213b..fe803e17de 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Another\stypo\sin\sthe\sPorter\sstemmer\scheck-in.\s(CVS\s3455) -D 2006-10-01T20:41:03 +C Add\sthe\soption\sto\somit\soffset\sinformation\sfrom\sposting\slists\sin\sFTS1.\s(CVS\s3456) +D 2006-10-03T11:42:29 F Makefile.in cabd42d34340f49260bc2a7668c38eba8d4cfd99 F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935 F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028 @@ -21,7 +21,7 @@ F ext/README.txt 913a7bd3f4837ab14d7e063304181787658b14e1 F ext/fts1/README.txt 20ac73b006a70bcfd80069bdaf59214b6cf1db5e F ext/fts1/ft_hash.c 3927bd880e65329bdc6f506555b228b28924921b F ext/fts1/ft_hash.h 1a35e654a235c2c662d3ca0dfc3138ad60b8b7d5 -F ext/fts1/fts1.c 4f6278a61dd57a2ead405188eb847b3f682850e3 +F ext/fts1/fts1.c 302d4fa00d5e3b1da0518c0b7f648d7cded900d0 F ext/fts1/fts1.h 6060b8f62c1d925ea8356cb1a6598073eb9159a6 F ext/fts1/fts1_hash.c 3196cee866edbebb1c0521e21672e6d599965114 F ext/fts1/fts1_hash.h 957d378355ed29f672cd5add012ce8b088a5e089 @@ -402,7 +402,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9 F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0 F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513 -P a7350bf86c41c428adbfe42dc9cbb3ecf0045c25 -R e8fc3b6778614d67cb9408a9cd7f6faa +P 6696bda11ccad9663b15206592116d638aa3ff0a +R c692c93a2339c5abd4c2802c5af8a551 U drh -Z f4756b3a2eb03f159a028654e46fb528 +Z 8d253c126c0d6e27101f1f52b6f86a2d diff --git a/manifest.uuid b/manifest.uuid index 09a7f3d20f..e2f2161b33 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -6696bda11ccad9663b15206592116d638aa3ff0a \ No newline at end of file +fdcea7b1ffd821f3f2b6d30997d3957f705a6d0c \ No newline at end of file