Merge the fts4-languageid branch with the trunk.

FossilOrigin-Name: 99a9073b5e411ce94f38ce49608baaa15de8b850
This commit is contained in:
dan 2012-03-05 15:33:32 +00:00
commit 3a1abde7c8
14 changed files with 955 additions and 137 deletions

View File

@ -465,6 +465,7 @@ static int fts3DisconnectMethod(sqlite3_vtab *pVtab){
sqlite3_free(p->zReadExprlist);
sqlite3_free(p->zWriteExprlist);
sqlite3_free(p->zContentTbl);
sqlite3_free(p->zLanguageid);
/* Invoke the tokenizer destructor to free the tokenizer. */
p->pTokenizer->pModule->xDestroy(p->pTokenizer);
@ -541,7 +542,9 @@ static void fts3DeclareVtab(int *pRc, Fts3Table *p){
int rc; /* Return code */
char *zSql; /* SQL statement passed to declare_vtab() */
char *zCols; /* List of user defined columns */
const char *zLanguageid;
zLanguageid = (p->zLanguageid ? p->zLanguageid : "__langid");
sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);
/* Create a list of user columns for the virtual table */
@ -552,7 +555,8 @@ static void fts3DeclareVtab(int *pRc, Fts3Table *p){
/* Create the whole "CREATE TABLE" statement to pass to SQLite */
zSql = sqlite3_mprintf(
"CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN)", zCols, p->zName
"CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN, %Q HIDDEN)",
zCols, p->zName, zLanguageid
);
if( !zCols || !zSql ){
rc = SQLITE_NOMEM;
@ -581,6 +585,7 @@ static int fts3CreateTables(Fts3Table *p){
sqlite3 *db = p->db; /* The database connection */
if( p->zContentTbl==0 ){
const char *zLanguageid = p->zLanguageid;
char *zContentCols; /* Columns of %_content table */
/* Create a list of user columns for the content table */
@ -589,6 +594,9 @@ static int fts3CreateTables(Fts3Table *p){
char *z = p->azColumn[i];
zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z);
}
if( zLanguageid && zContentCols ){
zContentCols = sqlite3_mprintf("%z, langid", zContentCols, zLanguageid);
}
if( zContentCols==0 ) rc = SQLITE_NOMEM;
/* Create the content table */
@ -788,14 +796,20 @@ static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){
for(i=0; i<p->nColumn; i++){
fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]);
}
if( p->zLanguageid ){
fts3Appendf(pRc, &zRet, ", x.%Q", "langid");
}
sqlite3_free(zFree);
}else{
fts3Appendf(pRc, &zRet, "rowid");
for(i=0; i<p->nColumn; i++){
fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]);
}
if( p->zLanguageid ){
fts3Appendf(pRc, &zRet, ", x.%Q", p->zLanguageid);
}
}
fts3Appendf(pRc, &zRet, "FROM '%q'.'%q%s' AS x",
fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x",
p->zDb,
(p->zContentTbl ? p->zContentTbl : p->zName),
(p->zContentTbl ? "" : "_content")
@ -838,6 +852,9 @@ static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){
for(i=0; i<p->nColumn; i++){
fts3Appendf(pRc, &zRet, ",%s(?)", zFunction);
}
if( p->zLanguageid ){
fts3Appendf(pRc, &zRet, ", ?");
}
sqlite3_free(zFree);
return zRet;
}
@ -1053,6 +1070,7 @@ static int fts3InitVtab(
char *zCompress = 0; /* compress=? parameter (or NULL) */
char *zUncompress = 0; /* uncompress=? parameter (or NULL) */
char *zContent = 0; /* content=? parameter (or NULL) */
char *zLanguageid = 0; /* languageid=? parameter (or NULL) */
assert( strlen(argv[0])==4 );
assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4)
@ -1102,7 +1120,8 @@ static int fts3InitVtab(
{ "compress", 8 }, /* 2 -> COMPRESS */
{ "uncompress", 10 }, /* 3 -> UNCOMPRESS */
{ "order", 5 }, /* 4 -> ORDER */
{ "content", 7 } /* 5 -> CONTENT */
{ "content", 7 }, /* 5 -> CONTENT */
{ "languageid", 10 } /* 6 -> LANGUAGEID */
};
int iOpt;
@ -1156,12 +1175,18 @@ static int fts3InitVtab(
bDescIdx = (zVal[0]=='d' || zVal[0]=='D');
break;
default: /* CONTENT */
assert( iOpt==5 );
sqlite3_free(zUncompress);
case 5: /* CONTENT */
sqlite3_free(zContent);
zContent = zVal;
zVal = 0;
break;
case 6: /* LANGUAGEID */
assert( iOpt==6 );
sqlite3_free(zLanguageid);
zLanguageid = zVal;
zVal = 0;
break;
}
}
sqlite3_free(zVal);
@ -1191,8 +1216,20 @@ static int fts3InitVtab(
sqlite3_free((void*)aCol);
aCol = 0;
rc = fts3ContentColumns(db, argv[1], zContent, &aCol, &nCol, &nString);
/* If a languageid= option was specified, remove the language id
** column from the aCol[] array. */
if( rc==SQLITE_OK && zLanguageid ){
int j;
for(j=0; j<nCol; j++){
if( sqlite3_stricmp(zLanguageid, aCol[j])==0 ){
memmove(&aCol[j], &aCol[j+1], (nCol-j) * sizeof(aCol[0]));
nCol--;
break;
}
}
}
}
assert( rc!=SQLITE_OK || nCol>0 );
}
if( rc!=SQLITE_OK ) goto fts3_init_out;
@ -1239,7 +1276,9 @@ static int fts3InitVtab(
p->bHasStat = isFts4;
p->bDescIdx = bDescIdx;
p->zContentTbl = zContent;
p->zLanguageid = zLanguageid;
zContent = 0;
zLanguageid = 0;
TESTONLY( p->inTransaction = -1 );
TESTONLY( p->mxSavepoint = -1 );
@ -1302,6 +1341,7 @@ fts3_init_out:
sqlite3_free(zCompress);
sqlite3_free(zUncompress);
sqlite3_free(zContent);
sqlite3_free(zLanguageid);
sqlite3_free((void *)aCol);
if( rc!=SQLITE_OK ){
if( p ){
@ -1353,6 +1393,7 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
Fts3Table *p = (Fts3Table *)pVTab;
int i; /* Iterator variable */
int iCons = -1; /* Index of constraint to use */
int iLangidCons = -1; /* Index of langid=x constraint, if present */
/* By default use a full table scan. This is an expensive option,
** so search through the constraints to see if a more efficient
@ -1365,7 +1406,8 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
if( pCons->usable==0 ) continue;
/* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */
if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ
if( iCons<0
&& pCons->op==SQLITE_INDEX_CONSTRAINT_EQ
&& (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 )
){
pInfo->idxNum = FTS3_DOCID_SEARCH;
@ -1388,7 +1430,13 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn;
pInfo->estimatedCost = 2.0;
iCons = i;
break;
}
/* Equality constraint on the langid column */
if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ
&& pCons->iColumn==p->nColumn + 2
){
iLangidCons = i;
}
}
@ -1396,6 +1444,9 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
pInfo->aConstraintUsage[iCons].argvIndex = 1;
pInfo->aConstraintUsage[iCons].omit = 1;
}
if( iLangidCons>=0 ){
pInfo->aConstraintUsage[iLangidCons].argvIndex = 2;
}
/* Regardless of the strategy selected, FTS can deliver rows in rowid (or
** docid) order. Both ascending and descending are possible.
@ -2545,6 +2596,7 @@ static int fts3SegReaderCursorAppend(
*/
static int fts3SegReaderCursor(
Fts3Table *p, /* FTS3 table handle */
int iLangid, /* Language id */
int iIndex, /* Index to search (from 0 to p->nIndex-1) */
int iLevel, /* Level of segments to scan */
const char *zTerm, /* Term to query for */
@ -2573,7 +2625,7 @@ static int fts3SegReaderCursor(
if( iLevel!=FTS3_SEGCURSOR_PENDING ){
if( rc==SQLITE_OK ){
rc = sqlite3Fts3AllSegdirs(p, iIndex, iLevel, &pStmt);
rc = sqlite3Fts3AllSegdirs(p, iLangid, iIndex, iLevel, &pStmt);
}
while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){
@ -2618,6 +2670,7 @@ static int fts3SegReaderCursor(
*/
int sqlite3Fts3SegReaderCursor(
Fts3Table *p, /* FTS3 table handle */
int iLangid,
int iIndex, /* Index to search (from 0 to p->nIndex-1) */
int iLevel, /* Level of segments to scan */
const char *zTerm, /* Term to query for */
@ -2642,7 +2695,7 @@ int sqlite3Fts3SegReaderCursor(
memset(pCsr, 0, sizeof(Fts3MultiSegReader));
return fts3SegReaderCursor(
p, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
p, iLangid, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
);
}
@ -2654,11 +2707,14 @@ int sqlite3Fts3SegReaderCursor(
*/
static int fts3SegReaderCursorAddZero(
Fts3Table *p, /* FTS virtual table handle */
int iLangid,
const char *zTerm, /* Term to scan doclist of */
int nTerm, /* Number of bytes in zTerm */
Fts3MultiSegReader *pCsr /* Fts3MultiSegReader to modify */
){
return fts3SegReaderCursor(p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr);
return fts3SegReaderCursor(p,
iLangid, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr
);
}
/*
@ -2694,8 +2750,9 @@ static int fts3TermSegReaderCursor(
for(i=1; bFound==0 && i<p->nIndex; i++){
if( p->aIndex[i].nPrefix==nTerm ){
bFound = 1;
rc = sqlite3Fts3SegReaderCursor(
p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr);
rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr
);
pSegcsr->bLookup = 1;
}
}
@ -2703,19 +2760,21 @@ static int fts3TermSegReaderCursor(
for(i=1; bFound==0 && i<p->nIndex; i++){
if( p->aIndex[i].nPrefix==nTerm+1 ){
bFound = 1;
rc = sqlite3Fts3SegReaderCursor(
p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr
rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr
);
if( rc==SQLITE_OK ){
rc = fts3SegReaderCursorAddZero(p, zTerm, nTerm, pSegcsr);
rc = fts3SegReaderCursorAddZero(
p, pCsr->iLangid, zTerm, nTerm, pSegcsr
);
}
}
}
}
if( bFound==0 ){
rc = sqlite3Fts3SegReaderCursor(
p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr
rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr
);
pSegcsr->bLookup = !isPrefix;
}
@ -2870,7 +2929,7 @@ static int fts3FilterMethod(
UNUSED_PARAMETER(nVal);
assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );
assert( nVal==0 || nVal==1 );
assert( nVal==0 || nVal==1 || nVal==2 );
assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) );
assert( p->pSegments==0 );
@ -2895,8 +2954,11 @@ static int fts3FilterMethod(
return SQLITE_NOMEM;
}
rc = sqlite3Fts3ExprParse(p->pTokenizer, p->azColumn, p->bHasStat,
p->nColumn, iCol, zQuery, -1, &pCsr->pExpr
pCsr->iLangid = 0;
if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]);
rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid,
p->azColumn, p->bHasStat, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr
);
if( rc!=SQLITE_OK ){
if( rc==SQLITE_ERROR ){
@ -2967,10 +3029,17 @@ static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
/*
** This is the xColumn method, called by SQLite to request a value from
** the row that the supplied cursor currently points to.
**
** If:
**
** (iCol < p->nColumn) -> The value of the iCol'th user column.
** (iCol == p->nColumn) -> Magic column with the same name as the table.
** (iCol == p->nColumn+1) -> Docid column
** (iCol == p->nColumn+2) -> Langid column
*/
static int fts3ColumnMethod(
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
sqlite3_context *pContext, /* Context for sqlite3_result_xxx() calls */
sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
int iCol /* Index of column to read value from */
){
int rc = SQLITE_OK; /* Return Code */
@ -2978,22 +3047,34 @@ static int fts3ColumnMethod(
Fts3Table *p = (Fts3Table *)pCursor->pVtab;
/* The column value supplied by SQLite must be in range. */
assert( iCol>=0 && iCol<=p->nColumn+1 );
assert( iCol>=0 && iCol<=p->nColumn+2 );
if( iCol==p->nColumn+1 ){
/* This call is a request for the "docid" column. Since "docid" is an
** alias for "rowid", use the xRowid() method to obtain the value.
*/
sqlite3_result_int64(pContext, pCsr->iPrevId);
sqlite3_result_int64(pCtx, pCsr->iPrevId);
}else if( iCol==p->nColumn ){
/* The extra column whose name is the same as the table.
** Return a blob which is a pointer to the cursor.
*/
sqlite3_result_blob(pContext, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);
** Return a blob which is a pointer to the cursor. */
sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);
}else if( iCol==p->nColumn+2 && pCsr->pExpr ){
sqlite3_result_int64(pCtx, pCsr->iLangid);
}else{
/* The requested column is either a user column (one that contains
** indexed data), or the language-id column. */
rc = fts3CursorSeek(0, pCsr);
if( rc==SQLITE_OK && sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){
sqlite3_result_value(pContext, sqlite3_column_value(pCsr->pStmt, iCol+1));
if( rc==SQLITE_OK ){
if( iCol==p->nColumn+2 ){
int iLangid = 0;
if( p->zLanguageid ){
iLangid = sqlite3_column_int(pCsr->pStmt, p->nColumn+1);
}
sqlite3_result_int(pCtx, iLangid);
}else if( sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){
sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1));
}
}
}

View File

@ -192,11 +192,12 @@ struct Fts3Table {
char **azColumn; /* column names. malloced */
sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */
char *zContentTbl; /* content=xxx option, or NULL */
char *zLanguageid; /* languageid=xxx option, or NULL */
/* Precompiled statements used by the implementation. Each of these
** statements is run and reset within a single virtual table API call.
*/
sqlite3_stmt *aStmt[27];
sqlite3_stmt *aStmt[28];
char *zReadExprlist;
char *zWriteExprlist;
@ -211,12 +212,12 @@ struct Fts3Table {
/* TODO: Fix the first paragraph of this comment.
**
** The following hash table is used to buffer pending index updates during
** transactions. Variable nPendingData estimates the memory size of the
** pending data, including hash table overhead, but not malloc overhead.
** When nPendingData exceeds nMaxPendingData, the buffer is flushed
** automatically. Variable iPrevDocid is the docid of the most recently
** inserted record.
** The following array of hash tables is used to buffer pending index
** updates during transactions. Variable nPendingData estimates the memory
** size of the pending data, including hash table overhead, not including
** malloc overhead. When nPendingData exceeds nMaxPendingData, the buffer
** is flushed automatically. Variable iPrevDocid is the docid of the most
** recently inserted record.
**
** A single FTS4 table may have multiple full-text indexes. For each index
** there is an entry in the aIndex[] array. Index 0 is an index of all the
@ -231,12 +232,13 @@ struct Fts3Table {
int nMaxPendingData; /* Max pending data before flush to disk */
int nPendingData; /* Current bytes of pending data */
sqlite_int64 iPrevDocid; /* Docid of most recently inserted document */
int iPrevLangid; /* Langid of recently inserted document */
#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
/* State variables used for validating that the transaction control
** methods of the virtual table are called at appropriate times. These
** values do not contribution to the FTS computation; they are used for
** verifying the SQLite core.
** values do not contribute to FTS functionality; they are used for
** verifying the operation of the SQLite core.
*/
int inTransaction; /* True after xBegin but before xCommit/xRollback */
int mxSavepoint; /* Largest valid xSavepoint integer */
@ -255,6 +257,7 @@ struct Fts3Cursor {
u8 isRequireSeek; /* True if must seek pStmt to %_content row */
sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */
Fts3Expr *pExpr; /* Parsed MATCH query string */
int iLangid; /* Language being queried for */
int nPhrase; /* Number of matchable phrases in query */
Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */
sqlite3_int64 iPrevId; /* Previous id read from aDoclist */
@ -406,7 +409,7 @@ int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64,
int sqlite3Fts3SegReaderPending(
Fts3Table*,int,const char*,int,int,Fts3SegReader**);
void sqlite3Fts3SegReaderFree(Fts3SegReader *);
int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, sqlite3_stmt **);
int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **);
int sqlite3Fts3ReadLock(Fts3Table *);
int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*);
@ -427,8 +430,8 @@ int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*);
int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *);
void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *);
int sqlite3Fts3SegReaderCursor(
Fts3Table *, int, int, const char *, int, int, int, Fts3MultiSegReader *);
int sqlite3Fts3SegReaderCursor(Fts3Table *,
int, int, int, const char *, int, int, int, Fts3MultiSegReader *);
/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
#define FTS3_SEGMENT_REQUIRE_POS 0x00000001
@ -495,7 +498,7 @@ void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *,
void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *);
/* fts3_expr.c */
int sqlite3Fts3ExprParse(sqlite3_tokenizer *,
int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int,
char **, int, int, int, const char *, int, Fts3Expr **
);
void sqlite3Fts3ExprFree(Fts3Expr *);
@ -504,6 +507,10 @@ int sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
int sqlite3Fts3InitTerm(sqlite3 *db);
#endif
int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char *, int,
sqlite3_tokenizer_cursor **
);
/* fts3_aux.c */
int sqlite3Fts3InitAux(sqlite3 *db);

View File

@ -376,7 +376,7 @@ static int fts3auxFilterMethod(
if( pCsr->zStop==0 ) return SQLITE_NOMEM;
}
rc = sqlite3Fts3SegReaderCursor(pFts3, 0, FTS3_SEGCURSOR_ALL,
rc = sqlite3Fts3SegReaderCursor(pFts3, 0, 0, FTS3_SEGCURSOR_ALL,
pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr
);
if( rc==SQLITE_OK ){

View File

@ -92,6 +92,7 @@ int sqlite3_fts3_enable_parentheses = 0;
typedef struct ParseContext ParseContext;
struct ParseContext {
sqlite3_tokenizer *pTokenizer; /* Tokenizer module */
int iLangid; /* Language id used with tokenizer */
const char **azCol; /* Array of column names for fts3 table */
int bFts4; /* True to allow FTS4-only syntax */
int nCol; /* Number of entries in azCol[] */
@ -127,6 +128,33 @@ static void *fts3MallocZero(int nByte){
return pRet;
}
int sqlite3Fts3OpenTokenizer(
sqlite3_tokenizer *pTokenizer,
int iLangid,
const char *z,
int n,
sqlite3_tokenizer_cursor **ppCsr
){
sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
sqlite3_tokenizer_cursor *pCsr = 0;
int rc;
rc = pModule->xOpen(pTokenizer, z, n, &pCsr);
assert( rc==SQLITE_OK || pCsr==0 );
if( rc==SQLITE_OK ){
pCsr->pTokenizer = pTokenizer;
if( pModule->iVersion>=1 ){
rc = pModule->xLanguageid(pCsr, iLangid);
if( rc!=SQLITE_OK ){
pModule->xClose(pCsr);
pCsr = 0;
}
}
}
*ppCsr = pCsr;
return rc;
}
/*
** Extract the next token from buffer z (length n) using the tokenizer
@ -154,15 +182,13 @@ static int getNextToken(
Fts3Expr *pRet = 0;
int nConsumed = 0;
rc = pModule->xOpen(pTokenizer, z, n, &pCursor);
rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor);
if( rc==SQLITE_OK ){
const char *zToken;
int nToken, iStart, iEnd, iPosition;
int nByte; /* total space to allocate */
pCursor->pTokenizer = pTokenizer;
rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
if( rc==SQLITE_OK ){
nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
pRet = (Fts3Expr *)fts3MallocZero(nByte);
@ -268,10 +294,10 @@ static int getNextString(
** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
** structures.
*/
rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
rc = sqlite3Fts3OpenTokenizer(
pTokenizer, pParse->iLangid, zInput, nInput, &pCursor);
if( rc==SQLITE_OK ){
int ii;
pCursor->pTokenizer = pTokenizer;
for(ii=0; rc==SQLITE_OK; ii++){
const char *zByte;
int nByte, iBegin, iEnd, iPos;
@ -745,6 +771,7 @@ exprparse_out:
*/
int sqlite3Fts3ExprParse(
sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
int iLangid, /* Language id for tokenizer */
char **azCol, /* Array of column names for fts3 table */
int bFts4, /* True to allow FTS4-only syntax */
int nCol, /* Number of entries in azCol[] */
@ -755,11 +782,13 @@ int sqlite3Fts3ExprParse(
int nParsed;
int rc;
ParseContext sParse;
memset(&sParse, 0, sizeof(ParseContext));
sParse.pTokenizer = pTokenizer;
sParse.iLangid = iLangid;
sParse.azCol = (const char **)azCol;
sParse.nCol = nCol;
sParse.iDefaultCol = iDefaultCol;
sParse.nNest = 0;
sParse.bFts4 = bFts4;
if( z==0 ){
*ppExpr = 0;
@ -950,7 +979,7 @@ static void fts3ExprTest(
}
rc = sqlite3Fts3ExprParse(
pTokenizer, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
);
if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
sqlite3_result_error(context, "Error parsing expression", -1);

View File

@ -532,6 +532,7 @@ static int fts3StringAppend(
*/
static int fts3SnippetShift(
Fts3Table *pTab, /* FTS3 table snippet comes from */
int iLangid, /* Language id to use in tokenizing */
int nSnippet, /* Number of tokens desired for snippet */
const char *zDoc, /* Document text to extract snippet from */
int nDoc, /* Size of buffer zDoc in bytes */
@ -567,11 +568,10 @@ static int fts3SnippetShift(
/* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
** or more tokens in zDoc/nDoc.
*/
rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC);
if( rc!=SQLITE_OK ){
return rc;
}
pC->pTokenizer = pTab->pTokenizer;
while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3;
rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
@ -631,11 +631,10 @@ static int fts3SnippetText(
/* Open a token cursor on the document. */
pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC);
if( rc!=SQLITE_OK ){
return rc;
}
pC->pTokenizer = pTab->pTokenizer;
while( rc==SQLITE_OK ){
int iBegin; /* Offset in zDoc of start of token */
@ -657,7 +656,9 @@ static int fts3SnippetText(
if( !isShiftDone ){
int n = nDoc - iBegin;
rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask);
rc = fts3SnippetShift(
pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask
);
isShiftDone = 1;
/* Now that the shift has been done, check if the initial "..." are
@ -1390,9 +1391,10 @@ void sqlite3Fts3Offsets(
}
/* Initialize a tokenizer iterator to iterate through column iCol. */
rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid,
zDoc, nDoc, &pC
);
if( rc!=SQLITE_OK ) goto offsets_out;
pC->pTokenizer = pTab->pTokenizer;
rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
while( rc==SQLITE_OK ){

View File

@ -271,7 +271,7 @@ static int fts3termFilterMethod(
pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
rc = sqlite3Fts3SegReaderCursor(pFts3, p->iIndex, FTS3_SEGCURSOR_ALL,
rc = sqlite3Fts3SegReaderCursor(pFts3, 0, p->iIndex, FTS3_SEGCURSOR_ALL,
pCsr->filter.zTerm, pCsr->filter.nTerm, 0, 1, &pCsr->csr
);
if( rc==SQLITE_OK ){

View File

@ -13,6 +13,9 @@
** This file is not part of the production FTS code. It is only used for
** testing. It contains a Tcl command that can be used to test if a document
** matches an FTS NEAR expression.
**
** As of March 2012, it also contains a version 1 tokenizer used for testing
** that the sqlite3_tokenizer_module.xLanguage() method is invoked correctly.
*/
#include <tcl.h>
@ -314,11 +317,207 @@ static int fts3_configure_incr_load_cmd(
return TCL_OK;
}
/**************************************************************************
** Beginning of test tokenizer code.
**
** For language 0, this tokenizer is similar to the default 'simple'
** tokenizer. For other languages L, the following:
**
** * Odd numbered languages are case-sensitive. Even numbered
** languages are not.
**
** * Language ids 100 or greater are considered an error.
**
** The implementation assumes that the input contains only ASCII characters
** (i.e. those that may be encoded in UTF-8 using a single byte).
*/
typedef struct test_tokenizer {
sqlite3_tokenizer base;
} test_tokenizer;
typedef struct test_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *aInput; /* Input being tokenized */
int nInput; /* Size of the input in bytes */
int iInput; /* Current offset in aInput */
int iToken; /* Index of next token to be returned */
char *aBuffer; /* Buffer containing current token */
int nBuffer; /* Number of bytes allocated at pToken */
int iLangid; /* Configured language id */
} test_tokenizer_cursor;
static int testTokenizerCreate(
int argc, const char * const *argv,
sqlite3_tokenizer **ppTokenizer
){
test_tokenizer *pNew;
pNew = sqlite3_malloc(sizeof(test_tokenizer));
if( !pNew ) return SQLITE_NOMEM;
memset(pNew, 0, sizeof(test_tokenizer));
*ppTokenizer = (sqlite3_tokenizer *)pNew;
return SQLITE_OK;
}
static int testTokenizerDestroy(sqlite3_tokenizer *pTokenizer){
test_tokenizer *p = (test_tokenizer *)pTokenizer;
sqlite3_free(p);
return SQLITE_OK;
}
static int testTokenizerOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *pInput, int nBytes, /* String to be tokenized */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
int rc = SQLITE_OK; /* Return code */
test_tokenizer_cursor *pCsr; /* New cursor object */
UNUSED_PARAMETER(pTokenizer);
pCsr = (test_tokenizer_cursor *)sqlite3_malloc(sizeof(test_tokenizer_cursor));
if( pCsr==0 ){
rc = SQLITE_NOMEM;
}else{
memset(pCsr, 0, sizeof(test_tokenizer_cursor));
pCsr->aInput = pInput;
if( nBytes<0 ){
pCsr->nInput = strlen(pInput);
}else{
pCsr->nInput = nBytes;
}
}
*ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
return rc;
}
static int testTokenizerClose(sqlite3_tokenizer_cursor *pCursor){
test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
sqlite3_free(pCsr->aBuffer);
sqlite3_free(pCsr);
return SQLITE_OK;
}
static int testIsTokenChar(char c){
return (c>='a' && c<='z') || (c>='A' && c<='Z');
}
static int testTolower(char c){
char ret = c;
if( ret>='A' && ret<='Z') ret = ret - ('A'-'a');
return ret;
}
static int testTokenizerNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by testTokenizerOpen */
const char **ppToken, /* OUT: *ppToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
int rc = SQLITE_OK;
const char *p;
const char *pEnd;
p = &pCsr->aInput[pCsr->iInput];
pEnd = &pCsr->aInput[pCsr->nInput];
/* Skip past any white-space */
assert( p<=pEnd );
while( p<pEnd && testIsTokenChar(*p)==0 ) p++;
if( p==pEnd ){
rc = SQLITE_DONE;
}else{
/* Advance to the end of the token */
const char *pToken = p;
int nToken;
while( p<pEnd && testIsTokenChar(*p) ) p++;
nToken = p-pToken;
/* Copy the token into the buffer */
if( nToken>pCsr->nBuffer ){
sqlite3_free(pCsr->aBuffer);
pCsr->aBuffer = sqlite3_malloc(nToken);
}
if( pCsr->aBuffer==0 ){
rc = SQLITE_NOMEM;
}else{
int i;
if( pCsr->iLangid & 0x00000001 ){
for(i=0; i<nToken; i++) pCsr->aBuffer[i] = pToken[i];
}else{
for(i=0; i<nToken; i++) pCsr->aBuffer[i] = testTolower(pToken[i]);
}
pCsr->iToken++;
pCsr->iInput = p - pCsr->aInput;
*ppToken = pCsr->aBuffer;
*pnBytes = nToken;
*piStartOffset = pToken - pCsr->aInput;
*piEndOffset = p - pCsr->aInput;
*piPosition = pCsr->iToken;
}
}
return rc;
}
static int testTokenizerLanguage(
sqlite3_tokenizer_cursor *pCursor,
int iLangid
){
int rc = SQLITE_OK;
test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
pCsr->iLangid = iLangid;
if( pCsr->iLangid>=100 ){
rc = SQLITE_ERROR;
}
return rc;
}
static int fts3_test_tokenizer_cmd(
ClientData clientData,
Tcl_Interp *interp,
int objc,
Tcl_Obj *CONST objv[]
){
static const sqlite3_tokenizer_module testTokenizerModule = {
1,
testTokenizerCreate,
testTokenizerDestroy,
testTokenizerOpen,
testTokenizerClose,
testTokenizerNext,
testTokenizerLanguage
};
const sqlite3_tokenizer_module *pPtr = &testTokenizerModule;
if( objc!=1 ){
Tcl_WrongNumArgs(interp, 1, objv, "");
return TCL_ERROR;
}
Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(
(const unsigned char *)&pPtr, sizeof(sqlite3_tokenizer_module *)
));
return TCL_OK;
}
/*
** End of tokenizer code.
**************************************************************************/
int Sqlitetestfts3_Init(Tcl_Interp *interp){
Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);
Tcl_CreateObjCommand(interp,
"fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0
);
Tcl_CreateObjCommand(
interp, "fts3_test_tokenizer", fts3_test_tokenizer_cmd, 0, 0
);
return TCL_OK;
}
#endif /* ifdef SQLITE_TEST */

View File

@ -288,11 +288,10 @@ static void testFunc(
goto finish;
}
pTokenizer->pModule = p;
if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){
zErr = "error in xOpen()";
goto finish;
}
pCsr->pTokenizer = pTokenizer;
while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));

View File

@ -52,7 +52,7 @@ typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
struct sqlite3_tokenizer_module {
/*
** Structure version. Should always be set to 0.
** Structure version. Should always be set to 0 or 1.
*/
int iVersion;
@ -133,6 +133,15 @@ struct sqlite3_tokenizer_module {
int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
int *piPosition /* OUT: Number of tokens returned before this one */
);
/***********************************************************************
** Methods below this point are only available if iVersion>=1.
*/
/*
** Configure the language id of a tokenizer cursor.
*/
int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid);
};
struct sqlite3_tokenizer {

View File

@ -232,6 +232,8 @@ struct SegmentNode {
#define SQL_DELETE_SEGDIR_RANGE 26
#define SQL_SELECT_ALL_LANGID 27
/*
** This function is used to obtain an SQLite prepared statement handle
** for the statement identified by the second argument. If successful,
@ -285,6 +287,7 @@ static int fts3SqlStmt(
/* 25 */ "",
/* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?",
/* 27 */ "SELECT DISTINCT level / (1024 * ?) FROM %Q.'%q_segdir'",
};
int rc = SQLITE_OK;
@ -431,6 +434,19 @@ int sqlite3Fts3ReadLock(Fts3Table *p){
return rc;
}
static sqlite3_int64 getAbsoluteLevel(
Fts3Table *p,
int iLangid,
int iIndex,
int iLevel
){
assert( iLangid>=0 );
assert( p->nIndex>0 );
assert( iIndex>=0 && iIndex<p->nIndex );
return (iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL + iLevel;
}
/*
** Set *ppStmt to a statement handle that may be used to iterate through
** all rows in the %_segdir table, from oldest to newest. If successful,
@ -450,6 +466,7 @@ int sqlite3Fts3ReadLock(Fts3Table *p){
*/
int sqlite3Fts3AllSegdirs(
Fts3Table *p, /* FTS3 table */
int iLangid, /* Language being queried */
int iIndex, /* Index for p->aIndex[] */
int iLevel, /* Level to select */
sqlite3_stmt **ppStmt /* OUT: Compiled statement */
@ -465,14 +482,16 @@ int sqlite3Fts3AllSegdirs(
/* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */
rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int(pStmt, 1, iIndex*FTS3_SEGDIR_MAXLEVEL);
sqlite3_bind_int(pStmt, 2, (iIndex+1)*FTS3_SEGDIR_MAXLEVEL-1);
sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
sqlite3_bind_int(pStmt, 2,
getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
);
}
}else{
/* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */
rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int(pStmt, 1, iLevel+iIndex*FTS3_SEGDIR_MAXLEVEL);
sqlite3_bind_int(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel));
}
}
*ppStmt = pStmt;
@ -638,6 +657,7 @@ static int fts3PendingTermsAddOne(
*/
static int fts3PendingTermsAdd(
Fts3Table *p, /* Table into which text will be inserted */
int iLangid, /* Language id to use */
const char *zText, /* Text of document to be inserted */
int iCol, /* Column into which text is being inserted */
u32 *pnWord /* OUT: Number of tokens inserted */
@ -667,11 +687,10 @@ static int fts3PendingTermsAdd(
return SQLITE_OK;
}
rc = pModule->xOpen(pTokenizer, zText, -1, &pCsr);
rc = sqlite3Fts3OpenTokenizer(pTokenizer, iLangid, zText, -1, &pCsr);
if( rc!=SQLITE_OK ){
return rc;
}
pCsr->pTokenizer = pTokenizer;
xNext = pModule->xNext;
while( SQLITE_OK==rc
@ -714,18 +733,28 @@ static int fts3PendingTermsAdd(
** fts3PendingTermsAdd() are to add term/position-list pairs for the
** contents of the document with docid iDocid.
*/
static int fts3PendingTermsDocid(Fts3Table *p, sqlite_int64 iDocid){
static int fts3PendingTermsDocid(
Fts3Table *p, /* Full-text table handle */
int iLangid, /* Language id of row being written */
sqlite_int64 iDocid /* Docid of row being written */
){
assert( iLangid>=0 );
/* TODO(shess) Explore whether partially flushing the buffer on
** forced-flush would provide better performance. I suspect that if
** we ordered the doclists by size and flushed the largest until the
** buffer was half empty, that would let the less frequent terms
** generate longer doclists.
*/
if( iDocid<=p->iPrevDocid || p->nPendingData>p->nMaxPendingData ){
if( iDocid<=p->iPrevDocid
|| p->iPrevLangid!=iLangid
|| p->nPendingData>p->nMaxPendingData
){
int rc = sqlite3Fts3PendingTermsFlush(p);
if( rc!=SQLITE_OK ) return rc;
}
p->iPrevDocid = iDocid;
p->iPrevLangid = iLangid;
return SQLITE_OK;
}
@ -754,11 +783,16 @@ void sqlite3Fts3PendingTermsClear(Fts3Table *p){
** Argument apVal is the same as the similarly named argument passed to
** fts3InsertData(). Parameter iDocid is the docid of the new row.
*/
static int fts3InsertTerms(Fts3Table *p, sqlite3_value **apVal, u32 *aSz){
static int fts3InsertTerms(
Fts3Table *p,
int iLangid,
sqlite3_value **apVal,
u32 *aSz
){
int i; /* Iterator variable */
for(i=2; i<p->nColumn+2; i++){
const char *zText = (const char *)sqlite3_value_text(apVal[i]);
int rc = fts3PendingTermsAdd(p, zText, i-2, &aSz[i-2]);
int rc = fts3PendingTermsAdd(p, iLangid, zText, i-2, &aSz[i-2]);
if( rc!=SQLITE_OK ){
return rc;
}
@ -779,6 +813,7 @@ static int fts3InsertTerms(Fts3Table *p, sqlite3_value **apVal, u32 *aSz){
** apVal[p->nColumn+1] Right-most user-defined column
** apVal[p->nColumn+2] Hidden column with same name as table
** apVal[p->nColumn+3] Hidden "docid" column (alias for rowid)
** apVal[p->nColumn+4] Hidden languageid column
*/
static int fts3InsertData(
Fts3Table *p, /* Full-text table */
@ -809,9 +844,13 @@ static int fts3InsertData(
** defined columns in the FTS3 table, plus one for the docid field.
*/
rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]);
if( rc!=SQLITE_OK ){
return rc;
if( rc==SQLITE_OK && p->zLanguageid ){
rc = sqlite3_bind_int(
pContentInsert, p->nColumn+2,
sqlite3_value_int(apVal[p->nColumn+4])
);
}
if( rc!=SQLITE_OK ) return rc;
/* There is a quirk here. The users INSERT statement may have specified
** a value for the "rowid" field, for the "docid" field, or for both.
@ -871,6 +910,15 @@ static int fts3DeleteAll(Fts3Table *p, int bContent){
return rc;
}
/*
**
*/
static int langidFromSelect(Fts3Table *p, sqlite3_stmt *pSelect){
int iLangid = 0;
if( p->zLanguageid ) iLangid = sqlite3_column_int(pSelect, p->nColumn+1);
return iLangid;
}
/*
** The first element in the apVal[] array is assumed to contain the docid
** (an integer) of a row about to be deleted. Remove all terms from the
@ -890,16 +938,18 @@ static void fts3DeleteTerms(
if( rc==SQLITE_OK ){
if( SQLITE_ROW==sqlite3_step(pSelect) ){
int i;
for(i=1; i<=p->nColumn; i++){
int iLangid = langidFromSelect(p, pSelect);
rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0));
for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){
const char *zText = (const char *)sqlite3_column_text(pSelect, i);
rc = fts3PendingTermsAdd(p, zText, -1, &aSz[i-1]);
if( rc!=SQLITE_OK ){
sqlite3_reset(pSelect);
*pRC = rc;
return;
}
rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[i-1]);
aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
}
if( rc!=SQLITE_OK ){
sqlite3_reset(pSelect);
*pRC = rc;
return;
}
}
rc = sqlite3_reset(pSelect);
}else{
@ -912,7 +962,7 @@ static void fts3DeleteTerms(
** Forward declaration to account for the circular dependency between
** functions fts3SegmentMerge() and fts3AllocateSegdirIdx().
*/
static int fts3SegmentMerge(Fts3Table *, int, int);
static int fts3SegmentMerge(Fts3Table *, int, int, int);
/*
** This function allocates a new level iLevel index in the segdir table.
@ -931,6 +981,7 @@ static int fts3SegmentMerge(Fts3Table *, int, int);
*/
static int fts3AllocateSegdirIdx(
Fts3Table *p,
int iLangid, /* Language id */
int iIndex, /* Index for p->aIndex */
int iLevel,
int *piIdx
@ -939,10 +990,15 @@ static int fts3AllocateSegdirIdx(
sqlite3_stmt *pNextIdx; /* Query for next idx at level iLevel */
int iNext = 0; /* Result of query pNextIdx */
assert( iLangid>=0 );
assert( p->nIndex>=1 );
/* Set variable iNext to the next available segdir index at level iLevel. */
rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int(pNextIdx, 1, iIndex*FTS3_SEGDIR_MAXLEVEL + iLevel);
sqlite3_bind_int64(
pNextIdx, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel)
);
if( SQLITE_ROW==sqlite3_step(pNextIdx) ){
iNext = sqlite3_column_int(pNextIdx, 0);
}
@ -956,7 +1012,7 @@ static int fts3AllocateSegdirIdx(
** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext.
*/
if( iNext>=FTS3_MERGE_COUNT ){
rc = fts3SegmentMerge(p, iIndex, iLevel);
rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel);
*piIdx = 0;
}else{
*piIdx = iNext;
@ -2179,7 +2235,12 @@ static int fts3IsEmpty(Fts3Table *p, sqlite3_value *pRowid, int *pisEmpty){
**
** Return SQLITE_OK if successful, or an SQLite error code if not.
*/
static int fts3SegmentMaxLevel(Fts3Table *p, int iIndex, int *pnMax){
static int fts3SegmentMaxLevel(
Fts3Table *p,
int iLangid,
int iIndex,
int *pnMax
){
sqlite3_stmt *pStmt;
int rc;
assert( iIndex>=0 && iIndex<p->nIndex );
@ -2192,8 +2253,10 @@ static int fts3SegmentMaxLevel(Fts3Table *p, int iIndex, int *pnMax){
*/
rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0);
if( rc!=SQLITE_OK ) return rc;
sqlite3_bind_int(pStmt, 1, iIndex*FTS3_SEGDIR_MAXLEVEL);
sqlite3_bind_int(pStmt, 2, (iIndex+1)*FTS3_SEGDIR_MAXLEVEL - 1);
sqlite3_bind_int(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
sqlite3_bind_int(pStmt, 2,
getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
);
if( SQLITE_ROW==sqlite3_step(pStmt) ){
*pnMax = sqlite3_column_int(pStmt, 0);
}
@ -2216,6 +2279,7 @@ static int fts3SegmentMaxLevel(Fts3Table *p, int iIndex, int *pnMax){
*/
static int fts3DeleteSegdir(
Fts3Table *p, /* Virtual table handle */
int iLangid, /* Language id */
int iIndex, /* Index for p->aIndex */
int iLevel, /* Level of %_segdir entries to delete */
Fts3SegReader **apSegment, /* Array of SegReader objects */
@ -2243,13 +2307,15 @@ static int fts3DeleteSegdir(
if( iLevel==FTS3_SEGCURSOR_ALL ){
rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int(pDelete, 1, iIndex*FTS3_SEGDIR_MAXLEVEL);
sqlite3_bind_int(pDelete, 2, (iIndex+1) * FTS3_SEGDIR_MAXLEVEL - 1);
sqlite3_bind_int(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
sqlite3_bind_int(pDelete, 2,
getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
);
}
}else{
rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int(pDelete, 1, iIndex*FTS3_SEGDIR_MAXLEVEL + iLevel);
sqlite3_bind_int(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex,iLevel));
}
}
@ -2718,13 +2784,18 @@ void sqlite3Fts3SegReaderFinish(
** Otherwise, if successful, SQLITE_OK is returned. If an error occurs,
** an SQLite error code is returned.
*/
static int fts3SegmentMerge(Fts3Table *p, int iIndex, int iLevel){
static int fts3SegmentMerge(
Fts3Table *p,
int iLangid, /* Language id to merge */
int iIndex, /* Index in p->aIndex[] to merge */
int iLevel /* Level to merge */
){
int rc; /* Return code */
int iIdx = 0; /* Index of new segment */
int iNewLevel = 0; /* Level/index to create new segment at */
SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */
Fts3SegFilter filter; /* Segment term filter condition */
Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */
Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */
int bIgnoreEmpty = 0; /* True to ignore empty segments */
assert( iLevel==FTS3_SEGCURSOR_ALL
@ -2734,7 +2805,7 @@ static int fts3SegmentMerge(Fts3Table *p, int iIndex, int iLevel){
assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
assert( iIndex>=0 && iIndex<p->nIndex );
rc = sqlite3Fts3SegReaderCursor(p, iIndex, iLevel, 0, 0, 1, 0, &csr);
rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr);
if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished;
if( iLevel==FTS3_SEGCURSOR_ALL ){
@ -2746,24 +2817,24 @@ static int fts3SegmentMerge(Fts3Table *p, int iIndex, int iLevel){
rc = SQLITE_DONE;
goto finished;
}
rc = fts3SegmentMaxLevel(p, iIndex, &iNewLevel);
rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iNewLevel);
bIgnoreEmpty = 1;
}else if( iLevel==FTS3_SEGCURSOR_PENDING ){
iNewLevel = iIndex * FTS3_SEGDIR_MAXLEVEL;
rc = fts3AllocateSegdirIdx(p, iIndex, 0, &iIdx);
iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, 0);
rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, 0, &iIdx);
}else{
/* This call is to merge all segments at level iLevel. find the next
** available segment index at level iLevel+1. The call to
** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to
** a single iLevel+2 segment if necessary. */
rc = fts3AllocateSegdirIdx(p, iIndex, iLevel+1, &iIdx);
iNewLevel = iIndex * FTS3_SEGDIR_MAXLEVEL + iLevel+1;
rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx);
iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1);
}
if( rc!=SQLITE_OK ) goto finished;
assert( csr.nSegment>0 );
assert( iNewLevel>=(iIndex*FTS3_SEGDIR_MAXLEVEL) );
assert( iNewLevel<((iIndex+1)*FTS3_SEGDIR_MAXLEVEL) );
assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) );
assert( iNewLevel<getAbsoluteLevel(p, iLangid, iIndex,FTS3_SEGDIR_MAXLEVEL) );
memset(&filter, 0, sizeof(Fts3SegFilter));
filter.flags = FTS3_SEGMENT_REQUIRE_POS;
@ -2780,7 +2851,9 @@ static int fts3SegmentMerge(Fts3Table *p, int iIndex, int iLevel){
assert( pWriter );
if( iLevel!=FTS3_SEGCURSOR_PENDING ){
rc = fts3DeleteSegdir(p, iIndex, iLevel, csr.apSegment, csr.nSegment);
rc = fts3DeleteSegdir(
p, iLangid, iIndex, iLevel, csr.apSegment, csr.nSegment
);
if( rc!=SQLITE_OK ) goto finished;
}
rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);
@ -2799,7 +2872,7 @@ int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
int rc = SQLITE_OK;
int i;
for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
rc = fts3SegmentMerge(p, i, FTS3_SEGCURSOR_PENDING);
rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING);
if( rc==SQLITE_DONE ) rc = SQLITE_OK;
}
sqlite3Fts3PendingTermsClear(p);
@ -2954,17 +3027,34 @@ static void fts3UpdateDocTotals(
sqlite3_free(a);
}
/*
** Merge the entire database so that there is one segment for each
** iIndex/iLangid combination.
*/
static int fts3DoOptimize(Fts3Table *p, int bReturnDone){
int i;
int bSeenDone = 0;
int rc = SQLITE_OK;
for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
rc = fts3SegmentMerge(p, i, FTS3_SEGCURSOR_ALL);
if( rc==SQLITE_DONE ){
bSeenDone = 1;
rc = SQLITE_OK;
int rc;
sqlite3_stmt *pAllLangid = 0;
rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
if( rc==SQLITE_OK ){
int rc2;
sqlite3_bind_int(pAllLangid, 1, p->nIndex);
while( sqlite3_step(pAllLangid)==SQLITE_ROW ){
int i;
int iLangid = sqlite3_column_int(pAllLangid, 0);
for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
rc = fts3SegmentMerge(p, iLangid, i, FTS3_SEGCURSOR_ALL);
if( rc==SQLITE_DONE ){
bSeenDone = 1;
rc = SQLITE_OK;
}
}
}
rc2 = sqlite3_reset(pAllLangid);
if( rc==SQLITE_OK ) rc = rc2;
}
sqlite3Fts3SegmentsClose(p);
sqlite3Fts3PendingTermsClear(p);
@ -3015,11 +3105,12 @@ static int fts3DoRebuild(Fts3Table *p){
while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
int iCol;
rc = fts3PendingTermsDocid(p, sqlite3_column_int64(pStmt, 0));
int iLangid = langidFromSelect(p, pStmt);
rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0));
aSz[p->nColumn] = 0;
for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
rc = fts3PendingTermsAdd(p, z, iCol, &aSz[iCol]);
rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]);
aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1);
}
if( p->bHasDocsize ){
@ -3138,14 +3229,13 @@ int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){
const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1);
sqlite3_tokenizer_cursor *pTC = 0;
rc = pModule->xOpen(pT, zText, -1, &pTC);
rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC);
while( rc==SQLITE_OK ){
char const *zToken; /* Buffer containing token */
int nToken; /* Number of bytes in token */
int iDum1, iDum2; /* Dummy variables */
int iPos; /* Position of token in zText */
pTC->pTokenizer = pT;
rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
Fts3PhraseToken *pPT = pDef->pToken;
@ -3245,8 +3335,6 @@ static int fts3DeleteByRowid(
rc = fts3DeleteAll(p, 1);
*pnDoc = *pnDoc - 1;
}else{
sqlite3_int64 iRemove = sqlite3_value_int64(pRowid);
rc = fts3PendingTermsDocid(p, iRemove);
fts3DeleteTerms(&rc, p, pRowid, aSzDel);
if( p->zContentTbl==0 ){
fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid);
@ -3265,7 +3353,16 @@ static int fts3DeleteByRowid(
/*
** This function does the work for the xUpdate method of FTS3 virtual
** tables.
** tables. The schema of the virtual table being:
**
** CREATE TABLE <table name>(
** <user COLUMns>,
** <table name> HIDDEN,
** docid HIDDEN,
** <langid> HIDDEN
** );
**
**
*/
int sqlite3Fts3UpdateMethod(
sqlite3_vtab *pVtab, /* FTS3 vtab object */
@ -3282,6 +3379,10 @@ int sqlite3Fts3UpdateMethod(
int bInsertDone = 0;
assert( p->pSegments==0 );
assert(
nArg==1 /* DELETE operations */
|| nArg==(2 + p->nColumn + 3) /* INSERT or UPDATE operations */
);
/* Check for a "special" INSERT operation. One of the form:
**
@ -3295,6 +3396,11 @@ int sqlite3Fts3UpdateMethod(
goto update_out;
}
if( nArg>1 && sqlite3_value_int(apVal[2 + p->nColumn + 2])<0 ){
rc = SQLITE_CONSTRAINT;
goto update_out;
}
/* Allocate space to hold the change in document sizes */
aSzIns = sqlite3_malloc( sizeof(aSzIns[0])*(p->nColumn+1)*2 );
if( aSzIns==0 ){
@ -3362,6 +3468,7 @@ int sqlite3Fts3UpdateMethod(
/* If this is an INSERT or UPDATE operation, insert the new record. */
if( nArg>1 && rc==SQLITE_OK ){
int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]);
if( bInsertDone==0 ){
rc = fts3InsertData(p, apVal, pRowid);
if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){
@ -3369,11 +3476,11 @@ int sqlite3Fts3UpdateMethod(
}
}
if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){
rc = fts3PendingTermsDocid(p, *pRowid);
rc = fts3PendingTermsDocid(p, iLangid, *pRowid);
}
if( rc==SQLITE_OK ){
assert( p->iPrevDocid==*pRowid );
rc = fts3InsertTerms(p, apVal, aSzIns);
rc = fts3InsertTerms(p, iLangid, apVal, aSzIns);
}
if( p->bHasDocsize ){
fts3InsertDocsize(&rc, p, aSzIns);

View File

@ -1,5 +1,5 @@
C Add\sa\stest\scase\sfor\sticket\s[3557ad65a076c].
D 2012-03-03T01:44:12.761
C Merge\sthe\sfts4-languageid\sbranch\swith\sthe\strunk.
D 2012-03-05T15:33:32.751
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 3f79a373e57c3b92dabf76f40b065e719d31ac34
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -63,22 +63,22 @@ F ext/fts3/README.content fdc666a70d5257a64fee209f97cf89e0e6e32b51
F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
F ext/fts3/fts3.c 96ae9e273a770e2c249117360ec68e38aa0b13fa
F ext/fts3/fts3.c 806632fd0020eed966ab82ea25fe09f1a4c86907
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
F ext/fts3/fts3Int.h ce958a6fa92a95462853aa3acc0b69bcda39102f
F ext/fts3/fts3_aux.c 0ebfa7b86cf8ff6a0861605fcc63b83ec1b70691
F ext/fts3/fts3_expr.c f5df26bddf46a5916b2a5f80c4027996e92b7b15
F ext/fts3/fts3Int.h d1d7f964ddee067bcd16a6af4ba7ecf66220056d
F ext/fts3/fts3_aux.c 72de4cb43db7bfc2f68fbda04b7d8095ae9a6239
F ext/fts3/fts3_expr.c dbc7ba4c3a6061adde0f38ed8e9b349568299551
F ext/fts3/fts3_hash.c 8dd2d06b66c72c628c2732555a32bc0943114914
F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec
F ext/fts3/fts3_icu.c 6c8f395cdf9e1e3afa7fadb7e523dbbf381c6dfa
F ext/fts3/fts3_porter.c b7e5276f9f0a5fc7018b6fa55ce0f31f269ef881
F ext/fts3/fts3_snippet.c 1f9ee6a8e0e242649645968dcec4deb253d86c2a
F ext/fts3/fts3_term.c a5457992723455a58804cb75c8cbd8978db5c2ef
F ext/fts3/fts3_test.c 24fa13f330db011500acb95590da9eee24951894
F ext/fts3/fts3_tokenizer.c 9ff7ec66ae3c5c0340fa081958e64f395c71a106
F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3
F ext/fts3/fts3_snippet.c c9e126c20760988aa7c43c6ea1379db34738282e
F ext/fts3/fts3_term.c d3466cf99432291be08e379d89645462431809d6
F ext/fts3/fts3_test.c a026412a41450a014ccb7abdd5efaa7c9711d49e
F ext/fts3/fts3_tokenizer.c 3da7254a9881f7e270ab28e2004e0d22b3212bce
F ext/fts3/fts3_tokenizer.h 66dec98e365854b6cd2d54f1a96bb6d428fc5a68
F ext/fts3/fts3_tokenizer1.c 0dde8f307b8045565cf63797ba9acfaff1c50c68
F ext/fts3/fts3_write.c 1721187a4dec29ef9ae648ad8478da741085af18
F ext/fts3/fts3_write.c f87bb2d27d31cb7a7bf306747079095393c9d073
F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9
F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9
@ -496,6 +496,7 @@ F test/fts3snippet.test 8e956051221a34c7daeb504f023cb54d5fa5a8b2
F test/fts3sort.test 95be0b19d7e41c44b29014f13ea8bddd495fd659
F test/fts4aa.test 6e7f90420b837b2c685f3bcbe84c868492d40a68
F test/fts4content.test 17b2360f7d1a9a7e5aa8022783f5c5731b6dfd4f
F test/fts4langid.test fabdd5a8db0fa00292e0704809f566e3fb6dba3a
F test/func.test 6c5ce11e3a0021ca3c0649234e2d4454c89110ca
F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f
F test/func3.test 001021e5b88bd02a3b365a5c5fd8f6f49d39744a
@ -634,7 +635,7 @@ F test/pageropt.test 9191867ed19a2b3db6c42d1b36b6fbc657cd1ab0
F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0
F test/pcache.test 065aa286e722ab24f2e51792c1f093bf60656b16
F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025
F test/permutations.test fa6f0e5f13fe0b1d3f7a7613179b7f7b20028184
F test/permutations.test 2b5a1b64a8e5114757457fbce9010387d1fe7682
F test/pragma.test f11c59ec935a52edb4d3d5676d456588121fcefa
F test/pragma2.test 3a55f82b954242c642f8342b17dffc8b47472947
F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552
@ -991,7 +992,7 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
F tool/warnings-clang.sh 9f406d66e750e8ac031c63a9ef3248aaa347ef2a
F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381
P dec9a995d7012a1f85be4edb71240b9d4f07c60e
R b4cefaed66b163e0022c525667456b34
U drh
Z 8282af4a85e83e5f9dade049cda1072b
P 4f34d7077b9acf7926c5e7375ca870d4ed3c60b9 f8e9c445dd358c40e5a7bf3756b9f291909dbea7
R 6e003d0f8aa6b8eacd50254a31c50e6a
U dan
Z c6f6d9c6291c4c182b564ddd4b9e0a68

View File

@ -1 +1 @@
4f34d7077b9acf7926c5e7375ca870d4ed3c60b9
99a9073b5e411ce94f38ce49608baaa15de8b850

385
test/fts4langid.test Normal file
View File

@ -0,0 +1,385 @@
# 2012 March 01
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the languageid=xxx FTS4 option.
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
set ::testprefix fts4content
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 {
finish_test
return
}
set ::testprefix fts4langid
#---------------------------------------------------------------------------
# Test plan:
#
# 1.* - Warm-body tests created for specific purposes during development.
# Passing these doesn't really prove much.
#
# 2.1.* - Test that FTS queries only ever return rows associated with
# the requested language.
#
# 2.2.* - Same as 2.1.*, after an 'optimize' command.
#
# 2.3.* - Same as 2.1.*, after a 'rebuild' command.
#
# 3.* - Tests with content= tables. Both where there is a real
# underlying content table and where there is not.
#
# 4.* - Test that if one is provided, the tokenizer xLanguage method
# is called to configure the tokenizer before tokenizing query
# or document text.
#
# 5.* - Test the fts4aux table when the associated FTS4 table contains
# multiple languages.
#
do_execsql_test 1.1 {
CREATE VIRTUAL TABLE t1 USING fts4(a, b, languageid=lang_id);
}
do_execsql_test 1.2 {
SELECT sql FROM sqlite_master WHERE name = 't1_content';
} {{CREATE TABLE 't1_content'(docid INTEGER PRIMARY KEY, 'c0a', 'c1b', langid)}}
do_execsql_test 1.3 {SELECT docid FROM t1} {}
do_execsql_test 1.4 {SELECT lang_id FROM t1} {}
do_execsql_test 1.5 {INSERT INTO t1(a, b) VALUES('aaa', 'bbb')}
do_execsql_test 1.6 {SELECT lang_id FROM t1 } {0}
do_execsql_test 1.7 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 4)}
do_execsql_test 1.8 {SELECT lang_id FROM t1 } {0 4}
do_execsql_test 1.9 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 'xyz')}
do_execsql_test 1.10 {SELECT lang_id FROM t1} {0 4 0}
do_execsql_test 1.11 {
CREATE VIRTUAL TABLE t2 USING fts4;
INSERT INTO t2 VALUES('abc');
}
do_execsql_test 1.12 { SELECT rowid FROM t2 WHERE content MATCH 'abc' } 1
do_execsql_test 1.13 {
DROP TABLE t1;
CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id);
INSERT INTO t1(content) VALUES('a b c');
INSERT INTO t1(content, lang_id) VALUES('a b c', 1);
}
do_execsql_test 1.14 {
SELECT rowid FROM t1 WHERE t1 MATCH 'b';
} {1}
do_execsql_test 1.15 {
SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 0;
} {1}
do_execsql_test 1.16 {
SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 1;
} {2}
do_catchsql_test 1.17 {
INSERT INTO t1(content, lang_id) VALUES('123', -1);
} {1 {constraint failed}}
do_execsql_test 1.18 {
DROP TABLE t1;
CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id);
INSERT INTO t1(content, lang_id) VALUES('A', 13);
INSERT INTO t1(content, lang_id) VALUES('B', 13);
INSERT INTO t1(content, lang_id) VALUES('C', 13);
INSERT INTO t1(content, lang_id) VALUES('D', 13);
INSERT INTO t1(content, lang_id) VALUES('E', 13);
INSERT INTO t1(content, lang_id) VALUES('F', 13);
INSERT INTO t1(content, lang_id) VALUES('G', 13);
INSERT INTO t1(content, lang_id) VALUES('H', 13);
INSERT INTO t1(content, lang_id) VALUES('I', 13);
INSERT INTO t1(content, lang_id) VALUES('J', 13);
INSERT INTO t1(content, lang_id) VALUES('K', 13);
INSERT INTO t1(content, lang_id) VALUES('L', 13);
INSERT INTO t1(content, lang_id) VALUES('M', 13);
INSERT INTO t1(content, lang_id) VALUES('N', 13);
INSERT INTO t1(content, lang_id) VALUES('O', 13);
INSERT INTO t1(content, lang_id) VALUES('P', 13);
INSERT INTO t1(content, lang_id) VALUES('Q', 13);
INSERT INTO t1(content, lang_id) VALUES('R', 13);
INSERT INTO t1(content, lang_id) VALUES('S', 13);
SELECT rowid FROM t1 WHERE t1 MATCH 'A';
} {}
#-------------------------------------------------------------------------
# Test cases 2.*
#
proc build_multilingual_db_1 {db} {
$db eval { CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l) }
set xwords [list zero one two three four five six seven eight nine ten]
set ywords [list alpha beta gamma delta epsilon zeta eta theta iota kappa]
for {set i 0} {$i < 1000} {incr i} {
set iLangid [expr $i%9]
set x ""
set y ""
set x [list]
lappend x [lindex $xwords [expr ($i / 1000) % 10]]
lappend x [lindex $xwords [expr ($i / 100) % 10]]
lappend x [lindex $xwords [expr ($i / 10) % 10]]
lappend x [lindex $xwords [expr ($i / 1) % 10]]
set y [list]
lappend y [lindex $ywords [expr ($i / 1000) % 10]]
lappend y [lindex $ywords [expr ($i / 100) % 10]]
lappend y [lindex $ywords [expr ($i / 10) % 10]]
lappend y [lindex $ywords [expr ($i / 1) % 10]]
$db eval { INSERT INTO t2(docid, x, y, l) VALUES($i, $x, $y, $iLangid) }
}
$db eval {
CREATE TABLE data(x, y, l);
INSERT INTO data(rowid, x, y, l) SELECT docid, x, y, l FROM t2;
}
}
proc rowid_list_set_langid {langid} {
set ::rowid_list_langid $langid
}
proc rowid_list {pattern} {
set langid $::rowid_list_langid
set res [list]
db eval {SELECT rowid, x, y FROM data WHERE l = $langid ORDER BY rowid ASC} {
if {[string match "*$pattern*" $x] || [string match "*$pattern*" $y]} {
lappend res $rowid
}
}
return $res
}
proc or_merge_list {list1 list2} {
set res [list]
set i1 0
set i2 0
set n1 [llength $list1]
set n2 [llength $list2]
while {$i1 < $n1 && $i2 < $n2} {
set e1 [lindex $list1 $i1]
set e2 [lindex $list2 $i2]
if {$e1==$e2} {
lappend res $e1
incr i1
incr i2
} elseif {$e1 < $e2} {
lappend res $e1
incr i1
} else {
lappend res $e2
incr i2
}
}
concat $res [lrange $list1 $i1 end] [lrange $list2 $i2 end]
}
proc or_merge_lists {args} {
set res [lindex $args 0]
for {set i 1} {$i < [llength $args]} {incr i} {
set res [or_merge_list $res [lindex $args $i]]
}
set res
}
proc and_merge_list {list1 list2} {
foreach i $list2 { set a($i) 1 }
set res [list]
foreach i $list1 {
if {[info exists a($i)]} {lappend res $i}
}
set res
}
proc and_merge_lists {args} {
set res [lindex $args 0]
for {set i 1} {$i < [llength $args]} {incr i} {
set res [and_merge_list $res [lindex $args $i]]
}
set res
}
proc filter_list {list langid} {
set res [list]
foreach i $list {
if {($i % 9) == $langid} {lappend res $i}
}
set res
}
do_test 2.0 {
reset_db
build_multilingual_db_1 db
} {}
proc do_test_query1 {tn query res_script} {
for {set langid 0} {$langid < 10} {incr langid} {
rowid_list_set_langid $langid
set res [eval $res_script]
set actual [
execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid}
]
do_test $tn.$langid [list set {} $actual] $res
}
}
# Run some queries.
do_test_query1 2.1.1 {delta} { rowid_list delta }
do_test_query1 2.1.2 {"zero one two"} { rowid_list "zero one two" }
do_test_query1 2.1.3 {zero one two} {
and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_query1 2.1.4 {"zero one" OR "one two"} {
or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}
# Now try the same tests as above, but after running the 'optimize'
# command on the FTS table.
#
do_execsql_test 2.2 {
INSERT INTO t2(t2) VALUES('optimize');
SELECT count(*) FROM t2_segdir;
} {9}
do_test_query1 2.2.1 {delta} { rowid_list delta }
do_test_query1 2.2.2 {"zero one two"} { rowid_list "zero one two" }
do_test_query1 2.2.3 {zero one two} {
and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_query1 2.2.4 {"zero one" OR "one two"} {
or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}
# And rebuild.
#
do_test 2.3 {
reset_db
build_multilingual_db_1 db
execsql { INSERT INTO t2(t2) VALUES('rebuild') }
} {}
do_test_query1 2.3.1 {delta} { rowid_list delta }
do_test_query1 2.3.2 {"zero one two"} { rowid_list "zero one two" }
do_test_query1 2.3.3 {zero one two} {
and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_query1 2.3.4 {"zero one" OR "one two"} {
or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}
#-------------------------------------------------------------------------
# Test cases 3.*
#
do_test 3.0 {
reset_db
build_multilingual_db_1 db
execsql {
CREATE TABLE t3_data(l, x, y);
INSERT INTO t3_data(rowid, l, x, y) SELECT docid, l, x, y FROM t2;
DROP TABLE t2;
}
} {}
do_execsql_test 3.1 {
CREATE VIRTUAL TABLE t2 USING fts4(content=t3_data, languageid=l);
INSERT INTO t2(t2) VALUES('rebuild');
}
do_test_query1 3.1.1 {delta} { rowid_list delta }
do_test_query1 3.1.2 {"zero one two"} { rowid_list "zero one two" }
do_test_query1 3.1.3 {zero one two} {
and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_query1 3.1.4 {"zero one" OR "one two"} {
or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}
do_execsql_test 3.2.1 {
DROP TABLE t2;
CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l, content=nosuchtable);
}
do_execsql_test 3.2.2 {
INSERT INTO t2(docid, x, y, l) SELECT rowid, x, y, l FROM t3_data;
}
do_execsql_test 3.2.3 {
DROP TABLE t3_data;
}
do_test_query1 3.3.1 {delta} { rowid_list delta }
do_test_query1 3.3.2 {"zero one two"} { rowid_list "zero one two" }
do_test_query1 3.3.3 {zero one two} {
and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_query1 3.3.4 {"zero one" OR "one two"} {
or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}
#-------------------------------------------------------------------------
# Test cases 4.*
#
proc build_multilingual_db_2 {db} {
$db eval {
CREATE VIRTUAL TABLE t4 USING fts4(
tokenize=testtokenizer,
languageid=lid
);
}
for {set i 0} {$i < 50} {incr i} {
execsql {
INSERT INTO t4(docid, content, lid) VALUES($i, 'The Quick Brown Fox', $i)
}
}
}
do_test 4.1.0 {
reset_db
set ptr [fts3_test_tokenizer]
execsql { SELECT fts3_tokenizer('testtokenizer', $ptr) }
build_multilingual_db_2 db
} {}
do_execsql_test 4.1.1 {
SELECT docid FROM t4 WHERE t4 MATCH 'quick';
} {0}
do_execsql_test 4.1.2 {
SELECT docid FROM t4 WHERE t4 MATCH 'quick' AND lid=1;
} {}
do_execsql_test 4.1.3 {
SELECT docid FROM t4 WHERE t4 MATCH 'Quick' AND lid=1;
} {1}
for {set i 0} {$i < 50} {incr i} {
do_execsql_test 4.1.4.$i {
SELECT count(*) FROM t4 WHERE t4 MATCH 'fox' AND lid=$i;
} [expr 0==($i%2)]
}
do_catchsql_test 4.1.5 {
INSERT INTO t4(content, lid) VALUES('hello world', 101)
} {1 {SQL logic error or missing database}}
finish_test

View File

@ -184,8 +184,7 @@ test_suite "fts3" -prefix "" -description {
fts3aux1.test fts3comp1.test fts3auto.test
fts4aa.test fts4content.test
fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test
fts3corrupt2.test
fts3first.test
fts3corrupt2.test fts3first.test fts4langid.test
}