Remove the iPos parameter from the tokenizer callback. Fix the "tokenchars" and "separators" options on the simple tokenizer.

FossilOrigin-Name: 65f0262fb82dbfd9f80233ac7c3108e2f2716c0a
This commit is contained in:
dan 2015-01-06 19:08:26 +00:00
parent 2a28e507f7
commit aacf3d1a3b
35 changed files with 234 additions and 243 deletions

View File

@ -1130,7 +1130,7 @@ static int fts5UpdateMethod(
}else if( nArg>1 ){
sqlite3_value *pCmd = apVal[2 + pConfig->nCol];
if( SQLITE_NULL!=sqlite3_value_type(pCmd) ){
const char *z = sqlite3_value_text(pCmd);
const char *z = (const char*)sqlite3_value_text(pCmd);
if( pConfig->eContent!=FTS5_CONTENT_NORMAL
&& 0==sqlite3_stricmp("delete", z)
){
@ -1220,7 +1220,7 @@ static int fts5ApiTokenize(
Fts5Context *pCtx,
const char *pText, int nText,
void *pUserData,
int (*xToken)(void*, const char*, int, int, int, int)
int (*xToken)(void*, const char*, int, int, int)
){
Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);

View File

@ -195,7 +195,7 @@ struct Fts5ExtensionApi {
int (*xTokenize)(Fts5Context*,
const char *pText, int nText, /* Text to tokenize */
void *pCtx, /* Context passed to xToken() */
int (*xToken)(void*, const char*, int, int, int, int) /* Callback */
int (*xToken)(void*, const char*, int, int, int) /* Callback */
);
int (*xPhraseCount)(Fts5Context*);
@ -291,8 +291,7 @@ struct fts5_tokenizer {
const char *pToken, /* Pointer to buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Byte offset of token within input text */
int iEnd, /* Byte offset of end of token within input text */
int iPos /* Position of token in input (first token is 0) */
int iEnd /* Byte offset of end of token within input text */
)
);
};

View File

@ -107,7 +107,7 @@ int sqlite3Fts5Tokenize(
Fts5Config *pConfig, /* FTS5 Configuration object */
const char *pText, int nText, /* Text to tokenize */
void *pCtx, /* Context passed to xToken() */
int (*xToken)(void*, const char*, int, int, int, int) /* Callback */
int (*xToken)(void*, const char*, int, int, int) /* Callback */
);
void sqlite3Fts5Dequote(char *z);

View File

@ -46,13 +46,6 @@ struct CInstIter {
int iEnd; /* Last token in coalesced phrase instance */
};
/*
** Return non-zero if the iterator is at EOF, or zero otherwise.
*/
static int fts5CInstIterEof(CInstIter *pIter){
return (pIter->iStart < 0);
}
/*
** Advance the iterator to the next coalesced phrase instance. Return
** an SQLite error code if an error occurs, or SQLITE_OK otherwise.
@ -117,6 +110,7 @@ static int fts5CInstIterInit(
typedef struct HighlightContext HighlightContext;
struct HighlightContext {
CInstIter iter; /* Coalesced Instance Iterator */
int iPos; /* Current token offset in zIn[] */
int iRangeStart; /* First token to include */
int iRangeEnd; /* If non-zero, last token to include */
const char *zOpen; /* Opening highlight */
@ -156,11 +150,11 @@ static int fts5HighlightCb(
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
int iStartOff, /* Start offset of token */
int iEndOff, /* End offset of token */
int iPos /* Position offset of token */
int iEndOff /* End offset of token */
){
HighlightContext *p = (HighlightContext*)pContext;
int rc = SQLITE_OK;
int iPos = p->iPos++;
if( p->iRangeEnd>0 ){
if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;

View File

@ -58,7 +58,6 @@ int sqlite3Fts5Get32(const u8 *aBuf){
}
void sqlite3Fts5BufferAppend32(int *pRc, Fts5Buffer *pBuf, int iVal){
char *a;
if( sqlite3Fts5BufferGrow(pRc, pBuf, 4) ) return;
sqlite3Fts5Put32(&pBuf->p[pBuf->n], iVal);
pBuf->n += 4;

View File

@ -364,7 +364,7 @@ static int fts5ConfigParseSpecial(
return rc;
}
*pzErr = sqlite3_mprintf("unrecognized directive: \"%s\"", zCmd);
*pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd);
return SQLITE_ERROR;
}
@ -588,7 +588,7 @@ int sqlite3Fts5Tokenize(
Fts5Config *pConfig, /* FTS5 Configuration object */
const char *pText, int nText, /* Text to tokenize */
void *pCtx, /* Context passed to xToken() */
int (*xToken)(void*, const char*, int, int, int, int) /* Callback */
int (*xToken)(void*, const char*, int, int, int) /* Callback */
){
return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken);
}

View File

@ -457,9 +457,11 @@ static int fts5LookaheadReaderInit(
return fts5LookaheadReaderNext(p);
}
#if 0
static int fts5LookaheadReaderEof(Fts5LookaheadReader *p){
return (p->iPos==FTS5_LOOKAHEAD_EOF);
}
#endif
typedef struct Fts5NearTrimmer Fts5NearTrimmer;
struct Fts5NearTrimmer {
@ -1141,8 +1143,7 @@ static int fts5ParseTokenize(
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Start offset of token */
int iEnd, /* End offset of token */
int iPos /* Position offset of token */
int iEnd /* End offset of token */
){
int rc = SQLITE_OK;
const int SZALLOC = 8;

View File

@ -842,12 +842,14 @@ static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
** And discard any cached reads. This function is called at the end of
** a read transaction or when any sub-transaction is rolled back.
*/
#if 0
static void fts5DataReset(Fts5Index *p){
if( p->pReader ){
sqlite3_blob_close(p->pReader);
p->pReader = 0;
}
}
#endif
/*
** Remove all records associated with segment iSegid in index iIdx.

View File

@ -282,12 +282,11 @@ static int fts5StorageInsertCallback(
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Start offset of token */
int iEnd, /* End offset of token */
int iPos /* Position offset of token */
int iEnd /* End offset of token */
){
Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext;
Fts5Index *pIdx = pCtx->pStorage->pIndex;
pCtx->szCol = iPos+1;
int iPos = pCtx->szCol++;
return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken);
}
@ -312,6 +311,7 @@ static int fts5StorageDeleteFromIndex(Fts5Storage *p, i64 iDel){
ctx.iCol = -1;
rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel);
for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){
ctx.szCol = 0;
rc = sqlite3Fts5Tokenize(pConfig,
(const char*)sqlite3_column_text(pSeek, iCol),
sqlite3_column_bytes(pSeek, iCol),
@ -474,6 +474,7 @@ int sqlite3Fts5StorageSpecialDelete(
rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel);
for(iCol=0; rc==SQLITE_OK && iCol<pConfig->nCol; iCol++){
ctx.szCol = 0;
rc = sqlite3Fts5Tokenize(pConfig,
(const char*)sqlite3_value_text(apVal[iCol]),
sqlite3_value_bytes(apVal[iCol]),
@ -651,14 +652,13 @@ static int fts5StorageIntegrityCallback(
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Start offset of token */
int iEnd, /* End offset of token */
int iPos /* Position offset of token */
int iEnd /* End offset of token */
){
Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
int iPos = pCtx->szCol++;
pCtx->cksum ^= sqlite3Fts5IndexCksum(
pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken
);
pCtx->szCol = iPos+1;
return SQLITE_OK;
}
@ -695,6 +695,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize);
for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
ctx.iCol = i;
ctx.szCol = 0;
rc = sqlite3Fts5Tokenize(
pConfig,
(const char*)sqlite3_column_text(pScan, i+1),

View File

@ -112,7 +112,7 @@ struct F5tAuxData {
static int xTokenizeCb(
void *pCtx,
const char *zToken, int nToken,
int iStart, int iEnd, int iPos
int iStart, int iEnd
){
F5tFunction *p = (F5tFunction*)pCtx;
Tcl_Obj *pEval = Tcl_DuplicateObj(p->pScript);
@ -122,7 +122,6 @@ static int xTokenizeCb(
Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewStringObj(zToken, nToken));
Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewIntObj(iStart));
Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewIntObj(iEnd));
Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewIntObj(iPos));
rc = Tcl_EvalObjEx(p->interp, pEval, 0);
Tcl_DecrRefCount(pEval);
@ -528,11 +527,10 @@ struct F5tTokenizeCtx {
static int xTokenizeCb2(
void *pCtx,
const char *zToken, int nToken,
int iStart, int iEnd, int iPos
int iStart, int iEnd
){
F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx;
if( p->bSubst ){
Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iPos));
Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken));
Tcl_ListObjAppendElement(
0, p->pRet, Tcl_NewStringObj(&p->zInput[iStart], iEnd-iStart)
@ -541,7 +539,6 @@ static int xTokenizeCb2(
Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken));
Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iStart));
Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iEnd));
Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iPos));
}
return SQLITE_OK;
}
@ -637,7 +634,7 @@ typedef struct F5tTokenizerModule F5tTokenizerInstance;
struct F5tTokenizerContext {
void *pCtx;
int (*xToken)(void*, const char*, int, int, int, int);
int (*xToken)(void*, const char*, int, int, int);
};
struct F5tTokenizerModule {
@ -693,11 +690,11 @@ static int f5tTokenizerTokenize(
Fts5Tokenizer *p,
void *pCtx,
const char *pText, int nText,
int (*xToken)(void*, const char*, int, int, int, int)
int (*xToken)(void*, const char*, int, int, int)
){
F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
void *pOldCtx;
int (*xOldToken)(void*, const char*, int, int, int, int);
int (*xOldToken)(void*, const char*, int, int, int);
Tcl_Obj *pEval;
int rc;
@ -733,14 +730,13 @@ static int f5tTokenizerReturn(
F5tTokenizerContext *p = (F5tTokenizerContext*)clientData;
int iStart;
int iEnd;
int iPos;
int nToken;
char *zToken;
int rc;
assert( p );
if( objc!=5 ){
Tcl_WrongNumArgs(interp, 1, objv, "TEXT START END POS");
if( objc!=4 ){
Tcl_WrongNumArgs(interp, 1, objv, "TEXT START END");
return TCL_ERROR;
}
if( p->xToken==0 ){
@ -753,12 +749,11 @@ static int f5tTokenizerReturn(
zToken = Tcl_GetStringFromObj(objv[1], &nToken);
if( Tcl_GetIntFromObj(interp, objv[2], &iStart)
|| Tcl_GetIntFromObj(interp, objv[3], &iEnd)
|| Tcl_GetIntFromObj(interp, objv[4], &iPos)
){
return TCL_ERROR;
}
rc = p->xToken(p->pCtx, zToken, nToken, iStart, iEnd, iPos);
rc = p->xToken(p->pCtx, zToken, nToken, iStart, iEnd);
Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE);
return TCL_OK;
}

View File

@ -16,28 +16,9 @@
#include <assert.h>
/**************************************************************************
** Start of unicode61 tokenizer implementation.
** Start of simple tokenizer implementation.
*/
/*
** Create a "simple" tokenizer.
*/
static int fts5SimpleCreate(
void *pCtx,
const char **azArg, int nArg,
Fts5Tokenizer **ppOut
){
*ppOut = 0;
return SQLITE_OK;
}
/*
** Delete a "simple" tokenizer.
*/
static void fts5SimpleDelete(Fts5Tokenizer *p){
return;
}
/*
** For tokenizers with no "unicode" modifier, the set of token characters
** is the same as the set of ASCII range alphanumeric characters.
@ -53,6 +34,69 @@ static unsigned char aSimpleTokenChar[128] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70..0x7F */
};
typedef struct SimpleTokenizer SimpleTokenizer;
struct SimpleTokenizer {
unsigned char aTokenChar[128];
};
static void fts5SimpleAddExceptions(
SimpleTokenizer *p,
const char *zArg,
int bTokenChars
){
int i;
for(i=0; zArg[i]; i++){
if( (zArg[i] & 0x80)==0 ){
p->aTokenChar[(int)zArg[i]] = (unsigned char)bTokenChars;
}
}
}
/*
** Create a "simple" tokenizer.
*/
static int fts5SimpleCreate(
void *pCtx,
const char **azArg, int nArg,
Fts5Tokenizer **ppOut
){
int rc = SQLITE_OK;
SimpleTokenizer *p = 0;
if( nArg%2 ){
rc = SQLITE_ERROR;
}else{
p = sqlite3_malloc(sizeof(SimpleTokenizer));
if( p==0 ){
rc = SQLITE_NOMEM;
}else{
int i;
memset(p, 0, sizeof(SimpleTokenizer));
memcpy(p->aTokenChar, aSimpleTokenChar, sizeof(aSimpleTokenChar));
for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
const char *zArg = azArg[i+1];
if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){
fts5SimpleAddExceptions(p, zArg, 1);
}else
if( 0==sqlite3_stricmp(azArg[i], "separators") ){
fts5SimpleAddExceptions(p, zArg, 0);
}else{
rc = SQLITE_ERROR;
}
}
}
}
*ppOut = (Fts5Tokenizer*)p;
return rc;
}
/*
** Delete a "simple" tokenizer.
*/
static void fts5SimpleDelete(Fts5Tokenizer *p){
sqlite3_free(p);
}
static void simpleFold(char *aOut, const char *aIn, int nByte){
int i;
@ -70,29 +114,30 @@ static int fts5SimpleTokenize(
Fts5Tokenizer *pTokenizer,
void *pCtx,
const char *pText, int nText,
int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos)
int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
){
SimpleTokenizer *p = (SimpleTokenizer*)pTokenizer;
int rc = SQLITE_OK;
int ie;
int is = 0;
int iPos = 0;
char aFold[64];
int nFold = sizeof(aFold);
char *pFold = aFold;
unsigned char *a = p->aTokenChar;
while( is<nText && rc==SQLITE_OK ){
int nByte;
/* Skip any leading divider characters. */
while( is<nText && ((pText[is]&0x80) || aSimpleTokenChar[pText[is]]==0 ) ){
while( is<nText && ((pText[is]&0x80) || a[(int)pText[is]]==0) ){
is++;
}
if( is==nText ) break;
/* Count the token characters */
ie = is+1;
while( ie<nText && ((pText[ie]&0x80)==0 && aSimpleTokenChar[pText[ie]] ) ){
while( ie<nText && ((pText[ie]&0x80)==0 && a[(int)pText[ie]] ) ){
ie++;
}
@ -110,8 +155,7 @@ static int fts5SimpleTokenize(
simpleFold(pFold, &pText[is], nByte);
/* Invoke the token callback */
rc = xToken(pCtx, pFold, nByte, is, ie, iPos);
iPos++;
rc = xToken(pCtx, pFold, nByte, is, ie);
is = ie+1;
}
@ -328,7 +372,7 @@ static int fts5UnicodeTokenize(
Fts5Tokenizer *pTokenizer,
void *pCtx,
const char *pText, int nText,
int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos)
int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
){
Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer;
const unsigned char *zInput = (const unsigned char*)pText;
@ -338,7 +382,6 @@ static int fts5UnicodeTokenize(
int nBuf = 0;
unsigned char *zBuf = 0;
unsigned char *zOut = 0;
int iPos = 0;
while( rc==SQLITE_OK && z<zTerm ){
int iCode;
@ -378,9 +421,8 @@ static int fts5UnicodeTokenize(
if( zOut>zBuf && (bAlnum==0 || z>=zTerm) ){
int ie = (bAlnum ? z : zCode) - zInput;
rc = xToken(pCtx, (const char*)zBuf, zOut-zBuf, zStart-zInput, ie, iPos);
rc = xToken(pCtx, (const char*)zBuf, zOut-zBuf, zStart-zInput, ie);
zOut = zBuf;
iPos++;
}
}
@ -390,7 +432,7 @@ static int fts5UnicodeTokenize(
}
/**************************************************************************
** Start of porter2 stemmer implementation.
** Start of porter stemmer implementation.
*/
/* Any tokens larger than this (in bytes) are passed through without
@ -452,7 +494,7 @@ static int fts5PorterCreate(
typedef struct PorterContext PorterContext;
struct PorterContext {
void *pCtx;
int (*xToken)(void*, const char*, int, int, int, int);
int (*xToken)(void*, const char*, int, int, int);
char *aBuf;
};
@ -470,7 +512,6 @@ static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){
int nBuf = *pnBuf;
PorterRule *p;
for(p=aRule; p->zSuffix; p++){
assert( strlen(p->zSuffix)==p->nSuffix );
assert( strlen(p->zOutput)==p->nOutput );
@ -577,8 +618,7 @@ static int fts5PorterCb(
const char *pToken,
int nToken,
int iStart,
int iEnd,
int iPos
int iEnd
){
PorterContext *p = (PorterContext*)pCtx;
@ -716,10 +756,10 @@ static int fts5PorterCb(
nBuf--;
}
return p->xToken(p->pCtx, aBuf, nBuf, iStart, iEnd, iPos);
return p->xToken(p->pCtx, aBuf, nBuf, iStart, iEnd);
pass_through:
return p->xToken(p->pCtx, pToken, nToken, iStart, iEnd, iPos);
return p->xToken(p->pCtx, pToken, nToken, iStart, iEnd);
}
/*
@ -729,7 +769,7 @@ static int fts5PorterTokenize(
Fts5Tokenizer *pTokenizer,
void *pCtx,
const char *pText, int nText,
int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos)
int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
){
PorterTokenizer *p = (PorterTokenizer*)pTokenizer;
PorterContext sCtx;

View File

@ -10,7 +10,10 @@
#***********************************************************************
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
proc fts5_test_poslist {cmd} {
@ -45,7 +48,7 @@ proc fts5_test_columntotalsize {cmd} {
set res
}
proc test_append_token {varname token iStart iEnd iPos} {
proc test_append_token {varname token iStart iEnd} {
upvar $varname var
lappend var $token
}

View File

@ -12,10 +12,7 @@
# focus of this script is testing the FTS5 module.
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5aa
# If SQLITE_ENABLE_FTS3 is defined, omit this file.

View File

@ -13,10 +13,7 @@
#
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ab
# If SQLITE_ENABLE_FTS5 is defined, omit this file.

View File

@ -13,10 +13,7 @@
#
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ac
# If SQLITE_ENABLE_FTS5 is defined, omit this file.

View File

@ -13,10 +13,7 @@
#
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ad
# If SQLITE_ENABLE_FTS5 is defined, omit this file.

View File

@ -13,10 +13,7 @@
#
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ae
# If SQLITE_ENABLE_FTS5 is defined, omit this file.

View File

@ -15,10 +15,7 @@
# snippet() function.
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5af
# If SQLITE_ENABLE_FTS5 is defined, omit this file.

View File

@ -12,10 +12,7 @@
# focus of this script is testing the FTS5 module.
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ag
# If SQLITE_ENABLE_FTS5 is defined, omit this file.

View File

@ -12,10 +12,7 @@
# focus of this script is testing the FTS5 module.
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ah
# If SQLITE_ENABLE_FTS5 is defined, omit this file.

View File

@ -14,10 +14,7 @@
# Specifically, it tests transactions and savepoints
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ai
# If SQLITE_ENABLE_FTS5 is defined, omit this file.

View File

@ -16,10 +16,7 @@
# and deleted,
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5aj
# If SQLITE_ENABLE_FTS5 is defined, omit this file.

View File

@ -14,10 +14,7 @@
# Specifically, the auxiliary function "highlight".
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ak
# If SQLITE_ENABLE_FTS5 is defined, omit this file.

View File

@ -14,10 +14,7 @@
# Specifically, this function tests the %_config table.
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5al
# If SQLITE_ENABLE_FTS5 is defined, omit this file.

View File

@ -12,10 +12,7 @@
# Tests focusing on the fts5 xSetAuxdata() and xGetAuxdata() APIs.
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5auxdata
do_execsql_test 1.0 {

View File

@ -11,10 +11,7 @@
#
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5content
#-------------------------------------------------------------------------

View File

@ -10,10 +10,7 @@
#*************************************************************************
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ea
# If SQLITE_ENABLE_FTS5 is defined, omit this file.

View File

@ -12,10 +12,7 @@
# focus of this script is testing the FTS5 module.
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault1

View File

@ -14,10 +14,7 @@
# http://tartarus.org/martin/PorterStemmer/
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5porter
set test_vocab {

View File

@ -12,14 +12,10 @@
# Tests focusing on the fts5 tokenizers
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5tokenizer
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize=porter);
DROP TABLE ft1;
@ -51,7 +47,6 @@ do_execsql_test 2.3 {
SELECT rowid FROM ft1 WHERE ft1 MATCH 'database embedding'
} 1
proc tcl_create {args} {
set ::targs $args
error "failed"
@ -70,7 +65,6 @@ foreach {tn directive expected} {
do_test 3.$tn.2 { set ::targs } $expected
}
do_catchsql_test 4.1 {
CREATE VIRTUAL TABLE ft2 USING fts5(x, tokenize = tcl abc);
} {1 {parse error in "tokenize = tcl abc"}}
@ -78,5 +72,26 @@ do_catchsql_test 4.2 {
CREATE VIRTUAL TABLE ft2 USING fts5(x y)
} {1 {parse error in "x y"}}
#-------------------------------------------------------------------------
# Test the "separators" and "tokenchars" options a bit.
#
foreach {tn tokenizer} {1 simple 2 unicode61} {
reset_db
set T "$tokenizer tokenchars ',.:' separators 'xyz'"
execsql "CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = \"$T\")"
do_execsql_test 5.$tn.1 {
INSERT INTO t1 VALUES('abcxdefyghizjkl.mno,pqr:stu/vwx+yz');
}
foreach {tn2 token res} {
1 abc 1 2 def 1 3 ghi 1 4 jkl {}
5 mno {} 6 pqr {} 7 stu {} 8 jkl.mno,pqr:stu 1
9 vw 1
} {
do_execsql_test 5.$tn.2.$tn2 "
SELECT rowid FROM t1 WHERE t1 MATCH '\"$token\"'
" $res
}
}
finish_test

View File

@ -12,16 +12,13 @@
# Tests focusing on the fts5 tokenizers
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5unicode
proc tokenize_test {tn tokenizer input output} {
uplevel [list do_test $tn [subst -nocommands {
set ret {}
foreach {z s e p} [sqlite3_fts5_tokenize db {$tokenizer} {$input}] {
foreach {z s e} [sqlite3_fts5_tokenize db {$tokenizer} {$input}] {
lappend ret [set z]
}
set ret

View File

@ -14,10 +14,7 @@
# This is a modified copy of FTS4 test file "fts4_unicode.test".
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5unicode2
proc do_unicode_token_test {tn input res} {
@ -41,37 +38,37 @@ proc do_unicode_token_test3 {tn args} {
] [list {*}$res]]
}
do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D}
do_unicode_token_test 1.0 {a B c D} {a a b B c c d D}
do_unicode_token_test 1.1 "\uC4 \uD6 \uDC" \
"0 \uE4 \uC4 1 \uF6 \uD6 2 \uFC \uDC"
"\uE4 \uC4 \uF6 \uD6 \uFC \uDC"
do_unicode_token_test 1.2 "x\uC4x x\uD6x x\uDCx" \
"0 x\uE4x x\uC4x 1 x\uF6x x\uD6x 2 x\uFCx x\uDCx"
"x\uE4x x\uC4x x\uF6x x\uD6x x\uFCx x\uDCx"
# 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s.
do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF"
do_unicode_token_test 1.4 "\u1E9E" "0 \uDF \u1E9E"
do_unicode_token_test 1.3 "\uDF" "\uDF \uDF"
do_unicode_token_test 1.4 "\u1E9E" "\uDF \u1E9E"
do_unicode_token_test 1.5 "The quick brown fox" {
0 the The 1 quick quick 2 brown brown 3 fox fox
the The quick quick brown brown fox fox
}
do_unicode_token_test 1.6 "The\u00bfquick\u224ebrown\u2263fox" {
0 the The 1 quick quick 2 brown brown 3 fox fox
the The quick quick brown brown fox fox
}
do_unicode_token_test2 1.7 {a B c D} {0 a a 1 b B 2 c c 3 d D}
do_unicode_token_test2 1.8 "\uC4 \uD6 \uDC" "0 a \uC4 1 o \uD6 2 u \uDC"
do_unicode_token_test2 1.7 {a B c D} {a a b B c c d D}
do_unicode_token_test2 1.8 "\uC4 \uD6 \uDC" "a \uC4 o \uD6 u \uDC"
do_unicode_token_test2 1.9 "x\uC4x x\uD6x x\uDCx" \
"0 xax x\uC4x 1 xox x\uD6x 2 xux x\uDCx"
"xax x\uC4x xox x\uD6x xux x\uDCx"
# Check that diacritics are removed if remove_diacritics=1 is specified.
# And that they do not break tokens.
do_unicode_token_test2 1.10 "xx\u0301xx" "0 xxxx xx\u301xx"
do_unicode_token_test2 1.10 "xx\u0301xx" "xxxx xx\u301xx"
# Title-case mappings work
do_unicode_token_test 1.11 "\u01c5" "0 \u01c6 \u01c5"
do_unicode_token_test 1.11 "\u01c5" "\u01c6 \u01c5"
#-------------------------------------------------------------------------
#
@ -263,45 +260,45 @@ breakpoint
do_unicode_token_test3 5.1 {tokenchars {}} {
sqlite3_reset sqlite3_column_int
} {
0 sqlite3 sqlite3
1 reset reset
2 sqlite3 sqlite3
3 column column
4 int int
sqlite3 sqlite3
reset reset
sqlite3 sqlite3
column column
int int
}
do_unicode_token_test3 5.2 {tokenchars _} {
sqlite3_reset sqlite3_column_int
} {
0 sqlite3_reset sqlite3_reset
1 sqlite3_column_int sqlite3_column_int
sqlite3_reset sqlite3_reset
sqlite3_column_int sqlite3_column_int
}
do_unicode_token_test3 5.3 {separators xyz} {
Laotianxhorseyrunszfast
} {
0 laotian Laotian
1 horse horse
2 runs runs
3 fast fast
laotian Laotian
horse horse
runs runs
fast fast
}
do_unicode_token_test3 5.4 {tokenchars xyz} {
Laotianxhorseyrunszfast
} {
0 laotianxhorseyrunszfast Laotianxhorseyrunszfast
laotianxhorseyrunszfast Laotianxhorseyrunszfast
}
do_unicode_token_test3 5.5 {tokenchars _} {separators zyx} {
sqlite3_resetxsqlite3_column_intyhonda_phantom
} {
0 sqlite3_reset sqlite3_reset
1 sqlite3_column_int sqlite3_column_int
2 honda_phantom honda_phantom
sqlite3_reset sqlite3_reset
sqlite3_column_int sqlite3_column_int
honda_phantom honda_phantom
}
do_unicode_token_test3 5.6 "separators \u05D1" "abc\u05D1def" {
0 abc abc 1 def def
abc abc def def
}
do_unicode_token_test3 5.7 \
@ -309,38 +306,37 @@ do_unicode_token_test3 5.7 \
"separators \u05D0\u05D1\u05D2" \
"\u2444fre\u2445sh\u05D0water\u05D2fish.\u2445timer" \
[list \
0 \u2444fre\u2445sh \u2444fre\u2445sh \
1 water water \
2 fish fish \
3 \u2445timer \u2445timer \
\u2444fre\u2445sh \u2444fre\u2445sh \
water water \
fish fish \
\u2445timer \u2445timer \
]
# Check that it is not possible to add a standalone diacritic codepoint
# to either separators or tokenchars.
do_unicode_token_test3 5.8 "separators \u0301" \
"hello\u0301world \u0301helloworld" \
"0 helloworld hello\u0301world 1 helloworld helloworld"
"helloworld hello\u0301world helloworld helloworld"
do_unicode_token_test3 5.9 "tokenchars \u0301" \
"hello\u0301world \u0301helloworld" \
"0 helloworld hello\u0301world 1 helloworld helloworld"
"helloworld hello\u0301world helloworld helloworld"
do_unicode_token_test3 5.10 "separators \u0301" \
"remove_diacritics 0" \
"hello\u0301world \u0301helloworld" \
"0 hello\u0301world hello\u0301world 1 helloworld helloworld"
"hello\u0301world hello\u0301world helloworld helloworld"
do_unicode_token_test3 5.11 "tokenchars \u0301" \
"remove_diacritics 0" \
"hello\u0301world \u0301helloworld" \
"0 hello\u0301world hello\u0301world 1 helloworld helloworld"
"hello\u0301world hello\u0301world helloworld helloworld"
#-------------------------------------------------------------------------
proc do_tokenize {tokenizer txt} {
set res [list]
foreach {a b c} [sqlite3_fts5_tokenize -subst db $tokenizer $txt] {
foreach {b c} [sqlite3_fts5_tokenize -subst db $tokenizer $txt] {
lappend res $b
}
set res
@ -391,6 +387,7 @@ foreach T $tokenizers {
do_isspace_test 6.$T.23 $T {8287 12288}
}
#-------------------------------------------------------------------------
# Test that the private use ranges are treated as alphanumeric.
#
@ -398,8 +395,8 @@ foreach {tn1 c} {
1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff
} {
foreach {tn2 config res} {
1 "" "0 hello*world hello*world"
2 "separators *" "0 hello hello 1 world world"
1 "" "hello*world hello*world"
2 "separators *" "hello hello world world"
} {
set config [string map [list * $c] $config]
set input [string map [list * $c] "hello*world"]

View File

@ -1,5 +1,5 @@
C Further\sfixes\sand\stest\scases\srelated\sto\sexternal\scontent\stables.
D 2015-01-06T14:38:34.378
C Remove\sthe\siPos\sparameter\sfrom\sthe\stokenizer\scallback.\sFix\sthe\s"tokenchars"\sand\s"separators"\soptions\son\sthe\ssimple\stokenizer.
D 2015-01-06T19:08:26.571
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 7cd23e4fc91004a6bd081623e1bc6932e44828c0
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -104,40 +104,41 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c
F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e
F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786
F ext/fts5/fts5.c e2c19b2c5ab96650732bb6904892a6fb9a27ab42
F ext/fts5/fts5.h 4f9d2c477c0ee1907164642471329a82cb6b203b
F ext/fts5/fts5Int.h 9aafe97064e9c3380991abad4f51bee51021d18d
F ext/fts5/fts5_aux.c a74523025a553f57c99c699b9e2d83c4506503b4
F ext/fts5/fts5_buffer.c 1bc5c762bb2e9b4a40b2e8a820a31b809e72eec1
F ext/fts5/fts5_config.c ecd2f2efca1cda58525087a1a0e0bc1d34aad7a0
F ext/fts5/fts5_expr.c 317093f00a2ccdaaee0a5290f9f228c600189c41
F ext/fts5/fts5.c 9f6f6597410d9fe76db385955ad6be171c454331
F ext/fts5/fts5.h cfafdf6f43f9402f999334382085e46f89d85ecf
F ext/fts5/fts5Int.h 8b338037a968da542a98bbbcdbb10bcf361ee2fe
F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f
F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9
F ext/fts5/fts5_config.c 33534ca25198cc62c54ff7d285d455c57ad19399
F ext/fts5/fts5_expr.c 0320ae948e82cf7dca800463de7f5b6a808ba7c3
F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279
F ext/fts5/fts5_index.c a0f370b7843183c040dbbf724e1080a615ee05cc
F ext/fts5/fts5_storage.c 9b6b8afde63ccc7e8f2f37252bf47a0ea00f468c
F ext/fts5/fts5_tcl.c 664e710e2bbeed505cb91848772ca7538623a67f
F ext/fts5/fts5_tokenize.c 5a0ad46408d09bcda2bf0addb5af42fdb75ebabb
F ext/fts5/fts5_index.c b58bcfba3fe4e53fbf2dc525ec25aa37b77ac9f0
F ext/fts5/fts5_storage.c cd72f2839049d5277df0edd0cf5c801f33542b07
F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5
F ext/fts5/fts5_tokenize.c 4c30cf32c63e59bec5b38533e0a65987df262851
F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9
F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9
F ext/fts5/test/fts5aa.test 2affb47c0efa9cd39e1589ff8d8d78bcc7792952
F ext/fts5/test/fts5ab.test 7a58a954cae2ae50cef3ee525c57bc8eb3eb50b3
F ext/fts5/test/fts5ac.test d3de838f48d2ac8c26386832f6d93a3a3dbb5d4b
F ext/fts5/test/fts5ad.test a8311d6ce46964fa1686937793dd81d284317324
F ext/fts5/test/fts5ae.test e576e646013489ce458a5b276caa787035efb175
F ext/fts5/test/fts5af.test 7e4c679bc6337ddcde6a3c9b9d81c81d2f7e77bd
F ext/fts5/test/fts5ag.test c79ee7707d120b79869fa2ac1538639b9fa1b997
F ext/fts5/test/fts5ah.test e510c741e9833d6335c87bef2e7f93fecfcc7c1d
F ext/fts5/test/fts5ai.test 6a22f43776e1612591392721b535ca28d2c1a19f
F ext/fts5/test/fts5aj.test 1a64ab4144f54bd12a520683950bf8460dd74fb3
F ext/fts5/test/fts5ak.test df2669fb76684f03d03918dfb2cf692012251b1f
F ext/fts5/test/fts5al.test bc873766fec3baae05ba6e76b379bc2f5e8eaf75
F ext/fts5/test/fts5auxdata.test fec4c9113176d351e567eab65fe9917e5ea0ab05
F ext/fts5/test/fts5content.test 55f760043ab3b066b9d91a9bf5f518198d31cc1f
F ext/fts5/test/fts5ea.test 0ef2c89e14c6360ad3905fae44409420d6b5a5c8
F ext/fts5/test/fts5fault1.test b95ed600b88bbbce5390f9097a5a5b7b01b3b9f7
F ext/fts5/test/fts5porter.test d8f7591b733bcc1f02ca0dd313bc891a4b289562
F ext/fts5/test/fts5tokenizer.test a1f3128e0d42c93632122c76cbe0d07a901591ca
F ext/fts5/test/fts5unicode.test b9c7bb982e0ee242a0774e636e1888ca32947a83
F ext/fts5/test/fts5unicode2.test 7b0d64bbb7bfb7b5080e032e068404b42432ee02
F ext/fts5/test/fts5_common.tcl 08e939096a07eb77a7a986613e960f31d3cab2cc w test/fts5_common.tcl
F ext/fts5/test/fts5aa.test 3941b54d7585153be0c5cf0026f7dd8cfef13ea9
F ext/fts5/test/fts5ab.test 91a3faac09ad9fab5f71494db6e4071963281536
F ext/fts5/test/fts5ac.test 48181b7c873da0e3b4a3316760fcb90d88e7fbd8
F ext/fts5/test/fts5ad.test 3b01eec8516d5631909716514e2e585a45ef0eb1
F ext/fts5/test/fts5ae.test 014d5be2f5f70407fb032d4f27704116254797c3
F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a
F ext/fts5/test/fts5ag.test ec3e119b728196620a31507ef503c455a7a73505
F ext/fts5/test/fts5ah.test 749855d1f457ecbf8e54b25a92e55a84cc689151
F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37
F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8
F ext/fts5/test/fts5ak.test dc7bcd087dea0451ec40bba173962a0ba3a1d8ce
F ext/fts5/test/fts5al.test 633fdb3d974629d01ba7734d180dbc2ad8ed772a
F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b
F ext/fts5/test/fts5content.test ed6a141b1fcaa8fc1cf719492a9e38b29f2a830b
F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c
F ext/fts5/test/fts5fault1.test f3f4c6ed15cc7a4dc8d517c0d1969d8e5a35a65c
F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e
F ext/fts5/test/fts5tokenizer.test f951bb9be29232bd057b0ac4d535b879d9cd9a89
F ext/fts5/test/fts5unicode.test 9ae93296e59917c1210336388f6d3b98051b50c9
F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee
F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43
F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb
F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37
@ -646,7 +647,6 @@ F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7
F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b
F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849
F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36
F test/fts5_common.tcl 2488117cd80b7a4de7c20054b89f082b77b4189c
F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d
F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef
F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f
@ -939,7 +939,7 @@ F test/tclsqlite.test 37a61c2da7e3bfe3b8c1a2867199f6b860df5d43
F test/tempdb.test 19d0f66e2e3eeffd68661a11c83ba5e6ace9128c
F test/temptable.test d2c9b87a54147161bcd1822e30c1d1cd891e5b30
F test/temptrigger.test 8ec228b0db5d7ebc4ee9b458fc28cb9e7873f5e1
F test/tester.tcl a9cb43af36b13ec12587e3579bc13eda98cfb6b2
F test/tester.tcl ed77454e6c7b40eb501db7e79d1c6fbfd3eebbff
F test/thread001.test 9f22fd3525a307ff42a326b6bc7b0465be1745a5
F test/thread002.test e630504f8a06c00bf8bbe68528774dd96aeb2e58
F test/thread003.test ee4c9efc3b86a6a2767516a37bd64251272560a7
@ -1271,7 +1271,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 047aaf830d1e72f0fdad3832a0b617e769d66468
R 4c2c7726c7891be9cd96464f52b4b676
P ce6a899baff7265a60c880098a9a57ea352b5415
R ee708c3acc09a58536cb486296b83967
U dan
Z b55a8c4b3246d78dc3224ac9cef3d20c
Z 63a8748a3e94829622d4a3b2bf209e0e

View File

@ -1 +1 @@
ce6a899baff7265a60c880098a9a57ea352b5415
65f0262fb82dbfd9f80233ac7c3108e2f2716c0a

View File

@ -1921,4 +1921,3 @@ database_never_corrupt
source $testdir/thread_common.tcl
source $testdir/malloc_common.tcl
source $testdir/fts5_common.tcl