Another change to the fts5 tokenizer API.
FossilOrigin-Name: fc71868496f45f9c7a79ed2bf2d164a7c4718ce1
This commit is contained in:
parent
57e0add3f9
commit
ee0c0a8de3
@ -217,7 +217,7 @@ struct Fts5ExtensionApi {
|
||||
int (*xTokenize)(Fts5Context*,
|
||||
const char *pText, int nText, /* Text to tokenize */
|
||||
void *pCtx, /* Context passed to xToken() */
|
||||
int (*xToken)(void*, const char*, int, int, int, int) /* Callback */
|
||||
int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
|
||||
);
|
||||
|
||||
int (*xPhraseCount)(Fts5Context*);
|
||||
@ -313,20 +313,25 @@ struct fts5_tokenizer {
|
||||
const char *pText, int nText,
|
||||
int (*xToken)(
|
||||
void *pCtx, /* Copy of 2nd argument to xTokenize() */
|
||||
int tflags, /* Mask of FTS5_TOKEN_* flags */
|
||||
const char *pToken, /* Pointer to buffer containing token */
|
||||
int nToken, /* Size of token in bytes */
|
||||
int iStart, /* Byte offset of token within input text */
|
||||
int iEnd, /* Byte offset of end of token within input text */
|
||||
int iPos /* Number of tokens before this one in input text */
|
||||
int iEnd /* Byte offset of end of token within input text */
|
||||
)
|
||||
);
|
||||
};
|
||||
|
||||
/* Flags that may be passed as the third argument to xTokenize() */
|
||||
#define FTS5_TOKENIZE_QUERY 0x0001
|
||||
#define FTS5_TOKENIZE_PREFIX 0x0002
|
||||
#define FTS5_TOKENIZE_DOCUMENT 0x0004
|
||||
#define FTS5_TOKENIZE_AUX 0x0008
|
||||
|
||||
/* Flags that may be passed by the tokenizer implementation back to FTS5
|
||||
** as the third argument to the supplied xToken callback. */
|
||||
#define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */
|
||||
|
||||
/*
|
||||
** END OF CUSTOM TOKENIZERS
|
||||
*************************************************************************/
|
||||
|
@ -169,7 +169,7 @@ int sqlite3Fts5Tokenize(
|
||||
int flags, /* FTS5_TOKENIZE_* flags */
|
||||
const char *pText, int nText, /* Text to tokenize */
|
||||
void *pCtx, /* Context passed to xToken() */
|
||||
int (*xToken)(void*, const char*, int, int, int, int) /* Callback */
|
||||
int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
|
||||
);
|
||||
|
||||
void sqlite3Fts5Dequote(char *z);
|
||||
|
@ -148,17 +148,18 @@ static void fts5HighlightAppend(
|
||||
*/
|
||||
static int fts5HighlightCb(
|
||||
void *pContext, /* Pointer to HighlightContext object */
|
||||
int tflags, /* Mask of FTS5_TOKEN_* flags */
|
||||
const char *pToken, /* Buffer containing token */
|
||||
int nToken, /* Size of token in bytes */
|
||||
int iStartOff, /* Start offset of token */
|
||||
int iEndOff, /* End offset of token */
|
||||
int iPos
|
||||
int iEndOff /* End offset of token */
|
||||
){
|
||||
HighlightContext *p = (HighlightContext*)pContext;
|
||||
int rc = SQLITE_OK;
|
||||
int iPos;
|
||||
|
||||
if( iPos<p->iPos ) return SQLITE_OK;
|
||||
p->iPos = iPos+1;
|
||||
if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK;
|
||||
iPos = p->iPos++;
|
||||
|
||||
if( p->iRangeEnd>0 ){
|
||||
if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;
|
||||
|
@ -648,7 +648,7 @@ int sqlite3Fts5Tokenize(
|
||||
int flags, /* FTS5_TOKENIZE_* flags */
|
||||
const char *pText, int nText, /* Text to tokenize */
|
||||
void *pCtx, /* Context passed to xToken() */
|
||||
int (*xToken)(void*, const char*, int, int, int, int) /* Callback */
|
||||
int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
|
||||
){
|
||||
if( pText==0 ) return SQLITE_OK;
|
||||
return pConfig->pTokApi->xTokenize(
|
||||
|
@ -1338,11 +1338,11 @@ struct TokenCtx {
|
||||
*/
|
||||
static int fts5ParseTokenize(
|
||||
void *pContext, /* Pointer to Fts5InsertCtx object */
|
||||
int tflags, /* Mask of FTS5_TOKEN_* flags */
|
||||
const char *pToken, /* Buffer containing token */
|
||||
int nToken, /* Size of token in bytes */
|
||||
int iStart, /* Start offset of token */
|
||||
int iEnd, /* End offset of token */
|
||||
int iPos
|
||||
int iEnd /* End offset of token */
|
||||
){
|
||||
int rc = SQLITE_OK;
|
||||
const int SZALLOC = 8;
|
||||
@ -1350,6 +1350,8 @@ static int fts5ParseTokenize(
|
||||
Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
|
||||
Fts5ExprTerm *pTerm;
|
||||
|
||||
if( tflags & FTS5_TOKEN_COLOCATED ) return rc;
|
||||
|
||||
if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
|
||||
Fts5ExprPhrase *pNew;
|
||||
int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);
|
||||
|
@ -1498,7 +1498,7 @@ static int fts5ApiTokenize(
|
||||
Fts5Context *pCtx,
|
||||
const char *pText, int nText,
|
||||
void *pUserData,
|
||||
int (*xToken)(void*, const char*, int, int, int, int)
|
||||
int (*xToken)(void*, int, const char*, int, int, int)
|
||||
){
|
||||
Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
|
||||
Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
|
||||
@ -1657,14 +1657,16 @@ static int fts5ApiColumnText(
|
||||
|
||||
static int fts5ColumnSizeCb(
|
||||
void *pContext, /* Pointer to int */
|
||||
int tflags,
|
||||
const char *pToken, /* Buffer containing token */
|
||||
int nToken, /* Size of token in bytes */
|
||||
int iStart, /* Start offset of token */
|
||||
int iEnd, /* End offset of token */
|
||||
int iPos
|
||||
int iEnd /* End offset of token */
|
||||
){
|
||||
int *pCnt = (int*)pContext;
|
||||
*pCnt = iPos+1;
|
||||
if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
|
||||
(*pCnt)++;
|
||||
}
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
|
@ -359,17 +359,18 @@ struct Fts5InsertCtx {
|
||||
*/
|
||||
static int fts5StorageInsertCallback(
|
||||
void *pContext, /* Pointer to Fts5InsertCtx object */
|
||||
int tflags,
|
||||
const char *pToken, /* Buffer containing token */
|
||||
int nToken, /* Size of token in bytes */
|
||||
int iStart, /* Start offset of token */
|
||||
int iEnd, /* End offset of token */
|
||||
int iPos
|
||||
int iEnd /* End offset of token */
|
||||
){
|
||||
Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext;
|
||||
Fts5Index *pIdx = pCtx->pStorage->pIndex;
|
||||
assert( iPos+1>=pCtx->szCol );
|
||||
pCtx->szCol = iPos+1;
|
||||
return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken);
|
||||
if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
|
||||
pCtx->szCol++;
|
||||
}
|
||||
return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -844,17 +845,18 @@ struct Fts5IntegrityCtx {
|
||||
*/
|
||||
static int fts5StorageIntegrityCallback(
|
||||
void *pContext, /* Pointer to Fts5InsertCtx object */
|
||||
int tflags,
|
||||
const char *pToken, /* Buffer containing token */
|
||||
int nToken, /* Size of token in bytes */
|
||||
int iStart, /* Start offset of token */
|
||||
int iEnd, /* End offset of token */
|
||||
int iPos
|
||||
int iEnd /* End offset of token */
|
||||
){
|
||||
Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
|
||||
assert( iPos+1>=pCtx->szCol );
|
||||
pCtx->szCol = iPos+1;
|
||||
if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
|
||||
pCtx->szCol++;
|
||||
}
|
||||
pCtx->cksum ^= sqlite3Fts5IndexCksum(
|
||||
pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken
|
||||
pCtx->pConfig, pCtx->iRowid, pCtx->iCol, pCtx->szCol-1, pToken, nToken
|
||||
);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
@ -141,8 +141,9 @@ struct F5tAuxData {
|
||||
|
||||
static int xTokenizeCb(
|
||||
void *pCtx,
|
||||
int tflags,
|
||||
const char *zToken, int nToken,
|
||||
int iStart, int iEnd, int iPos
|
||||
int iStart, int iEnd
|
||||
){
|
||||
F5tFunction *p = (F5tFunction*)pCtx;
|
||||
Tcl_Obj *pEval = Tcl_DuplicateObj(p->pScript);
|
||||
@ -584,8 +585,9 @@ struct F5tTokenizeCtx {
|
||||
|
||||
static int xTokenizeCb2(
|
||||
void *pCtx,
|
||||
int tflags,
|
||||
const char *zToken, int nToken,
|
||||
int iStart, int iEnd, int iPos
|
||||
int iStart, int iEnd
|
||||
){
|
||||
F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx;
|
||||
if( p->bSubst ){
|
||||
@ -694,7 +696,7 @@ typedef struct F5tTokenizerModule F5tTokenizerInstance;
|
||||
|
||||
struct F5tTokenizerContext {
|
||||
void *pCtx;
|
||||
int (*xToken)(void*, const char*, int, int, int);
|
||||
int (*xToken)(void*, int, const char*, int, int, int);
|
||||
};
|
||||
|
||||
struct F5tTokenizerModule {
|
||||
@ -752,11 +754,11 @@ static int f5tTokenizerTokenize(
|
||||
void *pCtx,
|
||||
int flags,
|
||||
const char *pText, int nText,
|
||||
int (*xToken)(void*, const char*, int, int, int, int)
|
||||
int (*xToken)(void*, int, const char*, int, int, int)
|
||||
){
|
||||
F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
|
||||
void *pOldCtx;
|
||||
int (*xOldToken)(void*, const char*, int, int, int);
|
||||
int (*xOldToken)(void*, int, const char*, int, int, int);
|
||||
Tcl_Obj *pEval;
|
||||
int rc;
|
||||
|
||||
@ -813,7 +815,7 @@ static int f5tTokenizerReturn(
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
rc = p->xToken(p->pCtx, zToken, nToken, iStart, iEnd);
|
||||
rc = p->xToken(p->pCtx, 0, zToken, nToken, iStart, iEnd);
|
||||
Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE);
|
||||
return TCL_OK;
|
||||
}
|
||||
|
@ -118,13 +118,12 @@ static int fts5AsciiTokenize(
|
||||
void *pCtx,
|
||||
int flags,
|
||||
const char *pText, int nText,
|
||||
int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos)
|
||||
int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
|
||||
){
|
||||
AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer;
|
||||
int rc = SQLITE_OK;
|
||||
int ie;
|
||||
int is = 0;
|
||||
int iPos = 0;
|
||||
|
||||
char aFold[64];
|
||||
int nFold = sizeof(aFold);
|
||||
@ -160,7 +159,7 @@ static int fts5AsciiTokenize(
|
||||
asciiFold(pFold, &pText[is], nByte);
|
||||
|
||||
/* Invoke the token callback */
|
||||
rc = xToken(pCtx, pFold, nByte, is, ie, iPos++);
|
||||
rc = xToken(pCtx, 0, pFold, nByte, is, ie);
|
||||
is = ie+1;
|
||||
}
|
||||
|
||||
@ -389,12 +388,11 @@ static int fts5UnicodeTokenize(
|
||||
void *pCtx,
|
||||
int flags,
|
||||
const char *pText, int nText,
|
||||
int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos)
|
||||
int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
|
||||
){
|
||||
Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer;
|
||||
int rc = SQLITE_OK;
|
||||
unsigned char *a = p->aTokenChar;
|
||||
int iPos = 0;
|
||||
|
||||
unsigned char *zTerm = (unsigned char*)&pText[nText];
|
||||
unsigned char *zCsr = (unsigned char *)pText;
|
||||
@ -479,7 +477,7 @@ static int fts5UnicodeTokenize(
|
||||
}
|
||||
|
||||
/* Invoke the token callback */
|
||||
rc = xToken(pCtx, aFold, zOut-aFold, is, ie, iPos++);
|
||||
rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie);
|
||||
}
|
||||
|
||||
tokenize_done:
|
||||
@ -557,7 +555,7 @@ static int fts5PorterCreate(
|
||||
typedef struct PorterContext PorterContext;
|
||||
struct PorterContext {
|
||||
void *pCtx;
|
||||
int (*xToken)(void*, const char*, int, int, int, int);
|
||||
int (*xToken)(void*, int, const char*, int, int, int);
|
||||
char *aBuf;
|
||||
};
|
||||
|
||||
@ -1122,11 +1120,11 @@ static void fts5PorterStep1A(char *aBuf, int *pnBuf){
|
||||
|
||||
static int fts5PorterCb(
|
||||
void *pCtx,
|
||||
int tflags,
|
||||
const char *pToken,
|
||||
int nToken,
|
||||
int iStart,
|
||||
int iEnd,
|
||||
int iPos
|
||||
int iEnd
|
||||
){
|
||||
PorterContext *p = (PorterContext*)pCtx;
|
||||
|
||||
@ -1180,10 +1178,10 @@ static int fts5PorterCb(
|
||||
nBuf--;
|
||||
}
|
||||
|
||||
return p->xToken(p->pCtx, aBuf, nBuf, iStart, iEnd, iPos);
|
||||
return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd);
|
||||
|
||||
pass_through:
|
||||
return p->xToken(p->pCtx, pToken, nToken, iStart, iEnd, iPos);
|
||||
return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1194,7 +1192,7 @@ static int fts5PorterTokenize(
|
||||
void *pCtx,
|
||||
int flags,
|
||||
const char *pText, int nText,
|
||||
int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos)
|
||||
int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
|
||||
){
|
||||
PorterTokenizer *p = (PorterTokenizer*)pTokenizer;
|
||||
PorterContext sCtx;
|
||||
|
31
manifest
31
manifest
@ -1,5 +1,5 @@
|
||||
C Change\sthe\sfts5\stokenizer\sAPI\sto\sallow\smore\sthan\sone\stoken\sto\soccupy\sa\ssingle\sposition\swithin\sa\sdocument.
|
||||
D 2015-08-28T19:56:47.300
|
||||
C Another\schange\sto\sthe\sfts5\stokenizer\sAPI.
|
||||
D 2015-08-29T15:44:27.938
|
||||
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
|
||||
F Makefile.in e2218eb228374422969de7b1680eda6864affcef
|
||||
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
|
||||
@ -105,19 +105,19 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
|
||||
F ext/fts3/unicode/mkunicode.tcl 95cf7ec186e48d4985e433ff8a1c89090a774252
|
||||
F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95
|
||||
F ext/fts5/extract_api_docs.tcl 06583c935f89075ea0b32f85efa5dd7619fcbd03
|
||||
F ext/fts5/fts5.h b9dfb487ada3caab4400210609b8309b71a4fb4d
|
||||
F ext/fts5/fts5Int.h b0cfe44ec9451f766b77c4e5f771e7919c6dc8d5
|
||||
F ext/fts5/fts5_aux.c 7d0e275ee94ad7afdd4208d6b071b4319e8f9ca0
|
||||
F ext/fts5/fts5.h 0784692f406588e6c90e13a78e1f36e7e3236e42
|
||||
F ext/fts5/fts5Int.h 9fd31e682acae32806f77e7c3b543c4294274c92
|
||||
F ext/fts5/fts5_aux.c 7a307760a9c57c750d043188ec0bad59f5b5ec7e
|
||||
F ext/fts5/fts5_buffer.c 80f9ba4431848cb857e3d2158f5280093dcd8015
|
||||
F ext/fts5/fts5_config.c ab81c8ccff6c0fb79f21c369e18e8e0dec365ec5
|
||||
F ext/fts5/fts5_expr.c f53917b6e68dee62e4c525466edacacf82eb7cbc
|
||||
F ext/fts5/fts5_config.c 80b61fd2c6844b64a3e72a64572d50a812da9384
|
||||
F ext/fts5/fts5_expr.c 7ea46f676491989069d31ae1f75c9439b0858711
|
||||
F ext/fts5/fts5_hash.c 4bf4b99708848357b8a2b5819e509eb6d3df9246
|
||||
F ext/fts5/fts5_index.c 076c4995bf06a6d1559a6e31f9a86b90f2105374
|
||||
F ext/fts5/fts5_main.c 7afdb84ac40b0e5bbb920a07a5cd5e062963816c
|
||||
F ext/fts5/fts5_storage.c 9c263323479a4aa554738e421813cd05615d379c
|
||||
F ext/fts5/fts5_tcl.c 41e2d6b455547a157085fd35fd59d4fd890dc7d3
|
||||
F ext/fts5/fts5_main.c b00834ac543431dc35edbe18018b4befe0c7fd42
|
||||
F ext/fts5/fts5_storage.c 9820e7b53ea12baf3c818485efd66346b73030c3
|
||||
F ext/fts5/fts5_tcl.c 058f8da51964458e9859edfc1ee13b1863edaeae
|
||||
F ext/fts5/fts5_test_mi.c 80a9e86fb4c5b6b58f8fefac05e9b96d1a6574e1
|
||||
F ext/fts5/fts5_tokenize.c 07a894410bc074685ddc0a9d89b5e7bf57ea4482
|
||||
F ext/fts5/fts5_tokenize.c 710541513ecf3fe6d9365326fc85aee6efe97229
|
||||
F ext/fts5/fts5_unicode2.c 78273fbd588d1d9bd0a7e4e0ccc9207348bae33c
|
||||
F ext/fts5/fts5_varint.c 3f86ce09cab152e3d45490d7586b7ed2e40c13f1
|
||||
F ext/fts5/fts5_vocab.c 4622e0b7d84a488a1585aaa56eb214ee67a988bc
|
||||
@ -1380,10 +1380,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
|
||||
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
|
||||
F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b
|
||||
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
|
||||
P 0fdc36fe35ae2fc8e9688fe6c53437f4d47502d9
|
||||
R 694c0e23ba08ed9bcc32d2c502ed8f13
|
||||
T *branch * fts5-incompatible
|
||||
T *sym-fts5-incompatible *
|
||||
T -sym-trunk *
|
||||
P 90b85b42f2b2dd3e939b129b7df2b822a05e243d
|
||||
R f343432805e01f14633e088d58d566cf
|
||||
U dan
|
||||
Z 745a50831400d199b74f44c2476ec260
|
||||
Z e4288542e4294b868813263b0597051d
|
||||
|
@ -1 +1 @@
|
||||
90b85b42f2b2dd3e939b129b7df2b822a05e243d
|
||||
fc71868496f45f9c7a79ed2bf2d164a7c4718ce1
|
Loading…
Reference in New Issue
Block a user