Merge trunk enhancements, and espeically the fix for allowing strings

as column identifers in CREATE INDEX statements.

FossilOrigin-Name: 5ff855293865c244ac632c630e8e7e8d7c05a5f6
This commit is contained in:
drh 2015-09-04 13:02:21 +00:00
commit aac39e1ded
34 changed files with 1848 additions and 733 deletions

View File

@ -108,13 +108,15 @@ proc get_tokenizer_docs {data} {
append res "<dt><b>$line</b></dt><dd><p style=margin-top:0>\n"
continue
}
if {[regexp {SYNONYM SUPPORT} $line]} {
set line "</dl><h3>Synonym Support</h3>"
}
if {[string trim $line] == ""} {
append res "<p>\n"
} else {
append res "$line\n"
}
}
append res "</dl>\n"
set res
}
@ -208,6 +210,10 @@ proc main {data} {
fts5_tokenizer {
output [get_fts5_struct $data "typedef struct Fts5Tokenizer" "^\};"]
output [get_fts5_struct $data \
"Flags that may be passed as the third argument to xTokenize()" \
"#define FTS5_TOKEN_COLOCATED"
]
}
fts5_extension {

View File

@ -217,7 +217,7 @@ struct Fts5ExtensionApi {
int (*xTokenize)(Fts5Context*,
const char *pText, int nText, /* Text to tokenize */
void *pCtx, /* Context passed to xToken() */
int (*xToken)(void*, const char*, int, int, int) /* Callback */
int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
);
int (*xPhraseCount)(Fts5Context*);
@ -278,18 +278,46 @@ struct Fts5ExtensionApi {
**
** xTokenize:
** This function is expected to tokenize the nText byte string indicated
** by argument pText. pText may not be nul-terminated. The first argument
** passed to this function is a pointer to an Fts5Tokenizer object returned
** by an earlier call to xCreate().
** by argument pText. pText may or may not be nul-terminated. The first
** argument passed to this function is a pointer to an Fts5Tokenizer object
** returned by an earlier call to xCreate().
**
** The second argument indicates the reason that FTS5 is requesting
** tokenization of the supplied text. This is always one of the following
** four values:
**
** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into
** or removed from the FTS table. The tokenizer is being invoked to
** determine the set of tokens to add to (or delete from) the
** FTS index.
**
** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed
** against the FTS index. The tokenizer is being called to tokenize
** a bareword or quoted string specified as part of the query.
**
** <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as
** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is
** followed by a "*" character, indicating that the last token
** returned by the tokenizer will be treated as a token prefix.
**
** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to
** satisfy an fts5_api.xTokenize() request made by an auxiliary
** function. Or an fts5_api.xColumnSize() request made by the same
** on a columnsize=0 database.
** </ul>
**
** For each token in the input string, the supplied callback xToken() must
** be invoked. The first argument to it should be a copy of the pointer
** passed as the second argument to xTokenize(). The next two arguments
** are a pointer to a buffer containing the token text, and the size of
** the token in bytes. The 4th and 5th arguments are the byte offsets of
** the first byte of and first byte immediately following the text from
** passed as the second argument to xTokenize(). The third and fourth
** arguments are a pointer to a buffer containing the token text, and the
** size of the token in bytes. The 4th and 5th arguments are the byte offsets
** of the first byte of and first byte immediately following the text from
** which the token is derived within the input.
**
** The second argument passed to the xToken() callback ("tflags") should
** normally be set to 0. The exception is if the tokenizer supports
** synonyms. In this case see the discussion below for details.
**
** FTS5 assumes the xToken() callback is invoked for each token in the
** order that they occur within the input text.
**
@ -301,6 +329,112 @@ struct Fts5ExtensionApi {
** may abandon the tokenization and return any error code other than
** SQLITE_OK or SQLITE_DONE.
**
** SYNONYM SUPPORT
**
** Custom tokenizers may also support synonyms. Consider a case in which a
** user wishes to query for a phrase such as "first place". Using the
** built-in tokenizers, the FTS5 query 'first + place' will match instances
** of "first place" within the document set, but not alternative forms
** such as "1st place". In some applications, it would be better to match
** all instances of "first place" or "1st place" regardless of which form
** the user specified in the MATCH query text.
**
** There are several ways to approach this in FTS5:
**
** <ol><li> By mapping all synonyms to a single token. In this case, the
** In the above example, this means that the tokenizer returns the
** same token for inputs "first" and "1st". Say that token is in
** fact "first", so that when the user inserts the document "I won
** 1st place" entries are added to the index for tokens "i", "won",
** "first" and "place". If the user then queries for '1st + place',
** the tokenizer substitutes "first" for "1st" and the query works
** as expected.
**
** <li> By adding multiple synonyms for a single term to the FTS index.
** In this case, when tokenizing query text, the tokenizer may
** provide multiple synonyms for a single term within the document.
** FTS5 then queries the index for each synonym individually. For
** example, faced with the query:
**
** <codeblock>
** ... MATCH 'first place'</codeblock>
**
** the tokenizer offers both "1st" and "first" as synonyms for the
** first token in the MATCH query and FTS5 effectively runs a query
** similar to:
**
** <codeblock>
** ... MATCH '(first OR 1st) place'</codeblock>
**
** except that, for the purposes of auxiliary functions, the query
** still appears to contain just two phrases - "(first OR 1st)"
** being treated as a single phrase.
**
** <li> By adding multiple synonyms for a single term to the FTS index.
** Using this method, when tokenizing document text, the tokenizer
** provides multiple synonyms for each token. So that when a
** document such as "I won first place" is tokenized, entries are
** added to the FTS index for "i", "won", "first", "1st" and
** "place".
**
** This way, even if the tokenizer does not provide synonyms
** when tokenizing query text (it should not - to do would be
** inefficient), it doesn't matter if the user queries for
** 'first + place' or '1st + place', as there are entires in the
** FTS index corresponding to both forms of the first token.
** </ol>
**
** Whether is is parsing document or query text, any call to xToken that
** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit
** is considered to supply a synonym for the previous token. For example,
** when parsing the document "I won first place", a tokenizer that supports
** synonyms would call xToken() 5 times, as follows:
**
** <codeblock>
** xToken(pCtx, 0, "i", 1, 0, 1);
** xToken(pCtx, 0, "won", 3, 2, 5);
** xToken(pCtx, 0, "first", 5, 6, 11);
** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11);
** xToken(pCtx, 0, "place", 5, 12, 17);
**</codeblock>
**
** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time
** xToken() is called. Multiple synonyms may be specified for a single token
** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence.
** There is no limit to the number of synonyms that may be provided for a
** single token.
**
** In many cases, method (1) above is the best approach. It does not add
** extra data to the FTS index or require FTS5 to query for multiple terms,
** so it is efficient in terms of disk space and query speed. However, it
** does not support prefix queries very well. If, as suggested above, the
** token "first" is subsituted for "1st" by the tokenizer, then the query:
**
** <codeblock>
** ... MATCH '1s*'</codeblock>
**
** will not match documents that contain the token "1st" (as the tokenizer
** will probably not map "1s" to any prefix of "first").
**
** For full prefix support, method (3) may be preferred. In this case,
** because the index contains entries for both "first" and "1st", prefix
** queries such as 'fi*' or '1s*' will match correctly. However, because
** extra entries are added to the FTS index, this method uses more space
** within the database.
**
** Method (2) offers a midpoint between (1) and (3). Using this method,
** a query such as '1s*' will match documents that contain the literal
** token "1st", but not "first" (assuming the tokenizer is not able to
** provide synonyms for prefixes). However, a non-prefix query like '1st'
** will match against "1st" and "first". This method does not require
** extra disk space, as no extra entries are added to the FTS index.
** On the other hand, it may require more CPU cycles to run MATCH queries,
** as separate queries of the FTS index are required for each synonym.
**
** When using methods (2) or (3), it is important that the tokenizer only
** provide synonyms when tokenizing document text (method (2)) or query
** text (method (3)), not both. Doing so will not cause any errors, but is
** inefficient.
*/
typedef struct Fts5Tokenizer Fts5Tokenizer;
typedef struct fts5_tokenizer fts5_tokenizer;
@ -309,9 +443,11 @@ struct fts5_tokenizer {
void (*xDelete)(Fts5Tokenizer*);
int (*xTokenize)(Fts5Tokenizer*,
void *pCtx,
int flags, /* Mask of FTS5_TOKENIZE_* flags */
const char *pText, int nText,
int (*xToken)(
void *pCtx, /* Copy of 2nd argument to xTokenize() */
int tflags, /* Mask of FTS5_TOKEN_* flags */
const char *pToken, /* Pointer to buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Byte offset of token within input text */
@ -320,6 +456,16 @@ struct fts5_tokenizer {
);
};
/* Flags that may be passed as the third argument to xTokenize() */
#define FTS5_TOKENIZE_QUERY 0x0001
#define FTS5_TOKENIZE_PREFIX 0x0002
#define FTS5_TOKENIZE_DOCUMENT 0x0004
#define FTS5_TOKENIZE_AUX 0x0008
/* Flags that may be passed by the tokenizer implementation back to FTS5
** as the third argument to the supplied xToken callback. */
#define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */
/*
** END OF CUSTOM TOKENIZERS
*************************************************************************/
@ -329,7 +475,7 @@ struct fts5_tokenizer {
*/
typedef struct fts5_api fts5_api;
struct fts5_api {
int iVersion; /* Currently always set to 1 */
int iVersion; /* Currently always set to 2 */
/* Create a new tokenizer */
int (*xCreateTokenizer)(

View File

@ -166,9 +166,10 @@ int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig);
int sqlite3Fts5Tokenize(
Fts5Config *pConfig, /* FTS5 Configuration object */
int flags, /* FTS5_TOKENIZE_* flags */
const char *pText, int nText, /* Text to tokenize */
void *pCtx, /* Context passed to xToken() */
int (*xToken)(void*, const char*, int, int, int) /* Callback */
int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
);
void sqlite3Fts5Dequote(char *z);
@ -234,8 +235,10 @@ struct Fts5PoslistReader {
int n; /* Size of buffer at a[] in bytes */
int i; /* Current offset in a[] */
u8 bFlag; /* For client use (any custom purpose) */
/* Output variables */
int bEof; /* Set to true at EOF */
u8 bEof; /* Set to true at EOF */
i64 iPos; /* (iCol<<32) + iPos */
};
int sqlite3Fts5PoslistReaderInit(
@ -381,9 +384,9 @@ int sqlite3Fts5IndexErrcode(Fts5Index*);
void sqlite3Fts5IndexReset(Fts5Index*);
/*
** Get or set the "averages" record.
** Get or set the "averages" values.
*/
int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf);
int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize);
int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int);
/*
@ -596,7 +599,7 @@ int sqlite3Fts5ExprPhraseCount(Fts5Expr*);
int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase);
int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **);
int sqlite3Fts5ExprPhraseExpr(Fts5Config*, Fts5Expr*, int, Fts5Expr**);
int sqlite3Fts5ExprClonePhrase(Fts5Config*, Fts5Expr*, int, Fts5Expr**);
/*******************************************
** The fts5_expr.c API above this point is used by the other hand-written

View File

@ -148,6 +148,7 @@ static void fts5HighlightAppend(
*/
static int fts5HighlightCb(
void *pContext, /* Pointer to HighlightContext object */
int tflags, /* Mask of FTS5_TOKEN_* flags */
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
int iStartOff, /* Start offset of token */
@ -155,7 +156,10 @@ static int fts5HighlightCb(
){
HighlightContext *p = (HighlightContext*)pContext;
int rc = SQLITE_OK;
int iPos = p->iPos++;
int iPos;
if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK;
iPos = p->iPos++;
if( p->iRangeEnd>0 ){
if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;

View File

@ -645,12 +645,15 @@ int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){
*/
int sqlite3Fts5Tokenize(
Fts5Config *pConfig, /* FTS5 Configuration object */
int flags, /* FTS5_TOKENIZE_* flags */
const char *pText, int nText, /* Text to tokenize */
void *pCtx, /* Context passed to xToken() */
int (*xToken)(void*, const char*, int, int, int) /* Callback */
int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
){
if( pText==0 ) return SQLITE_OK;
return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken);
return pConfig->pTokApi->xTokenize(
pConfig->pTok, pCtx, flags, pText, nText, xToken
);
}
/*

View File

@ -22,6 +22,8 @@
*/
#define FTS5_EOF 0
#define FTS5_LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32))
typedef struct Fts5ExprTerm Fts5ExprTerm;
/*
@ -73,6 +75,7 @@ struct Fts5ExprTerm {
int bPrefix; /* True for a prefix term */
char *zTerm; /* nul-terminated term */
Fts5IndexIter *pIter; /* Iterator for this term */
Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */
};
/*
@ -181,6 +184,10 @@ static int fts5ExprGetToken(
default: {
const char *z2;
if( sqlite3Fts5IsBareword(z[0])==0 ){
sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z);
return FTS5_EOF;
}
tok = FTS5_STRING;
for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++);
pToken->n = (z2 - z);
@ -244,79 +251,6 @@ int sqlite3Fts5ExprNew(
return sParse.rc;
}
/*
** Create a new FTS5 expression by cloning phrase iPhrase of the
** expression passed as the second argument.
*/
int sqlite3Fts5ExprPhraseExpr(
Fts5Config *pConfig,
Fts5Expr *pExpr,
int iPhrase,
Fts5Expr **ppNew
){
int rc = SQLITE_OK; /* Return code */
Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */
Fts5ExprPhrase *pCopy; /* Copy of pOrig */
Fts5Expr *pNew = 0; /* Expression to return via *ppNew */
pOrig = pExpr->apExprPhrase[iPhrase];
pCopy = (Fts5ExprPhrase*)sqlite3Fts5MallocZero(&rc,
sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * pOrig->nTerm
);
if( pCopy ){
int i; /* Used to iterate through phrase terms */
Fts5ExprPhrase **apPhrase;
Fts5ExprNode *pNode;
Fts5ExprNearset *pNear;
pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
apPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc,
sizeof(Fts5ExprPhrase*)
);
pNode = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprNode));
pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc,
sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*)
);
for(i=0; i<pOrig->nTerm; i++){
pCopy->aTerm[i].zTerm = sqlite3Fts5Strndup(&rc, pOrig->aTerm[i].zTerm,-1);
pCopy->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
}
if( rc==SQLITE_OK ){
/* All the allocations succeeded. Put the expression object together. */
pNew->pIndex = pExpr->pIndex;
pNew->pRoot = pNode;
pNew->nPhrase = 1;
pNew->apExprPhrase = apPhrase;
pNew->apExprPhrase[0] = pCopy;
pNode->eType = (pOrig->nTerm==1 ? FTS5_TERM : FTS5_STRING);
pNode->pNear = pNear;
pNear->nPhrase = 1;
pNear->apPhrase[0] = pCopy;
pCopy->nTerm = pOrig->nTerm;
pCopy->pNode = pNode;
}else{
/* At least one allocation failed. Free them all. */
for(i=0; i<pOrig->nTerm; i++){
sqlite3_free(pCopy->aTerm[i].zTerm);
}
sqlite3_free(pCopy);
sqlite3_free(pNear);
sqlite3_free(pNode);
sqlite3_free(apPhrase);
sqlite3_free(pNew);
pNew = 0;
}
}
*ppNew = pNew;
return rc;
}
/*
** Free the expression node object passed as the only argument.
*/
@ -350,6 +284,115 @@ static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){
return 0;
}
/*
** Argument pTerm must be a synonym iterator. Return the current rowid
** that it points to.
*/
static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){
i64 iRet = 0;
int bRetValid = 0;
Fts5ExprTerm *p;
assert( pTerm->pSynonym );
assert( bDesc==0 || bDesc==1 );
for(p=pTerm; p; p=p->pSynonym){
if( 0==sqlite3Fts5IterEof(p->pIter) ){
i64 iRowid = sqlite3Fts5IterRowid(p->pIter);
if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){
iRet = iRowid;
bRetValid = 1;
}
}
}
if( pbEof && bRetValid==0 ) *pbEof = 1;
return iRet;
}
/*
** Argument pTerm must be a synonym iterator.
*/
static int fts5ExprSynonymPoslist(
Fts5ExprTerm *pTerm,
i64 iRowid,
int *pbDel, /* OUT: Caller should sqlite3_free(*pa) */
u8 **pa, int *pn
){
Fts5PoslistWriter writer = {0};
Fts5PoslistReader aStatic[4];
Fts5PoslistReader *aIter = aStatic;
int nIter = 0;
int nAlloc = 4;
int rc = SQLITE_OK;
Fts5ExprTerm *p;
assert( pTerm->pSynonym );
for(p=pTerm; p; p=p->pSynonym){
Fts5IndexIter *pIter = p->pIter;
if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){
const u8 *a;
int n;
i64 dummy;
rc = sqlite3Fts5IterPoslist(pIter, &a, &n, &dummy);
if( rc!=SQLITE_OK ) goto synonym_poslist_out;
if( nIter==nAlloc ){
int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2;
Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte);
if( aNew==0 ){
rc = SQLITE_NOMEM;
goto synonym_poslist_out;
}
memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter);
nAlloc = nAlloc*2;
if( aIter!=aStatic ) sqlite3_free(aIter);
aIter = aNew;
}
sqlite3Fts5PoslistReaderInit(-1, a, n, &aIter[nIter]);
assert( aIter[nIter].bEof==0 );
nIter++;
}
}
assert( *pbDel==0 );
if( nIter==1 ){
*pa = (u8*)aIter[0].a;
*pn = aIter[0].n;
}else{
Fts5PoslistWriter writer = {0};
Fts5Buffer buf = {0,0,0};
i64 iPrev = -1;
while( 1 ){
int i;
i64 iMin = FTS5_LARGEST_INT64;
for(i=0; i<nIter; i++){
if( aIter[i].bEof==0 ){
if( aIter[i].iPos==iPrev ){
if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue;
}
if( aIter[i].iPos<iMin ){
iMin = aIter[i].iPos;
}
}
}
if( iMin==FTS5_LARGEST_INT64 || rc!=SQLITE_OK ) break;
rc = sqlite3Fts5PoslistWriterAppend(&buf, &writer, iMin);
iPrev = iMin;
}
if( rc ){
sqlite3_free(buf.p);
}else{
*pa = buf.p;
*pn = buf.n;
*pbDel = 1;
}
}
synonym_poslist_out:
if( aIter!=aStatic ) sqlite3_free(aIter);
return rc;
}
/*
** All individual term iterators in pPhrase are guaranteed to be valid and
** pointing to the same rowid when this function is called. This function
@ -362,7 +405,7 @@ static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){
** not a match.
*/
static int fts5ExprPhraseIsMatch(
Fts5Expr *pExpr, /* Expression pPhrase belongs to */
Fts5ExprNode *pNode, /* Node pPhrase belongs to */
Fts5ExprColset *pColset, /* Restrict matches to these columns */
Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */
int *pbMatch /* OUT: Set to true if really a match */
@ -388,16 +431,24 @@ static int fts5ExprPhraseIsMatch(
aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte);
if( !aIter ) return SQLITE_NOMEM;
}
memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm);
/* Initialize a term iterator for each term in the phrase */
for(i=0; i<pPhrase->nTerm; i++){
Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
i64 dummy;
int n;
const u8 *a;
rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n, &dummy);
if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){
goto ismatch_out;
int n = 0;
int bFlag = 0;
const u8 *a = 0;
if( pTerm->pSynonym ){
rc = fts5ExprSynonymPoslist(pTerm, pNode->iRowid, &bFlag, (u8**)&a, &n);
}else{
rc = sqlite3Fts5IterPoslist(pTerm->pIter, &a, &n, &dummy);
}
if( rc!=SQLITE_OK ) goto ismatch_out;
sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]);
aIter[i].bFlag = bFlag;
if( aIter[i].bEof ) goto ismatch_out;
}
while( 1 ){
@ -431,6 +482,9 @@ static int fts5ExprPhraseIsMatch(
ismatch_out:
*pbMatch = (pPhrase->poslist.n>0);
for(i=0; i<pPhrase->nTerm; i++){
if( aIter[i].bFlag ) sqlite3_free((u8*)aIter[i].a);
}
if( aIter!=aStatic ) sqlite3_free(aIter);
return rc;
}
@ -598,9 +652,45 @@ static int fts5ExprNearAdvanceFirst(
int bFromValid,
i64 iFrom
){
Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0];
int rc;
if( pTerm->pSynonym ){
int bEof = 1;
Fts5ExprTerm *p;
/* Find the firstest rowid any synonym points to. */
i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0);
/* Advance each iterator that currently points to iRowid. Or, if iFrom
** is valid - each iterator that points to a rowid before iFrom. */
for(p=pTerm; p; p=p->pSynonym){
if( sqlite3Fts5IterEof(p->pIter)==0 ){
i64 ii = sqlite3Fts5IterRowid(p->pIter);
if( ii==iRowid
|| (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc)
){
if( bFromValid ){
rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom);
}else{
rc = sqlite3Fts5IterNext(p->pIter);
}
if( rc!=SQLITE_OK ) break;
if( sqlite3Fts5IterEof(p->pIter)==0 ){
bEof = 0;
}
}else{
bEof = 0;
}
}
}
/* Set the EOF flag if either all synonym iterators are at EOF or an
** error has occurred. */
pNode->bEof = (rc || bEof);
}else{
Fts5IndexIter *pIter = pTerm->pIter;
assert( Fts5NodeIsString(pNode) );
if( bFromValid ){
rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
@ -609,6 +699,8 @@ static int fts5ExprNearAdvanceFirst(
}
pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));
}
return rc;
}
@ -647,6 +739,35 @@ static int fts5ExprAdvanceto(
return 0;
}
static int fts5ExprSynonymAdvanceto(
Fts5ExprTerm *pTerm, /* Term iterator to advance */
int bDesc, /* True if iterator is "rowid DESC" */
i64 *piLast, /* IN/OUT: Lastest rowid seen so far */
int *pRc /* OUT: Error code */
){
int rc = SQLITE_OK;
i64 iLast = *piLast;
Fts5ExprTerm *p;
int bEof = 0;
for(p=pTerm; rc==SQLITE_OK && p; p=p->pSynonym){
if( sqlite3Fts5IterEof(p->pIter)==0 ){
i64 iRowid = sqlite3Fts5IterRowid(p->pIter);
if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
rc = sqlite3Fts5IterNextFrom(p->pIter, iLast);
}
}
}
if( rc!=SQLITE_OK ){
*pRc = rc;
bEof = 1;
}else{
*piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof);
}
return bEof;
}
/*
** IN/OUT parameter (*pa) points to a position list n bytes in size. If
** the position list contains entries for column iCol, then (*pa) is set
@ -717,9 +838,9 @@ static int fts5ExprNearTest(
** phrase is not a match, break out of the loop early. */
for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
if( pPhrase->nTerm>1 || pNear->pColset ){
if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){
int bMatch = 0;
rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch);
rc = fts5ExprPhraseIsMatch(pNode, pNear->pColset, pPhrase, &bMatch);
if( bMatch==0 ) break;
}else{
rc = sqlite3Fts5IterPoslistBuffer(
@ -755,6 +876,7 @@ static int fts5ExprTokenTest(
assert( pNode->eType==FTS5_TERM );
assert( pNear->nPhrase==1 && pPhrase->nTerm==1 );
assert( pPhrase->aTerm[0].pSynonym==0 );
rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid);
@ -801,69 +923,99 @@ static int fts5ExprNearNextMatch(
i64 iLast; /* Lastest rowid any iterator points to */
int i, j; /* Phrase and token index, respectively */
int bMatch; /* True if all terms are at the same rowid */
const int bDesc = pExpr->bDesc;
assert( pNear->nPhrase>1 || pNear->apPhrase[0]->nTerm>1 );
/* Check that this node should not be FTS5_TERM */
assert( pNear->nPhrase>1
|| pNear->apPhrase[0]->nTerm>1
|| pNear->apPhrase[0]->aTerm[0].pSynonym
);
/* Initialize iLast, the "lastest" rowid any iterator points to. If the
** iterator skips through rowids in the default ascending order, this means
** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it
** means the minimum rowid. */
if( pLeft->aTerm[0].pSynonym ){
iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0);
}else{
iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter);
}
do {
bMatch = 1;
for(i=0; i<pNear->nPhrase; i++){
Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
for(j=0; j<pPhrase->nTerm; j++){
Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
if( pTerm->pSynonym ){
Fts5ExprTerm *p;
int bEof = 1;
i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0);
if( iRowid==iLast ) continue;
bMatch = 0;
if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){
pNode->bEof = 1;
return rc;
}
}else{
Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
i64 iRowid = sqlite3Fts5IterRowid(pIter);
if( iRowid!=iLast ) bMatch = 0;
if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast,&rc,&pNode->bEof) ){
if( iRowid==iLast ) continue;
bMatch = 0;
if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){
return rc;
}
}
}
}
}while( bMatch==0 );
pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode));
pNode->iRowid = iLast;
pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode));
return rc;
}
/*
** Initialize all term iterators in the pNear object. If any term is found
** to match no documents at all, set *pbEof to true and return immediately,
** without initializing any further iterators.
** to match no documents at all, return immediately without initializing any
** further iterators.
*/
static int fts5ExprNearInitAll(
Fts5Expr *pExpr,
Fts5ExprNode *pNode
){
Fts5ExprNearset *pNear = pNode->pNear;
Fts5ExprTerm *pTerm;
Fts5ExprPhrase *pPhrase;
int i, j;
int rc = SQLITE_OK;
for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
pPhrase = pNear->apPhrase[i];
Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
for(j=0; j<pPhrase->nTerm; j++){
pTerm = &pPhrase->aTerm[j];
if( pTerm->pIter ){
sqlite3Fts5IterClose(pTerm->pIter);
pTerm->pIter = 0;
Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
Fts5ExprTerm *p;
int bEof = 1;
for(p=pTerm; p && rc==SQLITE_OK; p=p->pSynonym){
if( p->pIter ){
sqlite3Fts5IterClose(p->pIter);
p->pIter = 0;
}
rc = sqlite3Fts5IndexQuery(
pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm),
pExpr->pIndex, p->zTerm, strlen(p->zTerm),
(pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
(pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
&pTerm->pIter
&p->pIter
);
assert( rc==SQLITE_OK || pTerm->pIter==0 );
if( pTerm->pIter==0 || sqlite3Fts5IterEof(pTerm->pIter) ){
assert( rc==SQLITE_OK || p->pIter==0 );
if( p->pIter && 0==sqlite3Fts5IterEof(p->pIter) ){
bEof = 0;
}
}
if( bEof ){
pNode->bEof = 1;
break;
return rc;
}
}
}
@ -1029,10 +1181,17 @@ static int fts5ExprNodeNext(
};
case FTS5_TERM: {
rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom);
if( pNode->bEof==0 ){
Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
if( bFromValid ){
rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
}else{
rc = sqlite3Fts5IterNext(pIter);
}
if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){
assert( rc==SQLITE_OK );
rc = fts5ExprTokenTest(pExpr, pNode);
}else{
pNode->bEof = 1;
}
return rc;
};
@ -1266,10 +1425,16 @@ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
if( pPhrase ){
int i;
for(i=0; i<pPhrase->nTerm; i++){
Fts5ExprTerm *pSyn;
Fts5ExprTerm *pNext;
Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
sqlite3_free(pTerm->zTerm);
if( pTerm->pIter ){
sqlite3Fts5IterClose(pTerm->pIter);
for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){
pNext = pSyn->pSynonym;
sqlite3Fts5IterClose(pSyn->pIter);
sqlite3_free(pSyn);
}
}
if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist);
@ -1331,6 +1496,7 @@ Fts5ExprNearset *sqlite3Fts5ParseNearset(
typedef struct TokenCtx TokenCtx;
struct TokenCtx {
Fts5ExprPhrase *pPhrase;
int rc;
};
/*
@ -1338,17 +1504,36 @@ struct TokenCtx {
*/
static int fts5ParseTokenize(
void *pContext, /* Pointer to Fts5InsertCtx object */
int tflags, /* Mask of FTS5_TOKEN_* flags */
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Start offset of token */
int iEnd /* End offset of token */
int iUnused1, /* Start offset of token */
int iUnused2 /* End offset of token */
){
int rc = SQLITE_OK;
const int SZALLOC = 8;
TokenCtx *pCtx = (TokenCtx*)pContext;
Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
Fts5ExprTerm *pTerm;
/* If an error has already occurred, this is a no-op */
if( pCtx->rc!=SQLITE_OK ) return pCtx->rc;
assert( pPhrase==0 || pPhrase->nTerm>0 );
if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){
Fts5ExprTerm *pSyn;
int nByte = sizeof(Fts5ExprTerm) + nToken+1;
pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte);
if( pSyn==0 ){
rc = SQLITE_NOMEM;
}else{
memset(pSyn, 0, nByte);
pSyn->zTerm = (char*)&pSyn[1];
memcpy(pSyn->zTerm, pToken, nToken);
pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
}
}else{
Fts5ExprTerm *pTerm;
if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
Fts5ExprPhrase *pNew;
int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);
@ -1356,16 +1541,23 @@ static int fts5ParseTokenize(
pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase,
sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
);
if( pNew==0 ) return SQLITE_NOMEM;
if( pNew==0 ){
rc = SQLITE_NOMEM;
}else{
if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
pCtx->pPhrase = pPhrase = pNew;
pNew->nTerm = nNew - SZALLOC;
}
}
if( rc==SQLITE_OK ){
pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
memset(pTerm, 0, sizeof(Fts5ExprTerm));
pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
}
}
pCtx->rc = rc;
return rc;
}
@ -1417,11 +1609,14 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm(
rc = fts5ParseStringFromToken(pToken, &z);
if( rc==SQLITE_OK ){
int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_QUERY : 0);
int n;
sqlite3Fts5Dequote(z);
rc = sqlite3Fts5Tokenize(pConfig, z, strlen(z), &sCtx, fts5ParseTokenize);
n = strlen(z);
rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize);
}
sqlite3_free(z);
if( rc ){
if( rc || (rc = sCtx.rc) ){
pParse->rc = rc;
fts5ExprPhraseFree(sCtx.pPhrase);
sCtx.pPhrase = 0;
@ -1450,6 +1645,83 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm(
return sCtx.pPhrase;
}
/*
** Create a new FTS5 expression by cloning phrase iPhrase of the
** expression passed as the second argument.
*/
int sqlite3Fts5ExprClonePhrase(
Fts5Config *pConfig,
Fts5Expr *pExpr,
int iPhrase,
Fts5Expr **ppNew
){
int rc = SQLITE_OK; /* Return code */
Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */
Fts5ExprPhrase *pCopy; /* Copy of pOrig */
int i; /* Used to iterate through phrase terms */
Fts5Expr *pNew = 0; /* Expression to return via *ppNew */
Fts5ExprPhrase **apPhrase; /* pNew->apPhrase */
Fts5ExprNode *pNode; /* pNew->pRoot */
Fts5ExprNearset *pNear; /* pNew->pRoot->pNear */
TokenCtx sCtx = {0,0}; /* Context object for fts5ParseTokenize */
pOrig = pExpr->apExprPhrase[iPhrase];
pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
if( rc==SQLITE_OK ){
pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc,
sizeof(Fts5ExprPhrase*));
}
if( rc==SQLITE_OK ){
pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc,
sizeof(Fts5ExprNode));
}
if( rc==SQLITE_OK ){
pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc,
sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*));
}
for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){
int tflags = 0;
Fts5ExprTerm *p;
for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){
const char *zTerm = p->zTerm;
rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, strlen(zTerm), 0, 0);
tflags = FTS5_TOKEN_COLOCATED;
}
if( rc==SQLITE_OK ){
sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
}
}
if( rc==SQLITE_OK ){
/* All the allocations succeeded. Put the expression object together. */
pNew->pIndex = pExpr->pIndex;
pNew->nPhrase = 1;
pNew->apExprPhrase[0] = sCtx.pPhrase;
pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase;
pNew->pRoot->pNear->nPhrase = 1;
sCtx.pPhrase->pNode = pNew->pRoot;
if( pOrig->nTerm==1 && pOrig->aTerm[0].pSynonym==0 ){
pNew->pRoot->eType = FTS5_TERM;
}else{
pNew->pRoot->eType = FTS5_STRING;
}
}else{
sqlite3Fts5ExprFree(pNew);
fts5ExprPhraseFree(sCtx.pPhrase);
pNew = 0;
}
*ppNew = pNew;
return rc;
}
/*
** Token pTok has appeared in a MATCH expression where the NEAR operator
** is expected. If token pTok does not contain "NEAR", store an error
@ -1630,7 +1902,10 @@ Fts5ExprNode *sqlite3Fts5ParseNode(
for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){
pNear->apPhrase[iPhrase]->pNode = pRet;
}
if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){
if( pNear->nPhrase==1
&& pNear->apPhrase[0]->nTerm==1
&& pNear->apPhrase[0]->aTerm[0].pSynonym==0
){
pRet->eType = FTS5_TERM;
}
}else{
@ -1650,16 +1925,28 @@ Fts5ExprNode *sqlite3Fts5ParseNode(
}
static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
char *zQuoted = sqlite3_malloc(strlen(pTerm->zTerm) * 2 + 3 + 2);
int nByte = 0;
Fts5ExprTerm *p;
char *zQuoted;
/* Determine the maximum amount of space required. */
for(p=pTerm; p; p=p->pSynonym){
nByte += strlen(pTerm->zTerm) * 2 + 3 + 2;
}
zQuoted = sqlite3_malloc(nByte);
if( zQuoted ){
int i = 0;
char *zIn = pTerm->zTerm;
for(p=pTerm; p; p=p->pSynonym){
char *zIn = p->zTerm;
zQuoted[i++] = '"';
while( *zIn ){
if( *zIn=='"' ) zQuoted[i++] = '"';
zQuoted[i++] = *zIn++;
}
zQuoted[i++] = '"';
if( p->pSynonym ) zQuoted[i++] = '|';
}
if( pTerm->bPrefix ){
zQuoted[i++] = ' ';
zQuoted[i++] = '*';

View File

@ -293,7 +293,6 @@ typedef struct Fts5Data Fts5Data;
typedef struct Fts5DlidxIter Fts5DlidxIter;
typedef struct Fts5DlidxLvl Fts5DlidxLvl;
typedef struct Fts5DlidxWriter Fts5DlidxWriter;
typedef struct Fts5NodeIter Fts5NodeIter;
typedef struct Fts5PageWriter Fts5PageWriter;
typedef struct Fts5SegIter Fts5SegIter;
typedef struct Fts5DoclistIter Fts5DoclistIter;
@ -526,24 +525,6 @@ struct Fts5IndexIter {
};
/*
** Object for iterating through the conents of a single internal node in
** memory.
*/
struct Fts5NodeIter {
/* Internal. Set and managed by fts5NodeIterXXX() functions. Except,
** the EOF test for the iterator is (Fts5NodeIter.aData==0). */
const u8 *aData;
int nData;
int iOff;
/* Output variables */
Fts5Buffer term;
int nEmpty;
int iChild;
int bDlidx;
};
/*
** An instance of the following type is used to iterate through the contents
** of a doclist-index record.
@ -573,23 +554,6 @@ struct Fts5DlidxIter {
Fts5DlidxLvl aLvl[1];
};
/*
** The first argument passed to this macro is a pointer to an Fts5Buffer
** object.
*/
#define fts5BufferSize(pBuf,n) { \
if( pBuf->nSpace<n ) { \
u8 *pNew = sqlite3_realloc(pBuf->p, n); \
if( pNew==0 ){ \
sqlite3_free(pBuf->p); \
} \
pBuf->nSpace = n; \
pBuf->p = pNew; \
} \
}
static void fts5PutU16(u8 *aOut, u16 iVal){
aOut[0] = (iVal>>8);
aOut[1] = (iVal&0xFF);
@ -617,6 +581,7 @@ static void *fts5IdxMalloc(Fts5Index *p, int nByte){
**
** res = *pLeft - *pRight
*/
#ifdef SQLITE_DEBUG
static int fts5BufferCompareBlob(
Fts5Buffer *pLeft, /* Left hand side of comparison */
const u8 *pRight, int nRight /* Right hand side of comparison */
@ -625,7 +590,7 @@ static int fts5BufferCompareBlob(
int res = memcmp(pLeft->p, pRight, nCmp);
return (res==0 ? (pLeft->n - nRight) : res);
}
#endif
/*
** Compare the contents of the two buffers using memcmp(). If one buffer
@ -665,11 +630,14 @@ static void fts5CloseReader(Fts5Index *p){
}
}
static Fts5Data *fts5DataReadOrBuffer(
Fts5Index *p,
Fts5Buffer *pBuf,
i64 iRowid
){
/*
** Retrieve a record from the %_data table.
**
** If an error occurs, NULL is returned and an error left in the
** Fts5Index object.
*/
static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
Fts5Data *pRet = 0;
if( p->rc==SQLITE_OK ){
int rc = SQLITE_OK;
@ -689,8 +657,8 @@ static Fts5Data *fts5DataReadOrBuffer(
if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
}
/* If the blob handle is not yet open, open and seek it. Otherwise, use
** the blob_reopen() API to reseek the existing blob handle. */
/* If the blob handle is not open at this point, open it and seek
** to the requested entry. */
if( p->pReader==0 && rc==SQLITE_OK ){
Fts5Config *pConfig = p->pConfig;
rc = sqlite3_blob_open(pConfig->db,
@ -708,23 +676,14 @@ static Fts5Data *fts5DataReadOrBuffer(
if( rc==SQLITE_OK ){
u8 *aOut = 0; /* Read blob data into this buffer */
int nByte = sqlite3_blob_bytes(p->pReader);
if( pBuf ){
fts5BufferSize(pBuf, MAX(nByte, p->pConfig->pgsz) + 20);
pBuf->n = nByte;
aOut = pBuf->p;
if( aOut==0 ){
rc = SQLITE_NOMEM;
}
}else{
int nSpace = nByte + FTS5_DATA_PADDING;
pRet = (Fts5Data*)sqlite3_malloc(nSpace+sizeof(Fts5Data));
int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
pRet = (Fts5Data*)sqlite3_malloc(nAlloc);
if( pRet ){
pRet->n = nByte;
aOut = pRet->p = (u8*)&pRet[1];
}else{
rc = SQLITE_NOMEM;
}
}
if( rc==SQLITE_OK ){
rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
@ -738,33 +697,10 @@ static Fts5Data *fts5DataReadOrBuffer(
p->nRead++;
}
return pRet;
}
/*
** Retrieve a record from the %_data table.
**
** If an error occurs, NULL is returned and an error left in the
** Fts5Index object.
*/
static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
Fts5Data *pRet = fts5DataReadOrBuffer(p, 0, iRowid);
assert( (pRet==0)==(p->rc!=SQLITE_OK) );
return pRet;
}
/*
** Read a record from the %_data table into the buffer supplied as the
** second argument.
**
** If an error occurs, an error is left in the Fts5Index object. If an
** error has already occurred when this function is called, it is a
** no-op.
*/
static void fts5DataBuffer(Fts5Index *p, Fts5Buffer *pBuf, i64 iRowid){
(void)fts5DataReadOrBuffer(p, pBuf, iRowid);
}
/*
** Release a reference to data record returned by an earlier call to
** fts5DataRead().
@ -1033,19 +969,18 @@ static Fts5Structure *fts5StructureRead(Fts5Index *p){
Fts5Config *pConfig = p->pConfig;
Fts5Structure *pRet = 0; /* Object to return */
int iCookie; /* Configuration cookie */
Fts5Data *pData;
Fts5Buffer buf = {0, 0, 0};
fts5DataBuffer(p, &buf, FTS5_STRUCTURE_ROWID);
if( buf.p==0 ) return 0;
assert( buf.nSpace>=(buf.n + FTS5_DATA_ZERO_PADDING) );
memset(&buf.p[buf.n], 0, FTS5_DATA_ZERO_PADDING);
p->rc = fts5StructureDecode(buf.p, buf.n, &iCookie, &pRet);
pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
if( p->rc ) return 0;
memset(&pData->p[pData->n], 0, FTS5_DATA_PADDING);
p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet);
if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
}
fts5BufferFree(&buf);
fts5DataRelease(pData);
if( p->rc!=SQLITE_OK ){
fts5StructureRelease(pRet);
pRet = 0;
@ -1228,62 +1163,6 @@ static void fts5StructurePromote(
}
/*
** If the pIter->iOff offset currently points to an entry indicating one
** or more term-less nodes, advance past it and set pIter->nEmpty to
** the number of empty child nodes.
*/
static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){
if( pIter->iOff<pIter->nData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){
pIter->bDlidx = pIter->aData[pIter->iOff] & 0x01;
pIter->iOff++;
pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty);
}else{
pIter->nEmpty = 0;
pIter->bDlidx = 0;
}
}
/*
** Advance to the next entry within the node.
*/
static void fts5NodeIterNext(int *pRc, Fts5NodeIter *pIter){
if( pIter->iOff>=pIter->nData ){
pIter->aData = 0;
pIter->iChild += pIter->nEmpty;
}else{
int nPre, nNew;
pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nPre);
pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nNew);
pIter->term.n = nPre-2;
fts5BufferAppendBlob(pRc, &pIter->term, nNew, pIter->aData+pIter->iOff);
pIter->iOff += nNew;
pIter->iChild += (1 + pIter->nEmpty);
fts5NodeIterGobbleNEmpty(pIter);
if( *pRc ) pIter->aData = 0;
}
}
/*
** Initialize the iterator object pIter to iterate through the internal
** segment node in pData.
*/
static void fts5NodeIterInit(const u8 *aData, int nData, Fts5NodeIter *pIter){
memset(pIter, 0, sizeof(*pIter));
pIter->aData = aData;
pIter->nData = nData;
pIter->iOff = fts5GetVarint32(aData, pIter->iChild);
fts5NodeIterGobbleNEmpty(pIter);
}
/*
** Free any memory allocated by the iterator object.
*/
static void fts5NodeIterFree(Fts5NodeIter *pIter){
fts5BufferFree(&pIter->term);
}
/*
** Advance the iterator passed as the only argument. If the end of the
** doclist-index page is reached, return non-zero.
@ -2041,119 +1920,6 @@ static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
}
#ifdef SQLITE_DEBUG
static void fts5AssertNodeSeekOk(
Fts5Buffer *pNode,
const u8 *pTerm, int nTerm, /* Term to search for */
int iExpectPg,
int bExpectDlidx
){
int bDlidx;
int iPg;
int rc = SQLITE_OK;
Fts5NodeIter node;
fts5NodeIterInit(pNode->p, pNode->n, &node);
assert( node.term.n==0 );
iPg = node.iChild;
bDlidx = node.bDlidx;
for(fts5NodeIterNext(&rc, &node);
node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)<=0;
fts5NodeIterNext(&rc, &node)
){
iPg = node.iChild;
bDlidx = node.bDlidx;
}
fts5NodeIterFree(&node);
assert( rc!=SQLITE_OK || iPg==iExpectPg );
assert( rc!=SQLITE_OK || bDlidx==bExpectDlidx );
}
#else
#define fts5AssertNodeSeekOk(v,w,x,y,z)
#endif
/*
** Argument pNode is an internal b-tree node. This function searches
** within the node for the largest term that is smaller than or equal
** to (pTerm/nTerm).
**
** It returns the associated page number. Or, if (pTerm/nTerm) is smaller
** than all terms within the node, the leftmost child page number.
**
** Before returning, (*pbDlidx) is set to true if the last term on the
** returned child page number has a doclist-index. Or left as is otherwise.
*/
static int fts5NodeSeek(
Fts5Buffer *pNode, /* Node to search */
const u8 *pTerm, int nTerm, /* Term to search for */
int *pbDlidx /* OUT: True if dlidx flag is set */
){
int iPg;
u8 *pPtr = pNode->p;
u8 *pEnd = &pPtr[pNode->n];
int nMatch = 0; /* Number of bytes of pTerm already matched */
assert( *pbDlidx==0 );
pPtr += fts5GetVarint32(pPtr, iPg);
while( pPtr<pEnd ){
int nEmpty = 0;
int nKeep;
int nNew;
/* If there is a "no terms" record at pPtr, read it now. Store the
** number of termless pages in nEmpty. If it indicates a doclist-index,
** set (*pbDlidx) to true.*/
if( *pPtr<2 ){
*pbDlidx = (*pPtr==0x01);
pPtr++;
pPtr += fts5GetVarint32(pPtr, nEmpty);
if( pPtr>=pEnd ) break;
}
/* Read the next "term" pointer. Set nKeep to the number of bytes to
** keep from the previous term, and nNew to the number of bytes of
** new data that will be appended to it. */
nKeep = (int)*pPtr++;
nNew = (int)*pPtr++;
if( (nKeep | nNew) & 0x0080 ){
pPtr -= 2;
pPtr += fts5GetVarint32(pPtr, nKeep);
pPtr += fts5GetVarint32(pPtr, nNew);
}
nKeep -= 2;
/* Compare (pTerm/nTerm) to the current term on the node (the one described
** by nKeep/nNew). If the node term is larger, break out of the while()
** loop.
**
** Otherwise, if (pTerm/nTerm) is larger or the two terms are equal,
** leave variable nMatch set to the size of the largest prefix common to
** both terms in bytes. */
if( nKeep==nMatch ){
int nTst = MIN(nNew, nTerm-nMatch);
int i;
for(i=0; i<nTst; i++){
if( pTerm[nKeep+i]!=pPtr[i] ) break;
}
nMatch += i;
assert( nMatch<=nTerm );
if( i<nNew && (nMatch==nTerm || pPtr[i] > pTerm[nMatch]) ) break;
}else if( nKeep<nMatch ){
break;
}
iPg += 1 + nEmpty;
*pbDlidx = 0;
pPtr += nNew;
}
fts5AssertNodeSeekOk(pNode, pTerm, nTerm, iPg, *pbDlidx);
return iPg;
}
#define fts5IndexGetVarint32(a, iOff, nVal) { \
nVal = a[iOff++]; \
if( nVal & 0x80 ){ \
@ -2677,13 +2443,13 @@ static void fts5SegIterNextFrom(
}
}
while( p->rc==SQLITE_OK ){
do{
if( bMove ) fts5SegIterNext(p, pIter, 0);
if( pIter->pLeaf==0 ) break;
if( bRev==0 && pIter->iRowid>=iMatch ) break;
if( bRev!=0 && pIter->iRowid<=iMatch ) break;
bMove = 1;
}
}while( p->rc==SQLITE_OK );
}
@ -4459,13 +4225,9 @@ int sqlite3Fts5IndexRollback(Fts5Index *p){
*/
int sqlite3Fts5IndexReinit(Fts5Index *p){
Fts5Structure s;
assert( p->rc==SQLITE_OK );
p->rc = sqlite3Fts5IndexSetAverages(p, (const u8*)"", 0);
memset(&s, 0, sizeof(Fts5Structure));
fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
fts5StructureWrite(p, &s);
return fts5IndexReturn(p);
}
@ -4787,13 +4549,28 @@ void sqlite3Fts5IterClose(Fts5IndexIter *pIter){
}
/*
** Read the "averages" record into the buffer supplied as the second
** argument. Return SQLITE_OK if successful, or an SQLite error code
** if an error occurs.
** Read and decode the "averages" record from the database.
**
** Parameter anSize must point to an array of size nCol, where nCol is
** the number of user defined columns in the FTS table.
*/
int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf){
assert( p->rc==SQLITE_OK );
fts5DataReadOrBuffer(p, pBuf, FTS5_AVERAGES_ROWID);
int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
int nCol = p->pConfig->nCol;
Fts5Data *pData;
*pnRow = 0;
memset(anSize, 0, sizeof(i64) * nCol);
pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
if( p->rc==SQLITE_OK && pData->n ){
int i = 0;
int iCol;
i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
for(iCol=0; i<pData->n && iCol<nCol; iCol++){
i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
}
}
fts5DataRelease(pData);
return fts5IndexReturn(p);
}
@ -5327,13 +5104,13 @@ static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
if( iSegid==0 ){
if( iKey==FTS5_AVERAGES_ROWID ){
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) ");
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
}else{
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(structure)");
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
}
}
else{
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(%ssegid=%d h=%d pgno=%d)",
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}",
bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno
);
}
@ -5487,16 +5264,14 @@ static void fts5DecodeFunction(
fts5DecodeStructure(&rc, &s, a, n);
}
}else{
Fts5Buffer term;
memset(&term, 0, sizeof(Fts5Buffer));
if( iHeight==0 ){
int iTermOff = 0;
int iRowidOff = 0;
int iOff;
int nKeep = 0;
memset(&term, 0, sizeof(Fts5Buffer));
if( n>=4 ){
iRowidOff = fts5GetU16(&a[0]);
iTermOff = fts5GetU16(&a[2]);
@ -5536,24 +5311,6 @@ static void fts5DecodeFunction(
}
}
fts5BufferFree(&term);
}else{
Fts5NodeIter ss;
for(fts5NodeIterInit(a, n, &ss); ss.aData; fts5NodeIterNext(&rc, &ss)){
if( ss.term.n==0 ){
sqlite3Fts5BufferAppendPrintf(&rc, &s, " left=%d", ss.iChild);
}else{
sqlite3Fts5BufferAppendPrintf(&rc,&s, " \"%.*s\"",
ss.term.n, ss.term.p
);
}
if( ss.nEmpty ){
sqlite3Fts5BufferAppendPrintf(&rc, &s, " empty=%d%s", ss.nEmpty,
ss.bDlidx ? "*" : ""
);
}
}
fts5NodeIterFree(&ss);
}
}
decode_out:

View File

@ -1498,11 +1498,13 @@ static int fts5ApiTokenize(
Fts5Context *pCtx,
const char *pText, int nText,
void *pUserData,
int (*xToken)(void*, const char*, int, int, int)
int (*xToken)(void*, int, const char*, int, int, int)
){
Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
return sqlite3Fts5Tokenize(pTab->pConfig, pText, nText, pUserData, xToken);
return sqlite3Fts5Tokenize(
pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken
);
}
static int fts5ApiPhraseCount(Fts5Context *pCtx){
@ -1655,13 +1657,16 @@ static int fts5ApiColumnText(
static int fts5ColumnSizeCb(
void *pContext, /* Pointer to int */
int tflags,
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Start offset of token */
int iEnd /* End offset of token */
){
int *pCnt = (int*)pContext;
*pCnt = *pCnt + 1;
if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
(*pCnt)++;
}
return SQLITE_OK;
}
@ -1691,7 +1696,9 @@ static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){
pCsr->aColumnSize[i] = 0;
rc = fts5ApiColumnText(pCtx, i, &z, &n);
if( rc==SQLITE_OK ){
rc = sqlite3Fts5Tokenize(pConfig, z, n, p, fts5ColumnSizeCb);
rc = sqlite3Fts5Tokenize(
pConfig, FTS5_TOKENIZE_AUX, z, n, p, fts5ColumnSizeCb
);
}
}
}
@ -1853,7 +1860,7 @@ static int fts5ApiQueryPhrase(
pNew->iFirstRowid = SMALLEST_INT64;
pNew->iLastRowid = LARGEST_INT64;
pNew->base.pVtab = (sqlite3_vtab*)pTab;
rc = sqlite3Fts5ExprPhraseExpr(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr);
rc = sqlite3Fts5ExprClonePhrase(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr);
}
if( rc==SQLITE_OK ){
@ -2344,7 +2351,7 @@ int sqlite3_fts5_init(
void *p = (void*)pGlobal;
memset(pGlobal, 0, sizeof(Fts5Global));
pGlobal->db = db;
pGlobal->api.iVersion = 1;
pGlobal->api.iVersion = 2;
pGlobal->api.xCreateFunction = fts5CreateAux;
pGlobal->api.xCreateTokenizer = fts5CreateTokenizer;
pGlobal->api.xFindTokenizer = fts5FindTokenizer;

View File

@ -359,6 +359,7 @@ struct Fts5InsertCtx {
*/
static int fts5StorageInsertCallback(
void *pContext, /* Pointer to Fts5InsertCtx object */
int tflags,
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Start offset of token */
@ -366,8 +367,10 @@ static int fts5StorageInsertCallback(
){
Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext;
Fts5Index *pIdx = pCtx->pStorage->pIndex;
int iPos = pCtx->szCol++;
return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken);
if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
pCtx->szCol++;
}
return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken);
}
/*
@ -394,6 +397,7 @@ static int fts5StorageDeleteFromIndex(Fts5Storage *p, i64 iDel){
if( pConfig->abUnindexed[iCol-1] ) continue;
ctx.szCol = 0;
rc = sqlite3Fts5Tokenize(pConfig,
FTS5_TOKENIZE_DOCUMENT,
(const char*)sqlite3_column_text(pSeek, iCol),
sqlite3_column_bytes(pSeek, iCol),
(void*)&ctx,
@ -451,22 +455,7 @@ static int fts5StorageInsertDocsize(
static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){
int rc = SQLITE_OK;
if( p->bTotalsValid==0 ){
int nCol = p->pConfig->nCol;
Fts5Buffer buf;
memset(&buf, 0, sizeof(buf));
memset(p->aTotalSize, 0, sizeof(i64) * nCol);
p->nTotalRow = 0;
rc = sqlite3Fts5IndexGetAverages(p->pIndex, &buf);
if( rc==SQLITE_OK && buf.n ){
int i = 0;
int iCol;
i += fts5GetVarint(&buf.p[i], (u64*)&p->nTotalRow);
for(iCol=0; i<buf.n && iCol<nCol; iCol++){
i += fts5GetVarint(&buf.p[i], (u64*)&p->aTotalSize[iCol]);
}
}
sqlite3_free(buf.p);
rc = sqlite3Fts5IndexGetAverages(p->pIndex, &p->nTotalRow, p->aTotalSize);
p->bTotalsValid = bCache;
}
return rc;
@ -565,6 +554,7 @@ int sqlite3Fts5StorageSpecialDelete(
if( pConfig->abUnindexed[iCol] ) continue;
ctx.szCol = 0;
rc = sqlite3Fts5Tokenize(pConfig,
FTS5_TOKENIZE_DOCUMENT,
(const char*)sqlite3_value_text(apVal[iCol]),
sqlite3_value_bytes(apVal[iCol]),
(void*)&ctx,
@ -654,6 +644,7 @@ int sqlite3Fts5StorageRebuild(Fts5Storage *p){
ctx.szCol = 0;
if( pConfig->abUnindexed[ctx.iCol]==0 ){
rc = sqlite3Fts5Tokenize(pConfig,
FTS5_TOKENIZE_DOCUMENT,
(const char*)sqlite3_column_text(pScan, ctx.iCol+1),
sqlite3_column_bytes(pScan, ctx.iCol+1),
(void*)&ctx,
@ -771,6 +762,7 @@ int sqlite3Fts5StorageInsert(
ctx.szCol = 0;
if( pConfig->abUnindexed[ctx.iCol]==0 ){
rc = sqlite3Fts5Tokenize(pConfig,
FTS5_TOKENIZE_DOCUMENT,
(const char*)sqlite3_value_text(apVal[ctx.iCol+2]),
sqlite3_value_bytes(apVal[ctx.iCol+2]),
(void*)&ctx,
@ -838,15 +830,18 @@ struct Fts5IntegrityCtx {
*/
static int fts5StorageIntegrityCallback(
void *pContext, /* Pointer to Fts5InsertCtx object */
int tflags,
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Start offset of token */
int iEnd /* End offset of token */
){
Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
int iPos = pCtx->szCol++;
if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
pCtx->szCol++;
}
pCtx->cksum ^= sqlite3Fts5IndexCksum(
pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken
pCtx->pConfig, pCtx->iRowid, pCtx->iCol, pCtx->szCol-1, pToken, nToken
);
return SQLITE_OK;
}
@ -881,19 +876,23 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
int i;
ctx.iRowid = sqlite3_column_int64(pScan, 0);
ctx.szCol = 0;
if( pConfig->bColumnsize ){
rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize);
}
for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
if( pConfig->abUnindexed[i] ) continue;
ctx.iCol = i;
ctx.szCol = 0;
rc = sqlite3Fts5Tokenize(
pConfig,
rc = sqlite3Fts5Tokenize(pConfig,
FTS5_TOKENIZE_DOCUMENT,
(const char*)sqlite3_column_text(pScan, i+1),
sqlite3_column_bytes(pScan, i+1),
(void*)&ctx,
fts5StorageIntegrityCallback
);
if( ctx.szCol!=aColSize[i] ) rc = FTS5_CORRUPT;
if( pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){
rc = FTS5_CORRUPT;
}
aTotalSize[i] += ctx.szCol;
}
if( rc!=SQLITE_OK ) break;
@ -918,7 +917,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
rc = fts5StorageCount(p, "content", &nRow);
if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
}
if( rc==SQLITE_OK ){
if( rc==SQLITE_OK && pConfig->bColumnsize ){
i64 nRow;
rc = fts5StorageCount(p, "docsize", &nRow);
if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
@ -1002,9 +1001,12 @@ static int fts5StorageDecodeSizeArray(
** otherwise.
*/
int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){
int nCol = p->pConfig->nCol;
sqlite3_stmt *pLookup = 0;
int rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0);
int nCol = p->pConfig->nCol; /* Number of user columns in table */
sqlite3_stmt *pLookup = 0; /* Statement to query %_docsize */
int rc; /* Return Code */
assert( p->pConfig->bColumnsize );
rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0);
if( rc==SQLITE_OK ){
int bCorrupt = 1;
sqlite3_bind_int64(pLookup, 1, iRowid);

View File

@ -141,6 +141,7 @@ struct F5tAuxData {
static int xTokenizeCb(
void *pCtx,
int tflags,
const char *zToken, int nToken,
int iStart, int iEnd
){
@ -584,6 +585,7 @@ struct F5tTokenizeCtx {
static int xTokenizeCb2(
void *pCtx,
int tflags,
const char *zToken, int nToken,
int iStart, int iEnd
){
@ -666,7 +668,9 @@ static int f5tTokenize(
ctx.bSubst = (objc==5);
ctx.pRet = pRet;
ctx.zInput = zText;
rc = tokenizer.xTokenize(pTok, (void*)&ctx, zText, nText, xTokenizeCb2);
rc = tokenizer.xTokenize(
pTok, (void*)&ctx, FTS5_TOKENIZE_DOCUMENT, zText, nText, xTokenizeCb2
);
tokenizer.xDelete(pTok);
if( rc!=SQLITE_OK ){
Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0);
@ -688,11 +692,11 @@ static int f5tTokenize(
typedef struct F5tTokenizerContext F5tTokenizerContext;
typedef struct F5tTokenizerCb F5tTokenizerCb;
typedef struct F5tTokenizerModule F5tTokenizerModule;
typedef struct F5tTokenizerModule F5tTokenizerInstance;
typedef struct F5tTokenizerInstance F5tTokenizerInstance;
struct F5tTokenizerContext {
void *pCtx;
int (*xToken)(void*, const char*, int, int, int);
int (*xToken)(void*, int, const char*, int, int, int);
};
struct F5tTokenizerModule {
@ -701,6 +705,12 @@ struct F5tTokenizerModule {
F5tTokenizerContext *pContext;
};
struct F5tTokenizerInstance {
Tcl_Interp *interp;
Tcl_Obj *pScript;
F5tTokenizerContext *pContext;
};
static int f5tTokenizerCreate(
void *pCtx,
const char **azArg,
@ -748,26 +758,53 @@ static void f5tTokenizerDelete(Fts5Tokenizer *p){
static int f5tTokenizerTokenize(
Fts5Tokenizer *p,
void *pCtx,
int flags,
const char *pText, int nText,
int (*xToken)(void*, const char*, int, int, int)
int (*xToken)(void*, int, const char*, int, int, int)
){
F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
void *pOldCtx;
int (*xOldToken)(void*, const char*, int, int, int);
int (*xOldToken)(void*, int, const char*, int, int, int);
Tcl_Obj *pEval;
int rc;
const char *zFlags;
pOldCtx = pInst->pContext->pCtx;
xOldToken = pInst->pContext->xToken;
pInst->pContext->pCtx = pCtx;
pInst->pContext->xToken = xToken;
assert(
flags==FTS5_TOKENIZE_DOCUMENT
|| flags==FTS5_TOKENIZE_AUX
|| flags==FTS5_TOKENIZE_QUERY
|| flags==(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)
);
pEval = Tcl_DuplicateObj(pInst->pScript);
Tcl_IncrRefCount(pEval);
rc = Tcl_ListObjAppendElement(
pInst->interp, pEval, Tcl_NewStringObj(pText, nText)
);
if( rc==TCL_OK ){
rc = Tcl_EvalObjEx(pInst->interp, pEval, TCL_GLOBAL_ONLY);
switch( flags ){
case FTS5_TOKENIZE_DOCUMENT:
zFlags = "document";
break;
case FTS5_TOKENIZE_AUX:
zFlags = "aux";
break;
case FTS5_TOKENIZE_QUERY:
zFlags = "query";
break;
case (FTS5_TOKENIZE_PREFIX | FTS5_TOKENIZE_QUERY):
zFlags = "prefixquery";
break;
default:
assert( 0 );
zFlags = "invalid";
break;
}
Tcl_ListObjAppendElement(pInst->interp, pEval, Tcl_NewStringObj(zFlags, -1));
Tcl_ListObjAppendElement(pInst->interp, pEval, Tcl_NewStringObj(pText,nText));
rc = Tcl_EvalObjEx(pInst->interp, pEval, TCL_GLOBAL_ONLY);
Tcl_DecrRefCount(pEval);
pInst->pContext->pCtx = pOldCtx;
@ -776,7 +813,7 @@ static int f5tTokenizerTokenize(
}
/*
** sqlite3_fts5_token TEXT START END POS
** sqlite3_fts5_token ?-colocated? TEXT START END
*/
static int f5tTokenizerReturn(
void * clientData,
@ -788,14 +825,29 @@ static int f5tTokenizerReturn(
int iStart;
int iEnd;
int nToken;
int tflags = 0;
char *zToken;
int rc;
assert( p );
if( objc!=4 ){
Tcl_WrongNumArgs(interp, 1, objv, "TEXT START END");
if( objc==5 ){
int nArg;
char *zArg = Tcl_GetStringFromObj(objv[1], &nArg);
if( nArg<=10 && nArg>=2 && memcmp("-colocated", zArg, nArg)==0 ){
tflags |= FTS5_TOKEN_COLOCATED;
}else{
goto usage;
}
}else if( objc!=4 ){
goto usage;
}
zToken = Tcl_GetStringFromObj(objv[objc-3], &nToken);
if( Tcl_GetIntFromObj(interp, objv[objc-2], &iStart)
|| Tcl_GetIntFromObj(interp, objv[objc-1], &iEnd)
){
return TCL_ERROR;
}
if( p->xToken==0 ){
Tcl_AppendResult(interp,
"sqlite3_fts5_token may only be used by tokenizer callback", 0
@ -803,16 +855,13 @@ static int f5tTokenizerReturn(
return TCL_ERROR;
}
zToken = Tcl_GetStringFromObj(objv[1], &nToken);
if( Tcl_GetIntFromObj(interp, objv[2], &iStart)
|| Tcl_GetIntFromObj(interp, objv[3], &iEnd)
){
return TCL_ERROR;
}
rc = p->xToken(p->pCtx, zToken, nToken, iStart, iEnd);
rc = p->xToken(p->pCtx, tflags, zToken, nToken, iStart, iEnd);
Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE);
return TCL_OK;
usage:
Tcl_WrongNumArgs(interp, 1, objv, "?-colocated? TEXT START END");
return TCL_ERROR;
}
static void f5tDelTokenizer(void *pCtx){

View File

@ -352,7 +352,7 @@ static void fts5MatchinfoFunc(
){
const char *zArg;
Fts5MatchinfoCtx *p;
int rc;
int rc = SQLITE_OK;
if( nVal>0 ){
zArg = (const char*)sqlite3_value_text(apVal[0]);
@ -363,11 +363,16 @@ static void fts5MatchinfoFunc(
p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0);
if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){
p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg);
pApi->xSetAuxdata(pFts, p, sqlite3_free);
if( p==0 ) return;
if( p==0 ){
rc = SQLITE_NOMEM;
}else{
rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
}
}
if( rc==SQLITE_OK ){
rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);
}
if( rc!=SQLITE_OK ){
sqlite3_result_error_code(pCtx, rc);
}else{

View File

@ -116,8 +116,9 @@ static void asciiFold(char *aOut, const char *aIn, int nByte){
static int fts5AsciiTokenize(
Fts5Tokenizer *pTokenizer,
void *pCtx,
int flags,
const char *pText, int nText,
int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
){
AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer;
int rc = SQLITE_OK;
@ -158,7 +159,7 @@ static int fts5AsciiTokenize(
asciiFold(pFold, &pText[is], nByte);
/* Invoke the token callback */
rc = xToken(pCtx, pFold, nByte, is, ie);
rc = xToken(pCtx, 0, pFold, nByte, is, ie);
is = ie+1;
}
@ -385,8 +386,9 @@ static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){
static int fts5UnicodeTokenize(
Fts5Tokenizer *pTokenizer,
void *pCtx,
int flags,
const char *pText, int nText,
int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
){
Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer;
int rc = SQLITE_OK;
@ -475,7 +477,7 @@ static int fts5UnicodeTokenize(
}
/* Invoke the token callback */
rc = xToken(pCtx, aFold, zOut-aFold, is, ie);
rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie);
}
tokenize_done:
@ -553,7 +555,7 @@ static int fts5PorterCreate(
typedef struct PorterContext PorterContext;
struct PorterContext {
void *pCtx;
int (*xToken)(void*, const char*, int, int, int);
int (*xToken)(void*, int, const char*, int, int, int);
char *aBuf;
};
@ -1118,6 +1120,7 @@ static void fts5PorterStep1A(char *aBuf, int *pnBuf){
static int fts5PorterCb(
void *pCtx,
int tflags,
const char *pToken,
int nToken,
int iStart,
@ -1175,10 +1178,10 @@ static int fts5PorterCb(
nBuf--;
}
return p->xToken(p->pCtx, aBuf, nBuf, iStart, iEnd);
return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd);
pass_through:
return p->xToken(p->pCtx, pToken, nToken, iStart, iEnd);
return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd);
}
/*
@ -1187,8 +1190,9 @@ static int fts5PorterCb(
static int fts5PorterTokenize(
Fts5Tokenizer *pTokenizer,
void *pCtx,
int flags,
const char *pText, int nText,
int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
){
PorterTokenizer *p = (PorterTokenizer*)pTokenizer;
PorterContext sCtx;
@ -1196,7 +1200,7 @@ static int fts5PorterTokenize(
sCtx.pCtx = pCtx;
sCtx.aBuf = p->aBuf;
return p->tokenizer.xTokenize(
p->pTokenizer, (void*)&sCtx, pText, nText, fts5PorterCb
p->pTokenizer, (void*)&sCtx, flags, pText, nText, fts5PorterCb
);
}
@ -1225,7 +1229,7 @@ int sqlite3Fts5TokenizerInit(fts5_api *pApi){
);
}
return SQLITE_OK;
return rc;
}

View File

@ -295,3 +295,36 @@ proc NOT {a b} {
return $a
}
#-------------------------------------------------------------------------
# This command is similar to [split], except that it also provides the
# start and end offsets of each token. For example:
#
# [fts5_tokenize_split "abc d ef"] -> {abc 0 3 d 4 5 ef 6 8}
#
proc gobble_whitespace {textvar} {
upvar $textvar t
regexp {([ ]*)(.*)} $t -> space t
return [string length $space]
}
proc gobble_text {textvar wordvar} {
upvar $textvar t
upvar $wordvar w
regexp {([^ ]*)(.*)} $t -> w t
return [string length $w]
}
proc fts5_tokenize_split {text} {
set token ""
set ret [list]
set iOff [gobble_whitespace text]
while {[set nToken [gobble_text text word]]} {
lappend ret $word $iOff [expr $iOff+$nToken]
incr iOff $nToken
incr iOff [gobble_whitespace text]
}
set ret
}

View File

@ -51,7 +51,7 @@ do_execsql_test 2.1 {
do_test 2.2 {
execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 }
} {/{\(structure\) {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* h=0 leaves=1..1}}}/}
} {/{{structure} {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* h=0 leaves=1..1}}}/}
foreach w {a b c d e f} {
do_execsql_test 2.3.$w.asc {
@ -343,7 +343,7 @@ do_execsql_test 13.5 {
} {1}
do_execsql_test 13.6 {
SELECT rowid FROM t1 WHERE t1 MATCH '.';
SELECT rowid FROM t1 WHERE t1 MATCH '""';
} {}
#-------------------------------------------------------------------------
@ -506,6 +506,36 @@ do_execsql_test 18.3 {
SELECT t1.rowid, t2.rowid FROM t2, t1 WHERE t2 MATCH t1.a AND t1.rowid = t2.c
} {1 1}
#--------------------------------------------------------------------
# fts5 table in the temp schema.
#
reset_db
do_execsql_test 19.0 {
CREATE VIRTUAL TABLE temp.t1 USING fts5(x);
INSERT INTO t1 VALUES('x y z');
INSERT INTO t1 VALUES('w x 1');
SELECT rowid FROM t1 WHERE t1 MATCH 'x';
} {1 2}
#--------------------------------------------------------------------
# Test that 6 and 7 byte varints can be read.
#
reset_db
do_execsql_test 20.0 {
CREATE VIRTUAL TABLE temp.tmp USING fts5(x);
}
set ::ids [list \
0 [expr 1<<36] [expr 2<<36] [expr 1<<43] [expr 2<<43]
]
do_test 20.1 {
foreach id $::ids {
execsql { INSERT INTO tmp(rowid, x) VALUES($id, 'x y z') }
}
execsql { SELECT rowid FROM tmp WHERE tmp MATCH 'y' }
} $::ids
finish_test

View File

@ -90,13 +90,13 @@ foreach {tn q res} "
do_test 1.6.$tn.1 {
set n [execsql_reads $q]
puts -nonewline "(n=$n nReadX=$nReadX)"
#puts -nonewline "(n=$n nReadX=$nReadX)"
expr {$n < ($nReadX / 8)}
} {1}
do_test 1.6.$tn.2 {
set n [execsql_reads "$q ORDER BY rowid DESC"]
puts -nonewline "(n=$n nReadX=$nReadX)"
#puts -nonewline "(n=$n nReadX=$nReadX)"
expr {$n < ($nReadX / 8)}
} {1}

View File

@ -134,5 +134,18 @@ do_execsql_test 3.2.1 {
1 {-1 0 -1} 2 {-1 0 -1}
}
#-------------------------------------------------------------------------
# Test the integrity-check
#
do_execsql_test 4.1.1 {
CREATE VIRTUAL TABLE t5 USING fts5(x, columnsize=0);
INSERT INTO t5 VALUES('1 2 3 4');
INSERT INTO t5 VALUES('2 4 6 8');
}
breakpoint
do_execsql_test 4.1.2 {
INSERT INTO t5(t5) VALUES('integrity-check');
}
finish_test

View File

@ -87,6 +87,12 @@ do_execsql_test 4.0 {
SELECT fts5_expr('a AND """"', 'x', 'tokenize="unicode61 tokenchars ''""''"');
} {{"a" AND """"}}
#-------------------------------------------------------------------------
# Experiment with a tokenizer that considers " to be a token character.
#
do_catchsql_test 5.0 {
SELECT fts5_expr('abc | def');
} {1 {fts5: syntax error near "|"}}

View File

@ -31,16 +31,16 @@ proc do_syntax_test {tn expr res} {
foreach {tn expr res} {
1 {abc} {"abc"}
2 {abc .} {"abc"}
3 {.} {}
4 {abc OR .} {"abc"}
5 {abc NOT .} {"abc"}
6 {abc AND .} {"abc"}
7 {. OR abc} {"abc"}
8 {. NOT abc} {"abc"}
9 {. AND abc} {"abc"}
10 {abc + . + def} {"abc" + "def"}
11 {abc . def} {"abc" AND "def"}
2 {abc ""} {"abc"}
3 {""} {}
4 {abc OR ""} {"abc"}
5 {abc NOT ""} {"abc"}
6 {abc AND ""} {"abc"}
7 {"" OR abc} {"abc"}
8 {"" NOT abc} {"abc"}
9 {"" AND abc} {"abc"}
10 {abc + "" + def} {"abc" + "def"}
11 {abc "" def} {"abc" AND "def"}
12 {r+e OR w} {"r" + "e" OR "w"}
} {
do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res]

View File

@ -22,6 +22,7 @@ ifcapable !fts5 {
return
}
#-------------------------------------------------------------------------
# OOM while rebuilding an FTS5 table.
#
@ -148,5 +149,149 @@ do_faultsim_test 4.1 -faults oom-t* -prep {
faultsim_test_result {0 {}}
}
#-------------------------------------------------------------------------
#
# 5.2.* OOM while running a query that includes synonyms and matchinfo().
#
# 5.3.* OOM while running a query that returns a row containing instances
# of more than 4 synonyms for a single term.
#
proc mit {blob} {
set scan(littleEndian) i*
set scan(bigEndian) I*
binary scan $blob $scan($::tcl_platform(byteOrder)) r
return $r
}
proc tcl_tokenize {tflags text} {
foreach {w iStart iEnd} [fts5_tokenize_split $text] {
sqlite3_fts5_token $w $iStart $iEnd
if {$tflags=="query" && [string length $w]==1} {
for {set i 2} {$i < 7} {incr i} {
sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
}
}
}
}
proc tcl_create {args} { return "tcl_tokenize" }
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
db func mit mit
sqlite3_fts5_register_matchinfo db
do_test 5.0 {
execsql { CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=tcl) }
execsql { INSERT INTO t1(t1, rank) VALUES('pgsz', 32) }
foreach {rowid text} {
1 {aaaa cc b aaaaa cc aa}
2 {aa aa bb a bbb}
3 {bb aaaaa aaaaa b aaaa aaaaa}
4 {aa a b aaaa aa}
5 {aa b ccc aaaaa cc}
6 {aa aaaaa bbbb cc aaa}
7 {aaaaa aa aa ccccc bb}
8 {ccc bbbbb ccccc bbb c}
9 {cccccc bbbb a aaa cccc c}
20 {ddd f ddd eeeee fff ffff eeee ddd fff eeeee dddddd eeee}
21 {fffff eee dddd fffff dd ee ee eeeee eee eeeeee ee dd e}
22 {fffff d eeee dddd fffff dddddd ffff ddddd eeeee ee eee dddd ddddd}
23 {ddddd fff ddd eeeee ffff eeee ddd ff ff ffffff eeeeee dddd ffffff}
24 {eee dd ee dddd dddd eeeeee e eee fff ffff}
25 {ddddd ffffff dddddd fff ddd ddddd ddd f eeee fff dddd f}
26 {f ffff fff fff eeeeee dddd d dddddd ddddd eee ff eeeee}
27 {eee fff dddddd eeeee eeeee dddd ddddd ffff f eeeee eee dddddd ddddd d}
28 {dd ddddd d ddd d fff d dddd ee dddd ee ddd dddddd dddddd}
29 {eeee dddd ee dddd eeee dddd dd fffff f ddd eeeee ddd ee}
30 {ff ffffff eeeeee eeeee eee ffffff ff ffff f fffff eeeee}
31 {fffff eeeeee dddd eeee eeee eeeeee eee fffff d ddddd ffffff ffff dddddd}
32 {dddddd fffff ee eeeeee eeee ee fff dddd fff eeee ffffff eeeeee ffffff}
33 {ddddd eeee dd ffff dddddd fff eeee ddddd ffff eeee ddd}
34 {ee dddd ddddd dddddd eeee eeeeee f dd ee dddddd ffffff}
35 {ee dddd dd eeeeee ddddd eee d eeeeee dddddd eee dddd fffff}
36 {eee ffffff ffffff e fffff eeeee ff dddddd dddddd fff}
37 {eeeee fffff dddddd dddd ffffff fff f dd ee dd dd eeeee}
38 {eeeeee ee d ff eeeeee eeeeee eee eeeee ee ffffff dddd eeee dddddd ee}
39 {eeeeee ddd fffff e dddd ee eee eee ffffff ee f d dddd}
40 {ffffff dddddd eee ee ffffff eee eeee ddddd ee eeeeee f}
41 {ddd ddd fff fffff ee fffff f fff ddddd fffff}
42 {dddd ee ff d f ffffff fff ffffff ff dd dddddd f eeee}
43 {d dd fff fffff d f fff e dddd ee ee}
44 {ff ffff eee ddd d dd ffff dddd d eeee d eeeeee}
45 {eeee f eeeee ee e ffff f ddd e fff}
46 {ffff d ffff eeee ffff eeeee f ffff ddddd eee}
47 {dd dd dddddd ddddd fffff dddddd ddd ddddd eeeeee ffff eeee eee ee}
48 {ffff ffff e dddd ffffff dd dd dddd f fffff}
49 {ffffff d dddddd ffff eeeee f ffff ffff d dd fffff eeeee}
50 {x e}
} {
execsql { INSERT INTO t1(rowid, a) VALUES($rowid, $text) }
}
} {}
set res [list {*}{
1 {3 24 8 2 12 6}
5 {2 24 8 2 12 6}
6 {3 24 8 1 12 6}
7 {3 24 8 1 12 6}
9 {2 24 8 3 12 6}
}]
do_execsql_test 5.1.1 {
SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c'
} $res
do_execsql_test 5.1.2 {
SELECT count(*) FROM t1 WHERE t1 MATCH 'd e f'
} 29
faultsim_save_and_close
do_faultsim_test 5.2 -faults oom* -prep {
faultsim_restore_and_reopen
sqlite3_fts5_create_tokenizer db tcl tcl_create
sqlite3_fts5_register_matchinfo db
db func mit mit
} -body {
db eval {
SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c'
}
} -test {
faultsim_test_result [list 0 $::res]
}
do_faultsim_test 5.3 -faults oom* -prep {
faultsim_restore_and_reopen
sqlite3_fts5_create_tokenizer db tcl tcl_create
} -body {
db eval {
SELECT count(*) FROM t1 WHERE t1 MATCH 'd AND e AND f'
}
} -test {
faultsim_test_result {0 29}
}
do_faultsim_test 5.4 -faults oom* -prep {
faultsim_restore_and_reopen
sqlite3_fts5_create_tokenizer db tcl tcl_create
} -body {
db eval {
SELECT count(*) FROM t1 WHERE t1 MATCH 'x + e'
}
} -test {
faultsim_test_result {0 1}
}
#-------------------------------------------------------------------------
catch { db close }
breakpoint
do_faultsim_test 6 -faults oom* -prep {
sqlite_orig db test.db
sqlite3_db_config_lookaside db 0 0 0
} -body {
load_static_extension db fts5
} -test {
faultsim_test_result {0 {}} {1 {initialization of fts5 failed: }}
if {$testrc==0} {
db eval { CREATE VIRTUAL TABLE temp.t1 USING fts5(x) }
}
db close
}
finish_test

View File

@ -0,0 +1,45 @@
# 2015 September 3
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file is focused on OOM errors.
#
source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault2
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# Test fault-injection on a query that uses xColumnSize() on columnsize=0
# table.
#
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=0);
INSERT INTO t1 VALUES('a b c d e f g');
INSERT INTO t1 VALUES('a b c d');
INSERT INTO t1 VALUES('a b c d e f g h i j');
}
fts5_aux_test_functions db
do_faultsim_test 1 -faults oom* -body {
execsql { SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH 'b' }
} -test {
faultsim_test_result {0 {7 4 10}} {1 SQLITE_NOMEM}
}
finish_test

View File

@ -355,10 +355,10 @@ do_execsql_test 10.1 {
#---------------------------------------------------------------------------
# Test the 'y' matchinfo flag
#
set sqlite_fts3_enable_parentheses 1
reset_db
sqlite3_fts5_register_matchinfo db
do_execsql_test 11.0 {
CREATE VIRTUAL TABLE tt USING fts3(x, y);
CREATE VIRTUAL TABLE tt USING fts5(x, y);
INSERT INTO tt VALUES('c d a c d d', 'e a g b d a'); -- 1
INSERT INTO tt VALUES('c c g a e b', 'c g d g e c'); -- 2
INSERT INTO tt VALUES('b e f d e g', 'b a c b c g'); -- 3
@ -432,19 +432,18 @@ foreach {tn expr res} {
SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr
} $r2
}
set sqlite_fts3_enable_parentheses 0
#---------------------------------------------------------------------------
# Test the 'b' matchinfo flag
#
set sqlite_fts3_enable_parentheses 1
reset_db
sqlite3_fts5_register_matchinfo db
db func mit mit
do_test 12.0 {
set cols [list]
for {set i 0} {$i < 50} {incr i} { lappend cols "c$i" }
execsql "CREATE VIRTUAL TABLE tt USING fts3([join $cols ,])"
execsql "CREATE VIRTUAL TABLE tt USING fts5([join $cols ,])"
} {}
do_execsql_test 12.1 {
@ -452,6 +451,5 @@ do_execsql_test 12.1 {
SELECT mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH 'abc';
} [list [list [expr 1<<4] [expr 1<<(45-32)]]]
set sqlite_fts3_enable_parentheses 0
finish_test

View File

@ -0,0 +1,460 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on custom tokenizers that support synonyms.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5synonym
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
foreach S {
{zero 0}
{one 1 i}
{two 2 ii}
{three 3 iii}
{four 4 iv}
{five 5 v}
{six 6 vi}
{seven 7 vii}
{eight 8 viii}
{nine 9 ix}
} {
foreach s $S {
set o [list]
foreach x $S {if {$x!=$s} {lappend o $x}}
set ::syn($s) $o
}
}
proc tcl_tokenize {tflags text} {
foreach {w iStart iEnd} [fts5_tokenize_split $text] {
sqlite3_fts5_token $w $iStart $iEnd
}
}
proc tcl_create {args} {
return "tcl_tokenize"
}
sqlite3_fts5_create_tokenizer db tcl tcl_create
#-------------------------------------------------------------------------
# Warm body test for the code in fts5_tcl.c.
#
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
INSERT INTO ft VALUES('abc def ghi');
INSERT INTO ft VALUES('jkl mno pqr');
SELECT rowid, x FROM ft WHERE ft MATCH 'def';
SELECT x, rowid FROM ft WHERE ft MATCH 'pqr';
} {1 {abc def ghi} {jkl mno pqr} 2}
#-------------------------------------------------------------------------
# Test a tokenizer that supports synonyms by adding extra entries to the
# FTS index.
#
proc tcl_tokenize {tflags text} {
foreach {w iStart iEnd} [fts5_tokenize_split $text] {
sqlite3_fts5_token $w $iStart $iEnd
if {$tflags=="document" && [info exists ::syn($w)]} {
foreach s $::syn($w) {
sqlite3_fts5_token -colo $s $iStart $iEnd
}
}
}
}
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
INSERT INTO ft VALUES('one two three');
INSERT INTO ft VALUES('four five six');
INSERT INTO ft VALUES('eight nine ten');
} {}
foreach {tn expr res} {
1 "3" 1
2 "eight OR 8 OR 5" {2 3}
3 "10" {}
4 "1*" {1}
5 "1 + 2" {1}
} {
do_execsql_test 2.1.$tn {
SELECT rowid FROM ft WHERE ft MATCH $expr
} $res
}
#-------------------------------------------------------------------------
# Test some broken tokenizers:
#
# 3.1.*: A tokenizer that declares the very first token to be colocated.
#
# 3.2.*: A tokenizer that reports two identical tokens at the same position.
# This is allowed.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
set bColo 1
foreach {w iStart iEnd} [fts5_tokenize_split $text] {
if {$bColo} {
sqlite3_fts5_token -colo $w $iStart $iEnd
set bColo 0
} {
sqlite3_fts5_token $w $iStart $iEnd
}
}
}
do_execsql_test 3.1.0 {
CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
INSERT INTO ft VALUES('one two three');
CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row);
SELECT * FROM vv;
} {
one 1 1 three 1 1 two 1 1
}
do_execsql_test 3.1.1 {
INSERT INTO ft(ft) VALUES('integrity-check');
} {}
proc tcl_tokenize {tflags text} {
foreach {w iStart iEnd} [fts5_tokenize_split $text] {
sqlite3_fts5_token $w $iStart $iEnd
}
}
do_execsql_test 3.1.2 {
SELECT rowid FROM ft WHERE ft MATCH 'one two three'
} {1}
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
foreach {w iStart iEnd} [fts5_tokenize_split $text] {
sqlite3_fts5_token $w $iStart $iEnd
sqlite3_fts5_token -colo $w $iStart $iEnd
}
}
do_execsql_test 3.2.0 {
CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
INSERT INTO ft VALUES('one one two three');
CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row);
SELECT * FROM vv;
} {
one 1 4 three 1 2 two 1 2
}
do_execsql_test 3.2.1 {
SELECT rowid FROM ft WHERE ft MATCH 'one';
} {1}
do_execsql_test 3.2.2 {
SELECT rowid FROM ft WHERE ft MATCH 'one two three';
} {1}
do_execsql_test 3.2.3 {
SELECT rowid FROM ft WHERE ft MATCH 'one + one + two + three';
} {1}
do_execsql_test 3.2.4 {
SELECT rowid FROM ft WHERE ft MATCH 'one two two three';
} {1}
do_execsql_test 3.2.5 {
SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three';
} {}
#-------------------------------------------------------------------------
# Check that expressions with synonyms can be parsed and executed.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
foreach {w iStart iEnd} [fts5_tokenize_split $text] {
sqlite3_fts5_token $w $iStart $iEnd
if {$tflags=="query" && [info exists ::syn($w)]} {
foreach s $::syn($w) {
sqlite3_fts5_token -colo $s $iStart $iEnd
}
}
}
}
foreach {tn expr res} {
1 {abc} {"abc"}
2 {one} {"one"|"i"|"1"}
3 {3} {"3"|"iii"|"three"}
4 {3*} {"3"|"iii"|"three" *}
} {
do_execsql_test 4.1.$tn {SELECT fts5_expr($expr, 'tokenize=tcl')} [list $res]
}
do_execsql_test 4.2.1 {
CREATE VIRTUAL TABLE xx USING fts5(x, tokenize=tcl);
INSERT INTO xx VALUES('one two');
INSERT INTO xx VALUES('three four');
}
do_execsql_test 4.2.2 {
SELECT rowid FROM xx WHERE xx MATCH '2'
} {1}
do_execsql_test 4.2.3 {
SELECT rowid FROM xx WHERE xx MATCH '3'
} {2}
do_test 5.0 {
execsql {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, tokenize=tcl)
}
foreach {rowid a b} {
1 {four v 4 i three} {1 3 five five 4 one}
2 {5 1 3 4 i} {2 2 v two 4}
3 {5 i 5 2 four 4 1} {iii ii five two 1}
4 {ii four 4 one 5 three five} {one 5 1 iii 4 3}
5 {three i v i four 4 1} {ii five five five iii}
6 {4 2 ii two 2 iii} {three 1 four 4 iv 1 iv}
7 {ii ii two three 2 5} {iii i ii iii iii one one}
8 {2 ii i two 3 three 2} {two iv v iii 3 five}
9 {i 2 iv 3 five four v} {iii 4 three i three ii 1}
} {
execsql { INSERT INTO t1(rowid, a, b) VALUES($rowid, $a, $b) }
}
} {}
foreach {tn q res} {
1 {one} {
1 {four v 4 [i] three} {[1] 3 five five 4 [one]}
2 {5 [1] 3 4 [i]} {2 2 v two 4}
3 {5 [i] 5 2 four 4 [1]} {iii ii five two [1]}
4 {ii four 4 [one] 5 three five} {[one] 5 [1] iii 4 3}
5 {three [i] v [i] four 4 [1]} {ii five five five iii}
6 {4 2 ii two 2 iii} {three [1] four 4 iv [1] iv}
7 {ii ii two three 2 5} {iii [i] ii iii iii [one] [one]}
8 {2 ii [i] two 3 three 2} {two iv v iii 3 five}
9 {[i] 2 iv 3 five four v} {iii 4 three [i] three ii [1]}
}
2 {five four} {
1 {[four] [v] [4] i three} {1 3 [five] [five] [4] one}
2 {[5] 1 3 [4] i} {2 2 [v] two [4]}
3 {[5] i [5] 2 [four] [4] 1} {iii ii [five] two 1}
4 {ii [four] [4] one [5] three [five]} {one [5] 1 iii [4] 3}
5 {three i [v] i [four] [4] 1} {ii [five] [five] [five] iii}
8 {2 ii i two 3 three 2} {two [iv] [v] iii 3 [five]}
9 {i 2 [iv] 3 [five] [four] [v]} {iii [4] three i three ii 1}
}
3 {one OR two OR iii OR 4 OR v} {
1 {[four] [v] [4] [i] [three]} {[1] [3] [five] [five] [4] [one]}
2 {[5] [1] [3] [4] [i]} {[2] [2] [v] [two] [4]}
3 {[5] [i] [5] [2] [four] [4] [1]} {[iii] [ii] [five] [two] [1]}
4 {[ii] [four] [4] [one] [5] [three] [five]} {[one] [5] [1] [iii] [4] [3]}
5 {[three] [i] [v] [i] [four] [4] [1]} {[ii] [five] [five] [five] [iii]}
6 {[4] [2] [ii] [two] [2] [iii]} {[three] [1] [four] [4] [iv] [1] [iv]}
7 {[ii] [ii] [two] [three] [2] [5]} {[iii] [i] [ii] [iii] [iii] [one] [one]}
8 {[2] [ii] [i] [two] [3] [three] [2]} {[two] [iv] [v] [iii] [3] [five]}
9 {[i] [2] [iv] [3] [five] [four] [v]} {[iii] [4] [three] [i] [three] [ii] [1]}
}
4 {5 + 1} {
2 {[5 1] 3 4 i} {2 2 v two 4}
3 {[5 i] 5 2 four 4 1} {iii ii five two 1}
4 {ii four 4 one 5 three five} {one [5 1] iii 4 3}
5 {three i [v i] four 4 1} {ii five five five iii}
}
5 {one + two + three} {
7 {ii ii two three 2 5} {iii [i ii iii] iii one one}
8 {2 ii [i two 3] three 2} {two iv v iii 3 five}
}
6 {"v v"} {
1 {four v 4 i three} {1 3 [five five] 4 one}
5 {three i v i four 4 1} {ii [five five five] iii}
}
} {
do_execsql_test 5.1.$tn {
SELECT rowid, highlight(t1, 0, '[', ']'), highlight(t1, 1, '[', ']')
FROM t1 WHERE t1 MATCH $q
} $res
}
# Test that the xQueryPhrase() API works with synonyms.
#
proc mit {blob} {
set scan(littleEndian) i*
set scan(bigEndian) I*
binary scan $blob $scan($::tcl_platform(byteOrder)) r
return $r
}
db func mit mit
sqlite3_fts5_register_matchinfo db
foreach {tn q res} {
1 {one} {
1 {1 11 7 2 12 6} 2 {2 11 7 0 12 6}
3 {2 11 7 1 12 6} 4 {1 11 7 2 12 6}
5 {3 11 7 0 12 6} 6 {0 11 7 2 12 6}
7 {0 11 7 3 12 6} 8 {1 11 7 0 12 6}
9 {1 11 7 2 12 6}
}
} {
do_execsql_test 5.2.$tn {
SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH $q
} $res
}
#-------------------------------------------------------------------------
# Test terms with more than 4 synonyms.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
foreach {w iStart iEnd} [fts5_tokenize_split $text] {
sqlite3_fts5_token $w $iStart $iEnd
if {$tflags=="query" && [string length $w]==1} {
for {set i 2} {$i<=10} {incr i} {
sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
}
}
}
}
do_execsql_test 6.0.1 {
CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=tcl);
INSERT INTO t1 VALUES('yy xx qq');
INSERT INTO t1 VALUES('yy xx xx');
}
do_execsql_test 6.0.2 {
SELECT * FROM t1 WHERE t1 MATCH 'NEAR(y q)';
} {{yy xx qq}}
do_test 6.0.3 {
execsql {
CREATE VIRTUAL TABLE t2 USING fts5(a, b, tokenize=tcl)
}
foreach {rowid a b} {
1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq aaaa}
2 {ww oooooo bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq}
3 {zzzz llll gggggg cccc uu} {hhhhhh aaaa ppppp rr ee jjjj}
4 {r f i rrrrrr ww hhh} {aa yyy t x aaaaa ii}
5 {fffff mm vvvv ooo ffffff kkkk tttt} {cccccc bb e zzz d n}
6 {iii dddd hh qqqq ddd ooo} {ttt d c b aaaaaa qqqq}
7 {jjjj rrrr v zzzzz u tt t} {ppppp pp dddd mm hhh uuu}
8 {gggg rrrrrr kkkk vvvv gggg jjjjjj b} {dddddd jj r w cccc wwwwww ss}
9 {kkkkk qqq oooo e tttttt mmm} {e ss qqqqqq hhhh llllll gg}
} {
execsql { INSERT INTO t2(rowid, a, b) VALUES($rowid, $a, $b) }
}
} {}
foreach {tn q res} {
1 {a} {
1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq [aaaa]}
3 {zzzz llll gggggg cccc uu} {hhhhhh [aaaa] ppppp rr ee jjjj}
4 {r f i rrrrrr ww hhh} {[aa] yyy t x [aaaaa] ii}
6 {iii dddd hh qqqq ddd ooo} {ttt d c b [aaaaaa] qqqq}
}
2 {a AND q} {
1 {yyyy vvvvv [qq] oo yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]}
6 {iii dddd hh [qqqq] ddd ooo} {ttt d c b [aaaaaa] [qqqq]}
}
3 {o OR (q AND a)} {
1 {yyyy vvvvv [qq] [oo] yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]}
2 {ww [oooooo] bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq}
5 {fffff mm vvvv [ooo] ffffff kkkk tttt} {cccccc bb e zzz d n}
6 {iii dddd hh [qqqq] ddd [ooo]} {ttt d c b [aaaaaa] [qqqq]}
9 {kkkkk qqq [oooo] e tttttt mmm} {e ss qqqqqq hhhh llllll gg}
}
4 {NEAR(q y, 20)} {
1 {[yyyy] vvvvv [qq] oo [yyyyyy] vvvv eee} {ffff uu r qq aaaa}
2 {ww oooooo bbbbb ssssss mm} {ffffff [yy] iiii rr s ccc [qqqqq]}
}
} {
do_execsql_test 6.1.$tn.asc {
SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']')
FROM t2 WHERE t2 MATCH $q
} $res
set res2 [list]
foreach {rowid a b} $res {
set res2 [concat [list $rowid $a $b] $res2]
}
do_execsql_test 6.1.$tn.desc {
SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']')
FROM t2 WHERE t2 MATCH $q ORDER BY rowid DESC
} $res2
}
do_execsql_test 6.2.1 {
INSERT INTO t2(rowid, a, b) VALUES(13,
'x xx xxx xxxx xxxxx xxxxxx xxxxxxx', 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy'
);
SELECT rowid, highlight(t2, 0, '<', '>'), highlight(t2, 1, '(', ')')
FROM t2 WHERE t2 MATCH 'x OR y'
} {
1 {<yyyy> vvvvv qq oo <yyyyyy> vvvv eee} {ffff uu r qq aaaa}
2 {ww oooooo bbbbb ssssss mm} {ffffff (yy) iiii rr s ccc qqqqq}
4 {r f i rrrrrr ww hhh} {aa (yyy) t (x) aaaaa ii}
13 {<x> <xx> <xxx> <xxxx> <xxxxx> <xxxxxx> <xxxxxxx>}
{(y) (yy) (yyy) (yyyy) (yyyyy) (yyyyyy) (yyyyyyy)}
}
#-------------------------------------------------------------------------
# Test that the xColumnSize() API is not confused by colocated tokens.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
fts5_aux_test_functions db
proc tcl_tokenize {tflags text} {
foreach {w iStart iEnd} [fts5_tokenize_split $text] {
sqlite3_fts5_token $w $iStart $iEnd
if {[string length $w]==1} {
for {set i 2} {$i<=10} {incr i} {
sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
}
}
}
}
do_execsql_test 7.0.1 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, columnsize=1, tokenize=tcl);
INSERT INTO t1 VALUES('0 2 3', '4 5 6 7');
INSERT INTO t1 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0');
SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH '000 AND 00 AND 0';
} {{3 4} {2 10}}
do_execsql_test 7.0.2 {
INSERT INTO t1(t1) VALUES('integrity-check');
}
do_execsql_test 7.1.1 {
CREATE VIRTUAL TABLE t2 USING fts5(a, b, columnsize=0, tokenize=tcl);
INSERT INTO t2 VALUES('0 2 3', '4 5 6 7');
INSERT INTO t2 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0');
SELECT fts5_test_columnsize(t2) FROM t2 WHERE t2 MATCH '000 AND 00 AND 0';
} {{3 4} {2 10}}
do_execsql_test 7.1.2 {
INSERT INTO t2(t2) VALUES('integrity-check');
}
finish_test

View File

@ -5,14 +5,52 @@
# Process command line arguments.
#
proc usage {} {
puts stderr "usage: $::argv0 database table"
puts stderr "usage: $::argv0 ?OPTIONS? database table"
puts stderr ""
puts stderr " -nterm (count number of terms in each segment)"
puts stderr ""
exit 1
}
if {[llength $argv]!=2} usage
set database [lindex $argv 0]
set tbl [lindex $argv 1]
set O(nterm) 0
if {[llength $argv]<2} usage
foreach a [lrange $argv 0 end-2] {
switch -- $a {
-nterm {
set O(nterm) 1
}
default {
usage
}
}
}
set database [lindex $argv end-1]
set tbl [lindex $argv end]
#-------------------------------------------------------------------------
# Count the number of terms in each segment of fts5 table $tbl. Store the
# counts in the array variable in the parent context named by parameter
# $arrayname, indexed by segment-id. Example:
#
# count_terms fts_tbl A
# foreach {k v} [array get A] { puts "segid=$k nTerm=$v" }
#
proc count_terms {tbl arrayname} {
upvar A $arrayname
array unset A
db eval "SELECT fts5_decode(rowid, block) AS d FROM ${tbl}_data" {
set desc [lindex $d 0]
if {[regexp {^segid=([0-9]*)} $desc -> id]} {
foreach i [lrange $d 1 end] {
if {[string match {term=*} $i]} { incr A($id) }
}
}
}
}
#-------------------------------------------------------------------------
@ -21,11 +59,21 @@ set tbl [lindex $argv 1]
sqlite3 db $database
catch { load_static_extension db fts5 }
if {$O(nterm)} { count_terms $tbl A }
db eval "SELECT fts5_decode(rowid, block) AS d FROM ${tbl}_data WHERE id=10" {
foreach lvl [lrange $d 1 end] {
puts [lrange $lvl 0 2]
foreach seg [lrange $lvl 3 end] {
puts " $seg"
if {$::O(nterm)} {
regexp {^id=([0-9]*)} $seg -> id
set nTerm 0
catch { set nTerm $A($id) }
puts [format " % -28s nTerm=%d" $seg $nTerm]
} else {
puts [format " % -28s" $seg]
}
}
}
}

46
main.mk
View File

@ -47,6 +47,7 @@
TCCX = $(TCC) $(OPTS) -I. -I$(TOP)/src -I$(TOP)
TCCX += -I$(TOP)/ext/rtree -I$(TOP)/ext/icu -I$(TOP)/ext/fts3
TCCX += -I$(TOP)/ext/async -I$(TOP)/ext/userauth
TCCX += -I$(TOP)/ext/fts5
# Object files for the SQLite library.
#
@ -230,6 +231,29 @@ SRC += \
$(TOP)/ext/rbu/sqlite3rbu.h
# FTS5 things
#
FTS5_HDR = \
$(TOP)/ext/fts5/fts5.h \
$(TOP)/ext/fts5/fts5Int.h \
fts5parse.h
FTS5_SRC = \
$(TOP)/ext/fts5/fts5_aux.c \
$(TOP)/ext/fts5/fts5_buffer.c \
$(TOP)/ext/fts5/fts5_main.c \
$(TOP)/ext/fts5/fts5_config.c \
$(TOP)/ext/fts5/fts5_expr.c \
$(TOP)/ext/fts5/fts5_hash.c \
$(TOP)/ext/fts5/fts5_index.c \
fts5parse.c \
$(TOP)/ext/fts5/fts5_storage.c \
$(TOP)/ext/fts5/fts5_tokenize.c \
$(TOP)/ext/fts5/fts5_unicode2.c \
$(TOP)/ext/fts5/fts5_varint.c \
$(TOP)/ext/fts5/fts5_vocab.c \
# Generated source code files
#
SRC += \
@ -636,25 +660,6 @@ fts3_write.o: $(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR)
rtree.o: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c
# FTS5 things
#
FTS5_SRC = \
$(TOP)/ext/fts5/fts5.h \
$(TOP)/ext/fts5/fts5Int.h \
$(TOP)/ext/fts5/fts5_aux.c \
$(TOP)/ext/fts5/fts5_buffer.c \
$(TOP)/ext/fts5/fts5_main.c \
$(TOP)/ext/fts5/fts5_config.c \
$(TOP)/ext/fts5/fts5_expr.c \
$(TOP)/ext/fts5/fts5_hash.c \
$(TOP)/ext/fts5/fts5_index.c \
fts5parse.c fts5parse.h \
$(TOP)/ext/fts5/fts5_storage.c \
$(TOP)/ext/fts5/fts5_tokenize.c \
$(TOP)/ext/fts5/fts5_unicode2.c \
$(TOP)/ext/fts5/fts5_varint.c \
$(TOP)/ext/fts5/fts5_vocab.c \
fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon
cp $(TOP)/ext/fts5/fts5parse.y .
rm -f fts5parse.h
@ -662,11 +667,10 @@ fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon
fts5parse.h: fts5parse.c
fts5.c: $(FTS5_SRC)
fts5.c: $(FTS5_SRC) $(FTS5_HDR)
tclsh $(TOP)/ext/fts5/tool/mkfts5c.tcl
cp $(TOP)/ext/fts5/fts5.h .
userauth.o: $(TOP)/ext/userauth/userauth.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/userauth/userauth.c

View File

@ -1,5 +1,5 @@
C Merge\senhancements\sfrom\strunk.
D 2015-09-03T14:18:12.100
C Merge\strunk\senhancements,\sand\sespeically\sthe\sfix\sfor\sallowing\sstrings\s\nas\scolumn\sidentifers\sin\sCREATE\sINDEX\sstatements.
D 2015-09-04T13:02:21.922
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in f85066ce844a28b671aaeeff320921cd0ce36239
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -104,34 +104,34 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c
F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
F ext/fts3/unicode/mkunicode.tcl 95cf7ec186e48d4985e433ff8a1c89090a774252
F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95
F ext/fts5/extract_api_docs.tcl 06583c935f89075ea0b32f85efa5dd7619fcbd03
F ext/fts5/fts5.h 1950ec0544de667a24c1d8af9b2fde5db7db3bc9
F ext/fts5/fts5Int.h 45f2ceb3c030f70e2cc4c199e9f700c2f2367f77
F ext/fts5/fts5_aux.c 044cb176a815f4388308738437f6e130aa384fb0
F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0
F ext/fts5/fts5.h f04659e0df5af83731b102189a32280f74f4a6bc
F ext/fts5/fts5Int.h f65d41f66accad0a289d6bd66b13c07d2932f9be
F ext/fts5/fts5_aux.c 7a307760a9c57c750d043188ec0bad59f5b5ec7e
F ext/fts5/fts5_buffer.c 80f9ba4431848cb857e3d2158f5280093dcd8015
F ext/fts5/fts5_config.c fdfa63ae8e527ecfaa50f94063c610429cc887cf
F ext/fts5/fts5_expr.c d075d36c84975a1cfcf070442d28e28027b61c25
F ext/fts5/fts5_config.c 80b61fd2c6844b64a3e72a64572d50a812da9384
F ext/fts5/fts5_expr.c 1c24e1a2ffb286bfe37e537a43b7fadabfe993d4
F ext/fts5/fts5_hash.c 4bf4b99708848357b8a2b5819e509eb6d3df9246
F ext/fts5/fts5_index.c 076c4995bf06a6d1559a6e31f9a86b90f2105374
F ext/fts5/fts5_main.c fc47ad734dfb55765b7542a345cee981170e7caa
F ext/fts5/fts5_storage.c 22ec9b5d35a39e2b5b65daf4ba7cd47fbb2d0df5
F ext/fts5/fts5_tcl.c 96a3b9e982c4a64a242eefd752fa6669cd405a67
F ext/fts5/fts5_test_mi.c 80a9e86fb4c5b6b58f8fefac05e9b96d1a6574e1
F ext/fts5/fts5_tokenize.c 2836f6728bd74c7efac7487f5d9c27ca3e1b509c
F ext/fts5/fts5_index.c 950e37028cc81ae21534819e79c73aea7efa6c8e
F ext/fts5/fts5_main.c e9d0892424bb7f0a8b58613d4ff75cb650cf286e
F ext/fts5/fts5_storage.c 120f7b143688b5b7710dacbd48cff211609b8059
F ext/fts5/fts5_tcl.c 6da58d6e8f42a93c4486b5ba9b187a7f995dee37
F ext/fts5/fts5_test_mi.c e96be827aa8f571031e65e481251dc1981d608bf
F ext/fts5/fts5_tokenize.c f380f46f341af9c9a9908e1aade685ba1eaa157a
F ext/fts5/fts5_unicode2.c 78273fbd588d1d9bd0a7e4e0ccc9207348bae33c
F ext/fts5/fts5_varint.c 3f86ce09cab152e3d45490d7586b7ed2e40c13f1
F ext/fts5/fts5_vocab.c 4622e0b7d84a488a1585aaa56eb214ee67a988bc
F ext/fts5/fts5parse.y 833db1101b78c0c47686ab1b84918e38c36e9452
F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba
F ext/fts5/test/fts5_common.tcl 3338968de1880ca12b0451ae8f9b8b12d14e0ba7
F ext/fts5/test/fts5aa.test c6e680a0d1b6c2616a382f1006d5d91eca697bd0
F ext/fts5/test/fts5_common.tcl b6e6a40ef5d069c8e86ca4fbad491e1195485dbc
F ext/fts5/test/fts5aa.test f558e1e5ccffa75d69e9a4814245d468ec6b6608
F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad
F ext/fts5/test/fts5ac.test 9737992d08c56bfd4803e933744d2d764e23795c
F ext/fts5/test/fts5ad.test b2edee8b7de0c21d2c88f8a18c195034aad6952d
F ext/fts5/test/fts5ae.test 0a9984fc3479f89f8c63d9848d6ed0c465dfcebe
F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a
F ext/fts5/test/fts5ag.test ec3e119b728196620a31507ef503c455a7a73505
F ext/fts5/test/fts5ah.test b9e78fa986a7bd564ebadfb244de02c84d7ac3ae
F ext/fts5/test/fts5ah.test e592c4978622dbc4de552cd0f9395df60ac5d54c
F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37
F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8
F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592
@ -141,7 +141,7 @@ F ext/fts5/test/fts5auto.test caa5bcf917db11944655a2a9bd38c67c520376ca
F ext/fts5/test/fts5aux.test 8c687c948cc98e9a94be014df7d518acc1b3b74f
F ext/fts5/test/fts5auxdata.test 141a7cbffcceb1bd2799b4b29c183ff8780d586e
F ext/fts5/test/fts5bigpl.test 04ee0d7eebbebf17c31f5a0b5c5f9494eac3a0cb
F ext/fts5/test/fts5columnsize.test 97dc6bd66c91009d00407aa078dd5e9e8eb22f99
F ext/fts5/test/fts5columnsize.test a8cfef21ffa1c264b9f670a7d94eeaccb5341c07
F ext/fts5/test/fts5config.test ad2ff42ddc856aed2d05bf89dc1c578c8a39ea3b
F ext/fts5/test/fts5content.test 9a952c95518a14182dc3b59e3c8fa71cda82a4e1
F ext/fts5/test/fts5corrupt.test 928c9c91d40690d301f943a7ed0ffc19e0d0e7b6
@ -149,18 +149,19 @@ F ext/fts5/test/fts5corrupt2.test 1a830ccd6dbe1b601c7e3f5bbc1cf77bd8c8803b
F ext/fts5/test/fts5corrupt3.test 1ccf575f5126e79f9fec7979fd02a1f40a076be3
F ext/fts5/test/fts5dlidx.test 59b80bbe34169a082c575d9c26f0a7019a7b79c1
F ext/fts5/test/fts5doclist.test 8edb5b57e5f144030ed74ec00ef6fa4294fed79b
F ext/fts5/test/fts5ea.test 451bb37310ee6df8ef72e4354fda5621b3b51448
F ext/fts5/test/fts5eb.test 46f49497edc25ef3b2bff9fb6d75b6d201e2b39e
F ext/fts5/test/fts5ea.test b01e3a18cdfabbff8104a96a5242a06a68a998a0
F ext/fts5/test/fts5eb.test 3e5869af2008cbc4ad03a175a0b6f6e58134cd43
F ext/fts5/test/fts5fault1.test 7a562367cb4a735b57b410dbdb62dcc8d971faec
F ext/fts5/test/fts5fault2.test 28c36c843bb39ae855ba79827417ecc37f114341
F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3
F ext/fts5/test/fts5fault4.test 762991d526ee67c2b374351a17248097ea38bee7
F ext/fts5/test/fts5fault5.test 54da9fd4c3434a1d4f6abdcb6469299d91cf5875
F ext/fts5/test/fts5fault6.test 234dc6355f8d3f8b5be2763f30699d770247c215
F ext/fts5/test/fts5fault6.test 97bce1a36b7a64e3203fea504ae8e5cfd5ada423
F ext/fts5/test/fts5fault7.test f4a9b796f8b20c78ec7cf9f4e11144d15d7c3fd4
F ext/fts5/test/fts5full.test 6f6143af0c6700501d9fd597189dfab1555bb741
F ext/fts5/test/fts5hash.test 42eb066f667e9a389a63437cb7038c51974d4fc6
F ext/fts5/test/fts5integrity.test 29f41d2c7126c6122fbb5d54e556506456876145
F ext/fts5/test/fts5matchinfo.test ee6e7b130096c708c12049fa9c1ceb628954c4f9
F ext/fts5/test/fts5matchinfo.test 2163b0013e824bba65499da9e34ea4da41349cc2
F ext/fts5/test/fts5merge.test 8f3cdba2ec9c5e7e568246e81b700ad37f764367
F ext/fts5/test/fts5near.test b214cddb1c1f1bddf45c75af768f20145f7e71cc
F ext/fts5/test/fts5optimize.test 42741e7c085ee0a1276140a752d4407d97c2c9f5
@ -172,6 +173,7 @@ F ext/fts5/test/fts5rank.test 11dcebba31d822f7e99685b4ea2c2ae3ec0b16f1
F ext/fts5/test/fts5rebuild.test 03935f617ace91ed23a6099c7c74d905227ff29b
F ext/fts5/test/fts5restart.test c17728fdea26e7d0f617d22ad5b4b2862b994c17
F ext/fts5/test/fts5rowid.test 6f9833b23b176dc4aa15b7fc02afeb2b220fd460
F ext/fts5/test/fts5synonym.test cf88c0a56d5ea9591e3939ef1f6e294f7f2d0671
F ext/fts5/test/fts5tokenizer.test ea4df698b35cc427ebf2ba22829d0e28386d8c89
F ext/fts5/test/fts5unicode.test fbef8d8a3b4b88470536cc57604a82ca52e51841
F ext/fts5/test/fts5unicode2.test c1dd890ba32b7609adba78e420faa847abe43b59
@ -181,7 +183,7 @@ F ext/fts5/test/fts5version.test 205beb2a67d9496af64df959e6a19238f69b83e8
F ext/fts5/test/fts5vocab.test cdf97b9678484e9bad5062edf9c9106e5c3b0c5c
F ext/fts5/tool/loadfts5.tcl 95edf0b6b92a09f9ed85595038b1108127987556
F ext/fts5/tool/mkfts5c.tcl 5745072c7de346e18c7f491e4c3281fe8a1cfe51
F ext/fts5/tool/showfts5.tcl fb62e8eae6d862afdd22f367e286fb886d5e1ab6
F ext/fts5/tool/showfts5.tcl 9eaf6c3df352f98a2ab5ce1921dd94128ab1381d
F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43
F ext/icu/icu.c b2732aef0b076e4276d9b39b5a33cec7a05e1413
F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37
@ -258,7 +260,7 @@ F ext/userauth/userauth.c 5fa3bdb492f481bbc1709fc83c91ebd13460c69e
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x
F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8
F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60
F main.mk 8da13ed011a7ae19450b7554910ff4afa3bd22b7
F main.mk 61821e43596648bfacce2d6283377bee35986131
F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea
F mkopcodeh.awk 0e7f04a8eb90f92259e47d80110e4e98d7ce337a
F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83
@ -282,7 +284,7 @@ F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79
F src/btree.c 4084d9eed2817331f6e6a82230ba30e448cad497
F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1
F src/btreeInt.h 8177c9ab90d772d6d2c6c517e05bed774b7c92c0
F src/build.c 77da53936388346bc5864eab54066c6f3988770a
F src/build.c 5566b3410080a54e5c302c55d3de53fd080cfc7d
F src/callback.c 7b44ce59674338ad48b0e84e7b72f935ea4f68b0
F src/complete.c addcd8160b081131005d5bc2d34adf20c1c5c92f
F src/ctime.c 5a0b735dc95604766f5dac73973658eef782ee8b
@ -324,12 +326,12 @@ F src/os_setup.h c9d4553b5aaa6f73391448b265b89bed0b890faa
F src/os_unix.c 76f493ed71c4154338049dee1bf6e47f69c74a55
F src/os_win.c 40b3af7a47eb1107d0d69e592bec345a3b7b798a
F src/os_win.h eb7a47aa17b26b77eb97e4823f20a00b8bda12ca
F src/pager.c aa916ca28606ccf4b6877dfc2b643ccbca86589f
F src/pager.c 4784012f80b2197c61ff6eaf4f5c7026d93253fd
F src/pager.h 6d435f563b3f7fcae4b84433b76a6ac2730036e2
F src/parse.y f599aa5e871a493330d567ced93de696f61f48f7
F src/pcache.c cde06aa50962595e412d497e22fd2e07878ba1f0
F src/pcache.c 24be750c79272e0ca7b6e007bc94999700f3e5ef
F src/pcache.h 9968603796240cdf83da7e7bef76edf90619cea9
F src/pcache1.c b31af9dbc83b9c68e87d681b8453a9605f28e734
F src/pcache1.c bf2afe64a3dedb8643c8dcbd94a145cc80ab2a67
F src/pragma.c d71b813e67bf03f3116b9dd5164fbfd81ec673a2
F src/pragma.h 631a91c8b0e6ca8f051a1d8a4a0da4150e04620a
F src/prepare.c 82e5db1013846a819f198336fed72c44c974e7b1
@ -773,8 +775,8 @@ F test/incrvacuum2.test 676c41428765d58f1da7dbe659ef27726d3d30ac
F test/incrvacuum3.test 75256fb1377e7c39ef2de62bfc42bbff67be295a
F test/incrvacuum_ioerr.test 6ae2f783424e47a0033304808fe27789cf93e635
F test/index.test fe3c7a1aad82af92623747e9c3f3aa94ccd51238
F test/index2.test ee83c6b5e3173a3d7137140d945d9a5d4fdfb9d6
F test/index3.test b6ec456cf3b81d9a32123fe7e449bde434db338b
F test/index2.test f835d5e13ca163bd78c4459ca15fd2e4ed487407
F test/index3.test fa3e49bbaa4f38091c9c742e36a1abe67c4ef1fc
F test/index4.test ab92e736d5946840236cd61ac3191f91a7856bf6
F test/index5.test 8621491915800ec274609e42e02a97d67e9b13e7
F test/index6.test 7102ec371414c42dfb1d5ca37eb4519aa9edc23a
@ -1287,10 +1289,10 @@ F test/walro.test 34422d1d95aaff0388f0791ec20edb34e2a3ed57
F test/walshared.test 0befc811dcf0b287efae21612304d15576e35417
F test/walslow.test e7be6d9888f83aa5d3d3c7c08aa9b5c28b93609a
F test/walthread.test de8dbaf6d9e41481c460ba31ca61e163d7348f8e
F test/where.test 1ff3d9f8da0a6c0dc5ccfd38d9225b2cdb5b6afb
F test/where.test 66d4c107e82dfe86c01a96277b77e7a8809aff0b
F test/where2.test af78c55589cbc82d793449493adba0dc3d659f23
F test/where3.test 1ad55ba900bd7747f98b6082e65bd3e442c5004e
F test/where4.test 68aa5ad796e33816db2078bc0f6de719c7a0e21f
F test/where4.test 44f506bf1737cf0fa4fc795e340208250f1fcd89
F test/where5.test fdf66f96d29a064b63eb543e28da4dfdccd81ad2
F test/where6.test 5da5a98cec820d488e82708301b96cb8c18a258b
F test/where7.test 5a4b0abc207d71da4deecd734ad8579e8dd40aa8
@ -1381,7 +1383,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P ff5137a6dd8cb2a9b629b3a244f52665e9c9ebce 847387ec8e6fef283899578fb232b2c23b00ee5b
R e19b7f210220f2833a87a0bc81a47608
P 1ab10cbf27245961b40eda1ce70f35646f0a9966 3d3df79bfaf9dbc7aa711c08a19d2f6dbe744b32
R 46fafd1e16150312779d43bb23b81793
U drh
Z 58f787bccb7c921ddef6f60f047a7162
Z 1a60e3fe37bc43a63ce49e32494691dc

View File

@ -1 +1 @@
1ab10cbf27245961b40eda1ce70f35646f0a9966
5ff855293865c244ac632c630e8e7e8d7c05a5f6

View File

@ -2850,6 +2850,30 @@ Index *sqlite3AllocateIndexObject(
return p;
}
/*
** Backwards Compatibility Hack:
**
** Historical versions of SQLite accepted strings as column names in
** indexes and PRIMARY KEY constraints and in UNIQUE constraints. Example:
**
** CREATE TABLE xyz(a,b,c,d,e,PRIMARY KEY('a'),UNIQUE('b','c' COLLATE trim)
** CREATE INDEX abc ON xyz('c','d' DESC,'e' COLLATE nocase DESC);
**
** This is goofy. But to preserve backwards compatibility we continue to
** accept it. This routine does the necessary conversion. It converts
** the expression given in its argument from a TK_STRING into a TK_ID
** if the expression is just a TK_STRING with an optional COLLATE clause.
** If the epxression is anything other than TK_STRING, the expression is
** unchanged.
*/
static void sqlite3StringToId(Expr *p){
if( p->op==TK_STRING ){
p->op = TK_ID;
}else if( p->op==TK_COLLATE && p->pLeft->op==TK_STRING ){
p->pLeft->op = TK_ID;
}
}
/*
** Create a new index for an SQL table. pName1.pName2 is the name of the index
** and pTblList is the name of the table that is to be indexed. Both will
@ -3118,6 +3142,7 @@ Index *sqlite3CreateIndex(
int requestedSortOrder; /* ASC or DESC on the i-th expression */
char *zColl; /* Collation sequence name */
sqlite3StringToId(pListItem->pExpr);
sqlite3ResolveSelfReference(pParse, pTab, NC_IdxExpr, pListItem->pExpr, 0);
if( pParse->nErr ) goto exit_create_index;
pCExpr = sqlite3ExprSkipCollate(pListItem->pExpr);

View File

@ -647,7 +647,7 @@ struct Pager {
u8 doNotSpill; /* Do not spill the cache when non-zero */
u8 subjInMemory; /* True to use in-memory sub-journals */
u8 bUseFetch; /* True to use xFetch() */
u8 hasBeenUsed; /* True if any content previously read */
u8 hasHeldSharedLock; /* True if a shared lock has ever been held */
Pgno dbSize; /* Number of pages in the database */
Pgno dbOrigSize; /* dbSize before the current transaction */
Pgno dbFileSize; /* Number of pages in the database file */
@ -5097,10 +5097,10 @@ int sqlite3PagerSharedLock(Pager *pPager){
);
}
if( !pPager->tempFile && pPager->hasBeenUsed ){
if( !pPager->tempFile && pPager->hasHeldSharedLock ){
/* The shared-lock has just been acquired then check to
** see if the database has been modified. If the database has changed,
** flush the cache. The pPager->hasBeenUsed flag prevents this from
** flush the cache. The hasHeldSharedLock flag prevents this from
** occurring on the very first access to a file, in order to save a
** single unnecessary sqlite3OsRead() call at the start-up.
**
@ -5170,6 +5170,7 @@ int sqlite3PagerSharedLock(Pager *pPager){
assert( pPager->eState==PAGER_OPEN );
}else{
pPager->eState = PAGER_READER;
pPager->hasHeldSharedLock = 1;
}
return rc;
}
@ -5253,21 +5254,25 @@ int sqlite3PagerAcquire(
** page 1 if there is no write-transaction open or the ACQUIRE_READONLY
** flag was specified by the caller. And so long as the db is not a
** temporary or in-memory database. */
const int bMmapOk = (pgno!=1 && USEFETCH(pPager)
const int bMmapOk = (pgno>1 && USEFETCH(pPager)
&& (pPager->eState==PAGER_READER || (flags & PAGER_GET_READONLY))
#ifdef SQLITE_HAS_CODEC
&& pPager->xCodec==0
#endif
);
/* Optimization note: Adding the "pgno<=1" term before "pgno==0" here
** allows the compiler optimizer to reuse the results of the "pgno>1"
** test in the previous statement, and avoid testing pgno==0 in the
** common case where pgno is large. */
if( pgno<=1 && pgno==0 ){
return SQLITE_CORRUPT_BKPT;
}
assert( pPager->eState>=PAGER_READER );
assert( assert_pager_state(pPager) );
assert( noContent==0 || bMmapOk==0 );
if( pgno==0 ){
return SQLITE_CORRUPT_BKPT;
}
pPager->hasBeenUsed = 1;
assert( pPager->hasHeldSharedLock==1 );
/* If the pager is in the error state, return an error immediately.
** Otherwise, request the page from the PCache layer. */
@ -5422,7 +5427,7 @@ DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
assert( pgno!=0 );
assert( pPager->pPCache!=0 );
pPage = sqlite3PcacheFetch(pPager->pPCache, pgno, 0);
assert( pPage==0 || pPager->hasBeenUsed );
assert( pPage==0 || pPager->hasHeldSharedLock );
if( pPage==0 ) return 0;
return sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pPage);
}
@ -6389,7 +6394,7 @@ u8 sqlite3PagerIsreadonly(Pager *pPager){
#ifdef SQLITE_DEBUG
/*
** Return the number of references to the pager.
** Return the sum of the reference counts for all pages held by pPager.
*/
int sqlite3PagerRefcount(Pager *pPager){
return sqlite3PcacheRefCount(pPager->pPCache);

View File

@ -19,7 +19,7 @@
struct PCache {
PgHdr *pDirty, *pDirtyTail; /* List of dirty pages in LRU order */
PgHdr *pSynced; /* Last synced page in dirty page list */
int nRef; /* Number of referenced pages */
int nRefSum; /* Sum of ref counts over all pages */
int szCache; /* Configured cache size */
int szPage; /* Size of every page in this cache */
int szExtra; /* Size of extra space for each page */
@ -184,7 +184,7 @@ int sqlite3PcacheOpen(
** are no outstanding page references when this function is called.
*/
int sqlite3PcacheSetPageSize(PCache *pCache, int szPage){
assert( pCache->nRef==0 && pCache->pDirty==0 );
assert( pCache->nRefSum==0 && pCache->pDirty==0 );
if( pCache->szPage ){
sqlite3_pcache *pNew;
pNew = sqlite3GlobalConfig.pcache2.xCreate(
@ -351,9 +351,7 @@ PgHdr *sqlite3PcacheFetchFinish(
if( !pPgHdr->pPage ){
return pcacheFetchFinishWithInit(pCache, pgno, pPage);
}
if( 0==pPgHdr->nRef ){
pCache->nRef++;
}
pCache->nRefSum++;
pPgHdr->nRef++;
return pPgHdr;
}
@ -364,9 +362,8 @@ PgHdr *sqlite3PcacheFetchFinish(
*/
void SQLITE_NOINLINE sqlite3PcacheRelease(PgHdr *p){
assert( p->nRef>0 );
p->nRef--;
if( p->nRef==0 ){
p->pCache->nRef--;
p->pCache->nRefSum--;
if( (--p->nRef)==0 ){
if( p->flags&PGHDR_CLEAN ){
pcacheUnpin(p);
}else if( p->pDirtyPrev!=0 ){
@ -382,6 +379,7 @@ void SQLITE_NOINLINE sqlite3PcacheRelease(PgHdr *p){
void sqlite3PcacheRef(PgHdr *p){
assert(p->nRef>0);
p->nRef++;
p->pCache->nRefSum++;
}
/*
@ -394,7 +392,7 @@ void sqlite3PcacheDrop(PgHdr *p){
if( p->flags&PGHDR_DIRTY ){
pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE);
}
p->pCache->nRef--;
p->pCache->nRefSum--;
sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 1);
}
@ -490,11 +488,11 @@ void sqlite3PcacheTruncate(PCache *pCache, Pgno pgno){
sqlite3PcacheMakeClean(p);
}
}
if( pgno==0 && pCache->nRef ){
if( pgno==0 && pCache->nRefSum ){
sqlite3_pcache_page *pPage1;
pPage1 = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache,1,0);
if( ALWAYS(pPage1) ){ /* Page 1 is always available in cache, because
** pCache->nRef>0 */
** pCache->nRefSum>0 */
memset(pPage1->pBuf, 0, pCache->szPage);
pgno = 1;
}
@ -600,10 +598,13 @@ PgHdr *sqlite3PcacheDirtyList(PCache *pCache){
}
/*
** Return the total number of referenced pages held by the cache.
** Return the total number of references to all pages held by the cache.
**
** This is not the total number of pages referenced, but the sum of the
** reference count for all pages.
*/
int sqlite3PcacheRefCount(PCache *pCache){
return pCache->nRef;
return pCache->nRefSum;
}
/*

View File

@ -87,6 +87,24 @@ typedef struct PgHdr1 PgHdr1;
typedef struct PgFreeslot PgFreeslot;
typedef struct PGroup PGroup;
/*
** Each cache entry is represented by an instance of the following
** structure. Unless SQLITE_PCACHE_SEPARATE_HEADER is defined, a buffer of
** PgHdr1.pCache->szPage bytes is allocated directly before this structure
** in memory.
*/
struct PgHdr1 {
sqlite3_pcache_page page; /* Base class. Must be first. pBuf & pExtra */
unsigned int iKey; /* Key value (page number) */
u8 isPinned; /* Page in use, not on the LRU list */
u8 isBulkLocal; /* This page from bulk local storage */
u8 isAnchor; /* This is the PGroup.lru element */
PgHdr1 *pNext; /* Next in hash table chain */
PCache1 *pCache; /* Cache that currently owns this page */
PgHdr1 *pLruNext; /* Next in LRU list of unpinned pages */
PgHdr1 *pLruPrev; /* Previous in LRU list of unpinned pages */
};
/* Each page cache (or PCache) belongs to a PGroup. A PGroup is a set
** of one or more PCaches that are able to recycle each other's unpinned
** pages when they are under memory pressure. A PGroup is an instance of
@ -115,7 +133,7 @@ struct PGroup {
unsigned int nMinPage; /* Sum of nMin for purgeable caches */
unsigned int mxPinned; /* nMaxpage + 10 - nMinPage */
unsigned int nCurrentPage; /* Number of purgeable pages allocated */
PgHdr1 *pLruHead, *pLruTail; /* LRU list of unpinned pages */
PgHdr1 lru; /* The beginning and end of the LRU list */
};
/* Each page cache is an instance of the following object. Every
@ -153,23 +171,6 @@ struct PCache1 {
void *pBulk; /* Bulk memory used by pcache-local */
};
/*
** Each cache entry is represented by an instance of the following
** structure. Unless SQLITE_PCACHE_SEPARATE_HEADER is defined, a buffer of
** PgHdr1.pCache->szPage bytes is allocated directly before this structure
** in memory.
*/
struct PgHdr1 {
sqlite3_pcache_page page;
unsigned int iKey; /* Key value (page number) */
u8 isPinned; /* Page in use, not on the LRU list */
u8 isBulkLocal; /* This page from bulk local storage */
PgHdr1 *pNext; /* Next in hash table chain */
PCache1 *pCache; /* Cache that currently owns this page */
PgHdr1 *pLruNext; /* Next in LRU list of unpinned pages */
PgHdr1 *pLruPrev; /* Previous in LRU list of unpinned pages */
};
/*
** Free slots in the allocator used to divide up the global page cache
** buffer provided using the SQLITE_CONFIG_PAGECACHE mechanism.
@ -230,6 +231,7 @@ static SQLITE_WSD struct PCacheGlobal {
/******************************************************************************/
/******** Page Allocation/SQLITE_CONFIG_PCACHE Related Functions **************/
/*
** This function is called during initialization if a static buffer is
** supplied to use for the page-cache by passing the SQLITE_CONFIG_PAGECACHE
@ -289,6 +291,7 @@ static int pcache1InitBulk(PCache1 *pCache){
pX->page.pBuf = zBulk;
pX->page.pExtra = &pX[1];
pX->isBulkLocal = 1;
pX->isAnchor = 0;
pX->pNext = pCache->pFree;
pCache->pFree = pX;
zBulk += pCache->szAlloc;
@ -431,6 +434,7 @@ static PgHdr1 *pcache1AllocPage(PCache1 *pCache, int benignMalloc){
p->page.pBuf = pPg;
p->page.pExtra = &p[1];
p->isBulkLocal = 0;
p->isAnchor = 0;
}
if( pCache->bPurgeable ){
pCache->pGroup->nCurrentPage++;
@ -557,22 +561,16 @@ static PgHdr1 *pcache1PinPage(PgHdr1 *pPage){
assert( pPage!=0 );
assert( pPage->isPinned==0 );
pCache = pPage->pCache;
assert( pPage->pLruNext || pPage==pCache->pGroup->pLruTail );
assert( pPage->pLruPrev || pPage==pCache->pGroup->pLruHead );
assert( pPage->pLruNext );
assert( pPage->pLruPrev );
assert( sqlite3_mutex_held(pCache->pGroup->mutex) );
if( pPage->pLruPrev ){
pPage->pLruPrev->pLruNext = pPage->pLruNext;
}else{
pCache->pGroup->pLruHead = pPage->pLruNext;
}
if( pPage->pLruNext ){
pPage->pLruNext->pLruPrev = pPage->pLruPrev;
}else{
pCache->pGroup->pLruTail = pPage->pLruPrev;
}
pPage->pLruNext = 0;
pPage->pLruPrev = 0;
pPage->isPinned = 1;
assert( pPage->isAnchor==0 );
assert( pCache->pGroup->lru.isAnchor==1 );
pCache->nRecyclable--;
return pPage;
}
@ -605,9 +603,11 @@ static void pcache1RemoveFromHash(PgHdr1 *pPage, int freeFlag){
*/
static void pcache1EnforceMaxPage(PCache1 *pCache){
PGroup *pGroup = pCache->pGroup;
PgHdr1 *p;
assert( sqlite3_mutex_held(pGroup->mutex) );
while( pGroup->nCurrentPage>pGroup->nMaxPage && pGroup->pLruTail ){
PgHdr1 *p = pGroup->pLruTail;
while( pGroup->nCurrentPage>pGroup->nMaxPage
&& (p=pGroup->lru.pLruPrev)->isAnchor==0
){
assert( p->pCache->pGroup==pGroup );
assert( p->isPinned==0 );
pcache1PinPage(p);
@ -741,6 +741,10 @@ static sqlite3_pcache *pcache1Create(int szPage, int szExtra, int bPurgeable){
}else{
pGroup = &pcache1.grp;
}
if( pGroup->lru.isAnchor==0 ){
pGroup->lru.isAnchor = 1;
pGroup->lru.pLruPrev = pGroup->lru.pLruNext = &pGroup->lru;
}
pCache->pGroup = pGroup;
pCache->szPage = szPage;
pCache->szExtra = szExtra;
@ -848,11 +852,11 @@ static SQLITE_NOINLINE PgHdr1 *pcache1FetchStage2(
/* Step 4. Try to recycle a page. */
if( pCache->bPurgeable
&& pGroup->pLruTail
&& !pGroup->lru.pLruPrev->isAnchor
&& ((pCache->nPage+1>=pCache->nMax) || pcache1UnderMemoryPressure(pCache))
){
PCache1 *pOther;
pPage = pGroup->pLruTail;
pPage = pGroup->lru.pLruPrev;
assert( pPage->isPinned==0 );
pcache1RemoveFromHash(pPage, 0);
pcache1PinPage(pPage);
@ -961,7 +965,10 @@ static PgHdr1 *pcache1FetchNoMutex(
pPage = pCache->apHash[iKey % pCache->nHash];
while( pPage && pPage->iKey!=iKey ){ pPage = pPage->pNext; }
/* Step 2: Abort if no existing page is found and createFlag is 0 */
/* Step 2: If the page was found in the hash table, then return it.
** If the page was not in the hash table and createFlag is 0, abort.
** Otherwise (page not in hash and createFlag!=0) continue with
** subsequent steps to try to create the page. */
if( pPage ){
if( !pPage->isPinned ){
return pcache1PinPage(pPage);
@ -1038,21 +1045,16 @@ static void pcache1Unpin(
** part of the PGroup LRU list.
*/
assert( pPage->pLruPrev==0 && pPage->pLruNext==0 );
assert( pGroup->pLruHead!=pPage && pGroup->pLruTail!=pPage );
assert( pPage->isPinned==1 );
if( reuseUnlikely || pGroup->nCurrentPage>pGroup->nMaxPage ){
pcache1RemoveFromHash(pPage, 1);
}else{
/* Add the page to the PGroup LRU list. */
if( pGroup->pLruHead ){
pGroup->pLruHead->pLruPrev = pPage;
pPage->pLruNext = pGroup->pLruHead;
pGroup->pLruHead = pPage;
}else{
pGroup->pLruTail = pPage;
pGroup->pLruHead = pPage;
}
PgHdr1 **ppFirst = &pGroup->lru.pLruNext;
pPage->pLruPrev = &pGroup->lru;
(pPage->pLruNext = *ppFirst)->pLruPrev = pPage;
*ppFirst = pPage;
pCache->nRecyclable++;
pPage->isPinned = 0;
}
@ -1190,7 +1192,9 @@ int sqlite3PcacheReleaseMemory(int nReq){
if( sqlite3GlobalConfig.nPage==0 ){
PgHdr1 *p;
pcache1EnterMutex(&pcache1.grp);
while( (nReq<0 || nFree<nReq) && ((p=pcache1.grp.pLruTail)!=0) ){
while( (nReq<0 || nFree<nReq)
&& (p=pcache1.grp.lru.pLruPrev)->isAnchor==0
){
nFree += pcache1MemSize(p->page.pBuf);
#ifdef SQLITE_PCACHE_SEPARATE_HEADER
nFree += sqlite3MemSize(p);
@ -1218,7 +1222,7 @@ void sqlite3PcacheStats(
){
PgHdr1 *p;
int nRecyclable = 0;
for(p=pcache1.grp.pLruHead; p; p=p->pLruNext){
for(p=pcache1.grp.lru.pLruNext; !p->isAnchor; p=p->pLruNext){
assert( p->isPinned==0 );
nRecyclable++;
}

View File

@ -1,4 +1,4 @@
# 2005 January 11
# 2005-01-11
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
@ -11,7 +11,6 @@
# This file implements regression tests for SQLite library. The
# focus of this file is testing the CREATE INDEX statement.
#
# $Id: index2.test,v 1.3 2006/03/03 19:12:30 drh Exp $
set testdir [file dirname $argv0]
source $testdir/tester.tcl

View File

@ -1,4 +1,4 @@
# 2005 February 14
# 2005-02-14
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
@ -11,7 +11,6 @@
# This file implements regression tests for SQLite library. The
# focus of this file is testing the CREATE INDEX statement.
#
# $Id: index3.test,v 1.3 2008/03/19 13:03:34 drh Exp $
set testdir [file dirname $argv0]
@ -40,17 +39,42 @@ do_test index3-1.3 {
} {0 {}}
integrity_check index3-1.4
# Backwards compatibility test:
#
# Verify that CREATE INDEX statements that use strings instead of
# identifiers for the the column names continue to work correctly.
# This is undocumented behavior retained for backwards compatiblity.
#
do_execsql_test index3-2.1 {
DROP TABLE t1;
CREATE TABLE t1(a, b, c, d, e,
PRIMARY KEY('a'), UNIQUE('b' COLLATE nocase DESC));
CREATE INDEX t1c ON t1('c');
CREATE INDEX t1d ON t1('d' COLLATE binary ASC);
WITH RECURSIVE c(x) AS (VALUES(1) UNION SELECT x+1 FROM c WHERE x<30)
INSERT INTO t1(a,b,c,d,e)
SELECT x, printf('ab%03xxy',x), x, x, x FROM c;
} {}
do_execsql_test index3-2.2 {
SELECT a FROM t1 WHERE b='ab005xy' COLLATE nocase;
} {5}
do_execsql_test index3-2.2eqp {
EXPLAIN QUERY PLAN
SELECT a FROM t1 WHERE b='ab005xy' COLLATE nocase;
} {/USING INDEX/}
# This test corrupts the database file so it must be the last test
# in the series.
#
do_test index3-99.1 {
execsql {
PRAGMA writable_schema=on;
UPDATE sqlite_master SET sql='nonsense';
UPDATE sqlite_master SET sql='nonsense' WHERE name='t1d'
}
db close
catch { sqlite3 db test.db }
catchsql { DROP INDEX i1 }
} {1 {malformed database schema (t1)}}
catchsql { DROP INDEX t1c }
} {1 {malformed database schema (t1d)}}
finish_test

View File

@ -42,8 +42,8 @@ do_test where-1.0 {
}
execsql {
CREATE INDEX i1w ON t1(w);
CREATE INDEX i1xy ON t1(x,y);
CREATE INDEX i1w ON t1("w"); -- Verify quoted identifier names
CREATE INDEX i1xy ON t1(`x`,'y' ASC); -- Old MySQL compatibility
CREATE INDEX i2p ON t2(p);
CREATE INDEX i2r ON t2(r);
CREATE INDEX i2qs ON t2(q, s);

View File

@ -136,7 +136,7 @@ do_test where4-3.1 {
INSERT INTO t2 VALUES(1);
INSERT INTO t2 VALUES(2);
INSERT INTO t2 VALUES(3);
CREATE TABLE t3(x,y,UNIQUE(x,y));
CREATE TABLE t3(x,y,UNIQUE("x",'y' ASC)); -- Goofy syntax allowed
INSERT INTO t3 VALUES(1,11);
INSERT INTO t3 VALUES(2,NULL);
@ -200,7 +200,8 @@ do_test where4-4.4 {
ifcapable subquery {
do_test where4-5.1 {
execsql {
CREATE TABLE t4(x,y,z,PRIMARY KEY(x,y));
-- Allow the 'x' syntax for backwards compatibility
CREATE TABLE t4(x,y,z,PRIMARY KEY('x' ASC, "y" ASC));
}
execsql {
SELECT *
@ -304,4 +305,3 @@ do_execsql_test 8.2 { SELECT * FROM u9 WHERE a IS $null } {{} 1 {} 2}
finish_test