Add the "unindexed" column option to fts5.

FossilOrigin-Name: 86309961344f4076ddcf55d730d3600ec3b6e45c
This commit is contained in:
dan 2015-04-24 19:41:43 +00:00
parent def90aae18
commit df5bd1fed2
8 changed files with 258 additions and 105 deletions

View File

@ -92,6 +92,7 @@ struct Fts5Config {
char *zName; /* Name of FTS index */
int nCol; /* Number of columns */
char **azCol; /* Column names */
u8 *abUnindexed; /* True for unindexed columns */
int nPrefix; /* Number of prefix indexes */
int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */
int eContent; /* An FTS5_CONTENT value */

View File

@ -134,31 +134,50 @@ static const char *fts5ConfigSkipLiteral(const char *pIn){
return p;
}
/*
** The first character of the string pointed to by argument z is guaranteed
** to be an open-quote character (see function fts5_isopenquote()).
**
** This function searches for the corresponding close-quote character within
** the string and, if found, dequotes the string in place and adds a new
** nul-terminator byte.
**
** If the close-quote is found, the value returned is the byte offset of
** the character immediately following it. Or, if the close-quote is not
** found, -1 is returned. If -1 is returned, the buffer is left in an
** undefined state.
*/
static int fts5Dequote(char *z){
char q;
int iIn = 1;
int iOut = 0;
int bRet = 1;
q = z[0];
/* Set stack variable q to the close-quote character */
assert( q=='[' || q=='\'' || q=='"' || q=='`' );
if( q=='[' ) q = ']';
while( z[iIn] ){
if( z[iIn]==q ){
if( z[iIn+1]!=q ){
if( z[iIn+1]=='\0' ) bRet = 0;
break;
/* Character iIn was the close quote. */
z[iOut] = '\0';
return iIn+1;
}else{
/* Character iIn and iIn+1 form an escaped quote character. Skip
** the input cursor past both and copy a single quote character
** to the output buffer. */
iIn += 2;
z[iOut++] = q;
}
z[iOut++] = q;
iIn += 2;
}else{
z[iOut++] = z[iIn++];
}
}
z[iOut] = '\0';
return bRet;
/* Did not find the close-quote character. Return -1. */
z[iOut] = '\0';
return -1;
}
/*
@ -184,18 +203,6 @@ void sqlite3Fts5Dequote(char *z){
}
}
/*
** Trim any white-space from the right of nul-terminated string z.
*/
static char *fts5TrimString(char *z){
int n = strlen(z);
while( n>0 && fts5_iswhitespace(z[n-1]) ){
z[--n] = '\0';
}
while( fts5_iswhitespace(*z) ) z++;
return z;
}
/*
** Duplicate the string passed as the only argument into a buffer allocated
** by sqlite3_malloc().
@ -251,10 +258,10 @@ static int fts5ConfigParseSpecial(
Fts5Global *pGlobal,
Fts5Config *pConfig, /* Configuration object to update */
const char *zCmd, /* Special command to parse */
int nCmd, /* Size of zCmd in bytes */
const char *zArg, /* Argument to parse */
char **pzErr /* OUT: Error message */
){
int nCmd = strlen(zCmd);
if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){
const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES;
int rc = SQLITE_OK;
@ -384,6 +391,84 @@ static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){
);
}
/*
** Gobble up the first bareword or quoted word from the input buffer zIn.
** Return a pointer to the character immediately following the last in
** the gobbled word if successful, or a NULL pointer otherwise (failed
** to find close-quote character).
**
** Before returning, set pzOut to point to a new buffer containing a
** nul-terminated, dequoted copy of the gobbled word. If the word was
** quoted, *pbQuoted is also set to 1 before returning.
**
** If *pRc is other than SQLITE_OK when this function is called, it is
** a no-op (NULL is returned). Otherwise, if an OOM occurs within this
** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not*
** set if a parse error (failed to find close quote) occurs.
*/
static const char *fts5ConfigGobbleWord(
int *pRc,
const char *zIn,
char **pzOut,
int *pbQuoted
){
const char *zRet = 0;
*pbQuoted = 0;
*pzOut = 0;
if( *pRc==SQLITE_OK ){
int nIn = strlen(zIn);
char *zOut = sqlite3_malloc(nIn+1);
if( zOut==0 ){
*pRc = SQLITE_NOMEM;
}else{
memcpy(zOut, zIn, nIn+1);
if( fts5_isopenquote(zOut[0]) ){
int ii = fts5Dequote(zOut);
if( ii>0 ) zRet = &zIn[ii];
*pbQuoted = 1;
}else{
zRet = fts5ConfigSkipBareword(zIn);
zOut[zRet-zIn] = '\0';
}
}
if( zRet==0 ){
sqlite3_free(zOut);
}else{
*pzOut = zOut;
}
}
return zRet;
}
static int fts5ConfigParseColumn(
Fts5Config *p,
char *zCol,
char *zArg,
char **pzErr
){
int rc = SQLITE_OK;
if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME)
|| 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME)
){
*pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol);
rc = SQLITE_ERROR;
}else if( zArg ){
if( 0==sqlite3_stricmp(zArg, "unindexed") ){
p->abUnindexed[p->nCol] = 1;
}else{
*pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg);
rc = SQLITE_ERROR;
}
}
p->azCol[p->nCol++] = zCol;
return rc;
}
/*
** Arguments nArg/azArg contain the string arguments passed to the xCreate
** or xConnect method of the virtual table. This function attempts to
@ -407,6 +492,7 @@ int sqlite3Fts5ConfigParse(
int rc = SQLITE_OK; /* Return code */
Fts5Config *pRet; /* New object to return */
int i;
int nByte;
*ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config));
if( pRet==0 ) return SQLITE_NOMEM;
@ -414,7 +500,9 @@ int sqlite3Fts5ConfigParse(
pRet->db = db;
pRet->iCookie = -1;
pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg);
nByte = nArg * (sizeof(char*) + sizeof(u8));
pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte);
pRet->abUnindexed = (u8*)&pRet->azCol[nArg];
pRet->zDb = fts5Strdup(&rc, azArg[1]);
pRet->zName = fts5Strdup(&rc, azArg[2]);
if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){
@ -423,63 +511,48 @@ int sqlite3Fts5ConfigParse(
}
for(i=3; rc==SQLITE_OK && i<nArg; i++){
char *zDup = fts5Strdup(&rc, azArg[i]);
if( zDup ){
char *zCol = 0;
int bParseError = 0;
const char *zOrig = azArg[i];
const char *z;
char *zOne = 0;
char *zTwo = 0;
int bOption = 0;
int bMustBeCol = 0;
/* Check if this is a quoted column name */
if( fts5_isopenquote(zDup[0]) ){
bParseError = fts5Dequote(zDup);
zCol = zDup;
}else{
char *z = (char*)fts5ConfigSkipBareword(zDup);
if( *z=='\0' ){
zCol = zDup;
}else{
int nCmd = z - zDup;
z = (char*)fts5ConfigSkipWhitespace(z);
if( *z!='=' ){
bParseError = 1;
}else{
z++;
z = fts5TrimString(z);
if( fts5_isopenquote(*z) ){
if( fts5Dequote(z) ) bParseError = 1;
}else{
char *z2 = (char*)fts5ConfigSkipBareword(z);
if( *z2 ) bParseError = 1;
}
if( bParseError==0 ){
rc = fts5ConfigParseSpecial(pGlobal, pRet, zDup, nCmd, z, pzErr);
}
}
}
}
if( bParseError ){
assert( *pzErr==0 );
*pzErr = sqlite3_mprintf("parse error in \"%s\"", zDup);
rc = SQLITE_ERROR;
}else if( zCol ){
if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME)
|| 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME)
){
*pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol);
rc = SQLITE_ERROR;
}else{
pRet->azCol[pRet->nCol++] = zCol;
zDup = 0;
}
}
sqlite3_free(zDup);
z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol);
z = fts5ConfigSkipWhitespace(z);
if( z && *z=='=' ){
bOption = 1;
z++;
if( bMustBeCol ) z = 0;
}
z = fts5ConfigSkipWhitespace(z);
if( z && z[0] ){
int bDummy;
z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy);
if( z && z[0] ) z = 0;
}
if( rc==SQLITE_OK ){
if( z==0 ){
*pzErr = sqlite3_mprintf("parse error in \"%s\"", zOrig);
rc = SQLITE_ERROR;
}else{
if( bOption ){
rc = fts5ConfigParseSpecial(pGlobal, pRet, zOne, zTwo, pzErr);
}else{
rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr);
zOne = 0;
}
}
}
sqlite3_free(zOne);
sqlite3_free(zTwo);
}
/* If a tokenizer= option was successfully parsed, the tokenizer has
** already been allocated. Otherwise, allocate an instance of the default
** tokenizer (simple) now. */
** tokenizer (unicode61) now. */
if( rc==SQLITE_OK && pRet->pTok==0 ){
rc = fts5ConfigDefaultTokenizer(pGlobal, pRet);
}

View File

@ -1815,15 +1815,13 @@ static void fts5SegIterNext(
int bDummy;
i64 iDelta;
if( p->rc==SQLITE_OK ){
pIter->iRowidOffset--;
pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset];
iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy);
iOff += nPos;
getVarint(&a[iOff], (u64*)&iDelta);
pIter->iRowid -= iDelta;
fts5SegIterLoadNPos(p, pIter);
}
pIter->iRowidOffset--;
pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset];
iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy);
iOff += nPos;
getVarint(&a[iOff], (u64*)&iDelta);
pIter->iRowid -= iDelta;
fts5SegIterLoadNPos(p, pIter);
}else{
fts5SegIterReverseNewPage(p, pIter);
}

View File

@ -323,6 +323,7 @@ static int fts5StorageDeleteFromIndex(Fts5Storage *p, i64 iDel){
ctx.iCol = -1;
rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel);
for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){
if( pConfig->abUnindexed[iCol-1] ) continue;
ctx.szCol = 0;
rc = sqlite3Fts5Tokenize(pConfig,
(const char*)sqlite3_column_text(pSeek, iCol),
@ -486,6 +487,7 @@ int sqlite3Fts5StorageSpecialDelete(
rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel);
for(iCol=0; rc==SQLITE_OK && iCol<pConfig->nCol; iCol++){
if( pConfig->abUnindexed[iCol] ) continue;
ctx.szCol = 0;
rc = sqlite3Fts5Tokenize(pConfig,
(const char*)sqlite3_value_text(apVal[iCol]),
@ -564,12 +566,14 @@ int sqlite3Fts5StorageRebuild(Fts5Storage *p){
rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iRowid);
for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
ctx.szCol = 0;
rc = sqlite3Fts5Tokenize(pConfig,
(const char*)sqlite3_column_text(pScan, ctx.iCol+1),
sqlite3_column_bytes(pScan, ctx.iCol+1),
(void*)&ctx,
fts5StorageInsertCallback
);
if( pConfig->abUnindexed[ctx.iCol]==0 ){
rc = sqlite3Fts5Tokenize(pConfig,
(const char*)sqlite3_column_text(pScan, ctx.iCol+1),
sqlite3_column_bytes(pScan, ctx.iCol+1),
(void*)&ctx,
fts5StorageInsertCallback
);
}
sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
}
@ -671,12 +675,14 @@ int sqlite3Fts5StorageInsert(
}
for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
ctx.szCol = 0;
rc = sqlite3Fts5Tokenize(pConfig,
(const char*)sqlite3_value_text(apVal[ctx.iCol+2]),
sqlite3_value_bytes(apVal[ctx.iCol+2]),
(void*)&ctx,
fts5StorageInsertCallback
);
if( pConfig->abUnindexed[ctx.iCol]==0 ){
rc = sqlite3Fts5Tokenize(pConfig,
(const char*)sqlite3_value_text(apVal[ctx.iCol+2]),
sqlite3_value_bytes(apVal[ctx.iCol+2]),
(void*)&ctx,
fts5StorageInsertCallback
);
}
sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
}
@ -783,6 +789,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
ctx.szCol = 0;
rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize);
for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
if( pConfig->abUnindexed[i] ) continue;
ctx.iCol = i;
ctx.szCol = 0;
rc = sqlite3Fts5Tokenize(

View File

@ -70,7 +70,7 @@ do_catchsql_test 4.1 {
} {1 {parse error in "tokenize = tcl abc"}}
do_catchsql_test 4.2 {
CREATE VIRTUAL TABLE ft2 USING fts5(x y)
} {1 {parse error in "x y"}}
} {1 {unrecognized column option: y}}
#-------------------------------------------------------------------------
# Test the "separators" and "tokenchars" options a bit.

View File

@ -0,0 +1,73 @@
# 2015 Apr 24
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# The tests in this file focus on "unindexed" columns.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5unindexed
do_execsql_test 1.1 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b UNINDEXED);
INSERT INTO t1 VALUES('a b c', 'd e f');
INSERT INTO t1 VALUES('g h i', 'j k l');
} {}
do_execsql_test 1.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'b' } {1}
do_execsql_test 1.3 { SELECT rowid FROM t1 WHERE t1 MATCH 'e' } {}
do_execsql_test 1.4 { INSERT INTO t1(t1) VALUES('integrity-check') } {}
do_execsql_test 1.5 { INSERT INTO t1(t1) VALUES('rebuild') } {}
do_execsql_test 1.6 { INSERT INTO t1(t1) VALUES('integrity-check') } {}
do_execsql_test 1.7 { SELECT rowid FROM t1 WHERE t1 MATCH 'b' } {1}
do_execsql_test 1.8 { SELECT rowid FROM t1 WHERE t1 MATCH 'e' } {}
do_execsql_test 1.9 { DELETE FROM t1 WHERE t1 MATCH 'b' } {}
do_execsql_test 1.10 { INSERT INTO t1(t1) VALUES('integrity-check') } {}
do_execsql_test 1.11 { INSERT INTO t1(t1) VALUES('rebuild') } {}
do_execsql_test 1.12 { INSERT INTO t1(t1) VALUES('integrity-check') } {}
do_execsql_test 1.13 { SELECT rowid FROM t1 WHERE t1 MATCH 'i' } {2}
do_execsql_test 1.14 { SELECT rowid FROM t1 WHERE t1 MATCH 'l' } {}
do_execsql_test 2.1 {
CREATE VIRTUAL TABLE t2 USING fts5(a UNINDEXED, b UNINDEXED);
INSERT INTO t1 VALUES('a b c', 'd e f');
INSERT INTO t1 VALUES('g h i', 'j k l');
SELECT rowid FROM t2_data;
} {1 10}
do_execsql_test 2.2 {
INSERT INTO t2(t2) VALUES('rebuild');
INSERT INTO t2(t2) VALUES('integrity-check');
SELECT rowid FROM t2_data;
} {1 10}
do_execsql_test 3.1 {
CREATE TABLE x4(i INTEGER PRIMARY KEY, a, b, c);
CREATE VIRTUAL TABLE t4 USING fts5(a, b UNINDEXED, c, content=x4);
INSERT INTO x4 VALUES(10, 'a b c', 'd e f', 'g h i');
INSERT INTO x4 VALUES(20, 'j k l', 'm n o', 'p q r');
INSERT INTO t4(t4) VALUES('rebuild');
INSERT INTO t4(t4) VALUES('integrity-check');
} {}
do_execsql_test 3.2 {
INSERT INTO t4(t4, rowid, a, b, c) VALUES('delete', 20, 'j k l', '', 'p q r');
DELETE FROM x4 WHERE rowid=20;
INSERT INTO t4(t4) VALUES('integrity-check');
} {}
finish_test

View File

@ -1,5 +1,5 @@
C Add\sextra\stests\sfor\scorrupt\sdatabase\shandling\sin\sfts5.
D 2015-04-24T15:56:09.379
C Add\sthe\s"unindexed"\scolumn\soption\sto\sfts5.
D 2015-04-24T19:41:43.259
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in faaf75b89840659d74501bea269c7e33414761c1
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -106,14 +106,14 @@ F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad
F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a
F ext/fts5/fts5.c 1eb8ca073be5222c43e4eee5408764c2cbb4200b
F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a
F ext/fts5/fts5Int.h 1309320cb233e1c5b38d7f1e2cab2138bbf497d8
F ext/fts5/fts5Int.h 803fd2fc03e3799a38ebb404f2f1309ded5d3e8b
F ext/fts5/fts5_aux.c fcea18b1a2a3f95a498b52aba2983557d7678a22
F ext/fts5/fts5_buffer.c 3ba56cc6824c9f7b1e0695159e0a9c636f6b4a23
F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894
F ext/fts5/fts5_config.c adf7110b0e8a9bdd64cb61c6f9da0bf6b80d9a1d
F ext/fts5/fts5_expr.c 05da381ab26031243266069302c6eb4094b2c5dd
F ext/fts5/fts5_hash.c 3cb5a3d04dd2030eb0ac8d544711dfd37c0e6529
F ext/fts5/fts5_index.c 1663ad6a9ae221f14f27442b9b1a9d5088a2c5fe
F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d
F ext/fts5/fts5_index.c 39810b25a017f2626ac72b3e44afe9b534e5d5db
F ext/fts5/fts5_storage.c b3a4cbbcd197fe587789398e51a631f92fc9196c
F ext/fts5/fts5_tcl.c 10bf0eb678d34c1bfdcfaf653d2e6dd92afa8b38
F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b
F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d
@ -148,9 +148,10 @@ F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e
F ext/fts5/test/fts5prefix.test 4610dfba4460d92f23a8014874a46493f1be77b5
F ext/fts5/test/fts5rebuild.test ee6792715c6c528cc188e7869d67c3c655889ddb
F ext/fts5/test/fts5rowid.test a1b2a6d76648c734c1aab11ee1a619067e8d90e6
F ext/fts5/test/fts5tokenizer.test b34ae592db66f6e89546d791ce1f905ba0b3395c
F ext/fts5/test/fts5tokenizer.test 7a6ee24db908c09a0dc1eba634ffa17afcc05d86
F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d
F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee
F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944
F ext/fts5/tool/loadfts5.tcl 1e126891d14ab85dcdb0fac7755a4cd5ba52e8b8
F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43
F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb
@ -1302,7 +1303,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 60045cedef109f03317dc878fe6bb3d03867ae69
R 9c5c238cdd6f30dc8d0223c36173d961
P 41449f7a0b5da6332eef48386c91ef63382c4783
R 6f8967f9e2552e4661a21d901cb3fab7
U dan
Z ec7cf237df9e7bd8116f5a496704530c
Z c90c5f944e45872447f2bae768eb92e5

View File

@ -1 +1 @@
41449f7a0b5da6332eef48386c91ef63382c4783
86309961344f4076ddcf55d730d3600ec3b6e45c