diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 0bbea3aa7d..1e08bd68e0 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -92,6 +92,7 @@ struct Fts5Config { char *zName; /* Name of FTS index */ int nCol; /* Number of columns */ char **azCol; /* Column names */ + u8 *abUnindexed; /* True for unindexed columns */ int nPrefix; /* Number of prefix indexes */ int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ int eContent; /* An FTS5_CONTENT value */ diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 0450db6913..9bdcdf9e44 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -134,31 +134,50 @@ static const char *fts5ConfigSkipLiteral(const char *pIn){ return p; } +/* +** The first character of the string pointed to by argument z is guaranteed +** to be an open-quote character (see function fts5_isopenquote()). +** +** This function searches for the corresponding close-quote character within +** the string and, if found, dequotes the string in place and adds a new +** nul-terminator byte. +** +** If the close-quote is found, the value returned is the byte offset of +** the character immediately following it. Or, if the close-quote is not +** found, -1 is returned. If -1 is returned, the buffer is left in an +** undefined state. +*/ static int fts5Dequote(char *z){ char q; int iIn = 1; int iOut = 0; - int bRet = 1; q = z[0]; + /* Set stack variable q to the close-quote character */ assert( q=='[' || q=='\'' || q=='"' || q=='`' ); if( q=='[' ) q = ']'; while( z[iIn] ){ if( z[iIn]==q ){ if( z[iIn+1]!=q ){ - if( z[iIn+1]=='\0' ) bRet = 0; - break; + /* Character iIn was the close quote. */ + z[iOut] = '\0'; + return iIn+1; + }else{ + /* Character iIn and iIn+1 form an escaped quote character. Skip + ** the input cursor past both and copy a single quote character + ** to the output buffer. */ + iIn += 2; + z[iOut++] = q; } - z[iOut++] = q; - iIn += 2; }else{ z[iOut++] = z[iIn++]; } } - z[iOut] = '\0'; - return bRet; + /* Did not find the close-quote character. Return -1. */ + z[iOut] = '\0'; + return -1; } /* @@ -184,18 +203,6 @@ void sqlite3Fts5Dequote(char *z){ } } -/* -** Trim any white-space from the right of nul-terminated string z. -*/ -static char *fts5TrimString(char *z){ - int n = strlen(z); - while( n>0 && fts5_iswhitespace(z[n-1]) ){ - z[--n] = '\0'; - } - while( fts5_iswhitespace(*z) ) z++; - return z; -} - /* ** Duplicate the string passed as the only argument into a buffer allocated ** by sqlite3_malloc(). @@ -251,10 +258,10 @@ static int fts5ConfigParseSpecial( Fts5Global *pGlobal, Fts5Config *pConfig, /* Configuration object to update */ const char *zCmd, /* Special command to parse */ - int nCmd, /* Size of zCmd in bytes */ const char *zArg, /* Argument to parse */ char **pzErr /* OUT: Error message */ ){ + int nCmd = strlen(zCmd); if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){ const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES; int rc = SQLITE_OK; @@ -384,6 +391,84 @@ static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){ ); } +/* +** Gobble up the first bareword or quoted word from the input buffer zIn. +** Return a pointer to the character immediately following the last in +** the gobbled word if successful, or a NULL pointer otherwise (failed +** to find close-quote character). +** +** Before returning, set pzOut to point to a new buffer containing a +** nul-terminated, dequoted copy of the gobbled word. If the word was +** quoted, *pbQuoted is also set to 1 before returning. +** +** If *pRc is other than SQLITE_OK when this function is called, it is +** a no-op (NULL is returned). Otherwise, if an OOM occurs within this +** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not* +** set if a parse error (failed to find close quote) occurs. +*/ +static const char *fts5ConfigGobbleWord( + int *pRc, + const char *zIn, + char **pzOut, + int *pbQuoted +){ + const char *zRet = 0; + *pbQuoted = 0; + *pzOut = 0; + + if( *pRc==SQLITE_OK ){ + int nIn = strlen(zIn); + char *zOut = sqlite3_malloc(nIn+1); + + if( zOut==0 ){ + *pRc = SQLITE_NOMEM; + }else{ + memcpy(zOut, zIn, nIn+1); + if( fts5_isopenquote(zOut[0]) ){ + int ii = fts5Dequote(zOut); + if( ii>0 ) zRet = &zIn[ii]; + *pbQuoted = 1; + }else{ + zRet = fts5ConfigSkipBareword(zIn); + zOut[zRet-zIn] = '\0'; + } + } + + if( zRet==0 ){ + sqlite3_free(zOut); + }else{ + *pzOut = zOut; + } + } + + return zRet; +} + +static int fts5ConfigParseColumn( + Fts5Config *p, + char *zCol, + char *zArg, + char **pzErr +){ + int rc = SQLITE_OK; + if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME) + || 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME) + ){ + *pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol); + rc = SQLITE_ERROR; + }else if( zArg ){ + if( 0==sqlite3_stricmp(zArg, "unindexed") ){ + p->abUnindexed[p->nCol] = 1; + }else{ + *pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg); + rc = SQLITE_ERROR; + } + } + + p->azCol[p->nCol++] = zCol; + return rc; +} + /* ** Arguments nArg/azArg contain the string arguments passed to the xCreate ** or xConnect method of the virtual table. This function attempts to @@ -407,6 +492,7 @@ int sqlite3Fts5ConfigParse( int rc = SQLITE_OK; /* Return code */ Fts5Config *pRet; /* New object to return */ int i; + int nByte; *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config)); if( pRet==0 ) return SQLITE_NOMEM; @@ -414,7 +500,9 @@ int sqlite3Fts5ConfigParse( pRet->db = db; pRet->iCookie = -1; - pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg); + nByte = nArg * (sizeof(char*) + sizeof(u8)); + pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte); + pRet->abUnindexed = (u8*)&pRet->azCol[nArg]; pRet->zDb = fts5Strdup(&rc, azArg[1]); pRet->zName = fts5Strdup(&rc, azArg[2]); if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ @@ -423,63 +511,48 @@ int sqlite3Fts5ConfigParse( } for(i=3; rc==SQLITE_OK && iazCol[pRet->nCol++] = zCol; - zDup = 0; - } - } - - sqlite3_free(zDup); + z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol); + z = fts5ConfigSkipWhitespace(z); + if( z && *z=='=' ){ + bOption = 1; + z++; + if( bMustBeCol ) z = 0; } + z = fts5ConfigSkipWhitespace(z); + if( z && z[0] ){ + int bDummy; + z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy); + if( z && z[0] ) z = 0; + } + + if( rc==SQLITE_OK ){ + if( z==0 ){ + *pzErr = sqlite3_mprintf("parse error in \"%s\"", zOrig); + rc = SQLITE_ERROR; + }else{ + if( bOption ){ + rc = fts5ConfigParseSpecial(pGlobal, pRet, zOne, zTwo, pzErr); + }else{ + rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr); + zOne = 0; + } + } + } + + sqlite3_free(zOne); + sqlite3_free(zTwo); } /* If a tokenizer= option was successfully parsed, the tokenizer has ** already been allocated. Otherwise, allocate an instance of the default - ** tokenizer (simple) now. */ + ** tokenizer (unicode61) now. */ if( rc==SQLITE_OK && pRet->pTok==0 ){ rc = fts5ConfigDefaultTokenizer(pGlobal, pRet); } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index abddf5b30d..c109cff57d 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -1815,15 +1815,13 @@ static void fts5SegIterNext( int bDummy; i64 iDelta; - if( p->rc==SQLITE_OK ){ - pIter->iRowidOffset--; - pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset]; - iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy); - iOff += nPos; - getVarint(&a[iOff], (u64*)&iDelta); - pIter->iRowid -= iDelta; - fts5SegIterLoadNPos(p, pIter); - } + pIter->iRowidOffset--; + pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset]; + iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy); + iOff += nPos; + getVarint(&a[iOff], (u64*)&iDelta); + pIter->iRowid -= iDelta; + fts5SegIterLoadNPos(p, pIter); }else{ fts5SegIterReverseNewPage(p, pIter); } diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index 075b2eb66a..33eda7c3e7 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -323,6 +323,7 @@ static int fts5StorageDeleteFromIndex(Fts5Storage *p, i64 iDel){ ctx.iCol = -1; rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){ + if( pConfig->abUnindexed[iCol-1] ) continue; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_column_text(pSeek, iCol), @@ -486,6 +487,7 @@ int sqlite3Fts5StorageSpecialDelete( rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=0; rc==SQLITE_OK && iColnCol; iCol++){ + if( pConfig->abUnindexed[iCol] ) continue; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_value_text(apVal[iCol]), @@ -564,12 +566,14 @@ int sqlite3Fts5StorageRebuild(Fts5Storage *p){ rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iRowid); for(ctx.iCol=0; rc==SQLITE_OK && ctx.iColnCol; ctx.iCol++){ ctx.szCol = 0; - rc = sqlite3Fts5Tokenize(pConfig, - (const char*)sqlite3_column_text(pScan, ctx.iCol+1), - sqlite3_column_bytes(pScan, ctx.iCol+1), - (void*)&ctx, - fts5StorageInsertCallback - ); + if( pConfig->abUnindexed[ctx.iCol]==0 ){ + rc = sqlite3Fts5Tokenize(pConfig, + (const char*)sqlite3_column_text(pScan, ctx.iCol+1), + sqlite3_column_bytes(pScan, ctx.iCol+1), + (void*)&ctx, + fts5StorageInsertCallback + ); + } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; } @@ -671,12 +675,14 @@ int sqlite3Fts5StorageInsert( } for(ctx.iCol=0; rc==SQLITE_OK && ctx.iColnCol; ctx.iCol++){ ctx.szCol = 0; - rc = sqlite3Fts5Tokenize(pConfig, - (const char*)sqlite3_value_text(apVal[ctx.iCol+2]), - sqlite3_value_bytes(apVal[ctx.iCol+2]), - (void*)&ctx, - fts5StorageInsertCallback - ); + if( pConfig->abUnindexed[ctx.iCol]==0 ){ + rc = sqlite3Fts5Tokenize(pConfig, + (const char*)sqlite3_value_text(apVal[ctx.iCol+2]), + sqlite3_value_bytes(apVal[ctx.iCol+2]), + (void*)&ctx, + fts5StorageInsertCallback + ); + } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; } @@ -783,6 +789,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ ctx.szCol = 0; rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); for(i=0; rc==SQLITE_OK && inCol; i++){ + if( pConfig->abUnindexed[i] ) continue; ctx.iCol = i; ctx.szCol = 0; rc = sqlite3Fts5Tokenize( diff --git a/ext/fts5/test/fts5tokenizer.test b/ext/fts5/test/fts5tokenizer.test index d8c4f20f0e..44de1690fe 100644 --- a/ext/fts5/test/fts5tokenizer.test +++ b/ext/fts5/test/fts5tokenizer.test @@ -70,7 +70,7 @@ do_catchsql_test 4.1 { } {1 {parse error in "tokenize = tcl abc"}} do_catchsql_test 4.2 { CREATE VIRTUAL TABLE ft2 USING fts5(x y) -} {1 {parse error in "x y"}} +} {1 {unrecognized column option: y}} #------------------------------------------------------------------------- # Test the "separators" and "tokenchars" options a bit. diff --git a/ext/fts5/test/fts5unindexed.test b/ext/fts5/test/fts5unindexed.test new file mode 100644 index 0000000000..e808064f05 --- /dev/null +++ b/ext/fts5/test/fts5unindexed.test @@ -0,0 +1,73 @@ +# 2015 Apr 24 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# The tests in this file focus on "unindexed" columns. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5unindexed + + +do_execsql_test 1.1 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b UNINDEXED); + INSERT INTO t1 VALUES('a b c', 'd e f'); + INSERT INTO t1 VALUES('g h i', 'j k l'); +} {} + +do_execsql_test 1.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'b' } {1} +do_execsql_test 1.3 { SELECT rowid FROM t1 WHERE t1 MATCH 'e' } {} + +do_execsql_test 1.4 { INSERT INTO t1(t1) VALUES('integrity-check') } {} +do_execsql_test 1.5 { INSERT INTO t1(t1) VALUES('rebuild') } {} +do_execsql_test 1.6 { INSERT INTO t1(t1) VALUES('integrity-check') } {} + +do_execsql_test 1.7 { SELECT rowid FROM t1 WHERE t1 MATCH 'b' } {1} +do_execsql_test 1.8 { SELECT rowid FROM t1 WHERE t1 MATCH 'e' } {} + +do_execsql_test 1.9 { DELETE FROM t1 WHERE t1 MATCH 'b' } {} + +do_execsql_test 1.10 { INSERT INTO t1(t1) VALUES('integrity-check') } {} +do_execsql_test 1.11 { INSERT INTO t1(t1) VALUES('rebuild') } {} +do_execsql_test 1.12 { INSERT INTO t1(t1) VALUES('integrity-check') } {} + +do_execsql_test 1.13 { SELECT rowid FROM t1 WHERE t1 MATCH 'i' } {2} +do_execsql_test 1.14 { SELECT rowid FROM t1 WHERE t1 MATCH 'l' } {} + +do_execsql_test 2.1 { + CREATE VIRTUAL TABLE t2 USING fts5(a UNINDEXED, b UNINDEXED); + INSERT INTO t1 VALUES('a b c', 'd e f'); + INSERT INTO t1 VALUES('g h i', 'j k l'); + SELECT rowid FROM t2_data; +} {1 10} +do_execsql_test 2.2 { + INSERT INTO t2(t2) VALUES('rebuild'); + INSERT INTO t2(t2) VALUES('integrity-check'); + SELECT rowid FROM t2_data; +} {1 10} + +do_execsql_test 3.1 { + CREATE TABLE x4(i INTEGER PRIMARY KEY, a, b, c); + CREATE VIRTUAL TABLE t4 USING fts5(a, b UNINDEXED, c, content=x4); + INSERT INTO x4 VALUES(10, 'a b c', 'd e f', 'g h i'); + INSERT INTO x4 VALUES(20, 'j k l', 'm n o', 'p q r'); + INSERT INTO t4(t4) VALUES('rebuild'); + INSERT INTO t4(t4) VALUES('integrity-check'); +} {} + +do_execsql_test 3.2 { + INSERT INTO t4(t4, rowid, a, b, c) VALUES('delete', 20, 'j k l', '', 'p q r'); + DELETE FROM x4 WHERE rowid=20; + INSERT INTO t4(t4) VALUES('integrity-check'); +} {} + + +finish_test + diff --git a/manifest b/manifest index 9d446ac187..fe8819f3b1 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sextra\stests\sfor\scorrupt\sdatabase\shandling\sin\sfts5. -D 2015-04-24T15:56:09.379 +C Add\sthe\s"unindexed"\scolumn\soption\sto\sfts5. +D 2015-04-24T19:41:43.259 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in faaf75b89840659d74501bea269c7e33414761c1 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,14 +106,14 @@ F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c 1eb8ca073be5222c43e4eee5408764c2cbb4200b F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a -F ext/fts5/fts5Int.h 1309320cb233e1c5b38d7f1e2cab2138bbf497d8 +F ext/fts5/fts5Int.h 803fd2fc03e3799a38ebb404f2f1309ded5d3e8b F ext/fts5/fts5_aux.c fcea18b1a2a3f95a498b52aba2983557d7678a22 F ext/fts5/fts5_buffer.c 3ba56cc6824c9f7b1e0695159e0a9c636f6b4a23 -F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894 +F ext/fts5/fts5_config.c adf7110b0e8a9bdd64cb61c6f9da0bf6b80d9a1d F ext/fts5/fts5_expr.c 05da381ab26031243266069302c6eb4094b2c5dd F ext/fts5/fts5_hash.c 3cb5a3d04dd2030eb0ac8d544711dfd37c0e6529 -F ext/fts5/fts5_index.c 1663ad6a9ae221f14f27442b9b1a9d5088a2c5fe -F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d +F ext/fts5/fts5_index.c 39810b25a017f2626ac72b3e44afe9b534e5d5db +F ext/fts5/fts5_storage.c b3a4cbbcd197fe587789398e51a631f92fc9196c F ext/fts5/fts5_tcl.c 10bf0eb678d34c1bfdcfaf653d2e6dd92afa8b38 F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b F ext/fts5/fts5_unicode2.c f74f53316377068812a1fa5a37819e6b8124631d @@ -148,9 +148,10 @@ F ext/fts5/test/fts5porter.test 50322599823cb8080a99f0ec0c39f7d0c12bcb5e F ext/fts5/test/fts5prefix.test 4610dfba4460d92f23a8014874a46493f1be77b5 F ext/fts5/test/fts5rebuild.test ee6792715c6c528cc188e7869d67c3c655889ddb F ext/fts5/test/fts5rowid.test a1b2a6d76648c734c1aab11ee1a619067e8d90e6 -F ext/fts5/test/fts5tokenizer.test b34ae592db66f6e89546d791ce1f905ba0b3395c +F ext/fts5/test/fts5tokenizer.test 7a6ee24db908c09a0dc1eba634ffa17afcc05d86 F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee +F ext/fts5/test/fts5unindexed.test f388605341a476b6ab622b4c267cd168f59a5944 F ext/fts5/tool/loadfts5.tcl 1e126891d14ab85dcdb0fac7755a4cd5ba52e8b8 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -1302,7 +1303,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 60045cedef109f03317dc878fe6bb3d03867ae69 -R 9c5c238cdd6f30dc8d0223c36173d961 +P 41449f7a0b5da6332eef48386c91ef63382c4783 +R 6f8967f9e2552e4661a21d901cb3fab7 U dan -Z ec7cf237df9e7bd8116f5a496704530c +Z c90c5f944e45872447f2bae768eb92e5 diff --git a/manifest.uuid b/manifest.uuid index 62f07c3585..71decbb105 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -41449f7a0b5da6332eef48386c91ef63382c4783 \ No newline at end of file +86309961344f4076ddcf55d730d3600ec3b6e45c \ No newline at end of file