diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 034e6866db..90df81a016 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -352,6 +352,8 @@ int sqlite3Fts5IndexLoadConfig(Fts5Index *p); int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v); #define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b) +int sqlite3Fts5GetVarintLen(u32 iVal); + /* ** End of interface to code in fts5_index.c. **************************************************************************/ diff --git a/ext/fts5/fts5_hash.c b/ext/fts5/fts5_hash.c index 841c4d3709..4b85b2af23 100644 --- a/ext/fts5/fts5_hash.c +++ b/ext/fts5/fts5_hash.c @@ -184,6 +184,23 @@ static int fts5HashResize(Fts5Hash *pHash){ return SQLITE_OK; } +static void fts5HashAddPoslistSize(Fts5HashEntry *p){ + if( p->iSzPoslist ){ + u8 *pPtr = (u8*)p; + int nSz = p->nData - p->iSzPoslist - 1; + + if( nSz<=127 ){ + pPtr[p->iSzPoslist] = nSz; + }else{ + int nByte = sqlite3Fts5GetVarintLen((u32)nSz); + memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz); + sqlite3PutVarint(&pPtr[p->iSzPoslist], nSz); + p->nData += (nByte-1); + } + p->iSzPoslist = 0; + } +} + int sqlite3Fts5HashWrite( Fts5Hash *pHash, i64 iRowid, /* Rowid for this entry */ @@ -221,7 +238,7 @@ int sqlite3Fts5HashWrite( p->nData = nToken + 1 + sizeof(Fts5HashEntry); p->nData += sqlite3PutVarint(&((u8*)p)[p->nData], iRowid); p->iSzPoslist = p->nData; - p->nData += 4; + p->nData += 1; p->iRowid = iRowid; p->pHashNext = pHash->aSlot[iHash]; pHash->aSlot[iHash] = p; @@ -233,7 +250,7 @@ int sqlite3Fts5HashWrite( ** is: ** ** + 9 bytes for a new rowid, - ** + 4 bytes reserved for the "poslist size" varint. + ** + 4 byte reserved for the "poslist size" varint. ** + 1 byte for a "new column" byte, ** + 3 bytes for a new column number (16-bit max) as a varint, ** + 5 bytes for the new position offset (32-bit max). @@ -255,11 +272,10 @@ int sqlite3Fts5HashWrite( /* If this is a new rowid, append the 4-byte size field for the previous ** entry, and the new rowid for this entry. */ if( iRowid!=p->iRowid ){ - assert( p->iSzPoslist>0 ); - fts5Put4ByteVarint(&pPtr[p->iSzPoslist], p->nData - p->iSzPoslist - 4); + fts5HashAddPoslistSize(p); p->nData += sqlite3PutVarint(&pPtr[p->nData], iRowid - p->iRowid); p->iSzPoslist = p->nData; - p->nData += 4; + p->nData += 1; p->iCol = 0; p->iPos = 0; p->iRowid = iRowid; @@ -393,14 +409,13 @@ int sqlite3Fts5HashIterate( while( pList ){ Fts5HashEntry *pNext = pList->pScanNext; if( rc==SQLITE_OK ){ - const int nSz = pList->nData - pList->iSzPoslist - 4; const int nKey = strlen(pList->zKey); i64 iRowid = 0; u8 *pPtr = (u8*)pList; int iOff = sizeof(Fts5HashEntry) + nKey + 1; /* Fill in the final poslist size field */ - fts5Put4ByteVarint(&pPtr[pList->iSzPoslist], nSz); + fts5HashAddPoslistSize(pList); /* Issue the new-term callback */ rc = xTerm(pCtx, pList->zKey, nKey); @@ -412,10 +427,9 @@ int sqlite3Fts5HashIterate( int nVarint; iOff += getVarint(&pPtr[iOff], (u64*)&iDelta); iRowid += iDelta; - nPoslist = fts5Get4ByteVarint(&pPtr[iOff], &nVarint); - iOff += 4; - rc = xEntry(pCtx, iRowid, &pPtr[iOff-nVarint], nPoslist+nVarint); - iOff += nPoslist; + nVarint = fts5GetVarint32(&pPtr[iOff], nPoslist); + rc = xEntry(pCtx, iRowid, &pPtr[iOff], nPoslist+nVarint); + iOff += nVarint+nPoslist; } /* Issue the term-done callback */ @@ -445,8 +459,7 @@ int sqlite3Fts5HashQuery( } if( p ){ - u8 *pPtr = (u8*)p; - fts5Put4ByteVarint(&pPtr[p->iSzPoslist], p->nData - p->iSzPoslist - 4); + fts5HashAddPoslistSize(p); *ppDoclist = &p->zKey[nTerm+1]; *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1); }else{ @@ -482,9 +495,8 @@ void sqlite3Fts5HashScanEntry( ){ Fts5HashEntry *p; if( (p = pHash->pScan) ){ - u8 *pPtr = (u8*)p; int nTerm = strlen(p->zKey); - fts5Put4ByteVarint(&pPtr[p->iSzPoslist], p->nData - p->iSzPoslist - 4); + fts5HashAddPoslistSize(p); *pzTerm = p->zKey; *ppDoclist = &p->zKey[nTerm+1]; *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1); diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 6916c4254e..b90e5308d6 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -371,7 +371,7 @@ struct Fts5Structure { struct Fts5PageWriter { int pgno; /* Page number for this page */ Fts5Buffer buf; /* Buffer containing page data */ - Fts5Buffer term; /* Buffer containing previous term on page */ + Fts5Buffer term; /* Buffer containing previous term on page */ }; struct Fts5SegWriter { int iIdx; /* Index to write to */ @@ -667,6 +667,14 @@ int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){ } } +int sqlite3Fts5GetVarintLen(u32 iVal){ + if( iVal<(1 << 7 ) ) return 1; + if( iVal<(1 << 14) ) return 2; + if( iVal<(1 << 21) ) return 3; + if( iVal<(1 << 28) ) return 4; + return 5; +} + /* ** Allocate and return a buffer at least nByte bytes in size. ** diff --git a/ext/fts5/tool/loadfts5.tcl b/ext/fts5/tool/loadfts5.tcl index 97353ffa3b..feb92ec162 100644 --- a/ext/fts5/tool/loadfts5.tcl +++ b/ext/fts5/tool/loadfts5.tcl @@ -36,6 +36,7 @@ proc usage {} { puts stderr " -fts4 (use fts4 instead of fts5)" puts stderr " -fts5 (use fts5)" puts stderr " -porter (use porter tokenizer)" + puts stderr " -delete (delete the database file before starting)" puts stderr " -limit N (load no more than N documents)" puts stderr " -automerge N (set the automerge parameter to N)" puts stderr " -crisismerge N (set the crisismerge parameter to N)" @@ -45,6 +46,7 @@ proc usage {} { set O(vtab) fts5 set O(tok) "" set O(limit) 0 +set O(delete) 0 set O(automerge) -1 set O(crisismerge) -1 @@ -65,6 +67,10 @@ for {set i 0} {$i < $nOpt} {incr i} { set O(tok) ", tokenize=porter" } + -delete { + set O(delete) 1 + } + -limit { if { [incr i]>=$nOpt } usage set O(limit) [lindex $argv $i] @@ -86,7 +92,9 @@ for {set i 0} {$i < $nOpt} {incr i} { } } -sqlite3 db [lindex $argv end-1] +set dbfile [lindex $argv end-1] +if {$O(delete)} { file delete -force $dbfile } +sqlite3 db $dbfile db func loadfile loadfile db transaction { diff --git a/manifest b/manifest index 9aae583c5c..e43204b803 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Ensure\sgenerated\sheader\sfile\sfts5parse.h\sis\sincluded\sin\ssqlite3.c. -D 2015-02-02T11:58:21.261 +C Instead\sof\sthe\s4-byte\sfields,\suse\sregular\svarints\sfor\sthe\sposlist-size\sfield\sin\sfts5_hash.c. +D 2015-02-25T19:24:37.378 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,13 +106,13 @@ F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a F ext/fts5/fts5.c f2e899fba27ca33c8897635752c4c83a40dcb18d F ext/fts5/fts5.h f931954065693898d26c51f23f1d27200184a69a -F ext/fts5/fts5Int.h 34040674eb25f3de8a0e57423a3155aef6312541 +F ext/fts5/fts5Int.h 7c2af493177b0e4e0290b869f19cd6d1d671d5ac F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f F ext/fts5/fts5_buffer.c b92ba0eb67532d174934087f93716caf9a2168c7 F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700 F ext/fts5/fts5_expr.c eee52c9df84eade48eaa3f50c8876f44b552ff9b -F ext/fts5/fts5_hash.c 57febfb06e59ae419ee9ba31667635f70d7c4dd0 -F ext/fts5/fts5_index.c beced7a9f360c2bf44a9f987c0a8735b6868ffbf +F ext/fts5/fts5_hash.c 9032dd35bf8da6f9d4fc8c955c348dd6d229d8e4 +F ext/fts5/fts5_index.c 220321d06ae87496e22bb92fe52b65d84549ef0e F ext/fts5/fts5_storage.c f7c12c9f454b2a525827b3d85fd222789236f548 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 0d108148c26132448487926fe683425002aee369 @@ -147,7 +147,7 @@ F ext/fts5/test/fts5rowid.test a1b2a6d76648c734c1aab11ee1a619067e8d90e6 F ext/fts5/test/fts5tokenizer.test b34ae592db66f6e89546d791ce1f905ba0b3395c F ext/fts5/test/fts5unicode.test 79b3e34eb29ce4929628aa514a40cb467fdabe4d F ext/fts5/test/fts5unicode2.test 64a5267fd6082fcb46439892ebd0cbaa5c38acee -F ext/fts5/tool/loadfts5.tcl 310cb6556b74eda5ce7829a539298c3f35003523 +F ext/fts5/tool/loadfts5.tcl 1e126891d14ab85dcdb0fac7755a4cd5ba52e8b8 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -1284,7 +1284,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P fb10bbb9f9c4481e6043d323a3018a4ec68eb0ff -R 5aa72f287ea58e03f6201c3554f79f11 +P bc7be2fcfd29d6f1b567b69b3b20896eceb99798 +R 79d1ace6d8bc7671e8fd6ed98da16ffb U dan -Z faf1a2c366b2fc1b07a53967e5d081da +Z a35e490814ed578ad02ebd09e2509be2 diff --git a/manifest.uuid b/manifest.uuid index 419358cbac..c4c9f580d3 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -bc7be2fcfd29d6f1b567b69b3b20896eceb99798 \ No newline at end of file +7eb022d7e5fdb180af823c82c47c938e4a7a355f \ No newline at end of file