Replace the hash table borrowed from fts3.

FossilOrigin-Name: 617e2fac1c128212254f71b1a8fddaf0d1d90262
This commit is contained in:
dan 2014-08-11 19:44:52 +00:00
parent c45f24b281
commit c7fe7a969b
7 changed files with 519 additions and 271 deletions

View File

@ -283,6 +283,47 @@ int sqlite3Fts5IndexReads(Fts5Index *p);
** End of interface to code in fts5_index.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_hash.c.
*/
typedef struct Fts5Hash Fts5Hash;
/*
** Create a hash table, free a hash table.
*/
int sqlite3Fts5HashNew(Fts5Hash**, int *pnSize);
void sqlite3Fts5HashFree(Fts5Hash*);
int sqlite3Fts5HashWrite(
Fts5Hash*,
i64 iRowid, /* Rowid for this entry */
int iCol, /* Column token appears in (-ve -> delete) */
int iPos, /* Position of token within column */
const char *pToken, int nToken /* Token to add or remove to or from index */
);
/*
** Empty (but do not delete) a hash table.
*/
void sqlite3Fts5HashClear(Fts5Hash*);
/*
** Iterate through the contents of the hash table.
*/
int sqlite3Fts5HashIterate(
Fts5Hash*,
void *pCtx,
int (*xTerm)(void*, const char*, int),
int (*xEntry)(void*, i64, const u8*, int),
int (*xTermDone)(void*)
);
/*
** End of interface to code in fts5_hash.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_storage.c. fts5_storage.c contains contains
** code to access the data stored in the %_content and %_docsize tables.

371
ext/fts5/fts5_hash.c Normal file
View File

@ -0,0 +1,371 @@
/*
** 2014 August 11
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
*/
#include "fts5Int.h"
typedef struct Fts5HashEntry Fts5HashEntry;
/*
** This file contains the implementation of an in-memory hash table used
** to accumuluate "term -> doclist" content before it is flused to a level-0
** segment.
*/
struct Fts5Hash {
int *pnByte; /* Pointer to bytes counter */
int nEntry; /* Number of entries currently in hash */
int nSlot; /* Size of aSlot[] array */
Fts5HashEntry **aSlot; /* Array of hash slots */
};
/*
** Each entry in the hash table is represented by an object of the
** following type. Each object, its key (zKey[]) and its current data
** are stored in a single memory allocation. The position list data
** immediately follows the key data in memory.
**
** The data that follows the key is in a similar, but not identical format
** to the doclist data stored in the database. It is:
**
** * Rowid, as a varint
** * Position list, without 0x00 terminator.
** * Size of previous position list and rowid, as a 4 byte
** big-endian integer.
**
** iRowidOff:
** Offset of last rowid written to data area. Relative to first byte of
** structure.
**
** nData:
** Bytes of data written since iRowidOff.
*/
struct Fts5HashEntry {
Fts5HashEntry *pNext; /* Next hash entry with same hash-key */
int nAlloc; /* Total size of allocation */
int iRowidOff; /* Offset of last rowid written */
int nData; /* Total bytes of data (incl. structure) */
int iCol; /* Column of last value written */
int iPos; /* Position of last value written */
i64 iRowid; /* Rowid of last value written */
char zKey[0]; /* Nul-terminated entry key */
};
/*
** Allocate a new hash table.
*/
int sqlite3Fts5HashNew(Fts5Hash **ppNew, int *pnByte){
int rc = SQLITE_OK;
Fts5Hash *pNew;
*ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash));
if( pNew==0 ){
rc = SQLITE_NOMEM;
}else{
int nByte;
memset(pNew, 0, sizeof(Fts5Hash));
pNew->pnByte = pnByte;
pNew->nSlot = 1024;
nByte = sizeof(Fts5HashEntry*) * pNew->nSlot;
pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte);
if( pNew->aSlot==0 ){
sqlite3_free(pNew);
*ppNew = 0;
rc = SQLITE_NOMEM;
}else{
memset(pNew->aSlot, 0, nByte);
}
}
return rc;
}
/*
** Free a hash table object.
*/
void sqlite3Fts5HashFree(Fts5Hash *pHash){
if( pHash ){
sqlite3Fts5HashClear(pHash);
sqlite3_free(pHash->aSlot);
sqlite3_free(pHash);
}
}
/*
** Empty (but do not delete) a hash table.
*/
void sqlite3Fts5HashClear(Fts5Hash *pHash){
int i;
for(i=0; i<pHash->nSlot; i++){
if( pHash->aSlot[i] ){
sqlite3_free(pHash->aSlot[i]);
pHash->aSlot[i] = 0;
}
}
}
static unsigned int fts5HashKey(Fts5Hash *pHash, const char *p, int n){
int i;
unsigned int h = 13;
for(i=n-1; i>=0; i--){
h = (h << 3) ^ h ^ p[i];
}
return (h % pHash->nSlot);
}
/*
** Store the 32-bit integer passed as the second argument in buffer p.
*/
static int fts5PutNativeInt(u8 *p, int i){
assert( sizeof(i)==4 );
memcpy(p, &i, sizeof(i));
return sizeof(i);
}
/*
** Read and return the 32-bit integer stored in buffer p.
*/
static int fts5GetNativeU32(u8 *p){
int i;
assert( sizeof(i)==4 );
memcpy(&i, p, sizeof(i));
return i;
}
int sqlite3Fts5HashWrite(
Fts5Hash *pHash,
i64 iRowid, /* Rowid for this entry */
int iCol, /* Column token appears in (-ve -> delete) */
int iPos, /* Position of token within column */
const char *pToken, int nToken /* Token to add or remove to or from index */
){
unsigned int iHash = fts5HashKey(pHash, pToken, nToken);
Fts5HashEntry *p;
u8 *pPtr;
int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */
/* Attempt to locate an existing hash object */
for(p=pHash->aSlot[iHash]; p; p=p->pNext){
if( memcmp(p->zKey, pToken, nToken)==0 && p->zKey[nToken]==0 ) break;
}
/* If an existing hash entry cannot be found, create a new one. */
if( p==0 ){
int nByte = sizeof(Fts5HashEntry) + nToken + 1 + 64;
if( nByte<128 ) nByte = 128;
p = (Fts5HashEntry*)sqlite3_malloc(nByte);
if( !p ) return SQLITE_NOMEM;
memset(p, 0, sizeof(Fts5HashEntry));
p->nAlloc = nByte;
memcpy(p->zKey, pToken, nToken);
p->zKey[nToken] = '\0';
p->iRowidOff = p->nData = nToken + 1 + sizeof(Fts5HashEntry);
p->nData += sqlite3PutVarint(&((u8*)p)[p->nData], iRowid);
p->iRowid = iRowid;
p->pNext = pHash->aSlot[iHash];
pHash->aSlot[iHash] = p;
nIncr += p->nData;
}
/* Check there is enough space to append a new entry. Worst case scenario
** is:
**
** + 4 bytes for the previous entry size field,
** + 9 bytes for a new rowid,
** + 1 byte for a "new column" byte,
** + 3 bytes for a new column number (16-bit max) as a varint,
** + 5 bytes for the new position offset (32-bit max).
*/
if( (p->nAlloc - p->nData) < (4 + 9 + 1 + 3 + 5) ){
int nNew = p->nAlloc * 2;
Fts5HashEntry *pNew;
Fts5HashEntry **pp;
pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew);
if( pNew==0 ) return SQLITE_NOMEM;
pNew->nAlloc = nNew;
for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pNext);
*pp = pNew;
p = pNew;
}
pPtr = (u8*)p;
nIncr -= p->nData;
/* If this is a new rowid, append the 4-byte size field for the previous
** entry, and the new rowid for this entry. */
if( iRowid!=p->iRowid ){
p->nData += fts5PutNativeInt(&pPtr[p->nData], p->nData - p->iRowidOff);
p->iRowidOff = p->nData;
p->nData += sqlite3PutVarint(&pPtr[p->nData], iRowid);
p->iCol = 0;
p->iPos = 0;
p->iRowid = iRowid;
}
if( iCol>=0 ){
/* Append a new column value, if necessary */
assert( iCol>=p->iCol );
if( iCol!=p->iCol ){
pPtr[p->nData++] = 0x01;
p->nData += sqlite3PutVarint(&pPtr[p->nData], iCol);
p->iCol = iCol;
p->iPos = 0;
}
/* Append the new position offset */
p->nData += sqlite3PutVarint(&pPtr[p->nData], iPos - p->iPos + 2);
p->iPos = iPos;
}
nIncr += p->nData;
*pHash->pnByte += nIncr;
return SQLITE_OK;
}
/*
** Arguments pLeft and pRight point to linked-lists of hash-entry objects,
** each sorted in key order. This function merges the two lists into a
** single list and returns a pointer to its first element.
*/
static Fts5HashEntry *fts5HashEntryMerge(
Fts5HashEntry *pLeft,
Fts5HashEntry *pRight
){
Fts5HashEntry *p1 = pLeft;
Fts5HashEntry *p2 = pRight;
Fts5HashEntry *pRet = 0;
Fts5HashEntry **ppOut = &pRet;
while( p1 || p2 ){
if( p1==0 ){
*ppOut = p2;
p2 = 0;
}else if( p2==0 ){
*ppOut = p1;
p1 = 0;
}else{
int i = 0;
while( p1->zKey[i]==p2->zKey[i] ) i++;
if( ((u8)p1->zKey[i])>((u8)p2->zKey[i]) ){
/* p2 is smaller */
*ppOut = p2;
ppOut = &p2->pNext;
p2 = p2->pNext;
}else{
/* p1 is smaller */
*ppOut = p1;
ppOut = &p1->pNext;
p1 = p1->pNext;
}
*ppOut = 0;
}
}
return pRet;
}
/*
** Extract all tokens from hash table iHash and link them into a list
** in sorted order. The hash table is cleared before returning. It is
** the responsibility of the caller to free the elements of the returned
** list.
*/
static int fts5HashEntrySort(Fts5Hash *pHash, Fts5HashEntry **ppSorted){
const int nMergeSlot = 32;
Fts5HashEntry **ap;
Fts5HashEntry *pList;
int iSlot;
int i;
*ppSorted = 0;
ap = sqlite3_malloc(sizeof(Fts5HashEntry*) * nMergeSlot);
if( !ap ) return SQLITE_NOMEM;
memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot);
for(iSlot=0; iSlot<pHash->nSlot; iSlot++){
while( pHash->aSlot[iSlot] ){
Fts5HashEntry *pEntry = pHash->aSlot[iSlot];
pHash->aSlot[iSlot] = pEntry->pNext;
pEntry->pNext = 0;
for(i=0; ap[i]; i++){
pEntry = fts5HashEntryMerge(pEntry, ap[i]);
ap[i] = 0;
}
ap[i] = pEntry;
}
}
pList = 0;
for(i=0; i<nMergeSlot; i++){
pList = fts5HashEntryMerge(pList, ap[i]);
}
sqlite3_free(ap);
*ppSorted = pList;
return SQLITE_OK;
}
int sqlite3Fts5HashIterate(
Fts5Hash *pHash,
void *pCtx,
int (*xTerm)(void*, const char*, int),
int (*xEntry)(void*, i64, const u8*, int),
int (*xTermDone)(void*)
){
Fts5HashEntry *pList;
int rc;
rc = fts5HashEntrySort(pHash, &pList);
if( rc==SQLITE_OK ){
while( pList ){
Fts5HashEntry *pNext = pList->pNext;
if( rc==SQLITE_OK ){
u8 *pPtr = (u8*)pList;
int nKey = strlen(pList->zKey);
int iOff = pList->iRowidOff;
int iEnd = sizeof(Fts5HashEntry) + nKey + 1;
int nByte = pList->nData - pList->iRowidOff;
rc = xTerm(pCtx, pList->zKey, nKey);
while( rc==SQLITE_OK && iOff ){
int nVarint;
i64 iRowid;
nVarint = getVarint(&pPtr[iOff], (u64*)&iRowid);
rc = xEntry(pCtx, iRowid, &pPtr[iOff+nVarint], nByte-nVarint);
if( iOff==iEnd ){
iOff = 0;
}else{
nByte = fts5GetNativeU32(&pPtr[iOff-sizeof(int)]);
iOff = iOff - sizeof(int) - nByte;
}
}
if( rc==SQLITE_OK ){
rc = xTermDone(pCtx);
}
}
sqlite3_free(pList);
pList = pNext;
}
}
return rc;
}

View File

@ -17,7 +17,6 @@
*/
#include "fts5Int.h"
#include "fts3_hash.h"
/*
** Overview:
@ -276,8 +275,6 @@ typedef struct Fts5DlidxIter Fts5DlidxIter;
typedef struct Fts5MultiSegIter Fts5MultiSegIter;
typedef struct Fts5NodeIter Fts5NodeIter;
typedef struct Fts5PageWriter Fts5PageWriter;
typedef struct Fts5PendingDoclist Fts5PendingDoclist;
typedef struct Fts5PendingPoslist Fts5PendingPoslist;
typedef struct Fts5PosIter Fts5PosIter;
typedef struct Fts5SegIter Fts5SegIter;
typedef struct Fts5DoclistIter Fts5DoclistIter;
@ -300,7 +297,7 @@ struct Fts5Index {
** Variables related to the accumulation of tokens and doclists within the
** in-memory hash tables before they are flushed to disk.
*/
Fts3Hash *aHash; /* One hash for terms, one for each prefix */
Fts5Hash **apHash; /* Array of hash tables */
int nMaxPendingData; /* Max pending data before flush to disk */
int nPendingData; /* Current bytes of pending data */
i64 iWriteRowid; /* Rowid for current doc being written */
@ -347,26 +344,6 @@ struct Fts5Data {
int nRef; /* Ref count */
};
/*
** Before it is flushed to a level-0 segment, term data is collected in
** the hash tables in the Fts5Index.aHash[] array. Hash table keys are
** terms (or, for prefix indexes, term prefixes) and values are instances
** of type Fts5PendingDoclist.
*/
struct Fts5PendingDoclist {
u8 *pTerm; /* Term for this entry */
int nTerm; /* Bytes of data at pTerm */
Fts5PendingPoslist *pPoslist; /* Linked list of position lists */
int iCol; /* Column for last entry in pPending */
int iPos; /* Pos value for last entry in pPending */
Fts5PendingDoclist *pNext; /* Used during merge sort */
};
struct Fts5PendingPoslist {
i64 iRowid; /* Rowid for this doclist entry */
Fts5Buffer buf; /* Current doclist contents */
Fts5PendingPoslist *pNext; /* Previous poslist for same term */
};
/*
** The contents of the "structure" record for each index are represented
** using an Fts5Structure record in memory. Which uses instances of the
@ -2458,18 +2435,6 @@ static int fts5PosIterEof(Fts5Index *p, Fts5PosIter *pIter){
return (p->rc || pIter->chunk.pLeaf==0);
}
/*
** Allocate memory. The difference between this function and fts5IdxMalloc()
** is that this increments the Fts5Index.nPendingData variable by the
** number of bytes allocated. It should be used for all allocations used
** to store pending-data within the in-memory hash tables.
*/
static void *fts5PendingMalloc(Fts5Index *p, int nByte){
p->nPendingData += nByte;
return fts5IdxMalloc(p, nByte);
}
/*
** Add an entry for (iRowid/iCol/iPos) to the doclist for (pToken/nToken)
** in hash table for index iIdx. If iIdx is zero, this is the main terms
@ -2485,78 +2450,11 @@ static void fts5AddTermToHash(
int iPos, /* Position of token within column */
const char *pToken, int nToken /* Token to add or remove to or from index */
){
Fts5Config *pConfig = p->pConfig;
Fts3Hash *pHash;
Fts5PendingDoclist *pDoclist;
Fts5PendingPoslist *pPoslist;
i64 iRowid = p->iWriteRowid; /* Rowid associated with these tokens */
/* If an error has already occured this call is a no-op. */
if( p->rc!=SQLITE_OK ) return;
/* Find the hash table to use. It has already been allocated. */
assert( iIdx<=pConfig->nPrefix );
assert( iIdx==0 || nToken==pConfig->aPrefix[iIdx-1] );
pHash = &p->aHash[iIdx];
/* Find the doclist to append to. Allocate a new doclist object if
** required. */
pDoclist = (Fts5PendingDoclist*)fts3HashFind(pHash, pToken, nToken);
if( pDoclist==0 ){
Fts5PendingDoclist *pDel;
pDoclist = fts5PendingMalloc(p, sizeof(Fts5PendingDoclist) + nToken);
if( pDoclist==0 ) return;
pDoclist->pTerm = (u8*)&pDoclist[1];
pDoclist->nTerm = nToken;
memcpy(pDoclist->pTerm, pToken, nToken);
pDel = fts3HashInsert(pHash, pDoclist->pTerm, nToken, pDoclist);
if( pDel ){
assert( pDoclist==pDel );
sqlite3_free(pDel);
p->rc = SQLITE_NOMEM;
return;
}
if( p->rc==SQLITE_OK ){
p->rc = sqlite3Fts5HashWrite(
p->apHash[iIdx], p->iWriteRowid, iCol, iPos, pToken, nToken
);
}
/* Find the poslist to append to. Allocate a new object if required. */
pPoslist = pDoclist->pPoslist;
if( pPoslist==0 || pPoslist->iRowid!=iRowid ){
pPoslist = fts5PendingMalloc(p, sizeof(Fts5PendingPoslist));
if( pPoslist==0 ) return;
pPoslist->pNext = pDoclist->pPoslist;
pPoslist->iRowid = iRowid;
pDoclist->pPoslist = pPoslist;
pDoclist->iCol = 0;
pDoclist->iPos = 0;
}
/* Append the values to the position list. */
if( iCol>=0 ){
p->nPendingData -= pPoslist->buf.nSpace;
if( iCol!=pDoclist->iCol ){
fts5BufferAppendVarint(&p->rc, &pPoslist->buf, 1);
fts5BufferAppendVarint(&p->rc, &pPoslist->buf, iCol);
pDoclist->iCol = iCol;
pDoclist->iPos = 0;
}
fts5BufferAppendVarint(&p->rc, &pPoslist->buf, iPos + 2 - pDoclist->iPos);
p->nPendingData += pPoslist->buf.nSpace;
pDoclist->iPos = iPos;
}
}
/*
** Free the pending-doclist object passed as the only argument.
*/
static void fts5FreePendingDoclist(Fts5PendingDoclist *p){
Fts5PendingPoslist *pPoslist;
Fts5PendingPoslist *pNext;
for(pPoslist=p->pPoslist; pPoslist; pPoslist=pNext){
pNext = pPoslist->pNext;
fts5BufferFree(&pPoslist->buf);
sqlite3_free(pPoslist);
}
sqlite3_free(p);
}
/*
@ -2582,15 +2480,11 @@ void sqlite3Fts5IndexWrite(
if( p->rc!=SQLITE_OK ) return;
/* Allocate hash tables if they have not already been allocated */
if( p->aHash==0 ){
if( p->apHash==0 ){
int nHash = pConfig->nPrefix + 1;
p->aHash = (Fts3Hash*)sqlite3_malloc(sizeof(Fts3Hash) * nHash);
if( p->aHash==0 ){
p->rc = SQLITE_NOMEM;
}else{
for(i=0; i<nHash; i++){
fts3HashInit(&p->aHash[i], FTS3_HASH_STRING, 0);
}
p->apHash = (Fts5Hash**)fts5IdxMalloc(p, sizeof(Fts5Hash*) * nHash);
for(i=0; p->rc==SQLITE_OK && i<nHash; i++){
p->rc = sqlite3Fts5HashNew(&p->apHash[i], &p->nPendingData);
}
}
@ -2635,89 +2529,6 @@ static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
return 0;
}
static Fts5PendingDoclist *fts5PendingMerge(
Fts5Index *p,
Fts5PendingDoclist *pLeft,
Fts5PendingDoclist *pRight
){
Fts5PendingDoclist *p1 = pLeft;
Fts5PendingDoclist *p2 = pRight;
Fts5PendingDoclist *pRet = 0;
Fts5PendingDoclist **ppOut = &pRet;
while( p1 || p2 ){
if( p1==0 ){
*ppOut = p2;
p2 = 0;
}else if( p2==0 ){
*ppOut = p1;
p1 = 0;
}else{
int nCmp = MIN(p1->nTerm, p2->nTerm);
int res = memcmp(p1->pTerm, p2->pTerm, nCmp);
if( res==0 ) res = p1->nTerm - p2->nTerm;
if( res>0 ){
/* p2 is smaller */
*ppOut = p2;
ppOut = &p2->pNext;
p2 = p2->pNext;
}else{
/* p1 is smaller */
*ppOut = p1;
ppOut = &p1->pNext;
p1 = p1->pNext;
}
*ppOut = 0;
}
}
return pRet;
}
/*
** Extract all tokens from hash table iHash and link them into a list
** in sorted order. The hash table is cleared before returning. It is
** the responsibility of the caller to free the elements of the returned
** list.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has
** already occurred, this function is a no-op.
*/
static Fts5PendingDoclist *fts5PendingList(Fts5Index *p, int iHash){
const int nMergeSlot = 32;
Fts3Hash *pHash;
Fts3HashElem *pE; /* Iterator variable */
Fts5PendingDoclist **ap;
Fts5PendingDoclist *pList;
int i;
ap = fts5IdxMalloc(p, sizeof(Fts5PendingDoclist*) * nMergeSlot);
if( !ap ) return 0;
pHash = &p->aHash[iHash];
for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){
int i;
Fts5PendingDoclist *pDoclist = (Fts5PendingDoclist*)fts3HashData(pE);
assert( pDoclist->pNext==0 );
for(i=0; ap[i]; i++){
pDoclist = fts5PendingMerge(p, pDoclist, ap[i]);
ap[i] = 0;
}
ap[i] = pDoclist;
}
pList = 0;
for(i=0; i<nMergeSlot; i++){
pList = fts5PendingMerge(p, pList, ap[i]);
}
sqlite3_free(ap);
fts3HashClear(pHash);
return pList;
}
/*
** Discard all data currently cached in the hash-tables.
*/
@ -2725,13 +2536,7 @@ static void fts5IndexDiscardData(Fts5Index *p){
Fts5Config *pConfig = p->pConfig;
int i;
for(i=0; i<=pConfig->nPrefix; i++){
Fts3Hash *pHash = &p->aHash[i];
Fts3HashElem *pE; /* Iterator variable */
for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){
Fts5PendingDoclist *pDoclist = (Fts5PendingDoclist*)fts3HashData(pE);
fts5FreePendingDoclist(pDoclist);
}
fts3HashClear(pHash);
sqlite3Fts5HashClear(p->apHash[i]);
}
p->nPendingData = 0;
}
@ -3012,44 +2817,6 @@ static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){
fts5BufferAppendVarint(&p->rc, &pWriter->aWriter[0].buf, 0);
}
/*
** Write the contents of pending-doclist object pDoclist to writer pWriter.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has
** already occurred, this function is a no-op.
*/
static void fts5WritePendingDoclist(
Fts5Index *p, /* FTS5 backend object */
Fts5SegWriter *pWriter, /* Write to this writer object */
Fts5PendingDoclist *pDoclist /* Doclist to write to pWriter */
){
Fts5PendingPoslist *pPoslist; /* Used to iterate through the doclist */
/* Append the term */
fts5WriteAppendTerm(p, pWriter, pDoclist->nTerm, pDoclist->pTerm);
/* Append the position list for each rowid */
for(pPoslist=pDoclist->pPoslist; pPoslist; pPoslist=pPoslist->pNext){
int i = 0;
/* Append the rowid itself */
fts5WriteAppendRowid(p, pWriter, pPoslist->iRowid);
/* Append the size of the position list in bytes */
fts5WriteAppendPoslistInt(p, pWriter, pPoslist->buf.n);
/* Copy the position list to the output segment */
while( i<pPoslist->buf.n){
int iVal;
i += getVarint32(&pPoslist->buf.p[i], iVal);
fts5WriteAppendPoslistInt(p, pWriter, iVal);
}
}
/* Write the doclist terminator */
fts5WriteAppendZerobyte(p, pWriter);
}
/*
** Flush any data cached by the writer object to the database. Free any
** allocations associated with the writer.
@ -3386,6 +3153,53 @@ static void fts5IndexWork(
}
}
typedef struct Fts5FlushCtx Fts5FlushCtx;
struct Fts5FlushCtx {
Fts5Index *pIdx;
Fts5SegWriter writer;
};
static int fts5FlushNewTerm(void *pCtx, const char *zTerm, int nTerm){
Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
int rc = SQLITE_OK;
fts5WriteAppendTerm(p->pIdx, &p->writer, nTerm, (const u8*)zTerm);
return rc;
}
static int fts5FlushTermDone(void *pCtx){
Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
int rc = SQLITE_OK;
/* Write the doclist terminator */
fts5WriteAppendZerobyte(p->pIdx, &p->writer);
return rc;
}
static int fts5FlushNewEntry(
void *pCtx,
i64 iRowid,
const u8 *aPoslist,
int nPoslist
){
Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
int rc = SQLITE_OK;
int i = 0;
/* Append the rowid itself */
fts5WriteAppendRowid(p->pIdx, &p->writer, iRowid);
/* Append the size of the position list in bytes */
fts5WriteAppendPoslistInt(p->pIdx, &p->writer, nPoslist);
/* Copy the position list to the output segment */
while( i<nPoslist ){
int iVal;
i += getVarint32(&aPoslist[i], iVal);
fts5WriteAppendPoslistInt(p->pIdx, &p->writer, iVal);
}
return rc;
}
/*
** Flush the contents of in-memory hash table iHash to a new level-0
** segment on disk. Also update the corresponding structure record.
@ -3404,24 +3218,19 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){
iSegid = fts5AllocateSegid(p, pStruct);
if( iSegid ){
Fts5SegWriter writer;
Fts5PendingDoclist *pList;
Fts5PendingDoclist *pIter;
Fts5PendingDoclist *pNext;
Fts5StructureSegment *pSeg; /* New segment within pStruct */
int nHeight; /* Height of new segment b-tree */
int rc;
Fts5FlushCtx ctx;
pList = fts5PendingList(p, iHash);
assert( pList!=0 || p->rc!=SQLITE_OK );
fts5WriteInit(p, &writer, iHash, iSegid);
fts5WriteInit(p, &ctx.writer, iHash, iSegid);
ctx.pIdx = p;
for(pIter=pList; pIter; pIter=pNext){
pNext = pIter->pNext;
fts5WritePendingDoclist(p, &writer, pIter);
fts5FreePendingDoclist(pIter);
}
fts5WriteFinish(p, &writer, &nHeight, &pgnoLast);
rc = sqlite3Fts5HashIterate( p->apHash[iHash], (void*)&ctx,
fts5FlushNewTerm, fts5FlushNewEntry, fts5FlushTermDone
);
if( p->rc==SQLITE_OK ) p->rc = rc;
fts5WriteFinish(p, &ctx.writer, &nHeight, &pgnoLast);
/* Edit the Fts5Structure and write it back to the database. */
if( pStruct->nLevel==0 ){
@ -3452,7 +3261,7 @@ static void fts5IndexFlush(Fts5Index *p){
/* If an error has already occured this call is a no-op. */
if( p->rc!=SQLITE_OK || p->nPendingData==0 ) return;
assert( p->aHash );
assert( p->apHash );
/* Flush the terms and each prefix index to disk */
for(i=0; i<=pConfig->nPrefix; i++){
@ -3555,7 +3364,13 @@ int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy){
assert( p->pReader==0 );
sqlite3_finalize(p->pWriter);
sqlite3_finalize(p->pDeleter);
sqlite3_free(p->aHash);
if( p->apHash ){
int i;
for(i=0; i<=p->pConfig->nPrefix; i++){
sqlite3Fts5HashFree(p->apHash[i]);
}
sqlite3_free(p->apHash);
}
sqlite3_free(p->zDataTbl);
sqlite3_free(p);
return rc;
@ -4315,7 +4130,7 @@ static void fts5SetupPrefixIter(
if( aBuf && pStruct ){
Fts5DoclistIter *pDoclist;
int i;
i64 iLastRowid;
i64 iLastRowid = 0;
Fts5MultiSegIter *p1 = 0; /* Iterator used to gather data from index */
Fts5Buffer doclist;

View File

@ -77,6 +77,7 @@ LIBOBJ += fts5_aux.o
LIBOBJ += fts5_buffer.o
LIBOBJ += fts5_config.o
LIBOBJ += fts5_expr.o
LIBOBJ += fts5_hash.o
LIBOBJ += fts5_index.o
LIBOBJ += fts5_storage.o
LIBOBJ += fts5parse.o
@ -232,6 +233,7 @@ SRC += \
$(TOP)/ext/fts5/fts5.c \
$(TOP)/ext/fts5/fts5_config.c \
$(TOP)/ext/fts5/fts5_expr.c \
$(TOP)/ext/fts5/fts5_hash.c \
$(TOP)/ext/fts5/fts5_index.c \
fts5parse.c \
$(TOP)/ext/fts5/fts5_storage.c
@ -599,6 +601,9 @@ fts5_config.o: $(TOP)/ext/fts5/fts5_config.c $(HDR) $(EXTHDR)
fts5_expr.o: $(TOP)/ext/fts5/fts5_expr.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_expr.c
fts5_hash.o: $(TOP)/ext/fts5/fts5_hash.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_hash.c
fts5_index.o: $(TOP)/ext/fts5/fts5_index.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_index.c

View File

@ -1,5 +1,5 @@
C Fix\san\suninitialized\svariable\scausing\sa\sproblem\sduring\sfts5\stable\sinitialization.
D 2014-08-09T18:22:59.679
C Replace\sthe\shash\stable\sborrowed\sfrom\sfts3.
D 2014-08-11T19:44:52.686
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -105,12 +105,13 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368
F ext/fts5/fts5.c 15e585ed0194f94a1da360808f29184f9d44554c
F ext/fts5/fts5.h 8ace10d5b249a3baa983c79e7a1306d2a79cfd6a
F ext/fts5/fts5Int.h 410001da21bcc3d09b4290d4858352d0985ac7a6
F ext/fts5/fts5Int.h f17a25546d598fdc5cc47f576d38063fd9290963
F ext/fts5/fts5_aux.c 31e581413ecab0962ce2b37468f9f658f36f4b0e
F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00
F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710
F ext/fts5/fts5_expr.c 7b8e380233176053841904a86006696ee8f6cd24
F ext/fts5/fts5_index.c 75b2ebfa97ad6054bba98cb923cd2d3c6cc5b112
F ext/fts5/fts5_hash.c 2af412d00f65ad427f18acbe421c113413cdef06
F ext/fts5/fts5_index.c ccef8703b6228a39090b0a03b83f163e69627ff2
F ext/fts5/fts5_storage.c fa3c8fc4766d850a4977bf1d4b71c37e7b07ab8b
F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9
F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43
@ -156,7 +157,7 @@ F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x
F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8
F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60
F main.mk 8118631727a27fa88eb38a07ac3b86ecb86e9eb0
F main.mk c4fff232b880b91bf665cd2951465de61178e444
F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea
F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5
F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83
@ -1162,7 +1163,7 @@ F tool/genfkey.test 4196a8928b78f51d54ef58e99e99401ab2f0a7e5
F tool/getlock.c f4c39b651370156cae979501a7b156bdba50e7ce
F tool/lemon.c 3ff0fec22f92dfb54e62eeb48772eddffdbeb0d6
F tool/lempar.c 01ca97f87610d1dac6d8cd96ab109ab1130e76dc
F tool/loadfts.c 3bdd46090112c84df44a4fbae740af3836108b3f
F tool/loadfts.c b5b3206ddd58d89ec8d54038c784bcadd6195915
F tool/logest.c eef612f8adf4d0993dafed0416064cf50d5d33c6
F tool/mkautoconfamal.sh f8d8dbf7d62f409ebed5134998bf5b51d7266383
F tool/mkkeywordhash.c dfff09dbbfaf950e89af294f48f902181b144670
@ -1201,7 +1202,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 2821825f7a481755a333dcdcad780b3e24448f20
R c0a232bfa9626e6e9a9c306fc05ca763
P a14fa876f0eb66028e302b908967cc4a05ede9fc
R b81a5fabd4e838059b5d12635ffcd939
U dan
Z 9113dc9c4d427c4fad9a129f5cfa7a9b
Z 39c621bf94a400035f58731d1ee0f6cd

View File

@ -1 +1 @@
a14fa876f0eb66028e302b908967cc4a05ede9fc
617e2fac1c128212254f71b1a8fddaf0d1d90262

View File

@ -69,6 +69,7 @@ static void showHelp(const char *zArgv0){
" -fts [345] FTS version to use (default=5)\n"
" -idx [01] Create a mapping from filename to rowid (default=0)\n"
" -dir <path> Root of directory tree to load data from (default=.)\n"
" -trans <integer> Number of inserts per transaction (default=1)\n"
, zArgv0
);
exit(1);
@ -96,6 +97,7 @@ static void sqlite_error_out(const char *zText, sqlite3 *db){
*/
typedef struct VisitContext VisitContext;
struct VisitContext {
int nRowPerTrans;
sqlite3 *db; /* Database handle */
sqlite3_stmt *pInsert; /* INSERT INTO fts VALUES(readtext(:1)) */
};
@ -112,7 +114,13 @@ void visit_file(void *pCtx, const char *zPath){
sqlite3_bind_text(p->pInsert, 1, zPath, -1, SQLITE_STATIC);
sqlite3_step(p->pInsert);
rc = sqlite3_reset(p->pInsert);
if( rc!=SQLITE_OK ) sqlite_error_out("insert", p->db);
if( rc!=SQLITE_OK ){
sqlite_error_out("insert", p->db);
}else if( p->nRowPerTrans>0
&& (sqlite3_last_insert_rowid(p->db) % p->nRowPerTrans)==0
){
sqlite3_exec(p->db, "COMMIT ; BEGIN", 0, 0, 0);
}
}
/*
@ -150,6 +158,7 @@ int main(int argc, char **argv){
const char *zDir = "."; /* Directory to scan */
int i;
int rc;
int nRowPerTrans = 0;
sqlite3 *db;
char *zSql;
VisitContext sCtx;
@ -163,6 +172,9 @@ int main(int argc, char **argv){
iFts = atoi(zArg);
if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]);
}
if( strcmp(zOpt, "-trans")==0 ){
nRowPerTrans = atoi(zArg);
}
else if( strcmp(zOpt, "-idx")==0 ){
bMap = atoi(zArg);
if( bMap!=0 && bMap!=1 ) showHelp(argv[0]);
@ -189,13 +201,16 @@ int main(int argc, char **argv){
/* Compile the INSERT statement to write data to the FTS table. */
memset(&sCtx, 0, sizeof(VisitContext));
sCtx.db = db;
sCtx.nRowPerTrans = nRowPerTrans;
rc = sqlite3_prepare_v2(db,
"INSERT INTO fts VALUES(readtext(?))", -1, &sCtx.pInsert, 0
);
if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_prepare_v2(1)", db);
/* Load all files in the directory hierarchy into the FTS table. */
if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "BEGIN", 0, 0, 0);
traverse(zDir, (void*)&sCtx, visit_file);
if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "COMMIT", 0, 0, 0);
/* Clean up and exit. */
sqlite3_finalize(sCtx.pInsert);