mirror of https://github.com/sqlite/sqlite
Move to an O(NlogN) algorithm for the priority queue. An insertion sort
was way too slow. FossilOrigin-Name: 7958cbba736a599c1293b06602eec43dfe4fd7d1
This commit is contained in:
parent
f938a87cfd
commit
2a0e00b436
12
manifest
12
manifest
|
@ -1,5 +1,5 @@
|
|||
C Add\ssupport\sfor\srowid.
|
||||
D 2011-03-29T23:41:31.447
|
||||
C Move\sto\san\sO(NlogN)\salgorithm\sfor\sthe\spriority\squeue.\s\sAn\sinsertion\ssort\nwas\sway\stoo\sslow.
|
||||
D 2011-03-30T01:43:00.780
|
||||
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
|
||||
F Makefile.in 6c96e694f446500449f683070b906de9fce17b88
|
||||
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
|
||||
|
@ -202,7 +202,7 @@ F src/test_config.c 62f0f8f934b1d5c7e4cd4f506ae453a1117b47d7
|
|||
F src/test_demovfs.c 0aed671636735116fc872c5b03706fd5612488b5
|
||||
F src/test_devsym.c e7498904e72ba7491d142d5c83b476c4e76993bc
|
||||
F src/test_func.c cbdec5cededa0761daedde5baf06004a9bf416b5
|
||||
F src/test_fuzzer.c dcb1e78badcf6f469ae386ecbed0e287920699c6
|
||||
F src/test_fuzzer.c edc2aaa0f75ce49efef39bcd2df45138479b0992
|
||||
F src/test_hexio.c 1237f000ec7a491009b1233f5c626ea71bce1ea2
|
||||
F src/test_init.c 5d624ffd0409d424cf9adbfe1f056b200270077c
|
||||
F src/test_intarray.c d879bbf8e4ce085ab966d1f3c896a7c8b4f5fc99
|
||||
|
@ -918,7 +918,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
|
|||
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
|
||||
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
|
||||
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
|
||||
P dd41155bc7459cafc1a2d5c75233193abfbac05d
|
||||
R 580275be924edc1c171a4ef2518a45c4
|
||||
P 2cf4158ff051916717fc2c0f4b6332d5f6ea6e3d
|
||||
R 9250685bf3b3b21491f862d4fc8952d0
|
||||
U drh
|
||||
Z c835c92c0dd7f7913ad85caf0a4ef7ac
|
||||
Z 0b9911f371ac67382b47460caefb0a30
|
||||
|
|
|
@ -1 +1 @@
|
|||
2cf4158ff051916717fc2c0f4b6332d5f6ea6e3d
|
||||
7958cbba736a599c1293b06602eec43dfe4fd7d1
|
|
@ -66,6 +66,7 @@ struct fuzzer_stem {
|
|||
const fuzzer_rule *pRule; /* Current rule to apply */
|
||||
int n; /* Apply pRule at this character offset */
|
||||
fuzzer_cost rBaseCost; /* Base cost of getting to zBasis */
|
||||
fuzzer_cost rCostX; /* Precomputed rBaseCost + pRule->rCost */
|
||||
fuzzer_stem *pNext; /* Next stem in rCost order */
|
||||
fuzzer_stem *pHash; /* Next stem with same hash on zBasis */
|
||||
};
|
||||
|
@ -82,6 +83,7 @@ struct fuzzer_vtab {
|
|||
};
|
||||
|
||||
#define FUZZER_HASH 4001 /* Hash table size */
|
||||
#define FUZZER_NQUEUE 20 /* Number of slots on the stem queue */
|
||||
|
||||
/* A fuzzer cursor object */
|
||||
struct fuzzer_cursor {
|
||||
|
@ -89,10 +91,13 @@ struct fuzzer_cursor {
|
|||
sqlite3_int64 iRowid; /* The rowid of the current word */
|
||||
fuzzer_vtab *pVtab; /* The virtual table this cursor belongs to */
|
||||
fuzzer_cost rLimit; /* Maximum cost of any term */
|
||||
fuzzer_stem *pStem; /* Sorted list of stems for generating new terms */
|
||||
fuzzer_stem *pStem; /* Stem with smallest rCostX */
|
||||
fuzzer_stem *pDone; /* Stems already processed to completion */
|
||||
fuzzer_stem *aQueue[FUZZER_NQUEUE]; /* Queue of stems with higher rCostX */
|
||||
int mxQueue; /* Largest used index in aQueue[] */
|
||||
char *zBuf; /* Temporary use buffer */
|
||||
int nBuf; /* Bytes allocated for zBuf */
|
||||
int nStem; /* Number of stems allocated */
|
||||
fuzzer_rule nullRule; /* Null rule used first */
|
||||
fuzzer_stem *apHash[FUZZER_HASH]; /* Hash of previously generated terms */
|
||||
};
|
||||
|
@ -205,23 +210,35 @@ static int fuzzerOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
|
|||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Free all stems in a list.
|
||||
*/
|
||||
static void fuzzerClearStemList(fuzzer_stem *pStem){
|
||||
while( pStem ){
|
||||
fuzzer_stem *pNext = pStem->pNext;
|
||||
sqlite3_free(pStem);
|
||||
pStem = pNext;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Free up all the memory allocated by a cursor. Set it rLimit to 0
|
||||
** to indicate that it is at EOF.
|
||||
*/
|
||||
static void fuzzerClearCursor(fuzzer_cursor *pCur, int clearHash){
|
||||
if( pCur->pStem==0 && pCur->pDone==0 ) clearHash = 0;
|
||||
do{
|
||||
while( pCur->pStem ){
|
||||
fuzzer_stem *pStem = pCur->pStem;
|
||||
pCur->pStem = pStem->pNext;
|
||||
sqlite3_free(pStem);
|
||||
}
|
||||
pCur->pStem = pCur->pDone;
|
||||
pCur->pDone = 0;
|
||||
}while( pCur->pStem );
|
||||
int i;
|
||||
fuzzerClearStemList(pCur->pStem);
|
||||
fuzzerClearStemList(pCur->pDone);
|
||||
for(i=0; i<FUZZER_NQUEUE; i++) fuzzerClearStemList(pCur->aQueue[i]);
|
||||
pCur->rLimit = (fuzzer_cost)0;
|
||||
if( clearHash ) memset(pCur->apHash, 0, sizeof(pCur->apHash));
|
||||
if( clearHash && pCur->nStem ){
|
||||
pCur->mxQueue = 0;
|
||||
pCur->pStem = 0;
|
||||
pCur->pDone = 0;
|
||||
memset(pCur->aQueue, 0, sizeof(pCur->aQueue));
|
||||
memset(pCur->apHash, 0, sizeof(pCur->apHash));
|
||||
}
|
||||
pCur->nStem = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -280,7 +297,7 @@ static unsigned int fuzzerHash(const char *z){
|
|||
** Current cost of a stem
|
||||
*/
|
||||
static fuzzer_cost fuzzerCost(fuzzer_stem *pStem){
|
||||
return pStem->rBaseCost + pStem->pRule->rCost;
|
||||
return pStem->rCostX = pStem->rBaseCost + pStem->pRule->rCost;
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
@ -304,7 +321,7 @@ static void fuzzerStemPrint(
|
|||
if( fuzzerRender(pStem, &zBuf, &nBuf)!=SQLITE_OK ) return;
|
||||
fprintf(stderr, "%s[%s](%d)-->{%s}(%d)%s",
|
||||
zPrefix,
|
||||
pStem->zBasis, pStem->rBaseCost, zBuf, fuzzerCost(pStem),
|
||||
pStem->zBasis, pStem->rBaseCost, zBuf, pStem->,
|
||||
zSuffix
|
||||
);
|
||||
sqlite3_free(zBuf);
|
||||
|
@ -349,6 +366,7 @@ static int fuzzerAdvance(fuzzer_cursor *pCur, fuzzer_stem *pStem){
|
|||
int rc = fuzzerSeen(pCur, pStem);
|
||||
if( rc<0 ) return -1;
|
||||
if( rc==0 ){
|
||||
fuzzerCost(pStem);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
@ -361,31 +379,106 @@ static int fuzzerAdvance(fuzzer_cursor *pCur, fuzzer_stem *pStem){
|
|||
}
|
||||
|
||||
/*
|
||||
** Insert pNew into the list at pList. Return a pointer to the new
|
||||
** The two input stem lists are both sorted in order of increasing
|
||||
** rCostX. Merge them together into a single list, sorted by rCostX, and
|
||||
** return a pointer to the head of that new list.
|
||||
*/
|
||||
static fuzzer_stem *fuzzerMergeStems(fuzzer_stem *pA, fuzzer_stem *pB){
|
||||
fuzzer_stem head;
|
||||
fuzzer_stem *pTail;
|
||||
|
||||
pTail = &head;
|
||||
while( pA && pB ){
|
||||
if( pA->rCostX<=pB->rCostX ){
|
||||
pTail->pNext = pA;
|
||||
pTail = pA;
|
||||
pA = pA->pNext;
|
||||
}else{
|
||||
pTail->pNext = pB;
|
||||
pTail = pB;
|
||||
pB = pB->pNext;
|
||||
}
|
||||
}
|
||||
if( pA==0 ){
|
||||
pTail->pNext = pB;
|
||||
}else{
|
||||
pTail->pNext = pA;
|
||||
}
|
||||
return head.pNext;
|
||||
}
|
||||
|
||||
/*
|
||||
** Load pCur->pStem with the lowest-cost stem. Return a pointer
|
||||
** to the lowest-cost stem.
|
||||
*/
|
||||
static fuzzer_stem *fuzzerLowestCostStem(fuzzer_cursor *pCur){
|
||||
fuzzer_stem *pBest, *pX;
|
||||
int iBest;
|
||||
int i;
|
||||
|
||||
if( pCur->pStem==0 ){
|
||||
iBest = -1;
|
||||
pBest = 0;
|
||||
for(i=0; i<=pCur->mxQueue; i++){
|
||||
pX = pCur->aQueue[i];
|
||||
if( pX==0 ) continue;
|
||||
if( pBest==0 || pBest->rCostX>pX->rCostX ){
|
||||
pBest = pX;
|
||||
iBest = i;
|
||||
}
|
||||
}
|
||||
if( pBest ){
|
||||
pCur->aQueue[iBest] = pBest->pNext;
|
||||
pBest->pNext = 0;
|
||||
pCur->pStem = pBest;
|
||||
}
|
||||
}
|
||||
return pCur->pStem;
|
||||
}
|
||||
|
||||
/*
|
||||
** Insert pNew into queue of pending stems. Then find the stem
|
||||
** with the lowest rCostX and move it into pCur->pStem.
|
||||
** list. The insert is done such the pNew is in the correct order
|
||||
** according to fuzzer_stem.zBaseCost+fuzzer_stem.pRule->rCost.
|
||||
*/
|
||||
static fuzzer_stem *fuzzerInsert(fuzzer_stem *pList, fuzzer_stem *pNew){
|
||||
fuzzer_cost c1;
|
||||
static fuzzer_stem *fuzzerInsert(fuzzer_cursor *pCur, fuzzer_stem *pNew){
|
||||
fuzzer_stem *pX;
|
||||
int i;
|
||||
|
||||
if( pList==0 ){
|
||||
/* If pCur->pStem exists and is greater than pNew, then make pNew
|
||||
** the new pCur->pStem and insert the old pCur->pStem instead.
|
||||
*/
|
||||
if( (pX = pCur->pStem)!=0 && pX->rCostX>pNew->rCostX ){
|
||||
pNew->pNext = 0;
|
||||
return pNew;
|
||||
pCur->pStem = pNew;
|
||||
pNew = pX;
|
||||
}
|
||||
c1 = fuzzerCost(pNew);
|
||||
if( c1 <= fuzzerCost(pList) ){
|
||||
pNew->pNext = pList;
|
||||
return pNew;
|
||||
|
||||
/* Insert the new value */
|
||||
pNew->pNext = 0;
|
||||
pX = pNew;
|
||||
for(i=0; i<=pCur->mxQueue; i++){
|
||||
if( pCur->aQueue[i] ){
|
||||
pX = fuzzerMergeStems(pX, pCur->aQueue[i]);
|
||||
pCur->aQueue[i] = 0;
|
||||
}else{
|
||||
fuzzer_stem *pPrev;
|
||||
pPrev = pList;
|
||||
while( pPrev->pNext && fuzzerCost(pPrev->pNext)<c1 ){
|
||||
pPrev = pPrev->pNext;
|
||||
pCur->aQueue[i] = pX;
|
||||
break;
|
||||
}
|
||||
pNew->pNext = pPrev->pNext;
|
||||
pPrev->pNext = pNew;
|
||||
return pList;
|
||||
}
|
||||
if( i>pCur->mxQueue ){
|
||||
if( i<FUZZER_NQUEUE ){
|
||||
pCur->mxQueue = i;
|
||||
pCur->aQueue[i] = pX;
|
||||
}else{
|
||||
assert( pCur->mxQueue==FUZZER_NQUEUE-1 );
|
||||
pX = fuzzerMergeStems(pX, pCur->aQueue[FUZZER_NQUEUE-1]);
|
||||
pCur->aQueue[FUZZER_NQUEUE-1] = pX;
|
||||
}
|
||||
}
|
||||
|
||||
return fuzzerLowestCostStem(pCur);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -408,10 +501,11 @@ static fuzzer_stem *fuzzerNewStem(
|
|||
memcpy(pNew->zBasis, zWord, pNew->nBasis+1);
|
||||
pNew->pRule = pCur->pVtab->pRule;
|
||||
pNew->n = -1;
|
||||
pNew->rBaseCost = rBaseCost;
|
||||
pNew->rBaseCost = pNew->rCostX = rBaseCost;
|
||||
h = fuzzerHash(pNew->zBasis);
|
||||
pNew->pHash = pCur->apHash[h];
|
||||
pCur->apHash[h] = pNew;
|
||||
pCur->nStem++;
|
||||
return pNew;
|
||||
}
|
||||
|
||||
|
@ -430,17 +524,16 @@ static int fuzzerNext(sqlite3_vtab_cursor *cur){
|
|||
** a new stem and insert the new stem into the priority queue.
|
||||
*/
|
||||
pStem = pCur->pStem;
|
||||
if( fuzzerCost(pStem)>0 ){
|
||||
if( pStem->rCostX>0 ){
|
||||
rc = fuzzerRender(pStem, &pCur->zBuf, &pCur->nBuf);
|
||||
if( rc==SQLITE_NOMEM ) return SQLITE_NOMEM;
|
||||
pNew = fuzzerNewStem(pCur, pCur->zBuf, fuzzerCost(pStem));
|
||||
pNew = fuzzerNewStem(pCur, pCur->zBuf, pStem->rCostX);
|
||||
if( pNew ){
|
||||
if( fuzzerAdvance(pCur, pNew)==0 ){
|
||||
pNew->pNext = pCur->pDone;
|
||||
pCur->pDone = pNew;
|
||||
}else{
|
||||
pCur->pStem = fuzzerInsert(pStem, pNew);
|
||||
if( pCur->pStem==pNew ){
|
||||
if( fuzzerInsert(pCur, pNew)==pNew ){
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
|
@ -454,17 +547,18 @@ static int fuzzerNext(sqlite3_vtab_cursor *cur){
|
|||
*/
|
||||
while( (pStem = pCur->pStem)!=0 ){
|
||||
if( fuzzerAdvance(pCur, pStem) ){
|
||||
pCur->pStem = pStem = fuzzerInsert(pStem->pNext, pStem);
|
||||
pCur->pStem = 0;
|
||||
pStem = fuzzerInsert(pCur, pStem);
|
||||
if( (rc = fuzzerSeen(pCur, pStem))!=0 ){
|
||||
if( rc<0 ) return SQLITE_NOMEM;
|
||||
continue;
|
||||
}
|
||||
return SQLITE_OK; /* New word found */
|
||||
}
|
||||
pCur->pStem = pStem->pNext;
|
||||
pCur->pStem = 0;
|
||||
pStem->pNext = pCur->pDone;
|
||||
pCur->pDone = pStem;
|
||||
if( pCur->pStem ){
|
||||
if( fuzzerLowestCostStem(pCur) ){
|
||||
rc = fuzzerSeen(pCur, pCur->pStem);
|
||||
if( rc<0 ) return SQLITE_NOMEM;
|
||||
if( rc==0 ){
|
||||
|
@ -531,7 +625,7 @@ static int fuzzerColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){
|
|||
sqlite3_result_text(ctx, pCur->zBuf, -1, SQLITE_TRANSIENT);
|
||||
}else if( i==1 ){
|
||||
/* the "distance" column */
|
||||
sqlite3_result_int(ctx, fuzzerCost(pCur->pStem));
|
||||
sqlite3_result_int(ctx, pCur->pStem->rCostX);
|
||||
}else{
|
||||
/* All other columns are NULL */
|
||||
sqlite3_result_null(ctx);
|
||||
|
|
Loading…
Reference in New Issue