Enhance the fuzzer virtual table to support multiple rule sets.
FossilOrigin-Name: a82938731b21d6166d7d482994cb065c8b725083
This commit is contained in:
parent
99b18401ab
commit
5beafd6ab7
14
manifest
14
manifest
@ -1,5 +1,5 @@
|
||||
C Merge\sthe\snon-blocking\sROLLBACK\schanges\sinto\strunk.
|
||||
D 2012-02-13T21:24:03.262
|
||||
C Enhance\sthe\sfuzzer\svirtual\stable\sto\ssupport\smultiple\srule\ssets.
|
||||
D 2012-02-14T15:34:50.192
|
||||
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
|
||||
F Makefile.in 3f79a373e57c3b92dabf76f40b065e719d31ac34
|
||||
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
|
||||
@ -206,7 +206,7 @@ F src/test_config.c a036a69b550ebc477ab9ca2b37269201f888436e
|
||||
F src/test_demovfs.c 20a4975127993f4959890016ae9ce5535a880094
|
||||
F src/test_devsym.c e7498904e72ba7491d142d5c83b476c4e76993bc
|
||||
F src/test_func.c 6232d722a4ddb193035aa13a03796bf57d6c12fd
|
||||
F src/test_fuzzer.c f884f6f32e8513d34248d6e1ac8a32047fead254
|
||||
F src/test_fuzzer.c 0b11b466bd9c5dc8d882d29bb8c7e576963fd905
|
||||
F src/test_hexio.c c4773049603151704a6ab25ac5e936b5109caf5a
|
||||
F src/test_init.c 3cbad7ce525aec925f8fda2192d576d47f0d478a
|
||||
F src/test_intarray.c d879bbf8e4ce085ab966d1f3c896a7c8b4f5fc99
|
||||
@ -504,7 +504,7 @@ F test/fuzz2.test 207d0f9d06db3eaf47a6b7bfc835b8e2fc397167
|
||||
F test/fuzz3.test aec64345184d1662bd30e6a17851ff659d596dc5
|
||||
F test/fuzz_common.tcl a87dfbb88c2a6b08a38e9a070dabd129e617b45b
|
||||
F test/fuzz_malloc.test 328f70aaca63adf29b4c6f06505ed0cf57ca7c26
|
||||
F test/fuzzer1.test ddfb04f3bd5cfdda3b1aa15b78d3ad055c9cc50f
|
||||
F test/fuzzer1.test ff725a0eec070dfc2b2acc13b21a52a139382929
|
||||
F test/hook.test 5f3749de6462a6b87b4209b74adf7df5ac2df639
|
||||
F test/icu.test 70df4faca133254c042d02ae342c0a141f2663f4
|
||||
F test/in.test a7b8a0f43da81cd08645b7a710099ffe9ad1126b
|
||||
@ -989,7 +989,7 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06
|
||||
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
|
||||
F tool/warnings-clang.sh 9f406d66e750e8ac031c63a9ef3248aaa347ef2a
|
||||
F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381
|
||||
P a8a042a751557d06bf04455ed7629cb29adcd87f 549f4fd00d8325c10099b100e5202b77ee1d83ad
|
||||
R f5be37b531ffc02ec5dfb73b01bae4f2
|
||||
P 9c572d424a20b0585bfac358a5d1ee5276dd05ba
|
||||
R 729d7ae625d9d3f1fd7b4a057f6fc25f
|
||||
U drh
|
||||
Z a17a96e51ae999984897d31a6e51ca34
|
||||
Z 5a28382a5967a16e17f8380cc4335d77
|
||||
|
@ -1 +1 @@
|
||||
9c572d424a20b0585bfac358a5d1ee5276dd05ba
|
||||
a82938731b21d6166d7d482994cb065c8b725083
|
@ -93,6 +93,31 @@
|
||||
**
|
||||
** This last query will show up to 50 words out of the vocabulary that
|
||||
** match or nearly match the $prefix.
|
||||
**
|
||||
** MULTIPLE RULE SETS
|
||||
**
|
||||
** An enhancement as of 2012-02-14 allows multiple rule sets to coexist in
|
||||
** the same fuzzer. This allows, for example, the fuzzer to operate in
|
||||
** multiple languages.
|
||||
**
|
||||
** A new column "ruleset" is added to the table. This column must have a
|
||||
** value between 0 and 49. The default value for the ruleset is 0. But
|
||||
** alternative values can be specified. For example:
|
||||
**
|
||||
** INSERT INTO f(ruleset,cFrom,cTo,Cost) VALUES(1,'qu','k',100);
|
||||
**
|
||||
** Only one ruleset will be used at a time. When running a MATCH query,
|
||||
** specify the desired ruleset using a "ruleset=N" term in the WHERE clause.
|
||||
** For example:
|
||||
**
|
||||
** SELECT vocabulary.w FROM f, vocabulary
|
||||
** WHERE f.word MATCH $word
|
||||
** AND f.distance<=200
|
||||
** AND f.word=vocabulary.w
|
||||
** AND f.ruleset=1 -- Specify the ruleset to use here
|
||||
** LIMIT 20
|
||||
**
|
||||
** If no ruleset is specified in the WHERE clause, ruleset 0 is used.
|
||||
*/
|
||||
#include "sqlite3.h"
|
||||
#include <stdlib.h>
|
||||
@ -112,10 +137,24 @@ typedef struct fuzzer_seen fuzzer_seen;
|
||||
typedef struct fuzzer_stem fuzzer_stem;
|
||||
|
||||
/*
|
||||
** Type of the "cost" of an edit operation. Might be changed to
|
||||
** "float" or "double" or "sqlite3_int64" in the future.
|
||||
** Various types.
|
||||
**
|
||||
** fuzzer_cost is the "cost" of an edit operation.
|
||||
**
|
||||
** fuzzer_len is the length of a matching string.
|
||||
**
|
||||
** fuzzer_ruleid is an ruleset identifier.
|
||||
*/
|
||||
typedef int fuzzer_cost;
|
||||
typedef signed char fuzzer_len;
|
||||
typedef unsigned char fuzzer_ruleid;
|
||||
|
||||
/*
|
||||
** Limits
|
||||
*/
|
||||
#define FUZZER_MX_LENGTH 50 /* Maximum length of a search string */
|
||||
#define FUZZER_MX_RULEID 50 /* Maximum rule ID */
|
||||
#define FUZZER_MX_COST 1000 /* Maximum single-rule cost */
|
||||
|
||||
|
||||
/*
|
||||
@ -123,11 +162,12 @@ typedef int fuzzer_cost;
|
||||
** All rules are kept on a linked list sorted by rCost.
|
||||
*/
|
||||
struct fuzzer_rule {
|
||||
fuzzer_rule *pNext; /* Next rule in order of increasing rCost */
|
||||
fuzzer_cost rCost; /* Cost of this transformation */
|
||||
int nFrom, nTo; /* Length of the zFrom and zTo strings */
|
||||
char *zFrom; /* Transform from */
|
||||
char zTo[4]; /* Transform to (extra space appended) */
|
||||
fuzzer_rule *pNext; /* Next rule in order of increasing rCost */
|
||||
char *zFrom; /* Transform from */
|
||||
fuzzer_cost rCost; /* Cost of this transformation */
|
||||
fuzzer_len nFrom, nTo; /* Length of the zFrom and zTo strings */
|
||||
fuzzer_ruleid iRuleset; /* The rule set to which this rule belongs */
|
||||
char zTo[4]; /* Transform to (extra space appended) */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -143,13 +183,13 @@ struct fuzzer_rule {
|
||||
*/
|
||||
struct fuzzer_stem {
|
||||
char *zBasis; /* Word being fuzzed */
|
||||
int nBasis; /* Length of the zBasis string */
|
||||
const fuzzer_rule *pRule; /* Current rule to apply */
|
||||
int n; /* Apply pRule at this character offset */
|
||||
fuzzer_cost rBaseCost; /* Base cost of getting to zBasis */
|
||||
fuzzer_cost rCostX; /* Precomputed rBaseCost + pRule->rCost */
|
||||
fuzzer_stem *pNext; /* Next stem in rCost order */
|
||||
fuzzer_stem *pHash; /* Next stem with same hash on zBasis */
|
||||
fuzzer_cost rBaseCost; /* Base cost of getting to zBasis */
|
||||
fuzzer_cost rCostX; /* Precomputed rBaseCost + pRule->rCost */
|
||||
fuzzer_len nBasis; /* Length of the zBasis string */
|
||||
fuzzer_len n; /* Apply pRule at this character offset */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -179,6 +219,7 @@ struct fuzzer_cursor {
|
||||
char *zBuf; /* Temporary use buffer */
|
||||
int nBuf; /* Bytes allocated for zBuf */
|
||||
int nStem; /* Number of stems allocated */
|
||||
int iRuleset; /* Only process rules from this ruleset */
|
||||
fuzzer_rule nullRule; /* Null rule used first */
|
||||
fuzzer_stem *apHash[FUZZER_HASH]; /* Hash of previously generated terms */
|
||||
};
|
||||
@ -202,7 +243,8 @@ static int fuzzerConnect(
|
||||
if( pNew==0 ) return SQLITE_NOMEM;
|
||||
pNew->zClassName = (char*)&pNew[1];
|
||||
memcpy(pNew->zClassName, argv[0], n);
|
||||
sqlite3_declare_vtab(db, "CREATE TABLE x(word,distance,cFrom,cTo,cost)");
|
||||
sqlite3_declare_vtab(db,
|
||||
"CREATE TABLE x(word,distance,ruleset,cFrom,cTo,cost)");
|
||||
memset(pNew, 0, sizeof(*pNew));
|
||||
*ppVtab = &pNew->base;
|
||||
return SQLITE_OK;
|
||||
@ -424,7 +466,7 @@ static int fuzzerSeen(fuzzer_cursor *pCur, fuzzer_stem *pStem){
|
||||
}
|
||||
h = fuzzerHash(pCur->zBuf);
|
||||
pLookup = pCur->apHash[h];
|
||||
while( pLookup && strcmp(pLookup->zBasis, pCur->zBuf)!=0 ){
|
||||
while( pLookup && strcmp(pLookup->zBasis, pCur->zBuf)!=0 ){
|
||||
pLookup = pLookup->pHash;
|
||||
}
|
||||
return pLookup!=0;
|
||||
@ -453,8 +495,11 @@ static int fuzzerAdvance(fuzzer_cursor *pCur, fuzzer_stem *pStem){
|
||||
}
|
||||
}
|
||||
pStem->n = -1;
|
||||
pStem->pRule = pRule->pNext;
|
||||
if( pStem->pRule && fuzzerCost(pStem)>pCur->rLimit ) pStem->pRule = 0;
|
||||
do{
|
||||
pRule = pRule->pNext;
|
||||
}while( pRule && pRule->iRuleset!=pCur->iRuleset );
|
||||
pStem->pRule = pRule;
|
||||
if( pRule && fuzzerCost(pStem)>pCur->rLimit ) pStem->pRule = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -667,16 +712,22 @@ static int fuzzerFilter(
|
||||
fuzzer_cursor *pCur = (fuzzer_cursor *)pVtabCursor;
|
||||
const char *zWord = 0;
|
||||
fuzzer_stem *pStem;
|
||||
int idx;
|
||||
|
||||
fuzzerClearCursor(pCur, 1);
|
||||
pCur->rLimit = 2147483647;
|
||||
if( idxNum==1 ){
|
||||
idx = 0;
|
||||
if( idxNum & 1 ){
|
||||
zWord = (const char*)sqlite3_value_text(argv[0]);
|
||||
}else if( idxNum==2 ){
|
||||
pCur->rLimit = (fuzzer_cost)sqlite3_value_int(argv[0]);
|
||||
}else if( idxNum==3 ){
|
||||
zWord = (const char*)sqlite3_value_text(argv[0]);
|
||||
pCur->rLimit = (fuzzer_cost)sqlite3_value_int(argv[1]);
|
||||
idx++;
|
||||
}
|
||||
if( idxNum & 2 ){
|
||||
pCur->rLimit = (fuzzer_cost)sqlite3_value_int(argv[idx]);
|
||||
idx++;
|
||||
}
|
||||
if( idxNum & 4 ){
|
||||
pCur->iRuleset = (fuzzer_cost)sqlite3_value_int(argv[idx]);
|
||||
idx++;
|
||||
}
|
||||
if( zWord==0 ) zWord = "";
|
||||
pCur->pStem = pStem = fuzzerNewStem(pCur, zWord, (fuzzer_cost)0);
|
||||
@ -735,22 +786,29 @@ static int fuzzerEof(sqlite3_vtab_cursor *cur){
|
||||
/*
|
||||
** Search for terms of these forms:
|
||||
**
|
||||
** word MATCH $str
|
||||
** distance < $value
|
||||
** distance <= $value
|
||||
** (A) word MATCH $str
|
||||
** (B1) distance < $value
|
||||
** (B2) distance <= $value
|
||||
** (C) ruleid == $ruleid
|
||||
**
|
||||
** The distance< and distance<= are both treated as distance<=.
|
||||
** The query plan number is as follows:
|
||||
** The query plan number is a bit vector:
|
||||
**
|
||||
** 0: None of the terms above are found
|
||||
** 1: There is a "word MATCH" term with $str in filter.argv[0].
|
||||
** 2: There is a "distance<" term with $value in filter.argv[0].
|
||||
** 3: Both "word MATCH" and "distance<" with $str in argv[0] and
|
||||
** $value in argv[1].
|
||||
** bit 1: Term of the form (A) found
|
||||
** bit 2: Term like (B1) or (B2) found
|
||||
** bit 3: Term like (C) found
|
||||
**
|
||||
** If bit-1 is set, $str is always in filter.argv[0]. If bit-2 is set
|
||||
** then $value is in filter.argv[0] if bit-1 is clear and is in
|
||||
** filter.argv[1] if bit-1 is set. If bit-3 is set, then $ruleid is
|
||||
** in filter.argv[0] if bit-1 and bit-2 are both zero, is in
|
||||
** filter.argv[1] if exactly one of bit-1 and bit-2 are set, and is in
|
||||
** filter.argv[2] if both bit-1 and bit-2 are set.
|
||||
*/
|
||||
static int fuzzerBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
|
||||
int iPlan = 0;
|
||||
int iDistTerm = -1;
|
||||
int iRulesetTerm = -1;
|
||||
int i;
|
||||
const struct sqlite3_index_constraint *pConstraint;
|
||||
pConstraint = pIdxInfo->aConstraint;
|
||||
@ -772,11 +830,23 @@ static int fuzzerBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
|
||||
iPlan |= 2;
|
||||
iDistTerm = i;
|
||||
}
|
||||
if( (iPlan & 4)==0
|
||||
&& pConstraint->iColumn==2
|
||||
&& pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
|
||||
){
|
||||
iPlan |= 4;
|
||||
pIdxInfo->aConstraintUsage[i].omit = 1;
|
||||
iRulesetTerm = i;
|
||||
}
|
||||
}
|
||||
if( iPlan==2 ){
|
||||
pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 1;
|
||||
}else if( iPlan==3 ){
|
||||
pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 2;
|
||||
if( iPlan & 2 ){
|
||||
pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 1+((iPlan&1)!=0);
|
||||
}
|
||||
if( iPlan & 4 ){
|
||||
int idx = 1;
|
||||
if( iPlan & 1 ) idx++;
|
||||
if( iPlan & 2 ) idx++;
|
||||
pIdxInfo->aConstraintUsage[iRulesetTerm].argvIndex = idx;
|
||||
}
|
||||
pIdxInfo->idxNum = iPlan;
|
||||
if( pIdxInfo->nOrderBy==1
|
||||
@ -811,7 +881,8 @@ static int fuzzerUpdate(
|
||||
const char *zTo;
|
||||
int nTo;
|
||||
fuzzer_cost rCost;
|
||||
if( argc!=7 ){
|
||||
int rulesetId;
|
||||
if( argc!=8 ){
|
||||
sqlite3_free(pVTab->zErrMsg);
|
||||
pVTab->zErrMsg = sqlite3_mprintf("cannot delete from a %s virtual table",
|
||||
p->zClassName);
|
||||
@ -823,22 +894,36 @@ static int fuzzerUpdate(
|
||||
p->zClassName);
|
||||
return SQLITE_CONSTRAINT;
|
||||
}
|
||||
zFrom = (char*)sqlite3_value_text(argv[4]);
|
||||
zFrom = (char*)sqlite3_value_text(argv[5]);
|
||||
if( zFrom==0 ) zFrom = "";
|
||||
zTo = (char*)sqlite3_value_text(argv[5]);
|
||||
zTo = (char*)sqlite3_value_text(argv[6]);
|
||||
if( zTo==0 ) zTo = "";
|
||||
if( strcmp(zFrom,zTo)==0 ){
|
||||
/* Silently ignore null transformations */
|
||||
return SQLITE_OK;
|
||||
}
|
||||
rCost = sqlite3_value_int(argv[6]);
|
||||
if( rCost<=0 ){
|
||||
rCost = sqlite3_value_int(argv[7]);
|
||||
if( rCost<=0 || rCost>FUZZER_MX_COST ){
|
||||
sqlite3_free(pVTab->zErrMsg);
|
||||
pVTab->zErrMsg = sqlite3_mprintf("cost must be positive");
|
||||
pVTab->zErrMsg = sqlite3_mprintf("cost must be between 1 and %d",
|
||||
FUZZER_MX_COST);
|
||||
return SQLITE_CONSTRAINT;
|
||||
}
|
||||
nFrom = strlen(zFrom);
|
||||
nTo = strlen(zTo);
|
||||
if( nFrom>FUZZER_MX_LENGTH || nTo>FUZZER_MX_LENGTH ){
|
||||
sqlite3_free(pVTab->zErrMsg);
|
||||
pVTab->zErrMsg = sqlite3_mprintf("maximum string length is %d",
|
||||
FUZZER_MX_LENGTH);
|
||||
return SQLITE_CONSTRAINT;
|
||||
}
|
||||
rulesetId = sqlite3_value_int(argv[4]);
|
||||
if( rulesetId<0 || rulesetId>FUZZER_MX_RULEID ){
|
||||
sqlite3_free(pVTab->zErrMsg);
|
||||
pVTab->zErrMsg = sqlite3_mprintf("rulesetid must be between 0 and %d",
|
||||
FUZZER_MX_RULEID);
|
||||
return SQLITE_CONSTRAINT;
|
||||
}
|
||||
pRule = sqlite3_malloc( sizeof(*pRule) + nFrom + nTo );
|
||||
if( pRule==0 ){
|
||||
return SQLITE_NOMEM;
|
||||
@ -850,6 +935,7 @@ static int fuzzerUpdate(
|
||||
pRule->nTo = nTo;
|
||||
pRule->rCost = rCost;
|
||||
pRule->pNext = p->pNewRule;
|
||||
pRule->iRuleset = rulesetId;
|
||||
p->pNewRule = pRule;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
@ -43,6 +43,64 @@ do_test fuzzer1-1.3 {
|
||||
}
|
||||
} {abcde 0 abcda 1 ebcde 10 ebcda 11 abcdo 100 ebcdo 110 obcde 110 obcda 111 obcdo 210}
|
||||
|
||||
do_test fuzzer1-1.4 {
|
||||
db eval {
|
||||
INSERT INTO f1(ruleset, cfrom, cto, cost) VALUES(1,'b','x',1);
|
||||
INSERT INTO f1(ruleset, cfrom, cto, cost) VALUES(1,'d','y',10);
|
||||
INSERT INTO f1(ruleset, cfrom, cto, cost) VALUES(1,'y','z',100);
|
||||
}
|
||||
} {}
|
||||
do_test fuzzer1-1.5 {
|
||||
db eval {
|
||||
SELECT word, distance FROM f1 WHERE word MATCH 'abcde'
|
||||
}
|
||||
} {abcde 0 abcda 1 ebcde 10 ebcda 11 abcdo 100 ebcdo 110 obcde 110 obcda 111 obcdo 210}
|
||||
do_test fuzzer1-1.6 {
|
||||
db eval {
|
||||
SELECT word, distance FROM f1 WHERE word MATCH 'abcde' AND ruleset=0
|
||||
}
|
||||
} {abcde 0 abcda 1 ebcde 10 ebcda 11 abcdo 100 ebcdo 110 obcde 110 obcda 111 obcdo 210}
|
||||
do_test fuzzer1-1.7 {
|
||||
db eval {
|
||||
SELECT word, distance FROM f1 WHERE word MATCH 'abcde' AND ruleset=1
|
||||
}
|
||||
} {abcde 0 axcde 1 axcda 2 abcye 10 abcya 11 axcye 11 axcya 12 abcze 110 abcza 111 axcze 111 axcza 112}
|
||||
do_test fuzzer1-1.8 {
|
||||
db eval {
|
||||
SELECT word, distance FROM f1 WHERE word MATCH 'abcde' AND distance<100
|
||||
}
|
||||
} {abcde 0 abcda 1 ebcde 10 ebcda 11}
|
||||
do_test fuzzer1-1.9 {
|
||||
db eval {
|
||||
SELECT word, distance FROM f1 WHERE word MATCH 'abcde' AND distance<=100
|
||||
}
|
||||
} {abcde 0 abcda 1 ebcde 10 ebcda 11 abcdo 100}
|
||||
do_test fuzzer1-1.10 {
|
||||
db eval {
|
||||
SELECT word, distance FROM f1
|
||||
WHERE word MATCH 'abcde' AND distance<100 AND ruleset=0
|
||||
}
|
||||
} {abcde 0 abcda 1 ebcde 10 ebcda 11}
|
||||
do_test fuzzer1-1.11 {
|
||||
db eval {
|
||||
SELECT word, distance FROM f1
|
||||
WHERE word MATCH 'abcde' AND distance<=100 AND ruleset=0
|
||||
}
|
||||
} {abcde 0 abcda 1 ebcde 10 ebcda 11 abcdo 100}
|
||||
do_test fuzzer1-1.12 {
|
||||
db eval {
|
||||
SELECT word, distance FROM f1
|
||||
WHERE word MATCH 'abcde' AND distance<12 AND ruleset=1
|
||||
}
|
||||
} {abcde 0 axcde 1 axcda 2 abcye 10 abcya 11 axcye 11}
|
||||
do_test fuzzer1-1.13 {
|
||||
db eval {
|
||||
SELECT word, distance FROM f1
|
||||
WHERE word MATCH 'abcde' AND distance<=12 AND ruleset=1
|
||||
}
|
||||
} {abcde 0 axcde 1 axcda 2 abcye 10 abcya 11 axcye 11 axcya 12}
|
||||
|
||||
|
||||
do_test fuzzer1-2.0 {
|
||||
execsql {
|
||||
CREATE VIRTUAL TABLE temp.f2 USING fuzzer;
|
||||
|
Loading…
x
Reference in New Issue
Block a user