Tighter compression of the keyword hash table. (CVS 3920)
FossilOrigin-Name: 681216767d7fabfccb0b12f6a81b18b6d1c252bf
This commit is contained in:
parent
9c61cd77a5
commit
744f2f4658
12
manifest
12
manifest
@ -1,5 +1,5 @@
|
||||
C Optional\sparameter\sin\sthe\sINCREMENTAL\sVACUUM\sstatement\sspecifies\show\smany\npages\sto\svacuum\sfrom\sthe\sdatabase.\s(CVS\s3919)
|
||||
D 2007-05-04T16:14:38
|
||||
C Tighter\scompression\sof\sthe\skeyword\shash\stable.\s(CVS\s3920)
|
||||
D 2007-05-04T17:07:53
|
||||
F Makefile.in 8cab54f7c9f5af8f22fd97ddf1ecfd1e1860de62
|
||||
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
|
||||
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
|
||||
@ -407,7 +407,7 @@ F tool/lempar.c 8f998bf8d08e2123149c2cc5d0597cd5d5d1abdd
|
||||
F tool/memleak.awk 4e7690a51bf3ed757e611273d43fe3f65b510133
|
||||
F tool/memleak2.awk 9cc20c8e8f3c675efac71ea0721ee6874a1566e8
|
||||
F tool/memleak3.tcl 7707006ee908cffff210c98158788d85bb3fcdbf
|
||||
F tool/mkkeywordhash.c e119bdc04305adcada8856d73ad7d837c4ec123c
|
||||
F tool/mkkeywordhash.c 39eb0ff0bc3faf8deea67b44fddcb64a29a27521
|
||||
F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e x
|
||||
F tool/mksqlite3c.tcl fa0429d32cb0756b23def531ec863bc0e216f375
|
||||
F tool/mksqlite3internalh.tcl a85bb0c812db1a060e6e6dfab4e4c817f53d194b
|
||||
@ -475,7 +475,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9
|
||||
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
|
||||
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
|
||||
F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5
|
||||
P b94d39e4d903bc34a9d0bbedd971677abc0ddfc0
|
||||
R 33b57057d3649deaa5906674a92ab54c
|
||||
P ed713f9ccb5d0f306a79ab9931e43db2327fb435
|
||||
R 5a6e4b5dc3429e2f2a7bdff2ac75dece
|
||||
U drh
|
||||
Z 95bb62fd59bc4dc14d01347fa995fad6
|
||||
Z 72fb146ed224d9e10bb3659cbe4d7ad5
|
||||
|
@ -1 +1 @@
|
||||
ed713f9ccb5d0f306a79ab9931e43db2327fb435
|
||||
681216767d7fabfccb0b12f6a81b18b6d1c252bf
|
@ -15,7 +15,7 @@ static const char zHdr[] =
|
||||
"**\n"
|
||||
"** The code in this file has been automatically generated by\n"
|
||||
"**\n"
|
||||
"** $Header: /home/drh/sqlite/trans/cvs/sqlite/sqlite/tool/mkkeywordhash.c,v 1.28 2007/04/26 14:42:36 danielk1977 Exp $\n"
|
||||
"** $Header: /home/drh/sqlite/trans/cvs/sqlite/sqlite/tool/mkkeywordhash.c,v 1.29 2007/05/04 17:07:53 drh Exp $\n"
|
||||
"**\n"
|
||||
"** The code in this file implements a function that determines whether\n"
|
||||
"** or not a given identifier is really an SQL keyword. The same thing\n"
|
||||
@ -40,6 +40,7 @@ struct Keyword {
|
||||
int offset; /* Offset to start of name string */
|
||||
int len; /* Length of this keyword, not counting final \000 */
|
||||
int prefix; /* Number of characters in prefix */
|
||||
int longestSuffix; /* Longest suffix that is a prefix on another word */
|
||||
int iNext; /* Index in aKeywordTable[] of next with same hash */
|
||||
int substrId; /* Id to another keyword this keyword is embedded in */
|
||||
int substrOffset; /* Offset into substrId for start of this keyword */
|
||||
@ -260,7 +261,7 @@ static Keyword aKeywordTable[] = {
|
||||
};
|
||||
|
||||
/* Number of keywords */
|
||||
static int NKEYWORD = (sizeof(aKeywordTable)/sizeof(aKeywordTable[0]));
|
||||
static int nKeyword = (sizeof(aKeywordTable)/sizeof(aKeywordTable[0]));
|
||||
|
||||
/* An array to map all upper-case characters into their corresponding
|
||||
** lower-case character.
|
||||
@ -299,7 +300,10 @@ static int keywordCompare1(const void *a, const void *b){
|
||||
static int keywordCompare2(const void *a, const void *b){
|
||||
const Keyword *pA = (Keyword*)a;
|
||||
const Keyword *pB = (Keyword*)b;
|
||||
int n = strcmp(pA->zName, pB->zName);
|
||||
int n = pB->longestSuffix - pA->longestSuffix;
|
||||
if( n==0 ){
|
||||
n = strcmp(pA->zName, pB->zName);
|
||||
}
|
||||
return n;
|
||||
}
|
||||
static int keywordCompare3(const void *a, const void *b){
|
||||
@ -314,7 +318,7 @@ static int keywordCompare3(const void *a, const void *b){
|
||||
*/
|
||||
static Keyword *findById(int id){
|
||||
int i;
|
||||
for(i=0; i<NKEYWORD; i++){
|
||||
for(i=0; i<nKeyword; i++){
|
||||
if( aKeywordTable[i].id==id ) break;
|
||||
}
|
||||
return &aKeywordTable[i];
|
||||
@ -329,34 +333,36 @@ int main(int argc, char **argv){
|
||||
int bestSize, bestCount;
|
||||
int count;
|
||||
int nChar;
|
||||
int aHash[1000]; /* 1000 is much bigger than NKEYWORD */
|
||||
int totalLen = 0;
|
||||
int aHash[1000]; /* 1000 is much bigger than nKeyword */
|
||||
|
||||
/* Remove entries from the list of keywords that have mask==0 */
|
||||
for(i=j=0; i<NKEYWORD; i++){
|
||||
for(i=j=0; i<nKeyword; i++){
|
||||
if( aKeywordTable[i].mask==0 ) continue;
|
||||
if( j<i ){
|
||||
aKeywordTable[j] = aKeywordTable[i];
|
||||
}
|
||||
j++;
|
||||
}
|
||||
NKEYWORD = j;
|
||||
nKeyword = j;
|
||||
|
||||
/* Fill in the lengths of strings and hashes for all entries. */
|
||||
for(i=0; i<NKEYWORD; i++){
|
||||
for(i=0; i<nKeyword; i++){
|
||||
Keyword *p = &aKeywordTable[i];
|
||||
p->len = strlen(p->zName);
|
||||
totalLen += p->len;
|
||||
p->hash = (UpperToLower[p->zName[0]]*4) ^
|
||||
(UpperToLower[p->zName[p->len-1]]*3) ^ p->len;
|
||||
p->id = i+1;
|
||||
}
|
||||
|
||||
/* Sort the table from shortest to longest keyword */
|
||||
qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare1);
|
||||
qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare1);
|
||||
|
||||
/* Look for short keywords embedded in longer keywords */
|
||||
for(i=NKEYWORD-2; i>=0; i--){
|
||||
for(i=nKeyword-2; i>=0; i--){
|
||||
Keyword *p = &aKeywordTable[i];
|
||||
for(j=NKEYWORD-1; j>i && p->substrId==0; j--){
|
||||
for(j=nKeyword-1; j>i && p->substrId==0; j--){
|
||||
Keyword *pOther = &aKeywordTable[j];
|
||||
if( pOther->substrId ) continue;
|
||||
if( pOther->len<=p->len ) continue;
|
||||
@ -370,18 +376,35 @@ int main(int argc, char **argv){
|
||||
}
|
||||
}
|
||||
|
||||
/* Sort the table into alphabetical order */
|
||||
qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare2);
|
||||
/* Compute the longestSuffix value for every word */
|
||||
for(i=0; i<nKeyword; i++){
|
||||
Keyword *p = &aKeywordTable[i];
|
||||
if( p->substrId ) continue;
|
||||
for(j=0; j<nKeyword; j++){
|
||||
Keyword *pOther;
|
||||
if( j==i ) continue;
|
||||
pOther = &aKeywordTable[j];
|
||||
if( pOther->substrId ) continue;
|
||||
for(k=p->longestSuffix+1; k<p->len && k<pOther->len; k++){
|
||||
if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){
|
||||
p->longestSuffix = k;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Sort the table into reverse order by length */
|
||||
qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare2);
|
||||
|
||||
/* Fill in the offset for all entries */
|
||||
nChar = 0;
|
||||
for(i=0; i<NKEYWORD; i++){
|
||||
for(i=0; i<nKeyword; i++){
|
||||
Keyword *p = &aKeywordTable[i];
|
||||
if( p->offset>0 || p->substrId ) continue;
|
||||
p->offset = nChar;
|
||||
nChar += p->len;
|
||||
for(k=p->len-1; k>=1; k--){
|
||||
for(j=i+1; j<NKEYWORD; j++){
|
||||
for(j=i+1; j<nKeyword; j++){
|
||||
Keyword *pOther = &aKeywordTable[j];
|
||||
if( pOther->offset>0 || pOther->substrId ) continue;
|
||||
if( pOther->len<=k ) continue;
|
||||
@ -398,7 +421,7 @@ int main(int argc, char **argv){
|
||||
}
|
||||
}
|
||||
}
|
||||
for(i=0; i<NKEYWORD; i++){
|
||||
for(i=0; i<nKeyword; i++){
|
||||
Keyword *p = &aKeywordTable[i];
|
||||
if( p->substrId ){
|
||||
p->offset = findById(p->substrId)->offset + p->substrOffset;
|
||||
@ -406,15 +429,15 @@ int main(int argc, char **argv){
|
||||
}
|
||||
|
||||
/* Sort the table by offset */
|
||||
qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare3);
|
||||
qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare3);
|
||||
|
||||
/* Figure out how big to make the hash table in order to minimize the
|
||||
** number of collisions */
|
||||
bestSize = NKEYWORD;
|
||||
bestCount = NKEYWORD*NKEYWORD;
|
||||
for(i=NKEYWORD/2; i<=2*NKEYWORD; i++){
|
||||
bestSize = nKeyword;
|
||||
bestCount = nKeyword*nKeyword;
|
||||
for(i=nKeyword/2; i<=2*nKeyword; i++){
|
||||
for(j=0; j<i; j++) aHash[j] = 0;
|
||||
for(j=0; j<NKEYWORD; j++){
|
||||
for(j=0; j<nKeyword; j++){
|
||||
h = aKeywordTable[j].hash % i;
|
||||
aHash[h] *= 2;
|
||||
aHash[h]++;
|
||||
@ -428,7 +451,7 @@ int main(int argc, char **argv){
|
||||
|
||||
/* Compute the hash */
|
||||
for(i=0; i<bestSize; i++) aHash[i] = 0;
|
||||
for(i=0; i<NKEYWORD; i++){
|
||||
for(i=0; i<nKeyword; i++){
|
||||
h = aKeywordTable[i].hash % bestSize;
|
||||
aKeywordTable[i].iNext = aHash[h];
|
||||
aHash[h] = i+1;
|
||||
@ -438,9 +461,11 @@ int main(int argc, char **argv){
|
||||
printf("%s", zHdr);
|
||||
printf("/* Hash score: %d */\n", bestCount);
|
||||
printf("static int keywordCode(const char *z, int n){\n");
|
||||
printf(" /* zText[] encodes %d bytes of keywords in %d bytes */\n",
|
||||
totalLen + nKeyword, nChar+1 );
|
||||
|
||||
printf(" static const char zText[%d] =\n", nChar+1);
|
||||
for(i=j=0; i<NKEYWORD; i++){
|
||||
for(i=j=0; i<nKeyword; i++){
|
||||
Keyword *p = &aKeywordTable[i];
|
||||
if( p->substrId ) continue;
|
||||
if( j==0 ) printf(" \"");
|
||||
@ -465,8 +490,8 @@ int main(int argc, char **argv){
|
||||
}
|
||||
printf("%s };\n", j==0 ? "" : "\n");
|
||||
|
||||
printf(" static const unsigned char aNext[%d] = {\n", NKEYWORD);
|
||||
for(i=j=0; i<NKEYWORD; i++){
|
||||
printf(" static const unsigned char aNext[%d] = {\n", nKeyword);
|
||||
for(i=j=0; i<nKeyword; i++){
|
||||
if( j==0 ) printf(" ");
|
||||
printf(" %3d,", aKeywordTable[i].iNext);
|
||||
j++;
|
||||
@ -477,8 +502,8 @@ int main(int argc, char **argv){
|
||||
}
|
||||
printf("%s };\n", j==0 ? "" : "\n");
|
||||
|
||||
printf(" static const unsigned char aLen[%d] = {\n", NKEYWORD);
|
||||
for(i=j=0; i<NKEYWORD; i++){
|
||||
printf(" static const unsigned char aLen[%d] = {\n", nKeyword);
|
||||
for(i=j=0; i<nKeyword; i++){
|
||||
if( j==0 ) printf(" ");
|
||||
printf(" %3d,", aKeywordTable[i].len+aKeywordTable[i].prefix);
|
||||
j++;
|
||||
@ -489,8 +514,8 @@ int main(int argc, char **argv){
|
||||
}
|
||||
printf("%s };\n", j==0 ? "" : "\n");
|
||||
|
||||
printf(" static const unsigned short int aOffset[%d] = {\n", NKEYWORD);
|
||||
for(i=j=0; i<NKEYWORD; i++){
|
||||
printf(" static const unsigned short int aOffset[%d] = {\n", nKeyword);
|
||||
for(i=j=0; i<nKeyword; i++){
|
||||
if( j==0 ) printf(" ");
|
||||
printf(" %3d,", aKeywordTable[i].offset);
|
||||
j++;
|
||||
@ -501,8 +526,8 @@ int main(int argc, char **argv){
|
||||
}
|
||||
printf("%s };\n", j==0 ? "" : "\n");
|
||||
|
||||
printf(" static const unsigned char aCode[%d] = {\n", NKEYWORD);
|
||||
for(i=j=0; i<NKEYWORD; i++){
|
||||
printf(" static const unsigned char aCode[%d] = {\n", nKeyword);
|
||||
for(i=j=0; i<nKeyword; i++){
|
||||
char *zToken = aKeywordTable[i].zTokenType;
|
||||
if( j==0 ) printf(" ");
|
||||
printf("%s,%*s", zToken, (int)(14-strlen(zToken)), "");
|
||||
|
Loading…
Reference in New Issue
Block a user