Optimizations to the tokenizer. (CVS 2011)

FossilOrigin-Name: e5540ce047e0215904005bc9df4ff0d1d0a3c1d1
This commit is contained in:
drh 2004-10-07 19:03:01 +00:00
parent 74ad7fe980
commit 2090a0ed48
5 changed files with 416 additions and 160 deletions

View File

@ -1,5 +1,5 @@
C Additional\sparser\soptimizations.\s(CVS\s2010)
D 2004-10-07T03:06:29
C Optimizations\sto\sthe\stokenizer.\s(CVS\s2011)
D 2004-10-07T19:03:01
F Makefile.in 52c1cc106cad9148d4b7cb387b458e82dc86b339
F Makefile.linux-gcc a9e5a0d309fa7c38e7c14d3ecf7690879d3a5457
F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd
@ -70,7 +70,7 @@ F src/test2.c b11fa244fff02190707dd0879987c37c75e61fc8
F src/test3.c 5b5b0f3d11b097399c1054fff73d8f3711092301
F src/test4.c 7c6b9fc33dd1f3f93c7f1ee6e5e6d016afa6c1df
F src/test5.c b001fa7f1b9e2dc5c2331de62fc641b5ab2bd7a1
F src/tokenize.c 449843c85c8118b9718f3935f4b5d18586c8cee2
F src/tokenize.c a235e9a38ccf175f7badccdec68f58d79cb5abe5
F src/trigger.c b51a120d53e8b85359be11bf5e50854e5725fe3d
F src/update.c 174c3b593b8f4928e510a51ec309e8ce69d2371c
F src/utf.c f4f83acd73389090e32d6589d307fc55d794c7ed
@ -201,6 +201,7 @@ F tool/lempar.c 1e61d2b6cb9d8affa264a13336bc0c088498caa4
F tool/memleak.awk b744b6109566206c746d826f6ecdba34662216bc
F tool/memleak2.awk 9cc20c8e8f3c675efac71ea0721ee6874a1566e8
F tool/memleak3.tcl 336eb50b0849dbf99b1d5462d9c37291b01b2b43
F tool/mkkeywordhash.c 0cfab6691def993d5b2124398d25129b12b6b5fd
F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e x
F tool/opcodeDoc.awk b3a2a3d5d3075b8bd90b7afe24283efdd586659c
F tool/report1.txt 9eae07f26a8fc53889b45fc833a66a33daa22816
@ -235,7 +236,7 @@ F www/dynload.tcl 02eb8273aa78cfa9070dd4501dca937fb22b466c
F www/faq.tcl 8cf9f59fd93868c9954223a99db244c9975fa43b
F www/fileformat.tcl f71a06a0d533c7df408539c64113b4adeaf29764
F www/formatchng.tcl d1dfecedfb25e122ab513a1e0948b15cb4f0be46
F www/index.tcl 3bf50fdac2f5df49cf4f6f76a7f312b5fd4725b8
F www/index.tcl ae29ea8a5bdde5a8031a9ac0935ac16dd921b4ef
F www/lang.tcl dde78c1415ee2d19fc30360808ca6f0e2ea71c30
F www/lockingv3.tcl afcd22f0f063989cff2f4d57bbc38d719b4c6e75
F www/mingw.tcl d96b451568c5d28545fefe0c80bee3431c73f69c
@ -251,7 +252,7 @@ F www/tclsqlite.tcl 560ecd6a916b320e59f2917317398f3d59b7cc25
F www/vdbe.tcl 59288db1ac5c0616296b26dce071c36cb611dfe9
F www/version3.tcl 092a01f5ef430d2c4acc0ae558d74c4bb89638a0
F www/whentouse.tcl a8335bce47cc2fddb07f19052cb0cb4d9129a8e4
P 1376a0bb8d864de755c614b2ecce4342155fd09b
R 7c281d56a2bfc9c05afabc9a79a1f3b5
P 618dee121e41d8e5c9b4d5c167475b11ddcda2d7
R c6f2c8afd2a59eadba90462f2b06f8bc
U drh
Z fe725e970bb1b0deb3b69c8bf4e69648
Z bc3b2c6d615a585d4fad51811788aa10

View File

@ -1 +1 @@
618dee121e41d8e5c9b4d5c167475b11ddcda2d7
e5540ce047e0215904005bc9df4ff0d1d0a3c1d1

View File

@ -15,173 +15,115 @@
** individual tokens and sends those tokens one-by-one over to the
** parser for analysis.
**
** $Id: tokenize.c,v 1.90 2004/10/05 02:41:43 drh Exp $
** $Id: tokenize.c,v 1.91 2004/10/07 19:03:01 drh Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include <stdlib.h>
/*
** All the keywords of the SQL language are stored as in a hash
** table composed of instances of the following structure.
*/
typedef struct Keyword Keyword;
struct Keyword {
char *zName; /* The keyword name */
u8 tokenType; /* Token value for this keyword */
u8 len; /* Length of this keyword */
u8 iNext; /* Index in aKeywordTable[] of next with same hash */
};
/*
** These are the keywords
*/
static Keyword aKeywordTable[] = {
{ "ABORT", TK_ABORT, },
{ "AFTER", TK_AFTER, },
{ "ALL", TK_ALL, },
{ "AND", TK_AND, },
{ "AS", TK_AS, },
{ "ASC", TK_ASC, },
{ "ATTACH", TK_ATTACH, },
{ "BEFORE", TK_BEFORE, },
{ "BEGIN", TK_BEGIN, },
{ "BETWEEN", TK_BETWEEN, },
{ "BY", TK_BY, },
{ "CASCADE", TK_CASCADE, },
{ "CASE", TK_CASE, },
{ "CHECK", TK_CHECK, },
{ "COLLATE", TK_COLLATE, },
{ "COMMIT", TK_COMMIT, },
{ "CONFLICT", TK_CONFLICT, },
{ "CONSTRAINT", TK_CONSTRAINT, },
{ "CREATE", TK_CREATE, },
{ "CROSS", TK_JOIN_KW, },
{ "DATABASE", TK_DATABASE, },
{ "DEFAULT", TK_DEFAULT, },
{ "DEFERRED", TK_DEFERRED, },
{ "DEFERRABLE", TK_DEFERRABLE, },
{ "DELETE", TK_DELETE, },
{ "DESC", TK_DESC, },
{ "DETACH", TK_DETACH, },
{ "DISTINCT", TK_DISTINCT, },
{ "DROP", TK_DROP, },
{ "END", TK_END, },
{ "EACH", TK_EACH, },
{ "ELSE", TK_ELSE, },
{ "EXCEPT", TK_EXCEPT, },
{ "EXCLUSIVE", TK_EXCLUSIVE, },
{ "EXPLAIN", TK_EXPLAIN, },
{ "FAIL", TK_FAIL, },
{ "FOR", TK_FOR, },
{ "FOREIGN", TK_FOREIGN, },
{ "FROM", TK_FROM, },
{ "FULL", TK_JOIN_KW, },
{ "GLOB", TK_GLOB, },
{ "GROUP", TK_GROUP, },
{ "HAVING", TK_HAVING, },
{ "IGNORE", TK_IGNORE, },
{ "IMMEDIATE", TK_IMMEDIATE, },
{ "IN", TK_IN, },
{ "INDEX", TK_INDEX, },
{ "INITIALLY", TK_INITIALLY, },
{ "INNER", TK_JOIN_KW, },
{ "INSERT", TK_INSERT, },
{ "INSTEAD", TK_INSTEAD, },
{ "INTERSECT", TK_INTERSECT, },
{ "INTO", TK_INTO, },
{ "IS", TK_IS, },
{ "ISNULL", TK_ISNULL, },
{ "JOIN", TK_JOIN, },
{ "KEY", TK_KEY, },
{ "LEFT", TK_JOIN_KW, },
{ "LIKE", TK_LIKE, },
{ "LIMIT", TK_LIMIT, },
{ "MATCH", TK_MATCH, },
{ "NATURAL", TK_JOIN_KW, },
{ "NOT", TK_NOT, },
{ "NOTNULL", TK_NOTNULL, },
{ "NULL", TK_NULL, },
{ "OF", TK_OF, },
{ "OFFSET", TK_OFFSET, },
{ "ON", TK_ON, },
{ "OR", TK_OR, },
{ "ORDER", TK_ORDER, },
{ "OUTER", TK_JOIN_KW, },
{ "PRAGMA", TK_PRAGMA, },
{ "PRIMARY", TK_PRIMARY, },
{ "RAISE", TK_RAISE, },
{ "REFERENCES", TK_REFERENCES, },
{ "REPLACE", TK_REPLACE, },
{ "RESTRICT", TK_RESTRICT, },
{ "RIGHT", TK_JOIN_KW, },
{ "ROLLBACK", TK_ROLLBACK, },
{ "ROW", TK_ROW, },
{ "SELECT", TK_SELECT, },
{ "SET", TK_SET, },
{ "STATEMENT", TK_STATEMENT, },
{ "TABLE", TK_TABLE, },
{ "TEMP", TK_TEMP, },
{ "TEMPORARY", TK_TEMP, },
{ "THEN", TK_THEN, },
{ "TRANSACTION", TK_TRANSACTION, },
{ "TRIGGER", TK_TRIGGER, },
{ "UNION", TK_UNION, },
{ "UNIQUE", TK_UNIQUE, },
{ "UPDATE", TK_UPDATE, },
{ "USING", TK_USING, },
{ "VACUUM", TK_VACUUM, },
{ "VALUES", TK_VALUES, },
{ "VIEW", TK_VIEW, },
{ "WHEN", TK_WHEN, },
{ "WHERE", TK_WHERE, },
};
/*
** This is the hash table
*/
#define KEY_HASH_SIZE 101
static u8 aiHashTable[KEY_HASH_SIZE];
/*
** This function looks up an identifier to determine if it is a
** keyword. If it is a keyword, the token code of that keyword is
** returned. If the input is not a keyword, TK_ID is returned.
**
** The implementation of this routine was generated by a program,
** mkkeywordhash.c, located in the tool subdirectory of the distribution.
** The output of the mkkeywordhash.c program was manually cut and pasted
** into this file. When the set of keywords for SQLite changes, you
** must modify the mkkeywordhash.c program (to add or remove keywords from
** the data tables) then rerun that program to regenerate this function.
*/
int sqlite3KeywordCode(const char *z, int n){
static const char zText[519] =
"ABORTAFTERALLANDASCATTACHBEFOREBEGINBETWEENBYCASCADECASECHECK"
"COLLATECOMMITCONFLICTCONSTRAINTCREATECROSSDATABASEDEFAULTDEFERRABLE"
"DEFERREDDELETEDESCDETACHDISTINCTDROPEACHELSEENDEXCEPTEXCLUSIVE"
"EXPLAINFAILFOREIGNFROMFULLGLOBGROUPHAVINGIGNOREIMMEDIATEINDEX"
"INITIALLYINNERINSERTINSTEADINTERSECTINTOISNULLJOINKEYLEFTLIKE"
"LIMITMATCHNATURALNOTNULLNULLOFFSETONORDEROUTERPRAGMAPRIMARYRAISE"
"REFERENCESREPLACERESTRICTRIGHTROLLBACKROWSELECTSETSTATEMENTTABLE"
"TEMPORARYTHENTRANSACTIONTRIGGERUNIONUNIQUEUPDATEUSINGVACUUMVALUES"
"VIEWWHENWHERE";
static const unsigned char aHash[154] = {
0, 75, 82, 0, 0, 97, 80, 0, 83, 0, 0, 0, 0,
0, 0, 6, 0, 95, 4, 0, 0, 0, 0, 0, 0, 0,
0, 96, 86, 8, 0, 26, 13, 7, 19, 15, 0, 0, 32,
25, 0, 21, 31, 41, 0, 0, 0, 34, 27, 0, 0, 30,
0, 0, 0, 9, 0, 10, 0, 0, 0, 0, 51, 0, 44,
43, 0, 45, 40, 0, 29, 39, 35, 0, 0, 20, 0, 59,
0, 16, 0, 17, 0, 18, 0, 55, 42, 72, 0, 33, 0,
0, 61, 66, 56, 0, 0, 0, 0, 0, 0, 0, 54, 0,
0, 0, 0, 0, 74, 50, 76, 64, 52, 0, 0, 0, 0,
68, 84, 0, 47, 0, 58, 60, 92, 0, 0, 48, 0, 93,
0, 63, 71, 98, 0, 0, 0, 0, 0, 67, 0, 0, 0,
0, 87, 0, 0, 0, 0, 0, 90, 88, 0, 94,
};
static const unsigned char aNext[98] = {
0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
0, 12, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0,
0, 0, 0, 14, 3, 24, 0, 0, 0, 1, 22, 0, 0,
36, 23, 28, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0,
0, 49, 37, 0, 0, 0, 38, 0, 53, 0, 57, 62, 0,
0, 0, 0, 0, 0, 70, 46, 0, 65, 0, 0, 0, 0,
69, 73, 0, 77, 0, 0, 0, 0, 0, 0, 81, 85, 0,
91, 79, 78, 0, 0, 89, 0,
};
static const unsigned char aLen[98] = {
5, 5, 3, 3, 2, 3, 6, 6, 5, 7, 2, 7, 4,
5, 7, 6, 8, 10, 6, 5, 8, 7, 10, 8, 6, 4,
6, 8, 4, 4, 4, 3, 6, 9, 7, 4, 3, 7, 4,
4, 4, 5, 6, 6, 9, 2, 5, 9, 5, 6, 7, 9,
4, 2, 6, 4, 3, 4, 4, 5, 5, 7, 3, 7, 4,
2, 6, 2, 2, 5, 5, 6, 7, 5, 10, 7, 8, 5,
8, 3, 6, 3, 9, 5, 4, 9, 4, 11, 7, 5, 6,
6, 5, 6, 6, 4, 4, 5,
};
static const unsigned short int aOffset[98] = {
0, 5, 10, 13, 16, 16, 19, 25, 31, 36, 43, 45, 52,
56, 61, 68, 74, 82, 92, 98, 103, 111, 118, 128, 136, 142,
146, 152, 160, 164, 168, 172, 175, 181, 190, 197, 201, 201, 208,
212, 216, 220, 225, 231, 237, 246, 246, 251, 260, 265, 271, 278,
287, 291, 291, 297, 301, 304, 308, 312, 317, 322, 329, 329, 336,
340, 340, 346, 348, 348, 353, 358, 364, 371, 376, 386, 393, 401,
406, 414, 417, 423, 426, 435, 440, 440, 449, 453, 464, 471, 476,
482, 488, 493, 499, 505, 509, 513,
};
static const unsigned char aCode[98] = {
TK_ABORT, TK_AFTER, TK_ALL, TK_AND, TK_AS,
TK_ASC, TK_ATTACH, TK_BEFORE, TK_BEGIN, TK_BETWEEN,
TK_BY, TK_CASCADE, TK_CASE, TK_CHECK, TK_COLLATE,
TK_COMMIT, TK_CONFLICT, TK_CONSTRAINT, TK_CREATE, TK_JOIN_KW,
TK_DATABASE, TK_DEFAULT, TK_DEFERRABLE, TK_DEFERRED, TK_DELETE,
TK_DESC, TK_DETACH, TK_DISTINCT, TK_DROP, TK_EACH,
TK_ELSE, TK_END, TK_EXCEPT, TK_EXCLUSIVE, TK_EXPLAIN,
TK_FAIL, TK_FOR, TK_FOREIGN, TK_FROM, TK_JOIN_KW,
TK_GLOB, TK_GROUP, TK_HAVING, TK_IGNORE, TK_IMMEDIATE,
TK_IN, TK_INDEX, TK_INITIALLY, TK_JOIN_KW, TK_INSERT,
TK_INSTEAD, TK_INTERSECT, TK_INTO, TK_IS, TK_ISNULL,
TK_JOIN, TK_KEY, TK_JOIN_KW, TK_LIKE, TK_LIMIT,
TK_MATCH, TK_JOIN_KW, TK_NOT, TK_NOTNULL, TK_NULL,
TK_OF, TK_OFFSET, TK_ON, TK_OR, TK_ORDER,
TK_JOIN_KW, TK_PRAGMA, TK_PRIMARY, TK_RAISE, TK_REFERENCES,
TK_REPLACE, TK_RESTRICT, TK_JOIN_KW, TK_ROLLBACK, TK_ROW,
TK_SELECT, TK_SET, TK_STATEMENT, TK_TABLE, TK_TEMP,
TK_TEMP, TK_THEN, TK_TRANSACTION,TK_TRIGGER, TK_UNION,
TK_UNIQUE, TK_UPDATE, TK_USING, TK_VACUUM, TK_VALUES,
TK_VIEW, TK_WHEN, TK_WHERE,
};
int h, i;
Keyword *p;
static char needInit = 1;
if( needInit ){
/* Initialize the keyword hash table */
sqlite3OsEnterMutex();
if( needInit ){
int nk;
nk = sizeof(aKeywordTable)/sizeof(aKeywordTable[0]);
for(i=0, p=aKeywordTable; i<nk; i++, p++){
const char *zName = p->zName;
int len = p->len = strlen(zName);
h = sqlite3HashNoCase(zName, len) % KEY_HASH_SIZE;
p->iNext = aiHashTable[h];
aiHashTable[h] = i+1;
}
needInit = 0;
}
sqlite3OsLeaveMutex();
}
h = sqlite3HashNoCase(z, n) % KEY_HASH_SIZE;
for(i=aiHashTable[h]; i; i=p->iNext){
p = &aKeywordTable[i-1];
if( p->len==n && sqlite3StrNICmp(p->zName, z, n)==0 ){
return p->tokenType;
if( n<2 ) return TK_ID;
h = (sqlite3UpperToLower[((unsigned char*)z)[0]]*5 +
sqlite3UpperToLower[((unsigned char*)z)[n-1]]*3 +
n) % 154;
for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){
if( aLen[i]==n && sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){
return aCode[i];
}
}
return TK_ID;
}
/*
** If X is a character that can be used in an identifier and
** X&0x80==0 then isIdChar[X] will be 1. If X&0x80==0x80 then

312
tool/mkkeywordhash.c Normal file
View File

@ -0,0 +1,312 @@
/*
** Compile and run this standalone program in order to generate code that
** implements a function that will translate alphabetic identifiers into
** parser token codes.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
/*
** All the keywords of the SQL language are stored as in a hash
** table composed of instances of the following structure.
*/
typedef struct Keyword Keyword;
struct Keyword {
char *zName; /* The keyword name */
char *zTokenType; /* Token value for this keyword */
int hash; /* Hash on the keyword */
int offset; /* Offset to start of name string */
int len; /* Length of this keyword, not counting final \000 */
int iNext; /* Index in aKeywordTable[] of next with same hash */
};
/*
** These are the keywords
*/
static Keyword aKeywordTable[] = {
{ "ABORT", "TK_ABORT", },
{ "AFTER", "TK_AFTER", },
{ "ALL", "TK_ALL", },
{ "AND", "TK_AND", },
{ "AS", "TK_AS", },
{ "ASC", "TK_ASC", },
{ "ATTACH", "TK_ATTACH", },
{ "BEFORE", "TK_BEFORE", },
{ "BEGIN", "TK_BEGIN", },
{ "BETWEEN", "TK_BETWEEN", },
{ "BY", "TK_BY", },
{ "CASCADE", "TK_CASCADE", },
{ "CASE", "TK_CASE", },
{ "CHECK", "TK_CHECK", },
{ "COLLATE", "TK_COLLATE", },
{ "COMMIT", "TK_COMMIT", },
{ "CONFLICT", "TK_CONFLICT", },
{ "CONSTRAINT", "TK_CONSTRAINT", },
{ "CREATE", "TK_CREATE", },
{ "CROSS", "TK_JOIN_KW", },
{ "DATABASE", "TK_DATABASE", },
{ "DEFAULT", "TK_DEFAULT", },
{ "DEFERRED", "TK_DEFERRED", },
{ "DEFERRABLE", "TK_DEFERRABLE", },
{ "DELETE", "TK_DELETE", },
{ "DESC", "TK_DESC", },
{ "DETACH", "TK_DETACH", },
{ "DISTINCT", "TK_DISTINCT", },
{ "DROP", "TK_DROP", },
{ "END", "TK_END", },
{ "EACH", "TK_EACH", },
{ "ELSE", "TK_ELSE", },
{ "EXCEPT", "TK_EXCEPT", },
{ "EXCLUSIVE", "TK_EXCLUSIVE", },
{ "EXPLAIN", "TK_EXPLAIN", },
{ "FAIL", "TK_FAIL", },
{ "FOR", "TK_FOR", },
{ "FOREIGN", "TK_FOREIGN", },
{ "FROM", "TK_FROM", },
{ "FULL", "TK_JOIN_KW", },
{ "GLOB", "TK_GLOB", },
{ "GROUP", "TK_GROUP", },
{ "HAVING", "TK_HAVING", },
{ "IGNORE", "TK_IGNORE", },
{ "IMMEDIATE", "TK_IMMEDIATE", },
{ "IN", "TK_IN", },
{ "INDEX", "TK_INDEX", },
{ "INITIALLY", "TK_INITIALLY", },
{ "INNER", "TK_JOIN_KW", },
{ "INSERT", "TK_INSERT", },
{ "INSTEAD", "TK_INSTEAD", },
{ "INTERSECT", "TK_INTERSECT", },
{ "INTO", "TK_INTO", },
{ "IS", "TK_IS", },
{ "ISNULL", "TK_ISNULL", },
{ "JOIN", "TK_JOIN", },
{ "KEY", "TK_KEY", },
{ "LEFT", "TK_JOIN_KW", },
{ "LIKE", "TK_LIKE", },
{ "LIMIT", "TK_LIMIT", },
{ "MATCH", "TK_MATCH", },
{ "NATURAL", "TK_JOIN_KW", },
{ "NOT", "TK_NOT", },
{ "NOTNULL", "TK_NOTNULL", },
{ "NULL", "TK_NULL", },
{ "OF", "TK_OF", },
{ "OFFSET", "TK_OFFSET", },
{ "ON", "TK_ON", },
{ "OR", "TK_OR", },
{ "ORDER", "TK_ORDER", },
{ "OUTER", "TK_JOIN_KW", },
{ "PRAGMA", "TK_PRAGMA", },
{ "PRIMARY", "TK_PRIMARY", },
{ "RAISE", "TK_RAISE", },
{ "REFERENCES", "TK_REFERENCES", },
{ "REPLACE", "TK_REPLACE", },
{ "RESTRICT", "TK_RESTRICT", },
{ "RIGHT", "TK_JOIN_KW", },
{ "ROLLBACK", "TK_ROLLBACK", },
{ "ROW", "TK_ROW", },
{ "SELECT", "TK_SELECT", },
{ "SET", "TK_SET", },
{ "STATEMENT", "TK_STATEMENT", },
{ "TABLE", "TK_TABLE", },
{ "TEMP", "TK_TEMP", },
{ "TEMPORARY", "TK_TEMP", },
{ "THEN", "TK_THEN", },
{ "TRANSACTION", "TK_TRANSACTION", },
{ "TRIGGER", "TK_TRIGGER", },
{ "UNION", "TK_UNION", },
{ "UNIQUE", "TK_UNIQUE", },
{ "UPDATE", "TK_UPDATE", },
{ "USING", "TK_USING", },
{ "VACUUM", "TK_VACUUM", },
{ "VALUES", "TK_VALUES", },
{ "VIEW", "TK_VIEW", },
{ "WHEN", "TK_WHEN", },
{ "WHERE", "TK_WHERE", },
};
/* Number of keywords */
#define NKEYWORD (sizeof(aKeywordTable)/sizeof(aKeywordTable[0]))
/* An array to map all upper-case characters into their corresponding
** lower-case character.
*/
const unsigned char sqlite3UpperToLower[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107,
108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,
162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,
180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,
198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,
216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,
234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,
252,253,254,255
};
#define UpperToLower sqlite3UpperToLower
/*
** Comparision function for two Keyword records
*/
static int keywordCompare(const void *a, const void *b){
const Keyword *pA = (Keyword*)a;
const Keyword *pB = (Keyword*)b;
return strcmp(pA->zName, pB->zName);
}
/*
** This routine does the work. The generated code is printed on standard
** output.
*/
int main(int argc, char **argv){
int i, j, h;
int bestSize, bestCount;
int count;
int nChar;
int aHash[1000]; /* 1000 is much bigger than NKEYWORD */
/* Make sure the table is sorted */
qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare);
/* Fill in the hash value, length, and offset for all entries */
nChar = 0;
for(i=0; i<NKEYWORD; i++){
Keyword *p = &aKeywordTable[i];
p->len = strlen(p->zName);
/* p->hash = sqlite3HashNoCase(p->zName, p->len); */
p->hash = UpperToLower[p->zName[0]]*5 +
UpperToLower[p->zName[p->len-1]]*3 + p->len;
p->offset = nChar;
if( i<NKEYWORD-1 && strncmp(p->zName, aKeywordTable[i+1].zName,p->len)==0 ){
/* This entry is a prefix of the one that follows. Do not advance
** the offset */
}else{
nChar += p->len;
}
}
/* Figure out how big to make the hash table in order to minimize the
** number of collisions */
bestSize = NKEYWORD;
bestCount = NKEYWORD*NKEYWORD;
for(i=NKEYWORD/2; i<=2*NKEYWORD; i++){
for(j=0; j<i; j++) aHash[j] = 0;
for(j=0; j<NKEYWORD; j++){
h = aKeywordTable[j].hash % i;
aHash[h] *= 2;
aHash[h]++;
}
for(j=count=0; j<i; j++) count += aHash[j];
if( count<bestCount ){
bestCount = count;
bestSize = i;
}
}
/* Compute the hash */
for(i=0; i<bestSize; i++) aHash[i] = 0;
for(i=0; i<NKEYWORD; i++){
h = aKeywordTable[i].hash % bestSize;
aKeywordTable[i].iNext = aHash[h];
aHash[h] = i+1;
}
/* Begin generating code */
printf("int sqlite3KeywordCode(const char *z, int n){\n");
printf(" static const char zText[%d] =\n", nChar+1);
for(i=j=0; i<NKEYWORD; i++){
Keyword *p = &aKeywordTable[i];
if( i<NKEYWORD-1 && p->offset==aKeywordTable[i+1].offset ) continue;
if( j==0 ) printf(" \"");
printf("%s", p->zName);
j += p->len;
if( j>60 ){
printf("\"\n");
j = 0;
}
}
printf("%s;\n", j>0 ? "\"" : " ");
printf(" static const unsigned char aHash[%d] = {\n", bestSize);
for(i=j=0; i<bestSize; i++){
if( j==0 ) printf(" ");
printf(" %3d,", aHash[i]);
j++;
if( j>12 ){
printf("\n");
j = 0;
}
}
printf("%s };\n", j==0 ? "" : "\n");
printf(" static const unsigned char aNext[%d] = {\n", NKEYWORD);
for(i=j=0; i<NKEYWORD; i++){
if( j==0 ) printf(" ");
printf(" %3d,", aKeywordTable[i].iNext);
j++;
if( j>12 ){
printf("\n");
j = 0;
}
}
printf("%s };\n", j==0 ? "" : "\n");
printf(" static const unsigned char aLen[%d] = {\n", NKEYWORD);
for(i=j=0; i<NKEYWORD; i++){
if( j==0 ) printf(" ");
printf(" %3d,", aKeywordTable[i].len);
j++;
if( j>12 ){
printf("\n");
j = 0;
}
}
printf("%s };\n", j==0 ? "" : "\n");
printf(" static const unsigned short int aOffset[%d] = {\n", NKEYWORD);
for(i=j=0; i<NKEYWORD; i++){
if( j==0 ) printf(" ");
printf(" %3d,", aKeywordTable[i].offset);
j++;
if( j>12 ){
printf("\n");
j = 0;
}
}
printf("%s };\n", j==0 ? "" : "\n");
printf(" static const unsigned char aCode[%d] = {\n", NKEYWORD);
for(i=j=0; i<NKEYWORD; i++){
char *zToken = aKeywordTable[i].zTokenType;
if( j==0 ) printf(" ");
printf("%s,%*s", zToken, (int)(14-strlen(zToken)), "");
j++;
if( j>=5 ){
printf("\n");
j = 0;
}
}
printf("%s };\n", j==0 ? "" : "\n");
printf(" int h, i;\n");
printf(" if( n<2 ) return TK_ID;\n");
printf(" h = (sqlite3UpperToLower[((unsigned char*)z)[0]]*5 + \n"
" sqlite3UpperToLower[((unsigned char*)z)[n-1]]*3 +\n"
" n) %% %d;\n", bestSize);
printf(" for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){\n");
printf(" if( aLen[i]==n &&"
" sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){\n");
printf(" return aCode[i];\n");
printf(" }\n");
printf(" }\n");
printf(" return TK_ID;\n");
printf("}\n");
return 0;
}

View File

@ -22,7 +22,8 @@ Features include:
<li>A complete database is stored in a single disk file.</li>
<li>Database files can be freely shared between machines with
different byte orders.</li>
<li>Supports databases up to 2 terabytes (2<sup>41</sup> bytes) in size.</li>
<li>Supports databases up to 2 terabytes
(2<sup><small>41</small></sup> bytes) in size.</li>
<li>Sizes of strings and BLOBs limited only by available memory.</li>
<li>Small code footprint: less than 30K lines of C code,
less than 250KB code space (gcc on i486)</li>
@ -33,7 +34,7 @@ Features include:
Bindings for many other languages
<a href="http://www.sqlite.org/cvstrac/wiki?p=SqliteWrappers">
available separately.</a></li>
<li>Well-commented source code with over 90% test coverage.</li>
<li>Well-commented source code with over 95% test coverage.</li>
<li>Self-contained: no external dependencies.</li>
<li>Sources are in the <a href="copyright.html">public domain</a>.
Use for any purpose.</li>
@ -80,4 +81,4 @@ puts {
<p align="right"><a href="oldnews.html">Old news...</a></p>
</td></tr></table>
}
footer {$Id: index.tcl,v 1.97 2004/09/18 18:51:09 drh Exp $}
footer {$Id: index.tcl,v 1.98 2004/10/07 19:03:02 drh Exp $}