From 54bb56d82a04cb05b53af5475c2052a44756e879 Mon Sep 17 00:00:00 2001 From: drh Date: Tue, 10 Nov 2015 03:30:51 +0000 Subject: [PATCH] Performance enhancement to the tokenizer. FossilOrigin-Name: 6ea2df86c95c226052f528424e9bee367a2e765a --- manifest | 14 +++++------ manifest.uuid | 2 +- src/tokenize.c | 4 ++-- tool/mkkeywordhash.c | 56 ++++++++++++++++---------------------------- 4 files changed, 30 insertions(+), 46 deletions(-) diff --git a/manifest b/manifest index a06ddf0f84..bef5b126ef 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Remove\san\sunused\snon-terminal\sfrom\sthe\sgrammar. -D 2015-11-10T00:02:49.412 +C Performance\senhancement\sto\sthe\stokenizer. +D 2015-11-10T03:30:51.926 F Makefile.in d828db6afa6c1fa060d01e33e4674408df1942a1 F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc e928e68168df69b353300ac87c10105206653a03 @@ -395,7 +395,7 @@ F src/test_vfs.c 3b65d42e18b262805716bd96178c81da8f2d9283 F src/test_vfstrace.c bab9594adc976cbe696ff3970728830b4c5ed698 F src/test_wsd.c 41cadfd9d97fe8e3e4e44f61a4a8ccd6f7ca8fe9 F src/threads.c bbfb74450643cb5372a43ad4f6cffd7e9dfcecb0 -F src/tokenize.c c1006aa773da5725ef55e8d48f69c11d7141d011 +F src/tokenize.c 5606871a377f390af7040ec3c12e0d183512d785 F src/treeview.c 78842e90c1f71269e7a73a1d4221b6fe360bab66 F src/trigger.c 322f23aad694e8f31d384dcfa386d52a48d3c52f F src/update.c 40e51cd0883cb5bfd6abb7d8a7cd8aa47fab2945 @@ -1360,7 +1360,7 @@ F tool/lempar.c 02cd882bd3144a5e25c86c652af49a6bc07baeae F tool/loadfts.c c3c64e4d5e90e8ba41159232c2189dba4be7b862 F tool/logest.c eef612f8adf4d0993dafed0416064cf50d5d33c6 F tool/mkautoconfamal.sh 4bdf61548a143e5977bd86ab93d68b694d10c8fa -F tool/mkkeywordhash.c dfff09dbbfaf950e89af294f48f902181b144670 +F tool/mkkeywordhash.c 37f9c2e62c31384b697ede8825e2d344e98db22c F tool/mkopcodec.tcl edde8adc42621b5e598127f8cdc6d52cfe21f52b F tool/mkopcodeh.tcl e04177031532b7aa9379ded50e820231ac4abd6e F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e @@ -1402,7 +1402,7 @@ F tool/vdbe_profile.tcl 246d0da094856d72d2c12efec03250d71639d19f F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P c4a7e93fca622fd11a6e16161fbd2f39c2575f00 -R 718dc1350ed033f1faf4636e4683af75 +P 3c37c522883ea9f2eec4f0ba5c5141912c003425 +R 40750d96ad92c103526c6bd35096918c U drh -Z ed4dab40ca0663dda7ca06a05db43ba5 +Z 8ead9b7802f9e64a677105e8e10b5cbd diff --git a/manifest.uuid b/manifest.uuid index 7b89cfcfaa..be66b8946e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3c37c522883ea9f2eec4f0ba5c5141912c003425 \ No newline at end of file +6ea2df86c95c226052f528424e9bee367a2e765a \ No newline at end of file diff --git a/src/tokenize.c b/src/tokenize.c index 81b98d593a..b85e35dc10 100644 --- a/src/tokenize.c +++ b/src/tokenize.c @@ -369,8 +369,8 @@ int sqlite3GetToken(const unsigned char *z, int *tokenType){ break; } for(i=1; IdChar(z[i]); i++){} - *tokenType = keywordCode((char*)z, i); - return i; + *tokenType = TK_ID; + return keywordCode((char*)z, i, tokenType); } } *tokenType = TK_ILLEGAL; diff --git a/tool/mkkeywordhash.c b/tool/mkkeywordhash.c index 721611f5a3..e4d393e3fa 100644 --- a/tool/mkkeywordhash.c +++ b/tool/mkkeywordhash.c @@ -277,27 +277,8 @@ static Keyword aKeywordTable[] = { /* Number of keywords */ static int nKeyword = (sizeof(aKeywordTable)/sizeof(aKeywordTable[0])); -/* An array to map all upper-case characters into their corresponding -** lower-case character. -*/ -const unsigned char sqlite3UpperToLower[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, - 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, - 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103, - 104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121, - 122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107, - 108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125, - 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, - 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161, - 162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179, - 180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197, - 198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215, - 216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233, - 234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251, - 252,253,254,255 -}; -#define UpperToLower sqlite3UpperToLower +/* Map all alphabetic characters into the same case */ +#define charMap(X) (0x20|(X)) /* ** Comparision function for two Keyword records @@ -347,7 +328,7 @@ static Keyword *findById(int id){ ** output. */ int main(int argc, char **argv){ - int i, j, k, h; + int i, j, k, h, m; int bestSize, bestCount; int count; int nChar; @@ -372,8 +353,8 @@ int main(int argc, char **argv){ assert( p->lenzOrigName) ); memcpy(p->zOrigName, p->zName, p->len+1); totalLen += p->len; - p->hash = (UpperToLower[(int)p->zName[0]]*4) ^ - (UpperToLower[(int)p->zName[p->len-1]]*3) ^ p->len; + p->hash = (charMap(p->zName[0])*4) ^ + (charMap(p->zName[p->len-1])*3) ^ (p->len*1); p->id = i+1; } @@ -481,7 +462,7 @@ int main(int argc, char **argv){ /* Begin generating code */ printf("%s", zHdr); printf("/* Hash score: %d */\n", bestCount); - printf("static int keywordCode(const char *z, int n){\n"); + printf("static int keywordCode(const char *z, int n, int *pType){\n"); printf(" /* zText[] encodes %d bytes of keywords in %d bytes */\n", totalLen + nKeyword, nChar+1 ); for(i=j=k=0; i=0; i=((int)aNext[i])-1){\n"); - printf(" if( aLen[i]==n &&" - " sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){\n"); + printf(" if( n>=2 ){\n"); + printf(" h = ((charMap(z[0])*4) ^ (charMap(z[n-1])*3) ^ n) %% %d;\n", + bestSize); + printf(" for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){\n"); + printf(" if( aLen[i]==n &&" + " sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){\n"); for(i=0; i