Improve the speed of the tokenizer by recognizing that tokens starting

with letters "_", "Y", or "Z" can never be SQL keywords and must be ordinary
identifiers.

FossilOrigin-Name: 16e281ed6219cc229dec7e3f1b40da2304dc270a74fd6ef78d04a088e30e7026
This commit is contained in:
drh 2020-11-27 20:56:16 +00:00
parent cad760d16e
commit d1032f952c
3 changed files with 38 additions and 33 deletions

View File

@ -1,5 +1,5 @@
C Fix\sa\scompiler\swarning\sin\sfts5_aux.c.
D 2020-11-27T19:40:13.829
C Improve\sthe\sspeed\sof\sthe\stokenizer\sby\srecognizing\sthat\stokens\sstarting\nwith\sletters\s"_",\s"Y",\sor\s"Z"\scan\snever\sbe\sSQL\skeywords\sand\smust\sbe\sordinary\nidentifiers.
D 2020-11-27T20:56:16.951
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@ -603,7 +603,7 @@ F src/test_windirent.h 90dfbe95442c9762357fe128dc7ae3dc199d006de93eb33ba3972e0a9
F src/test_window.c cdae419fdcea5bad6dcd9368c685abdad6deb59e9fc8b84b153de513d394ba3f
F src/test_wsd.c 41cadfd9d97fe8e3e4e44f61a4a8ccd6f7ca8fe9
F src/threads.c 4ae07fa022a3dc7c5beb373cf744a85d3c5c6c3c
F src/tokenize.c 4dc01b267593537e2a0d0efe9f80dabe24c5b6f7627bc6971c487fa6a1dacbbf
F src/tokenize.c 01dba3023659dc6f6b1e054c14b35a0074bd35de10466b99454d33278191d97f
F src/treeview.c 4b92992176fb2caefbe06ba5bd06e0e0ebcde3d5564758da672631f17aa51cda
F src/trigger.c 515e79206d40d1d4149129318582e79a6e9db590a7b74e226fdb5b2a6c7e1b10
F src/update.c 9f126204a6acb96bbe47391ae48e0fc579105d8e76a6d9c4fab3271367476580
@ -1886,7 +1886,10 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P 6ff9673847c0b4174d9435e93d19af0ee7406b1a12edeb6edec98697e1646824
R b9d507fc84194599f6126b12d3d5bf12
U dan
Z 59431772996995dfd9988b1c0c070b2d
P 8edb983bc87898eff2cd2e7e672a32a47c71b2be9d818513d339e95560d45b2b
R a0457978bc43ec4e3c0a4c4481e2d29f
T *branch * faster-tokenizer
T *sym-faster-tokenizer *
T -sym-trunk *
U drh
Z 7f6094958178f08bfb909f826361ecf4

View File

@ -1 +1 @@
8edb983bc87898eff2cd2e7e672a32a47c71b2be9d818513d339e95560d45b2b
16e281ed6219cc229dec7e3f1b40da2304dc270a74fd6ef78d04a088e30e7026

View File

@ -27,8 +27,8 @@
** all of them need to be used within the switch.
*/
#define CC_X 0 /* The letter 'x', or start of BLOB literal */
#define CC_KYWD 1 /* Alphabetics or '_'. Usable in a keyword */
#define CC_ID 2 /* unicode characters usable in IDs */
#define CC_KYWD0 1 /* First letter of a keyword */
#define CC_KYWD 2 /* Alphabetics or '_'. Usable in a keyword */
#define CC_DIGIT 3 /* Digits */
#define CC_DOLLAR 4 /* '$' */
#define CC_VARALPHA 5 /* '@', '#', ':'. Alphabetic SQL variables */
@ -53,20 +53,21 @@
#define CC_AND 24 /* '&' */
#define CC_TILDA 25 /* '~' */
#define CC_DOT 26 /* '.' */
#define CC_ILLEGAL 27 /* Illegal character */
#define CC_NUL 28 /* 0x00 */
#define CC_ID 27 /* unicode characters usable in IDs */
#define CC_ILLEGAL 28 /* Illegal character */
#define CC_NUL 29 /* 0x00 */
static const unsigned char aiClass[] = {
#ifdef SQLITE_ASCII
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */
/* 0x */ 28, 27, 27, 27, 27, 27, 27, 27, 27, 7, 7, 27, 7, 7, 27, 27,
/* 1x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
/* 0x */ 29, 28, 28, 28, 28, 28, 28, 28, 28, 7, 7, 28, 7, 7, 28, 28,
/* 1x */ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
/* 2x */ 7, 15, 8, 5, 4, 22, 24, 8, 17, 18, 21, 20, 23, 11, 26, 16,
/* 3x */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 19, 12, 14, 13, 6,
/* 4x */ 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 5x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 9, 27, 27, 27, 1,
/* 5x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 9, 28, 28, 28, 2,
/* 6x */ 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 7x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 27, 10, 27, 25, 27,
/* 7x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 28, 10, 28, 25, 28,
/* 8x */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* 9x */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* Ax */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@ -78,22 +79,22 @@ static const unsigned char aiClass[] = {
#endif
#ifdef SQLITE_EBCDIC
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */
/* 0x */ 27, 27, 27, 27, 27, 7, 27, 27, 27, 27, 27, 27, 7, 7, 27, 27,
/* 1x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
/* 2x */ 27, 27, 27, 27, 27, 7, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
/* 3x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
/* 4x */ 7, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 26, 12, 17, 20, 10,
/* 5x */ 24, 27, 27, 27, 27, 27, 27, 27, 27, 27, 15, 4, 21, 18, 19, 27,
/* 6x */ 11, 16, 27, 27, 27, 27, 27, 27, 27, 27, 27, 23, 22, 1, 13, 6,
/* 7x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 8, 5, 5, 5, 8, 14, 8,
/* 8x */ 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 27, 27, 27, 27, 27,
/* 9x */ 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 27, 27, 27, 27, 27,
/* Ax */ 27, 25, 1, 1, 1, 1, 1, 0, 1, 1, 27, 27, 27, 27, 27, 27,
/* Bx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 9, 27, 27, 27, 27, 27,
/* Cx */ 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 27, 27, 27, 27, 27,
/* Dx */ 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 27, 27, 27, 27, 27,
/* Ex */ 27, 27, 1, 1, 1, 1, 1, 0, 1, 1, 27, 27, 27, 27, 27, 27,
/* Fx */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 27, 27, 27, 27, 27, 27,
/* 0x */ 29, 28, 28, 28, 28, 7, 28, 28, 28, 28, 28, 28, 7, 7, 28, 28,
/* 1x */ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
/* 2x */ 28, 28, 28, 28, 28, 7, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
/* 3x */ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
/* 4x */ 7, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 26, 12, 17, 20, 10,
/* 5x */ 24, 28, 28, 28, 28, 28, 28, 28, 28, 28, 15, 4, 21, 18, 19, 28,
/* 6x */ 11, 16, 28, 28, 28, 28, 28, 28, 28, 28, 28, 23, 22, 2, 13, 6,
/* 7x */ 28, 28, 28, 28, 28, 28, 28, 28, 28, 8, 5, 5, 5, 8, 14, 8,
/* 8x */ 28, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 28, 28, 28, 28, 28,
/* 9x */ 28, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 28, 28, 28, 28, 28,
/* Ax */ 28, 25, 1, 1, 1, 1, 1, 0, 2, 2, 28, 28, 28, 28, 28, 28,
/* Bx */ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 9, 28, 28, 28, 28, 28,
/* Cx */ 28, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 28, 28, 28, 28, 28,
/* Dx */ 28, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 28, 28, 28, 28, 28,
/* Ex */ 28, 28, 1, 1, 1, 1, 1, 0, 2, 2, 28, 28, 28, 28, 28, 28,
/* Fx */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 28, 28, 28, 28, 28, 28,
#endif
};
@ -499,7 +500,7 @@ int sqlite3GetToken(const unsigned char *z, int *tokenType){
if( n==0 ) *tokenType = TK_ILLEGAL;
return i;
}
case CC_KYWD: {
case CC_KYWD0: {
for(i=1; aiClass[z[i]]<=CC_KYWD; i++){}
if( IdChar(z[i]) ){
/* This token started out using characters that can appear in keywords,
@ -529,6 +530,7 @@ int sqlite3GetToken(const unsigned char *z, int *tokenType){
** SQL keywords start with the letter 'x'. Fall through */
/* no break */ deliberate_fall_through
}
case CC_KYWD:
case CC_ID: {
i = 1;
break;