From d1032f952cad0b1710f6854684afc1f7831caf55 Mon Sep 17 00:00:00 2001 From: drh Date: Fri, 27 Nov 2020 20:56:16 +0000 Subject: [PATCH] Improve the speed of the tokenizer by recognizing that tokens starting with letters "_", "Y", or "Z" can never be SQL keywords and must be ordinary identifiers. FossilOrigin-Name: 16e281ed6219cc229dec7e3f1b40da2304dc270a74fd6ef78d04a088e30e7026 --- manifest | 17 ++++++++++------- manifest.uuid | 2 +- src/tokenize.c | 52 ++++++++++++++++++++++++++------------------------ 3 files changed, 38 insertions(+), 33 deletions(-) diff --git a/manifest b/manifest index ebcd99ac4e..d3f2807345 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\scompiler\swarning\sin\sfts5_aux.c. -D 2020-11-27T19:40:13.829 +C Improve\sthe\sspeed\sof\sthe\stokenizer\sby\srecognizing\sthat\stokens\sstarting\nwith\sletters\s"_",\s"Y",\sor\s"Z"\scan\snever\sbe\sSQL\skeywords\sand\smust\sbe\sordinary\nidentifiers. +D 2020-11-27T20:56:16.951 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -603,7 +603,7 @@ F src/test_windirent.h 90dfbe95442c9762357fe128dc7ae3dc199d006de93eb33ba3972e0a9 F src/test_window.c cdae419fdcea5bad6dcd9368c685abdad6deb59e9fc8b84b153de513d394ba3f F src/test_wsd.c 41cadfd9d97fe8e3e4e44f61a4a8ccd6f7ca8fe9 F src/threads.c 4ae07fa022a3dc7c5beb373cf744a85d3c5c6c3c -F src/tokenize.c 4dc01b267593537e2a0d0efe9f80dabe24c5b6f7627bc6971c487fa6a1dacbbf +F src/tokenize.c 01dba3023659dc6f6b1e054c14b35a0074bd35de10466b99454d33278191d97f F src/treeview.c 4b92992176fb2caefbe06ba5bd06e0e0ebcde3d5564758da672631f17aa51cda F src/trigger.c 515e79206d40d1d4149129318582e79a6e9db590a7b74e226fdb5b2a6c7e1b10 F src/update.c 9f126204a6acb96bbe47391ae48e0fc579105d8e76a6d9c4fab3271367476580 @@ -1886,7 +1886,10 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 6ff9673847c0b4174d9435e93d19af0ee7406b1a12edeb6edec98697e1646824 -R b9d507fc84194599f6126b12d3d5bf12 -U dan -Z 59431772996995dfd9988b1c0c070b2d +P 8edb983bc87898eff2cd2e7e672a32a47c71b2be9d818513d339e95560d45b2b +R a0457978bc43ec4e3c0a4c4481e2d29f +T *branch * faster-tokenizer +T *sym-faster-tokenizer * +T -sym-trunk * +U drh +Z 7f6094958178f08bfb909f826361ecf4 diff --git a/manifest.uuid b/manifest.uuid index 67d18affcf..c33fe4a84a 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -8edb983bc87898eff2cd2e7e672a32a47c71b2be9d818513d339e95560d45b2b \ No newline at end of file +16e281ed6219cc229dec7e3f1b40da2304dc270a74fd6ef78d04a088e30e7026 \ No newline at end of file diff --git a/src/tokenize.c b/src/tokenize.c index 70f0c63678..bafda0322b 100644 --- a/src/tokenize.c +++ b/src/tokenize.c @@ -27,8 +27,8 @@ ** all of them need to be used within the switch. */ #define CC_X 0 /* The letter 'x', or start of BLOB literal */ -#define CC_KYWD 1 /* Alphabetics or '_'. Usable in a keyword */ -#define CC_ID 2 /* unicode characters usable in IDs */ +#define CC_KYWD0 1 /* First letter of a keyword */ +#define CC_KYWD 2 /* Alphabetics or '_'. Usable in a keyword */ #define CC_DIGIT 3 /* Digits */ #define CC_DOLLAR 4 /* '$' */ #define CC_VARALPHA 5 /* '@', '#', ':'. Alphabetic SQL variables */ @@ -53,20 +53,21 @@ #define CC_AND 24 /* '&' */ #define CC_TILDA 25 /* '~' */ #define CC_DOT 26 /* '.' */ -#define CC_ILLEGAL 27 /* Illegal character */ -#define CC_NUL 28 /* 0x00 */ +#define CC_ID 27 /* unicode characters usable in IDs */ +#define CC_ILLEGAL 28 /* Illegal character */ +#define CC_NUL 29 /* 0x00 */ static const unsigned char aiClass[] = { #ifdef SQLITE_ASCII /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */ -/* 0x */ 28, 27, 27, 27, 27, 27, 27, 27, 27, 7, 7, 27, 7, 7, 27, 27, -/* 1x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +/* 0x */ 29, 28, 28, 28, 28, 28, 28, 28, 28, 7, 7, 28, 7, 7, 28, 28, +/* 1x */ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, /* 2x */ 7, 15, 8, 5, 4, 22, 24, 8, 17, 18, 21, 20, 23, 11, 26, 16, /* 3x */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 19, 12, 14, 13, 6, /* 4x */ 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 5x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 9, 27, 27, 27, 1, +/* 5x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 9, 28, 28, 28, 2, /* 6x */ 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 7x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 27, 10, 27, 25, 27, +/* 7x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 28, 10, 28, 25, 28, /* 8x */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 9x */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* Ax */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -78,22 +79,22 @@ static const unsigned char aiClass[] = { #endif #ifdef SQLITE_EBCDIC /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */ -/* 0x */ 27, 27, 27, 27, 27, 7, 27, 27, 27, 27, 27, 27, 7, 7, 27, 27, -/* 1x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, -/* 2x */ 27, 27, 27, 27, 27, 7, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, -/* 3x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, -/* 4x */ 7, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 26, 12, 17, 20, 10, -/* 5x */ 24, 27, 27, 27, 27, 27, 27, 27, 27, 27, 15, 4, 21, 18, 19, 27, -/* 6x */ 11, 16, 27, 27, 27, 27, 27, 27, 27, 27, 27, 23, 22, 1, 13, 6, -/* 7x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 8, 5, 5, 5, 8, 14, 8, -/* 8x */ 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 27, 27, 27, 27, 27, -/* 9x */ 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 27, 27, 27, 27, 27, -/* Ax */ 27, 25, 1, 1, 1, 1, 1, 0, 1, 1, 27, 27, 27, 27, 27, 27, -/* Bx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 9, 27, 27, 27, 27, 27, -/* Cx */ 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 27, 27, 27, 27, 27, -/* Dx */ 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 27, 27, 27, 27, 27, -/* Ex */ 27, 27, 1, 1, 1, 1, 1, 0, 1, 1, 27, 27, 27, 27, 27, 27, -/* Fx */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 27, 27, 27, 27, 27, 27, +/* 0x */ 29, 28, 28, 28, 28, 7, 28, 28, 28, 28, 28, 28, 7, 7, 28, 28, +/* 1x */ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +/* 2x */ 28, 28, 28, 28, 28, 7, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +/* 3x */ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +/* 4x */ 7, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 26, 12, 17, 20, 10, +/* 5x */ 24, 28, 28, 28, 28, 28, 28, 28, 28, 28, 15, 4, 21, 18, 19, 28, +/* 6x */ 11, 16, 28, 28, 28, 28, 28, 28, 28, 28, 28, 23, 22, 2, 13, 6, +/* 7x */ 28, 28, 28, 28, 28, 28, 28, 28, 28, 8, 5, 5, 5, 8, 14, 8, +/* 8x */ 28, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 28, 28, 28, 28, 28, +/* 9x */ 28, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 28, 28, 28, 28, 28, +/* Ax */ 28, 25, 1, 1, 1, 1, 1, 0, 2, 2, 28, 28, 28, 28, 28, 28, +/* Bx */ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 9, 28, 28, 28, 28, 28, +/* Cx */ 28, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 28, 28, 28, 28, 28, +/* Dx */ 28, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 28, 28, 28, 28, 28, +/* Ex */ 28, 28, 1, 1, 1, 1, 1, 0, 2, 2, 28, 28, 28, 28, 28, 28, +/* Fx */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 28, 28, 28, 28, 28, 28, #endif }; @@ -499,7 +500,7 @@ int sqlite3GetToken(const unsigned char *z, int *tokenType){ if( n==0 ) *tokenType = TK_ILLEGAL; return i; } - case CC_KYWD: { + case CC_KYWD0: { for(i=1; aiClass[z[i]]<=CC_KYWD; i++){} if( IdChar(z[i]) ){ /* This token started out using characters that can appear in keywords, @@ -529,6 +530,7 @@ int sqlite3GetToken(const unsigned char *z, int *tokenType){ ** SQL keywords start with the letter 'x'. Fall through */ /* no break */ deliberate_fall_through } + case CC_KYWD: case CC_ID: { i = 1; break;