From 643d855da9625dd18f784674792dadd9697bb3ad Mon Sep 17 00:00:00 2001 From: drh Date: Mon, 10 Dec 2018 16:00:57 +0000 Subject: [PATCH] Refactor the sqlite3_normalized_sql() implementation. This is a work-in-progress. There are still issues. FossilOrigin-Name: a4c890b0af9786295e6df05022009d8946550adb873535c610be805c2b7a4083 --- manifest | 24 +++--- manifest.uuid | 2 +- src/prepare.c | 198 ------------------------------------------------ src/sqliteInt.h | 3 - src/tokenize.c | 197 +++++++++++++++++++++++++++++++---------------- src/vdbe.h | 2 +- src/vdbeaux.c | 16 +--- 7 files changed, 148 insertions(+), 294 deletions(-) diff --git a/manifest b/manifest index 544f8a6898..c0a5842691 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\ssupport\sfor\sthe\sVACUUM\sINTO\scommand. -D 2018-12-10T01:48:29.276 +C Refactor\sthe\ssqlite3_normalized_sql()\simplementation.\sThis\sis\sa\nwork-in-progress.\sThere\sare\sstill\sissues. +D 2018-12-10T16:00:57.538 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F Makefile.in 68d0ba0f0b533d5bc84c78c13a6ce84ee81183a67014caa47a969e67f028fa1c @@ -502,7 +502,7 @@ F src/pcache.h 4f87acd914cef5016fae3030343540d75f5b85a1877eed1a2a19b9f284248586 F src/pcache1.c ad0ffc5b35b0280d045ac569d34d4b842e3e6a4a118f6396b320987a0957afcc F src/pragma.c 96ce7dce4dc9cb2b7aa0e1b2ce7536870bdc00b10becc278245e775489447ea0 F src/pragma.h fdd03d78a7497f74a3f652909f945328480089189526841ae829ce7313d98d13 -F src/prepare.c be449edb106a16f1ad95f9b798bdc2337f8c3f83b96c284f417c0a26d43f0c1b +F src/prepare.c 0e8fc0deaf36da104e08d07ce7d97bc09ab57d078b399381532fec3fa1d3f2bb F src/printf.c 0f1177cf1dd4d7827bf64d840768514ec76409abecaca9e8b577dbd065150381 F src/random.c 80f5d666f23feb3e6665a6ce04c7197212a88384 F src/resolve.c 095d1d41d7a981ee9db8bfeae25ed0d6a8a5e5e3d66b0f4efd71877ed7b56132 @@ -512,7 +512,7 @@ F src/shell.c.in 5f38bd0e127c2cc4e506b5c3565c10879ddfae6c2d867bb5972563e40717c19 F src/sqlite.h.in 908ec406feefc4c7e1486a2e3dc30a8bfb51c5a345a8e8130ac201962db171c4 F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8 F src/sqlite3ext.h 960f1b86c3610fa23cb6a267572a97dcf286e77aa0dd3b9b23292ffaa1ea8683 -F src/sqliteInt.h 70ce5e14c887554d3c51f2045f5a95b6e83de745d7f6448e79e49fdd8dfc2d5c +F src/sqliteInt.h 369d4774d97643e26085e0ea4cdee6afeadaa27670193b9eebaea8c95687fad0 F src/sqliteLimit.h 1513bfb7b20378aa0041e7022d04acb73525de35b80b252f1b83fedb4de6a76b F src/status.c 46e7aec11f79dad50965a5ca5fa9de009f7d6bde08be2156f1538a0a296d4d0e F src/table.c b46ad567748f24a326d9de40e5b9659f96ffff34 @@ -570,7 +570,7 @@ F src/test_windirent.h 90dfbe95442c9762357fe128dc7ae3dc199d006de93eb33ba3972e0a9 F src/test_window.c cdae419fdcea5bad6dcd9368c685abdad6deb59e9fc8b84b153de513d394ba3f F src/test_wsd.c 41cadfd9d97fe8e3e4e44f61a4a8ccd6f7ca8fe9 F src/threads.c 4ae07fa022a3dc7c5beb373cf744a85d3c5c6c3c -F src/tokenize.c 9e781e1ca80eefe7b5d6a9e2cd5c678c847da55fd6f093781fad7950934d4c83 +F src/tokenize.c a43ecbafefceb475522f17d6f92d64193a755fe3f35615dfca13340c81ca3872 F src/treeview.c 7b12ac059de54c939b6eb0dbffc9410c29c80d2470cee5cbe07d5ff9ea2d9253 F src/trigger.c d3d78568f37fb2e6cdcc2d1e7b60156f15b0b600adec55b83c5d42f6cad250bd F src/update.c 1816d56c1bca1ba4e0ef98cac2f49be62858e9df1dc08844c7067eb41cc44274 @@ -579,10 +579,10 @@ F src/utf.c 810fbfebe12359f10bc2a011520a6e10879ab2a163bcb26c74768eab82ea62a5 F src/util.c d9eb0a6c4aae1b00a7369eadd7ca0bbe946cb4c953b6751aa20d357c2f482157 F src/vacuum.c 3ffe64ecfc94b7528c5d7bdb1c3a19d72fec63f2aa846e3b90f8de5dbbddf5aa F src/vdbe.c 55bafc424748d9ed505ab2680736e51d1bb05c01e9885cbb3b287b51dc8b47ec -F src/vdbe.h d82f323d581b36b8e147d650257ef34e0e93790039b6cbda45c321c275f7595e +F src/vdbe.h 8990d668a89890a33326b0a29b992c4014b72f3b6cdcd9ee0e190593c247f9b0 F src/vdbeInt.h 73f5051923f3f29779bfc374c0c68e23b8e5e3792def2e33e51b427edb890abd F src/vdbeapi.c 9709452bee82963e1f7f1f5d0c71db823d553f8dbb2c47a911c4983d537a1947 -F src/vdbeaux.c 9a9617666124e18cbd6e936740f7469dcf0d82867b1abf9ed828694500930b64 +F src/vdbeaux.c c72fc6015e52c212d6c6db7cee04bfbfbd681106f551c9296812a73082fb09d6 F src/vdbeblob.c f5c70f973ea3a9e915d1693278a5f890dc78594300cf4d54e64f2b0917c94191 F src/vdbemem.c 7b3305bc4a5139f4536ac9b5f61da0f915e49d2e3fdfa87dfdfa9d7aba8bc1e9 F src/vdbesort.c 90aad5a92608f2dd771c96749beabdb562c9d881131a860a7a5bccf66dc3be7f @@ -1783,8 +1783,10 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P b7bf3c9832bde26b359f4d58ebcf7788c500586d311387d022192ed65a126252 9748d7995bc1dd632d66d2d326048d50e29c6f5e6993d5a6294d14421d2cb72f -R f78f1915a996819be59599569cf251be -T +closed 9748d7995bc1dd632d66d2d326048d50e29c6f5e6993d5a6294d14421d2cb72f +P 77f150b8b46761f4f62f9d8926c10a95a70589a4525393fc16b321bd98c083a7 +R d755f31eec9cdb1a3415665c2aacb5ab +T *branch * normalize-refactor +T *sym-normalize-refactor * +T -sym-trunk * U drh -Z ee3d9a1eeb178b5acf328ec54a590f24 +Z f90c6b85a4f2311f0e8b3d6a605ae025 diff --git a/manifest.uuid b/manifest.uuid index fb0635c7f2..5d73daf1c8 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -77f150b8b46761f4f62f9d8926c10a95a70589a4525393fc16b321bd98c083a7 \ No newline at end of file +a4c890b0af9786295e6df05022009d8946550adb873535c610be805c2b7a4083 \ No newline at end of file diff --git a/src/prepare.c b/src/prepare.c index 811ef95aea..e06b7cb6ad 100644 --- a/src/prepare.c +++ b/src/prepare.c @@ -709,204 +709,6 @@ static int sqlite3LockAndPrepare( return rc; } -#ifdef SQLITE_ENABLE_NORMALIZE - -/* -** Attempt to estimate the final output buffer size needed for the fully -** normalized version of the specified SQL string. This should take into -** account any potential expansion that could occur (e.g. via IN clauses -** being expanded, etc). This size returned is the total number of bytes -** including the NUL terminator. -*/ -static int estimateNormalizedSize( - const char *zSql, /* The original SQL string */ - int nSql /* Length of original SQL string */ -){ - int nOut = nSql + 4; - const char *z = zSql; - while( nOut0 ){ - zOut[j++] = '"'; - continue; - }else if( k==nToken-1 ){ - zOut[j++] = '"'; - continue; - } - } - if( bKeyword ){ - zOut[j++] = sqlite3Toupper(zSql[iIn+k]); - }else{ - zOut[j++] = sqlite3Tolower(zSql[iIn+k]); - } - } - *piOut = j; -} - -/* -** Compute a normalization of the SQL given by zSql[0..nSql-1]. Return -** the normalization in space obtained from sqlite3DbMalloc(). Or return -** NULL if anything goes wrong or if zSql is NULL. -*/ -char *sqlite3Normalize( - Vdbe *pVdbe, /* VM being reprepared */ - const char *zSql, /* The original SQL string */ - int nSql /* Size of the input string in bytes */ -){ - sqlite3 *db; /* Database handle. */ - char *z; /* The output string */ - int nZ; /* Size of the output string in bytes */ - int i; /* Next character to read from zSql[] */ - int j; /* Next character to fill in on z[] */ - int tokenType = 0; /* Type of the next token */ - int prevTokenType = 0; /* Type of the previous token, except spaces */ - int n; /* Size of the next token */ - int nParen = 0; /* Nesting level of parenthesis */ - int iStartIN = 0; /* Start of RHS of IN operator in z[] */ - int nParenAtIN = 0; /* Value of nParent at start of RHS of IN operator */ - - db = sqlite3VdbeDb(pVdbe); - assert( db!=0 ); - if( zSql==0 ) return 0; - nZ = estimateNormalizedSize(zSql, nSql); - z = sqlite3DbMallocRawNN(db, nZ); - if( z==0 ) goto normalizeError; - for(i=j=0; i0 && nParen==nParenAtIN ){ - assert( iStartIN+6=0 ); - assert( nZ-1-j=0 ); - /* Fall through */ - } - case TK_MINUS: - case TK_SEMI: - case TK_PLUS: - case TK_STAR: - case TK_SLASH: - case TK_REM: - case TK_EQ: - case TK_LE: - case TK_NE: - case TK_LSHIFT: - case TK_LT: - case TK_RSHIFT: - case TK_GT: - case TK_GE: - case TK_BITOR: - case TK_CONCAT: - case TK_COMMA: - case TK_BITAND: - case TK_BITNOT: - case TK_DOT: - case TK_IN: - case TK_IS: - case TK_NOT: - case TK_NULL: - case TK_ID: { - if( tokenType==TK_NULL ){ - if( prevTokenType==TK_IS || prevTokenType==TK_NOT ){ - /* NULL is a keyword in this case, not a literal value */ - }else{ - /* Here the NULL is a literal value */ - z[j++] = '?'; - break; - } - } - if( j>0 && sqlite3IsIdChar(z[j-1]) && sqlite3IsIdChar(zSql[i]) ){ - z[j++] = ' '; - } - if( tokenType==TK_ID ){ - if( zSql[i]=='"' - && sqlite3VdbeUsesDoubleQuotedString(db,pVdbe,zSql+i,n) - ){ - z[j++] = '?'; - break; - } - if( nParen==nParenAtIN ) iStartIN = 0; - } - copyNormalizedToken(zSql, i, n, flags, z, &j); - break; - } - } - } - assert( j0 && z[j-1]==' ' ){ j--; } - if( j>0 && z[j-1]!=';' ){ z[j++] = ';'; } - z[j] = 0; - assert( jrc!=SQLITE_OK ); return nErr; } + + +#ifdef SQLITE_ENABLE_NORMALIZE +/* +** Insert a single space character into pStr if the current string +** ends with an identifier +*/ +static void addSpaceSeparator(sqlite3_str *pStr){ + if( pStr->nChar && sqlite3IsIdChar(pStr->zText[pStr->nChar-1]) ){ + sqlite3_str_append(pStr, " ", 1); + } +} + +/* +** Compute a normalization of the SQL given by zSql[0..nSql-1]. Return +** the normalization in space obtained from sqlite3DbMalloc(). Or return +** NULL if anything goes wrong or if zSql is NULL. +*/ +char *sqlite3Normalize( + Vdbe *pVdbe, /* VM being reprepared */ + const char *zSql, /* The original SQL string */ + int nSql /* Size of the input string in bytes */ +){ + sqlite3 *db; /* The database connection */ + int i; /* Next unread byte of zSql[] */ + int n; /* length of current token */ + int tokenType; /* type of current token */ + int prevType; /* Previous non-whitespace token */ + int nParen; /* Number of nested levels of parentheses */ + int iStartIN; /* Start of RHS of IN operator in z[] */ + int nParenAtIN; /* Value of nParent at start of RHS of IN operator */ + int j; /* Bytes of normalized SQL generated so far */ + sqlite3_str *pStr; /* The normalized SQL string under construction */ + + if( zSql==0 || nSql==0 ) return 0; + db = sqlite3VdbeDb(pVdbe); + tokenType = -1; + nParen = iStartIN = nParenAtIN = 0; + pStr = sqlite3_str_new(db); + for(i=0; iaccError==0; i+=n){ + if( tokenType!=TK_SPACE ){ + prevType = tokenType; + } + n = sqlite3GetToken((unsigned char*)zSql+i, &tokenType); + if( NEVER(n<=0) ) break; + switch( tokenType ){ + case TK_SPACE: { + break; + } + case TK_NULL: { + if( prevType==TK_IS || prevType==TK_NOT ){ + sqlite3_str_append(pStr, " NULL", 5); + break; + } + /* Fall through */ + } + case TK_STRING: + case TK_INTEGER: + case TK_FLOAT: + case TK_VARIABLE: + case TK_BLOB: { + sqlite3_str_append(pStr, "?", 1); + break; + } + case TK_LP: { + nParen++; + if( prevType==TK_IN ){ + iStartIN = pStr->nChar; + nParenAtIN = nParen; + } + sqlite3_str_append(pStr, "(", 1); + break; + } + case TK_RP: { + if( iStartIN>0 && nParen==nParenAtIN ){ + assert( pStr->nChar>=iStartIN ); + pStr->nChar = iStartIN+1; + sqlite3_str_append(pStr, "?,?,?", 5); + iStartIN = 0; + } + nParen--; + sqlite3_str_append(pStr, ")", 1); + break; + } + case TK_ID: { + j = pStr->nChar; + if( sqlite3Isquote(zSql[i]) ){ + char *zId = sqlite3DbStrNDup(db, zSql+i, n); + int nId; + int eType = 0; + if( zId==0 ) break; + sqlite3Dequote(zId); + if( zSql[i]=='"' && sqlite3VdbeUsesDoubleQuotedString(pVdbe, zId) ){ + sqlite3_str_append(pStr, "?", 1); + sqlite3DbFree(db, zId); + break; + } + nId = sqlite3Strlen30(zId); + if( sqlite3GetToken((u8*)zId, &eType)==nId && eType==TK_ID ){ + addSpaceSeparator(pStr); + sqlite3_str_append(pStr, zId, nId); + }else{ + sqlite3_str_appendf(pStr, "\"%w\"", zId); + } + sqlite3DbFree(db, zId); + }else{ + addSpaceSeparator(pStr); + sqlite3_str_append(pStr, zSql+i, n); + } + while( jnChar ){ + pStr->zText[j] = sqlite3Tolower(pStr->zText[j]); + j++; + } + break; + } + default: { + if( sqlite3IsIdChar(zSql[i]) ) addSpaceSeparator(pStr); + j = pStr->nChar; + sqlite3_str_append(pStr, zSql+i, n); + while( jnChar ){ + pStr->zText[j] = sqlite3Toupper(pStr->zText[j]); + j++; + } + break; + } + } + } + return sqlite3_str_finish(pStr); +} +#endif /* SQLITE_ENABLE_NORMALIZE */ diff --git a/src/vdbe.h b/src/vdbe.h index 1712b8b224..f4d360e49e 100644 --- a/src/vdbe.h +++ b/src/vdbe.h @@ -253,7 +253,7 @@ u8 sqlite3VdbePrepareFlags(Vdbe*); void sqlite3VdbeSetSql(Vdbe*, const char *z, int n, u8); #ifdef SQLITE_ENABLE_NORMALIZE void sqlite3VdbeAddDblquoteStr(sqlite3*,Vdbe*,const char*); -int sqlite3VdbeUsesDoubleQuotedString(sqlite3*,Vdbe*,const char*,int); +int sqlite3VdbeUsesDoubleQuotedString(Vdbe*,const char*); #endif void sqlite3VdbeSwap(Vdbe*,Vdbe*); VdbeOp *sqlite3VdbeTakeOpArray(Vdbe*, int*, int*); diff --git a/src/vdbeaux.c b/src/vdbeaux.c index 2f45c217f1..ac7bf6a65b 100644 --- a/src/vdbeaux.c +++ b/src/vdbeaux.c @@ -97,26 +97,16 @@ void sqlite3VdbeAddDblquoteStr(sqlite3 *db, Vdbe *p, const char *z){ ** that identifier is really used as a string literal. */ int sqlite3VdbeUsesDoubleQuotedString( - sqlite3 *db, /* Used for transient malloc */ Vdbe *pVdbe, /* The prepared statement */ - const char *zId, /* The double-quoted identifier */ - int nId /* Bytes in zId, which is not zero-terminated */ + const char *zId /* The double-quoted identifier, already dequoted */ ){ - char *z; DblquoteStr *pStr; assert( zId!=0 ); - assert( zId[0]=='"' ); - assert( nId>=2 ); - assert( zId[nId-1]=='"' ); if( pVdbe->pDblStr==0 ) return 0; - z = sqlite3DbStrNDup(db, zId, nId); - if( z==0 ) return 0; - sqlite3Dequote(z); for(pStr=pVdbe->pDblStr; pStr; pStr=pStr->pNextStr){ - if( strcmp(z, pStr->z)==0 ) break; + if( strcmp(zId, pStr->z)==0 ) return 1; } - sqlite3DbFree(db, z); - return pStr!=0; + return 0; } #endif