Refactor the sqlite3_normalized_sql() implementation. This is a

work-in-progress. There are still issues.

FossilOrigin-Name: a4c890b0af9786295e6df05022009d8946550adb873535c610be805c2b7a4083
This commit is contained in:
drh 2018-12-10 16:00:57 +00:00
parent fb34559eb1
commit 643d855da9
7 changed files with 148 additions and 294 deletions

View File

@ -1,5 +1,5 @@
C Add\ssupport\sfor\sthe\sVACUUM\sINTO\scommand.
D 2018-12-10T01:48:29.276
C Refactor\sthe\ssqlite3_normalized_sql()\simplementation.\sThis\sis\sa\nwork-in-progress.\sThere\sare\sstill\sissues.
D 2018-12-10T16:00:57.538
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F Makefile.in 68d0ba0f0b533d5bc84c78c13a6ce84ee81183a67014caa47a969e67f028fa1c
@ -502,7 +502,7 @@ F src/pcache.h 4f87acd914cef5016fae3030343540d75f5b85a1877eed1a2a19b9f284248586
F src/pcache1.c ad0ffc5b35b0280d045ac569d34d4b842e3e6a4a118f6396b320987a0957afcc
F src/pragma.c 96ce7dce4dc9cb2b7aa0e1b2ce7536870bdc00b10becc278245e775489447ea0
F src/pragma.h fdd03d78a7497f74a3f652909f945328480089189526841ae829ce7313d98d13
F src/prepare.c be449edb106a16f1ad95f9b798bdc2337f8c3f83b96c284f417c0a26d43f0c1b
F src/prepare.c 0e8fc0deaf36da104e08d07ce7d97bc09ab57d078b399381532fec3fa1d3f2bb
F src/printf.c 0f1177cf1dd4d7827bf64d840768514ec76409abecaca9e8b577dbd065150381
F src/random.c 80f5d666f23feb3e6665a6ce04c7197212a88384
F src/resolve.c 095d1d41d7a981ee9db8bfeae25ed0d6a8a5e5e3d66b0f4efd71877ed7b56132
@ -512,7 +512,7 @@ F src/shell.c.in 5f38bd0e127c2cc4e506b5c3565c10879ddfae6c2d867bb5972563e40717c19
F src/sqlite.h.in 908ec406feefc4c7e1486a2e3dc30a8bfb51c5a345a8e8130ac201962db171c4
F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8
F src/sqlite3ext.h 960f1b86c3610fa23cb6a267572a97dcf286e77aa0dd3b9b23292ffaa1ea8683
F src/sqliteInt.h 70ce5e14c887554d3c51f2045f5a95b6e83de745d7f6448e79e49fdd8dfc2d5c
F src/sqliteInt.h 369d4774d97643e26085e0ea4cdee6afeadaa27670193b9eebaea8c95687fad0
F src/sqliteLimit.h 1513bfb7b20378aa0041e7022d04acb73525de35b80b252f1b83fedb4de6a76b
F src/status.c 46e7aec11f79dad50965a5ca5fa9de009f7d6bde08be2156f1538a0a296d4d0e
F src/table.c b46ad567748f24a326d9de40e5b9659f96ffff34
@ -570,7 +570,7 @@ F src/test_windirent.h 90dfbe95442c9762357fe128dc7ae3dc199d006de93eb33ba3972e0a9
F src/test_window.c cdae419fdcea5bad6dcd9368c685abdad6deb59e9fc8b84b153de513d394ba3f
F src/test_wsd.c 41cadfd9d97fe8e3e4e44f61a4a8ccd6f7ca8fe9
F src/threads.c 4ae07fa022a3dc7c5beb373cf744a85d3c5c6c3c
F src/tokenize.c 9e781e1ca80eefe7b5d6a9e2cd5c678c847da55fd6f093781fad7950934d4c83
F src/tokenize.c a43ecbafefceb475522f17d6f92d64193a755fe3f35615dfca13340c81ca3872
F src/treeview.c 7b12ac059de54c939b6eb0dbffc9410c29c80d2470cee5cbe07d5ff9ea2d9253
F src/trigger.c d3d78568f37fb2e6cdcc2d1e7b60156f15b0b600adec55b83c5d42f6cad250bd
F src/update.c 1816d56c1bca1ba4e0ef98cac2f49be62858e9df1dc08844c7067eb41cc44274
@ -579,10 +579,10 @@ F src/utf.c 810fbfebe12359f10bc2a011520a6e10879ab2a163bcb26c74768eab82ea62a5
F src/util.c d9eb0a6c4aae1b00a7369eadd7ca0bbe946cb4c953b6751aa20d357c2f482157
F src/vacuum.c 3ffe64ecfc94b7528c5d7bdb1c3a19d72fec63f2aa846e3b90f8de5dbbddf5aa
F src/vdbe.c 55bafc424748d9ed505ab2680736e51d1bb05c01e9885cbb3b287b51dc8b47ec
F src/vdbe.h d82f323d581b36b8e147d650257ef34e0e93790039b6cbda45c321c275f7595e
F src/vdbe.h 8990d668a89890a33326b0a29b992c4014b72f3b6cdcd9ee0e190593c247f9b0
F src/vdbeInt.h 73f5051923f3f29779bfc374c0c68e23b8e5e3792def2e33e51b427edb890abd
F src/vdbeapi.c 9709452bee82963e1f7f1f5d0c71db823d553f8dbb2c47a911c4983d537a1947
F src/vdbeaux.c 9a9617666124e18cbd6e936740f7469dcf0d82867b1abf9ed828694500930b64
F src/vdbeaux.c c72fc6015e52c212d6c6db7cee04bfbfbd681106f551c9296812a73082fb09d6
F src/vdbeblob.c f5c70f973ea3a9e915d1693278a5f890dc78594300cf4d54e64f2b0917c94191
F src/vdbemem.c 7b3305bc4a5139f4536ac9b5f61da0f915e49d2e3fdfa87dfdfa9d7aba8bc1e9
F src/vdbesort.c 90aad5a92608f2dd771c96749beabdb562c9d881131a860a7a5bccf66dc3be7f
@ -1783,8 +1783,10 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P b7bf3c9832bde26b359f4d58ebcf7788c500586d311387d022192ed65a126252 9748d7995bc1dd632d66d2d326048d50e29c6f5e6993d5a6294d14421d2cb72f
R f78f1915a996819be59599569cf251be
T +closed 9748d7995bc1dd632d66d2d326048d50e29c6f5e6993d5a6294d14421d2cb72f
P 77f150b8b46761f4f62f9d8926c10a95a70589a4525393fc16b321bd98c083a7
R d755f31eec9cdb1a3415665c2aacb5ab
T *branch * normalize-refactor
T *sym-normalize-refactor *
T -sym-trunk *
U drh
Z ee3d9a1eeb178b5acf328ec54a590f24
Z f90c6b85a4f2311f0e8b3d6a605ae025

View File

@ -1 +1 @@
77f150b8b46761f4f62f9d8926c10a95a70589a4525393fc16b321bd98c083a7
a4c890b0af9786295e6df05022009d8946550adb873535c610be805c2b7a4083

View File

@ -709,204 +709,6 @@ static int sqlite3LockAndPrepare(
return rc;
}
#ifdef SQLITE_ENABLE_NORMALIZE
/*
** Attempt to estimate the final output buffer size needed for the fully
** normalized version of the specified SQL string. This should take into
** account any potential expansion that could occur (e.g. via IN clauses
** being expanded, etc). This size returned is the total number of bytes
** including the NUL terminator.
*/
static int estimateNormalizedSize(
const char *zSql, /* The original SQL string */
int nSql /* Length of original SQL string */
){
int nOut = nSql + 4;
const char *z = zSql;
while( nOut<nSql*5 ){
while( z[0]!=0 && z[0]!='I' && z[0]!='i' ){ z++; }
if( z[0]==0 ) break;
z++;
if( z[0]!='N' && z[0]!='n' ) break;
z++;
while( sqlite3Isspace(z[0]) ){ z++; }
if( z[0]!='(' ) break;
z++;
nOut += 5; /* ?,?,? */
}
return nOut;
}
/*
** Copy the current token into the output buffer while dealing with quoted
** identifiers. By default, all letters will be converted into lowercase.
** If the bUpper flag is set, uppercase will be used. The piOut argument
** will be used to update the target index into the output string.
*/
static void copyNormalizedToken(
const char *zSql, /* The original SQL string */
int iIn, /* Current index into the original SQL string */
int nToken, /* Number of bytes in the current token */
int tokenFlags, /* Flags returned by the tokenizer */
char *zOut, /* The output string */
int *piOut /* Pointer to target index into the output string */
){
int bQuoted = tokenFlags & SQLITE_TOKEN_QUOTED;
int bKeyword = tokenFlags & SQLITE_TOKEN_KEYWORD;
int j = *piOut, k = 0;
for(; k<nToken; k++){
if( bQuoted ){
if( k==0 && iIn>0 ){
zOut[j++] = '"';
continue;
}else if( k==nToken-1 ){
zOut[j++] = '"';
continue;
}
}
if( bKeyword ){
zOut[j++] = sqlite3Toupper(zSql[iIn+k]);
}else{
zOut[j++] = sqlite3Tolower(zSql[iIn+k]);
}
}
*piOut = j;
}
/*
** Compute a normalization of the SQL given by zSql[0..nSql-1]. Return
** the normalization in space obtained from sqlite3DbMalloc(). Or return
** NULL if anything goes wrong or if zSql is NULL.
*/
char *sqlite3Normalize(
Vdbe *pVdbe, /* VM being reprepared */
const char *zSql, /* The original SQL string */
int nSql /* Size of the input string in bytes */
){
sqlite3 *db; /* Database handle. */
char *z; /* The output string */
int nZ; /* Size of the output string in bytes */
int i; /* Next character to read from zSql[] */
int j; /* Next character to fill in on z[] */
int tokenType = 0; /* Type of the next token */
int prevTokenType = 0; /* Type of the previous token, except spaces */
int n; /* Size of the next token */
int nParen = 0; /* Nesting level of parenthesis */
int iStartIN = 0; /* Start of RHS of IN operator in z[] */
int nParenAtIN = 0; /* Value of nParent at start of RHS of IN operator */
db = sqlite3VdbeDb(pVdbe);
assert( db!=0 );
if( zSql==0 ) return 0;
nZ = estimateNormalizedSize(zSql, nSql);
z = sqlite3DbMallocRawNN(db, nZ);
if( z==0 ) goto normalizeError;
for(i=j=0; i<nSql && zSql[i]; i+=n){
int flags = 0;
if( tokenType!=TK_SPACE ) prevTokenType = tokenType;
n = sqlite3GetTokenNormalized((unsigned char*)zSql+i, &tokenType, &flags);
switch( tokenType ){
case TK_SPACE: {
break;
}
case TK_ILLEGAL: {
goto normalizeError;
}
case TK_STRING:
case TK_INTEGER:
case TK_FLOAT:
case TK_VARIABLE:
case TK_BLOB: {
z[j++] = '?';
break;
}
case TK_LP:
case TK_RP: {
if( tokenType==TK_LP ){
nParen++;
if( prevTokenType==TK_IN ){
iStartIN = j;
nParenAtIN = nParen;
}
}else{
if( iStartIN>0 && nParen==nParenAtIN ){
assert( iStartIN+6<nZ );
memcpy(z+iStartIN+1, "?,?,?", 5);
j = iStartIN+6;
assert( nZ-1-j>=0 );
assert( nZ-1-j<nZ );
memset(z+j, 0, nZ-1-j);
iStartIN = 0;
}
nParen--;
}
assert( nParen>=0 );
/* Fall through */
}
case TK_MINUS:
case TK_SEMI:
case TK_PLUS:
case TK_STAR:
case TK_SLASH:
case TK_REM:
case TK_EQ:
case TK_LE:
case TK_NE:
case TK_LSHIFT:
case TK_LT:
case TK_RSHIFT:
case TK_GT:
case TK_GE:
case TK_BITOR:
case TK_CONCAT:
case TK_COMMA:
case TK_BITAND:
case TK_BITNOT:
case TK_DOT:
case TK_IN:
case TK_IS:
case TK_NOT:
case TK_NULL:
case TK_ID: {
if( tokenType==TK_NULL ){
if( prevTokenType==TK_IS || prevTokenType==TK_NOT ){
/* NULL is a keyword in this case, not a literal value */
}else{
/* Here the NULL is a literal value */
z[j++] = '?';
break;
}
}
if( j>0 && sqlite3IsIdChar(z[j-1]) && sqlite3IsIdChar(zSql[i]) ){
z[j++] = ' ';
}
if( tokenType==TK_ID ){
if( zSql[i]=='"'
&& sqlite3VdbeUsesDoubleQuotedString(db,pVdbe,zSql+i,n)
){
z[j++] = '?';
break;
}
if( nParen==nParenAtIN ) iStartIN = 0;
}
copyNormalizedToken(zSql, i, n, flags, z, &j);
break;
}
}
}
assert( j<nZ && "one" );
while( j>0 && z[j-1]==' ' ){ j--; }
if( j>0 && z[j-1]!=';' ){ z[j++] = ';'; }
z[j] = 0;
assert( j<nZ && "two" );
return z;
normalizeError:
sqlite3DbFree(db, z);
return 0;
}
#endif /* SQLITE_ENABLE_NORMALIZE */
/*
** Rerun the compilation of a statement after a schema change.

View File

@ -4255,9 +4255,6 @@ void sqlite3AlterFunctions(void);
void sqlite3AlterRenameTable(Parse*, SrcList*, Token*);
void sqlite3AlterRenameColumn(Parse*, SrcList*, Token*, Token*);
int sqlite3GetToken(const unsigned char *, int *);
#ifdef SQLITE_ENABLE_NORMALIZE
int sqlite3GetTokenNormalized(const unsigned char *, int *, int *);
#endif
void sqlite3NestedParse(Parse*, const char*, ...);
void sqlite3ExpirePreparedStatements(sqlite3*, int);
int sqlite3CodeSubselect(Parse*, Expr *, int, int);

View File

@ -545,73 +545,6 @@ int sqlite3GetToken(const unsigned char *z, int *tokenType){
return i;
}
#ifdef SQLITE_ENABLE_NORMALIZE
/*
** Return the length (in bytes) of the token that begins at z[0].
** Store the token type in *tokenType before returning. If flags has
** SQLITE_TOKEN_NORMALIZE flag enabled, use the identifier token type
** for keywords. Add SQLITE_TOKEN_QUOTED to flags if the token was
** actually a quoted identifier. Add SQLITE_TOKEN_KEYWORD to flags
** if the token was recognized as a keyword; this is useful when the
** SQLITE_TOKEN_NORMALIZE flag is used, because it enables the caller
** to differentiate between a keyword being treated as an identifier
** (for normalization purposes) and an actual identifier.
*/
int sqlite3GetTokenNormalized(
const unsigned char *z,
int *tokenType,
int *flags
){
int n;
unsigned char iClass = aiClass[*z];
if( iClass==CC_KYWD ){
int i;
for(i=1; aiClass[z[i]]<=CC_KYWD; i++){}
if( IdChar(z[i]) ){
/* This token started out using characters that can appear in keywords,
** but z[i] is a character not allowed within keywords, so this must
** be an identifier instead */
i++;
while( IdChar(z[i]) ){ i++; }
*tokenType = TK_ID;
return i;
}
*tokenType = TK_ID;
n = keywordCode((char*)z, i, tokenType);
/* If the token is no longer considered to be an identifier, then it is a
** keyword of some kind. Make the token back into an identifier and then
** set the SQLITE_TOKEN_KEYWORD flag. Several non-identifier tokens are
** used verbatim, including IN, IS, NOT, and NULL. */
switch( *tokenType ){
case TK_ID: {
/* do nothing, handled by caller */
break;
}
case TK_IN:
case TK_IS:
case TK_NOT:
case TK_NULL: {
*flags |= SQLITE_TOKEN_KEYWORD;
break;
}
default: {
*tokenType = TK_ID;
*flags |= SQLITE_TOKEN_KEYWORD;
break;
}
}
}else{
n = sqlite3GetToken(z, tokenType);
/* If the token is considered to be an identifier and the character class
** of the first character is a quote, set the SQLITE_TOKEN_QUOTED flag. */
if( *tokenType==TK_ID && (iClass==CC_QUOTE || iClass==CC_QUOTE2) ){
*flags |= SQLITE_TOKEN_QUOTED;
}
}
return n;
}
#endif /* SQLITE_ENABLE_NORMALIZE */
/*
** Run the parser on the given SQL string. The parser structure is
** passed in. An SQLITE_ status code is returned. If an error occurs
@ -781,3 +714,133 @@ int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){
assert( nErr==0 || pParse->rc!=SQLITE_OK );
return nErr;
}
#ifdef SQLITE_ENABLE_NORMALIZE
/*
** Insert a single space character into pStr if the current string
** ends with an identifier
*/
static void addSpaceSeparator(sqlite3_str *pStr){
if( pStr->nChar && sqlite3IsIdChar(pStr->zText[pStr->nChar-1]) ){
sqlite3_str_append(pStr, " ", 1);
}
}
/*
** Compute a normalization of the SQL given by zSql[0..nSql-1]. Return
** the normalization in space obtained from sqlite3DbMalloc(). Or return
** NULL if anything goes wrong or if zSql is NULL.
*/
char *sqlite3Normalize(
Vdbe *pVdbe, /* VM being reprepared */
const char *zSql, /* The original SQL string */
int nSql /* Size of the input string in bytes */
){
sqlite3 *db; /* The database connection */
int i; /* Next unread byte of zSql[] */
int n; /* length of current token */
int tokenType; /* type of current token */
int prevType; /* Previous non-whitespace token */
int nParen; /* Number of nested levels of parentheses */
int iStartIN; /* Start of RHS of IN operator in z[] */
int nParenAtIN; /* Value of nParent at start of RHS of IN operator */
int j; /* Bytes of normalized SQL generated so far */
sqlite3_str *pStr; /* The normalized SQL string under construction */
if( zSql==0 || nSql==0 ) return 0;
db = sqlite3VdbeDb(pVdbe);
tokenType = -1;
nParen = iStartIN = nParenAtIN = 0;
pStr = sqlite3_str_new(db);
for(i=0; i<nSql && pStr->accError==0; i+=n){
if( tokenType!=TK_SPACE ){
prevType = tokenType;
}
n = sqlite3GetToken((unsigned char*)zSql+i, &tokenType);
if( NEVER(n<=0) ) break;
switch( tokenType ){
case TK_SPACE: {
break;
}
case TK_NULL: {
if( prevType==TK_IS || prevType==TK_NOT ){
sqlite3_str_append(pStr, " NULL", 5);
break;
}
/* Fall through */
}
case TK_STRING:
case TK_INTEGER:
case TK_FLOAT:
case TK_VARIABLE:
case TK_BLOB: {
sqlite3_str_append(pStr, "?", 1);
break;
}
case TK_LP: {
nParen++;
if( prevType==TK_IN ){
iStartIN = pStr->nChar;
nParenAtIN = nParen;
}
sqlite3_str_append(pStr, "(", 1);
break;
}
case TK_RP: {
if( iStartIN>0 && nParen==nParenAtIN ){
assert( pStr->nChar>=iStartIN );
pStr->nChar = iStartIN+1;
sqlite3_str_append(pStr, "?,?,?", 5);
iStartIN = 0;
}
nParen--;
sqlite3_str_append(pStr, ")", 1);
break;
}
case TK_ID: {
j = pStr->nChar;
if( sqlite3Isquote(zSql[i]) ){
char *zId = sqlite3DbStrNDup(db, zSql+i, n);
int nId;
int eType = 0;
if( zId==0 ) break;
sqlite3Dequote(zId);
if( zSql[i]=='"' && sqlite3VdbeUsesDoubleQuotedString(pVdbe, zId) ){
sqlite3_str_append(pStr, "?", 1);
sqlite3DbFree(db, zId);
break;
}
nId = sqlite3Strlen30(zId);
if( sqlite3GetToken((u8*)zId, &eType)==nId && eType==TK_ID ){
addSpaceSeparator(pStr);
sqlite3_str_append(pStr, zId, nId);
}else{
sqlite3_str_appendf(pStr, "\"%w\"", zId);
}
sqlite3DbFree(db, zId);
}else{
addSpaceSeparator(pStr);
sqlite3_str_append(pStr, zSql+i, n);
}
while( j<pStr->nChar ){
pStr->zText[j] = sqlite3Tolower(pStr->zText[j]);
j++;
}
break;
}
default: {
if( sqlite3IsIdChar(zSql[i]) ) addSpaceSeparator(pStr);
j = pStr->nChar;
sqlite3_str_append(pStr, zSql+i, n);
while( j<pStr->nChar ){
pStr->zText[j] = sqlite3Toupper(pStr->zText[j]);
j++;
}
break;
}
}
}
return sqlite3_str_finish(pStr);
}
#endif /* SQLITE_ENABLE_NORMALIZE */

View File

@ -253,7 +253,7 @@ u8 sqlite3VdbePrepareFlags(Vdbe*);
void sqlite3VdbeSetSql(Vdbe*, const char *z, int n, u8);
#ifdef SQLITE_ENABLE_NORMALIZE
void sqlite3VdbeAddDblquoteStr(sqlite3*,Vdbe*,const char*);
int sqlite3VdbeUsesDoubleQuotedString(sqlite3*,Vdbe*,const char*,int);
int sqlite3VdbeUsesDoubleQuotedString(Vdbe*,const char*);
#endif
void sqlite3VdbeSwap(Vdbe*,Vdbe*);
VdbeOp *sqlite3VdbeTakeOpArray(Vdbe*, int*, int*);

View File

@ -97,26 +97,16 @@ void sqlite3VdbeAddDblquoteStr(sqlite3 *db, Vdbe *p, const char *z){
** that identifier is really used as a string literal.
*/
int sqlite3VdbeUsesDoubleQuotedString(
sqlite3 *db, /* Used for transient malloc */
Vdbe *pVdbe, /* The prepared statement */
const char *zId, /* The double-quoted identifier */
int nId /* Bytes in zId, which is not zero-terminated */
const char *zId /* The double-quoted identifier, already dequoted */
){
char *z;
DblquoteStr *pStr;
assert( zId!=0 );
assert( zId[0]=='"' );
assert( nId>=2 );
assert( zId[nId-1]=='"' );
if( pVdbe->pDblStr==0 ) return 0;
z = sqlite3DbStrNDup(db, zId, nId);
if( z==0 ) return 0;
sqlite3Dequote(z);
for(pStr=pVdbe->pDblStr; pStr; pStr=pStr->pNextStr){
if( strcmp(z, pStr->z)==0 ) break;
if( strcmp(zId, pStr->z)==0 ) return 1;
}
sqlite3DbFree(db, z);
return pStr!=0;
return 0;
}
#endif