91694dbdd2
FossilOrigin-Name: 658f42257d56a3562dfa8e55023c6e497f55c565cd751d942de3a9e0cb2fa708
708 lines
23 KiB
C
708 lines
23 KiB
C
/*
|
|
** 2018-01-08
|
|
**
|
|
** The author disclaims copyright to this source code. In place of
|
|
** a legal notice, here is a blessing:
|
|
**
|
|
** May you do good and not evil.
|
|
** May you find forgiveness for yourself and forgive others.
|
|
** May you share freely, never taking more than you give.
|
|
**
|
|
******************************************************************************
|
|
**
|
|
** This file contains code to implement the sqlite3_normalize() function.
|
|
**
|
|
** char *sqlite3_normalize(const char *zSql);
|
|
**
|
|
** This function takes an SQL string as input and returns a "normalized"
|
|
** version of that string in memory obtained from sqlite3_malloc64(). The
|
|
** caller is responsible for ensuring that the returned memory is freed.
|
|
**
|
|
** If a memory allocation error occurs, this routine returns NULL.
|
|
**
|
|
** The normalization consists of the following transformations:
|
|
**
|
|
** (1) Convert every literal (string, blob literal, numeric constant,
|
|
** or "NULL" constant) into a ?
|
|
**
|
|
** (2) Remove all superfluous whitespace, including comments. Change
|
|
** all required whitespace to a single space character.
|
|
**
|
|
** (3) Lowercase all ASCII characters.
|
|
**
|
|
** (4) If an IN or NOT IN operator is followed by a list of 1 or more
|
|
** values, convert that list into "(?,?,?)".
|
|
**
|
|
** The purpose of normalization is two-fold:
|
|
**
|
|
** (1) Sanitize queries by removing potentially private or sensitive
|
|
** information contained in literals.
|
|
**
|
|
** (2) Identify structurally identical queries by comparing their
|
|
** normalized forms.
|
|
**
|
|
** Command-Line Utility
|
|
** --------------------
|
|
**
|
|
** This file also contains code for a command-line utility that converts
|
|
** SQL queries in text files into their normalized forms. To build the
|
|
** command-line program, compile this file with -DSQLITE_NORMALIZE_CLI
|
|
** and link it against the SQLite library.
|
|
*/
|
|
#include <sqlite3.h>
|
|
#include <string.h>
|
|
|
|
/*
|
|
** Implementation note:
|
|
**
|
|
** Much of the tokenizer logic is copied out of the tokenize.c source file
|
|
** of SQLite. That logic could be simplified for this particular application,
|
|
** but that would impose a risk of introducing subtle errors. It is best to
|
|
** keep the code as close to the original as possible.
|
|
**
|
|
** The tokenize code is in sync with the SQLite core as of 2018-01-08.
|
|
** Any future changes to the core tokenizer might require corresponding
|
|
** adjustments to the tokenizer logic in this module.
|
|
*/
|
|
|
|
|
|
/* Character classes for tokenizing
|
|
**
|
|
** In the sqlite3GetToken() function, a switch() on aiClass[c] is implemented
|
|
** using a lookup table, whereas a switch() directly on c uses a binary search.
|
|
** The lookup table is much faster. To maximize speed, and to ensure that
|
|
** a lookup table is used, all of the classes need to be small integers and
|
|
** all of them need to be used within the switch.
|
|
*/
|
|
#define CC_X 0 /* The letter 'x', or start of BLOB literal */
|
|
#define CC_KYWD 1 /* Alphabetics or '_'. Usable in a keyword */
|
|
#define CC_ID 2 /* unicode characters usable in IDs */
|
|
#define CC_DIGIT 3 /* Digits */
|
|
#define CC_DOLLAR 4 /* '$' */
|
|
#define CC_VARALPHA 5 /* '@', '#', ':'. Alphabetic SQL variables */
|
|
#define CC_VARNUM 6 /* '?'. Numeric SQL variables */
|
|
#define CC_SPACE 7 /* Space characters */
|
|
#define CC_QUOTE 8 /* '"', '\'', or '`'. String literals, quoted ids */
|
|
#define CC_QUOTE2 9 /* '['. [...] style quoted ids */
|
|
#define CC_PIPE 10 /* '|'. Bitwise OR or concatenate */
|
|
#define CC_MINUS 11 /* '-'. Minus or SQL-style comment */
|
|
#define CC_LT 12 /* '<'. Part of < or <= or <> */
|
|
#define CC_GT 13 /* '>'. Part of > or >= */
|
|
#define CC_EQ 14 /* '='. Part of = or == */
|
|
#define CC_BANG 15 /* '!'. Part of != */
|
|
#define CC_SLASH 16 /* '/'. / or c-style comment */
|
|
#define CC_LP 17 /* '(' */
|
|
#define CC_RP 18 /* ')' */
|
|
#define CC_SEMI 19 /* ';' */
|
|
#define CC_PLUS 20 /* '+' */
|
|
#define CC_STAR 21 /* '*' */
|
|
#define CC_PERCENT 22 /* '%' */
|
|
#define CC_COMMA 23 /* ',' */
|
|
#define CC_AND 24 /* '&' */
|
|
#define CC_TILDA 25 /* '~' */
|
|
#define CC_DOT 26 /* '.' */
|
|
#define CC_ILLEGAL 27 /* Illegal character */
|
|
|
|
static const unsigned char aiClass[] = {
|
|
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */
|
|
/* 0x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 7, 7, 27, 7, 7, 27, 27,
|
|
/* 1x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
|
/* 2x */ 7, 15, 8, 5, 4, 22, 24, 8, 17, 18, 21, 20, 23, 11, 26, 16,
|
|
/* 3x */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 19, 12, 14, 13, 6,
|
|
/* 4x */ 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
/* 5x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 9, 27, 27, 27, 1,
|
|
/* 6x */ 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
/* 7x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 27, 10, 27, 25, 27,
|
|
/* 8x */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
/* 9x */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
/* Ax */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
/* Bx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
/* Cx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
/* Dx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
/* Ex */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
/* Fx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
|
};
|
|
|
|
/* An array to map all upper-case characters into their corresponding
|
|
** lower-case character.
|
|
**
|
|
** SQLite only considers US-ASCII (or EBCDIC) characters. We do not
|
|
** handle case conversions for the UTF character set since the tables
|
|
** involved are nearly as big or bigger than SQLite itself.
|
|
*/
|
|
static const unsigned char sqlite3UpperToLower[] = {
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
|
|
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
|
|
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
|
|
54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
|
|
104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
|
|
122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107,
|
|
108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
|
|
126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
|
|
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,
|
|
162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,
|
|
180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,
|
|
198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,
|
|
216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,
|
|
234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,
|
|
252,253,254,255
|
|
};
|
|
|
|
/*
|
|
** The following 256 byte lookup table is used to support SQLites built-in
|
|
** equivalents to the following standard library functions:
|
|
**
|
|
** isspace() 0x01
|
|
** isalpha() 0x02
|
|
** isdigit() 0x04
|
|
** isalnum() 0x06
|
|
** isxdigit() 0x08
|
|
** toupper() 0x20
|
|
** SQLite identifier character 0x40
|
|
** Quote character 0x80
|
|
**
|
|
** Bit 0x20 is set if the mapped character requires translation to upper
|
|
** case. i.e. if the character is a lower-case ASCII character.
|
|
** If x is a lower-case ASCII character, then its upper-case equivalent
|
|
** is (x - 0x20). Therefore toupper() can be implemented as:
|
|
**
|
|
** (x & ~(map[x]&0x20))
|
|
**
|
|
** The equivalent of tolower() is implemented using the sqlite3UpperToLower[]
|
|
** array. tolower() is used more often than toupper() by SQLite.
|
|
**
|
|
** Bit 0x40 is set if the character is non-alphanumeric and can be used in an
|
|
** SQLite identifier. Identifiers are alphanumerics, "_", "$", and any
|
|
** non-ASCII UTF character. Hence the test for whether or not a character is
|
|
** part of an identifier is 0x46.
|
|
*/
|
|
static const unsigned char sqlite3CtypeMap[256] = {
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 00..07 ........ */
|
|
0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, /* 08..0f ........ */
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 10..17 ........ */
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 18..1f ........ */
|
|
0x01, 0x00, 0x80, 0x00, 0x40, 0x00, 0x00, 0x80, /* 20..27 !"#$%&' */
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 28..2f ()*+,-./ */
|
|
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, /* 30..37 01234567 */
|
|
0x0c, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 38..3f 89:;<=>? */
|
|
|
|
0x00, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x02, /* 40..47 @ABCDEFG */
|
|
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 48..4f HIJKLMNO */
|
|
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 50..57 PQRSTUVW */
|
|
0x02, 0x02, 0x02, 0x80, 0x00, 0x00, 0x00, 0x40, /* 58..5f XYZ[\]^_ */
|
|
0x80, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x22, /* 60..67 `abcdefg */
|
|
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, /* 68..6f hijklmno */
|
|
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, /* 70..77 pqrstuvw */
|
|
0x22, 0x22, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, /* 78..7f xyz{|}~. */
|
|
|
|
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 80..87 ........ */
|
|
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 88..8f ........ */
|
|
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 90..97 ........ */
|
|
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 98..9f ........ */
|
|
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* a0..a7 ........ */
|
|
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* a8..af ........ */
|
|
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* b0..b7 ........ */
|
|
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* b8..bf ........ */
|
|
|
|
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* c0..c7 ........ */
|
|
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* c8..cf ........ */
|
|
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* d0..d7 ........ */
|
|
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* d8..df ........ */
|
|
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* e0..e7 ........ */
|
|
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* e8..ef ........ */
|
|
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* f0..f7 ........ */
|
|
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 /* f8..ff ........ */
|
|
};
|
|
#define sqlite3Toupper(x) ((x)&~(sqlite3CtypeMap[(unsigned char)(x)]&0x20))
|
|
#define sqlite3Isspace(x) (sqlite3CtypeMap[(unsigned char)(x)]&0x01)
|
|
#define sqlite3Isalnum(x) (sqlite3CtypeMap[(unsigned char)(x)]&0x06)
|
|
#define sqlite3Isalpha(x) (sqlite3CtypeMap[(unsigned char)(x)]&0x02)
|
|
#define sqlite3Isdigit(x) (sqlite3CtypeMap[(unsigned char)(x)]&0x04)
|
|
#define sqlite3Isxdigit(x) (sqlite3CtypeMap[(unsigned char)(x)]&0x08)
|
|
#define sqlite3Tolower(x) (sqlite3UpperToLower[(unsigned char)(x)])
|
|
#define sqlite3Isquote(x) (sqlite3CtypeMap[(unsigned char)(x)]&0x80)
|
|
|
|
|
|
/*
|
|
** If X is a character that can be used in an identifier then
|
|
** IdChar(X) will be true. Otherwise it is false.
|
|
**
|
|
** For ASCII, any character with the high-order bit set is
|
|
** allowed in an identifier. For 7-bit characters,
|
|
** sqlite3IsIdChar[X] must be 1.
|
|
**
|
|
** For EBCDIC, the rules are more complex but have the same
|
|
** end result.
|
|
**
|
|
** Ticket #1066. the SQL standard does not allow '$' in the
|
|
** middle of identifiers. But many SQL implementations do.
|
|
** SQLite will allow '$' in identifiers for compatibility.
|
|
** But the feature is undocumented.
|
|
*/
|
|
#define IdChar(C) ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0)
|
|
|
|
/*
|
|
** Ignore testcase() macros
|
|
*/
|
|
#define testcase(X)
|
|
|
|
/*
|
|
** Token values
|
|
*/
|
|
#define TK_SPACE 0
|
|
#define TK_NAME 1
|
|
#define TK_LITERAL 2
|
|
#define TK_PUNCT 3
|
|
#define TK_ERROR 4
|
|
|
|
#define TK_MINUS TK_PUNCT
|
|
#define TK_LP TK_PUNCT
|
|
#define TK_RP TK_PUNCT
|
|
#define TK_SEMI TK_PUNCT
|
|
#define TK_PLUS TK_PUNCT
|
|
#define TK_STAR TK_PUNCT
|
|
#define TK_SLASH TK_PUNCT
|
|
#define TK_REM TK_PUNCT
|
|
#define TK_EQ TK_PUNCT
|
|
#define TK_LE TK_PUNCT
|
|
#define TK_NE TK_PUNCT
|
|
#define TK_LSHIFT TK_PUNCT
|
|
#define TK_LT TK_PUNCT
|
|
#define TK_GE TK_PUNCT
|
|
#define TK_RSHIFT TK_PUNCT
|
|
#define TK_GT TK_PUNCT
|
|
#define TK_GE TK_PUNCT
|
|
#define TK_BITOR TK_PUNCT
|
|
#define TK_CONCAT TK_PUNCT
|
|
#define TK_COMMA TK_PUNCT
|
|
#define TK_BITAND TK_PUNCT
|
|
#define TK_BITNOT TK_PUNCT
|
|
#define TK_STRING TK_LITERAL
|
|
#define TK_ID TK_NAME
|
|
#define TK_ILLEGAL TK_ERROR
|
|
#define TK_DOT TK_PUNCT
|
|
#define TK_INTEGER TK_LITERAL
|
|
#define TK_FLOAT TK_LITERAL
|
|
#define TK_VARIABLE TK_LITERAL
|
|
#define TK_BLOB TK_LITERAL
|
|
|
|
/*
|
|
** Return the length (in bytes) of the token that begins at z[0].
|
|
** Store the token type in *tokenType before returning.
|
|
*/
|
|
static int sqlite3GetToken(const unsigned char *z, int *tokenType){
|
|
int i, c;
|
|
switch( aiClass[*z] ){ /* Switch on the character-class of the first byte
|
|
** of the token. See the comment on the CC_ defines
|
|
** above. */
|
|
case CC_SPACE: {
|
|
for(i=1; sqlite3Isspace(z[i]); i++){}
|
|
*tokenType = TK_SPACE;
|
|
return i;
|
|
}
|
|
case CC_MINUS: {
|
|
if( z[1]=='-' ){
|
|
for(i=2; (c=z[i])!=0 && c!='\n'; i++){}
|
|
*tokenType = TK_SPACE;
|
|
return i;
|
|
}
|
|
*tokenType = TK_MINUS;
|
|
return 1;
|
|
}
|
|
case CC_LP: {
|
|
*tokenType = TK_LP;
|
|
return 1;
|
|
}
|
|
case CC_RP: {
|
|
*tokenType = TK_RP;
|
|
return 1;
|
|
}
|
|
case CC_SEMI: {
|
|
*tokenType = TK_SEMI;
|
|
return 1;
|
|
}
|
|
case CC_PLUS: {
|
|
*tokenType = TK_PLUS;
|
|
return 1;
|
|
}
|
|
case CC_STAR: {
|
|
*tokenType = TK_STAR;
|
|
return 1;
|
|
}
|
|
case CC_SLASH: {
|
|
if( z[1]!='*' || z[2]==0 ){
|
|
*tokenType = TK_SLASH;
|
|
return 1;
|
|
}
|
|
for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){}
|
|
if( c ) i++;
|
|
*tokenType = TK_SPACE;
|
|
return i;
|
|
}
|
|
case CC_PERCENT: {
|
|
*tokenType = TK_REM;
|
|
return 1;
|
|
}
|
|
case CC_EQ: {
|
|
*tokenType = TK_EQ;
|
|
return 1 + (z[1]=='=');
|
|
}
|
|
case CC_LT: {
|
|
if( (c=z[1])=='=' ){
|
|
*tokenType = TK_LE;
|
|
return 2;
|
|
}else if( c=='>' ){
|
|
*tokenType = TK_NE;
|
|
return 2;
|
|
}else if( c=='<' ){
|
|
*tokenType = TK_LSHIFT;
|
|
return 2;
|
|
}else{
|
|
*tokenType = TK_LT;
|
|
return 1;
|
|
}
|
|
}
|
|
case CC_GT: {
|
|
if( (c=z[1])=='=' ){
|
|
*tokenType = TK_GE;
|
|
return 2;
|
|
}else if( c=='>' ){
|
|
*tokenType = TK_RSHIFT;
|
|
return 2;
|
|
}else{
|
|
*tokenType = TK_GT;
|
|
return 1;
|
|
}
|
|
}
|
|
case CC_BANG: {
|
|
if( z[1]!='=' ){
|
|
*tokenType = TK_ILLEGAL;
|
|
return 1;
|
|
}else{
|
|
*tokenType = TK_NE;
|
|
return 2;
|
|
}
|
|
}
|
|
case CC_PIPE: {
|
|
if( z[1]!='|' ){
|
|
*tokenType = TK_BITOR;
|
|
return 1;
|
|
}else{
|
|
*tokenType = TK_CONCAT;
|
|
return 2;
|
|
}
|
|
}
|
|
case CC_COMMA: {
|
|
*tokenType = TK_COMMA;
|
|
return 1;
|
|
}
|
|
case CC_AND: {
|
|
*tokenType = TK_BITAND;
|
|
return 1;
|
|
}
|
|
case CC_TILDA: {
|
|
*tokenType = TK_BITNOT;
|
|
return 1;
|
|
}
|
|
case CC_QUOTE: {
|
|
int delim = z[0];
|
|
testcase( delim=='`' );
|
|
testcase( delim=='\'' );
|
|
testcase( delim=='"' );
|
|
for(i=1; (c=z[i])!=0; i++){
|
|
if( c==delim ){
|
|
if( z[i+1]==delim ){
|
|
i++;
|
|
}else{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if( c=='\'' ){
|
|
*tokenType = TK_STRING;
|
|
return i+1;
|
|
}else if( c!=0 ){
|
|
*tokenType = TK_ID;
|
|
return i+1;
|
|
}else{
|
|
*tokenType = TK_ILLEGAL;
|
|
return i;
|
|
}
|
|
}
|
|
case CC_DOT: {
|
|
if( !sqlite3Isdigit(z[1]) ){
|
|
*tokenType = TK_DOT;
|
|
return 1;
|
|
}
|
|
/* If the next character is a digit, this is a floating point
|
|
** number that begins with ".". Fall thru into the next case */
|
|
}
|
|
case CC_DIGIT: {
|
|
*tokenType = TK_INTEGER;
|
|
if( z[0]=='0' && (z[1]=='x' || z[1]=='X') && sqlite3Isxdigit(z[2]) ){
|
|
for(i=3; sqlite3Isxdigit(z[i]); i++){}
|
|
return i;
|
|
}
|
|
for(i=0; sqlite3Isdigit(z[i]); i++){}
|
|
if( z[i]=='.' ){
|
|
i++;
|
|
while( sqlite3Isdigit(z[i]) ){ i++; }
|
|
*tokenType = TK_FLOAT;
|
|
}
|
|
if( (z[i]=='e' || z[i]=='E') &&
|
|
( sqlite3Isdigit(z[i+1])
|
|
|| ((z[i+1]=='+' || z[i+1]=='-') && sqlite3Isdigit(z[i+2]))
|
|
)
|
|
){
|
|
i += 2;
|
|
while( sqlite3Isdigit(z[i]) ){ i++; }
|
|
*tokenType = TK_FLOAT;
|
|
}
|
|
while( IdChar(z[i]) ){
|
|
*tokenType = TK_ILLEGAL;
|
|
i++;
|
|
}
|
|
return i;
|
|
}
|
|
case CC_QUOTE2: {
|
|
for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
|
|
*tokenType = c==']' ? TK_ID : TK_ILLEGAL;
|
|
return i;
|
|
}
|
|
case CC_VARNUM: {
|
|
*tokenType = TK_VARIABLE;
|
|
for(i=1; sqlite3Isdigit(z[i]); i++){}
|
|
return i;
|
|
}
|
|
case CC_DOLLAR:
|
|
case CC_VARALPHA: {
|
|
int n = 0;
|
|
testcase( z[0]=='$' ); testcase( z[0]=='@' );
|
|
testcase( z[0]==':' ); testcase( z[0]=='#' );
|
|
*tokenType = TK_VARIABLE;
|
|
for(i=1; (c=z[i])!=0; i++){
|
|
if( IdChar(c) ){
|
|
n++;
|
|
}else if( c=='(' && n>0 ){
|
|
do{
|
|
i++;
|
|
}while( (c=z[i])!=0 && !sqlite3Isspace(c) && c!=')' );
|
|
if( c==')' ){
|
|
i++;
|
|
}else{
|
|
*tokenType = TK_ILLEGAL;
|
|
}
|
|
break;
|
|
}else if( c==':' && z[i+1]==':' ){
|
|
i++;
|
|
}else{
|
|
break;
|
|
}
|
|
}
|
|
if( n==0 ) *tokenType = TK_ILLEGAL;
|
|
return i;
|
|
}
|
|
case CC_KYWD: {
|
|
for(i=1; aiClass[z[i]]<=CC_KYWD; i++){}
|
|
if( IdChar(z[i]) ){
|
|
/* This token started out using characters that can appear in keywords,
|
|
** but z[i] is a character not allowed within keywords, so this must
|
|
** be an identifier instead */
|
|
i++;
|
|
break;
|
|
}
|
|
*tokenType = TK_ID;
|
|
return i;
|
|
}
|
|
case CC_X: {
|
|
testcase( z[0]=='x' ); testcase( z[0]=='X' );
|
|
if( z[1]=='\'' ){
|
|
*tokenType = TK_BLOB;
|
|
for(i=2; sqlite3Isxdigit(z[i]); i++){}
|
|
if( z[i]!='\'' || i%2 ){
|
|
*tokenType = TK_ILLEGAL;
|
|
while( z[i] && z[i]!='\'' ){ i++; }
|
|
}
|
|
if( z[i] ) i++;
|
|
return i;
|
|
}
|
|
/* If it is not a BLOB literal, then it must be an ID, since no
|
|
** SQL keywords start with the letter 'x'. Fall through */
|
|
}
|
|
case CC_ID: {
|
|
i = 1;
|
|
break;
|
|
}
|
|
default: {
|
|
*tokenType = TK_ILLEGAL;
|
|
return 1;
|
|
}
|
|
}
|
|
while( IdChar(z[i]) ){ i++; }
|
|
*tokenType = TK_ID;
|
|
return i;
|
|
}
|
|
|
|
char *sqlite3_normalize(const char *zSql){
|
|
char *z; /* The output string */
|
|
sqlite3_int64 nZ; /* Size of the output string in bytes */
|
|
sqlite3_int64 nSql; /* Size of the input string in bytes */
|
|
int i; /* Next character to read from zSql[] */
|
|
int j; /* Next slot to fill in on z[] */
|
|
int tokenType; /* Type of the next token */
|
|
int n; /* Size of the next token */
|
|
int k; /* Loop counter */
|
|
|
|
nSql = strlen(zSql);
|
|
nZ = nSql;
|
|
z = sqlite3_malloc64( nZ+2 );
|
|
if( z==0 ) return 0;
|
|
for(i=j=0; zSql[i]; i += n){
|
|
n = sqlite3GetToken((unsigned char*)zSql+i, &tokenType);
|
|
switch( tokenType ){
|
|
case TK_SPACE: {
|
|
break;
|
|
}
|
|
case TK_ERROR: {
|
|
sqlite3_free(z);
|
|
return 0;
|
|
}
|
|
case TK_LITERAL: {
|
|
z[j++] = '?';
|
|
break;
|
|
}
|
|
case TK_PUNCT:
|
|
case TK_NAME: {
|
|
if( n==4 && sqlite3_strnicmp(zSql+i,"NULL",4)==0 ){
|
|
if( (j>=3 && strncmp(z+j-2,"is",2)==0 && !IdChar(z[j-3]))
|
|
|| (j>=4 && strncmp(z+j-3,"not",3)==0 && !IdChar(z[j-4]))
|
|
){
|
|
/* NULL is a keyword in this case, not a literal value */
|
|
}else{
|
|
/* Here the NULL is a literal value */
|
|
z[j++] = '?';
|
|
break;
|
|
}
|
|
}
|
|
if( j>0 && IdChar(z[j-1]) && IdChar(zSql[i]) ) z[j++] = ' ';
|
|
for(k=0; k<n; k++){
|
|
z[j++] = sqlite3Tolower(zSql[i+k]);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
while( j>0 && z[j-1]==' ' ){ j--; }
|
|
if( i>0 && z[j-1]!=';' ){ z[j++] = ';'; }
|
|
z[j] = 0;
|
|
|
|
/* Make a second pass converting "in(...)" where the "..." is not a
|
|
** SELECT statement into "in(?,?,?)" */
|
|
for(i=0; i<j; i=n){
|
|
char *zIn = strstr(z+i, "in(");
|
|
int nParen;
|
|
if( zIn==0 ) break;
|
|
n = (int)(zIn-z)+3; /* Index of first char past "in(" */
|
|
if( n && IdChar(zIn[-1]) ) continue;
|
|
if( strncmp(zIn, "in(select",9)==0 && !IdChar(zIn[9]) ) continue;
|
|
if( strncmp(zIn, "in(with",7)==0 && !IdChar(zIn[7]) ) continue;
|
|
for(nParen=1, k=0; z[n+k]; k++){
|
|
if( z[n+k]=='(' ) nParen++;
|
|
if( z[n+k]==')' ){
|
|
nParen--;
|
|
if( nParen==0 ) break;
|
|
}
|
|
}
|
|
/* k is the number of bytes in the "..." within "in(...)" */
|
|
if( k<5 ){
|
|
z = sqlite3_realloc64(z, j+(5-k)+1);
|
|
if( z==0 ) return 0;
|
|
memmove(z+n+5, z+n+k, j-(n+k));
|
|
}else if( k>5 ){
|
|
memmove(z+n+5, z+n+k, j-(n+k));
|
|
}
|
|
j = j-k+5;
|
|
z[j] = 0;
|
|
memcpy(z+n, "?,?,?", 5);
|
|
}
|
|
return z;
|
|
}
|
|
|
|
/*
|
|
** For testing purposes, or to build a stand-alone SQL normalizer program,
|
|
** compile this one source file with the -DSQLITE_NORMALIZE_CLI and link
|
|
** it against any SQLite library. The resulting command-line program will
|
|
** run sqlite3_normalize() over the text of all files named on the command-
|
|
** line and show the result on standard output.
|
|
*/
|
|
#ifdef SQLITE_NORMALIZE_CLI
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
/*
|
|
** Break zIn up into separate SQL statements and run sqlite3_normalize()
|
|
** on each one. Print the result of each run.
|
|
*/
|
|
static void normalizeFile(char *zIn){
|
|
int i;
|
|
if( zIn==0 ) return;
|
|
for(i=0; zIn[i]; i++){
|
|
char cSaved;
|
|
if( zIn[i]!=';' ) continue;
|
|
cSaved = zIn[i+1];
|
|
zIn[i+1] = 0;
|
|
if( sqlite3_complete(zIn) ){
|
|
char *zOut = sqlite3_normalize(zIn);
|
|
if( zOut ){
|
|
printf("%s\n", zOut);
|
|
sqlite3_free(zOut);
|
|
}else{
|
|
fprintf(stderr, "ERROR: %s\n", zIn);
|
|
}
|
|
zIn[i+1] = cSaved;
|
|
zIn += i+1;
|
|
i = -1;
|
|
}else{
|
|
zIn[i+1] = cSaved;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** The main routine for "sql_normalize". Read files named on the
|
|
** command-line and run the text of each through sqlite3_normalize().
|
|
*/
|
|
int main(int argc, char **argv){
|
|
int i;
|
|
FILE *in;
|
|
char *zBuf = 0;
|
|
sqlite3_int64 sz, got;
|
|
|
|
for(i=1; i<argc; i++){
|
|
in = fopen(argv[i], "rb");
|
|
if( in==0 ){
|
|
fprintf(stderr, "cannot open \"%s\"\n", argv[i]);
|
|
continue;
|
|
}
|
|
fseek(in, 0, SEEK_END);
|
|
sz = ftell(in);
|
|
rewind(in);
|
|
zBuf = sqlite3_realloc64(zBuf, sz+1);
|
|
if( zBuf==0 ){
|
|
fprintf(stderr, "failed to malloc for %lld bytes\n", sz);
|
|
exit(1);
|
|
}
|
|
got = fread(zBuf, 1, sz, in);
|
|
fclose(in);
|
|
if( got!=sz ){
|
|
fprintf(stderr, "only able to read %lld of %lld bytes from \"%s\"\n",
|
|
got, sz, argv[i]);
|
|
}else{
|
|
zBuf[got] = 0;
|
|
normalizeFile(zBuf);
|
|
}
|
|
}
|
|
sqlite3_free(zBuf);
|
|
}
|
|
#endif /* SQLITE_NORMALIZE_CLI */
|