From 98808babd3efdc2bb3b8b0884eda76abe6e32477 Mon Sep 17 00:00:00 2001 From: drh Date: Thu, 18 Oct 2001 12:34:46 +0000 Subject: [PATCH] Support for UTF-8 and ISO8859 characters in identifiers. Bug fix in the column name generator for selects (was coreing). (CVS 290) FossilOrigin-Name: 22948fc685299ca888907eea68edb8a6e87c3f49 --- manifest | 28 +++++++++++------------ manifest.uuid | 2 +- src/os.c | 2 +- src/pager.c | 4 ++-- src/select.c | 6 ++--- src/sqliteInt.h | 3 +-- src/tclsqlite.c | 26 ++++++++++++--------- src/tokenize.c | 57 +++++++++++++++++++++++++++++++++-------------- src/vdbe.c | 9 +++++--- test/select1.test | 12 ++++++++-- www/changes.tcl | 6 +++++ 11 files changed, 99 insertions(+), 56 deletions(-) diff --git a/manifest b/manifest index 2083be8a87..0be6cb473e 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Version\s2.0.5\s(CVS\s465) -D 2001-10-15T00:45:00 +C Support\sfor\sUTF-8\sand\sISO8859\scharacters\sin\sidentifiers.\s\sBug\sfix\sin\nthe\scolumn\sname\sgenerator\sfor\sselects\s(was\scoreing).\s(CVS\s290) +D 2001-10-18T12:34:47 F Makefile.in 6801df952cb1df64aa32e4de85fed24511d28efd F Makefile.template 1fdb891f14083ee0b63cf7282f91529634438e7a F README 93d2977cc5c6595c448de16bdefc312b9d401533 @@ -29,27 +29,27 @@ F src/hash.h a5f5b3ce2d086a172c5879b0b06a27a82eac9fac F src/insert.c b65c1d4b848e45d41e9dcccd2b226ca335de67b6 F src/main.c 9a18e97290d41844e8c12e021fb7c42948a19dc9 F src/md5.c 52f677bfc590e09f71d07d7e327bd59da738d07c -F src/os.c 886bdd6c1dff71116f688e8b736d82e43e7ac9ed +F src/os.c 2a501026a66416292a30ab5b0988ec75783340ae F src/os.h bed702c9e3b768bc3cb1b12c90b83d099c1546be -F src/pager.c e2e189a15e230c60e811f5e2ab25e68ae41c90be +F src/pager.c 5e2877673e93ad2fa83e6d49fcd8d9590f8f38a5 F src/pager.h a0d4c5ae271914aa07b62aee0707997d6932b6ca F src/parse.y 148e4cd134d3cbd816dcb0df50e49e498faa6ba4 F src/printf.c b1e22a47be8cdf707815647239991e08e8cb69f9 F src/random.c 2a9cc2c9716d14815fd4c2accf89d87a1143e46b -F src/select.c 75bb3ca7fd42f7c6d86fc565688e7834587a9f0d +F src/select.c 0e8089c5ae84fa3eb7e64b40350832983918e7a4 F src/shell.c cb8c41f1b2173efd212dab3f35f1fc6bf32ead76 F src/shell.tcl 27ecbd63dd88396ad16d81ab44f73e6c0ea9d20e F src/sqlite.h.in b95c161abf1d58bceb05290fa3f657d8f388fc11 -F src/sqliteInt.h 04bfa79fcf6ade1961f6e3b9dc679a63be25cbd7 +F src/sqliteInt.h acfd52eb2949abb847b1be93687e93e3663231b2 F src/table.c abd0adbe0fee39d995287b3bcccd908d174dfcac -F src/tclsqlite.c 765599686c19ed777ac379928d732c8bfc63ebac +F src/tclsqlite.c 0b947866c89fe5b683fc86e8419b7f8da3cebf7d F src/test1.c e4b31f62ea71963cbae44338acf477a04fc8fc49 F src/test2.c e9f99aa5ee73872819259d6612c11e55e1644321 F src/test3.c 4a0d7b882fdae731dbb759f512ad867122452f96 -F src/tokenize.c c3fcb76a41a22803b6060bddb5fbadc80bbe309c +F src/tokenize.c 2b95e67bcb5f68e7fe4a5a426fcbe0891d7aaa54 F src/update.c c916182c6bfbc8a6f20c24920c4560fece6c9569 F src/util.c 4da3be37d0fd3c640d2d3033503768afdc8e5387 -F src/vdbe.c 0f8ea6ca59f0899e9e0d71a81c0bf46110447cf6 +F src/vdbe.c 01617df84381c3ace10feb370b8d1f72f275c1ab F src/vdbe.h 86fc2ef42f48024c9a2e1b7fb01eda22b65a5295 F src/where.c 22fe910c7c8e2736eb37e9861343e90c0b513c86 F test/all.test a2320eb40b462f25bd3e33115b1cabf3791450dd @@ -74,7 +74,7 @@ F test/printf.test 3cb415073754cb8ff076f26173143c3cd293a9da F test/quick.test b6ec50f808efc06595fd324bf4f3fabadb9c7e9c F test/quote.test 286db944717afa9a9bf829dd85e59185c65d5435 F test/rowid.test 427bfbbe9684fe7a2f851aa05badaae6d4972ce8 -F test/select1.test 5f47445fa3a033e02e1b07e4fcd4f142e5a46403 +F test/select1.test 129c0188565383c3d22858161b96fefd3f7fa09f F test/select2.test f91c903e2bab0e9d45274855a981eebf846d5e32 F test/select3.test 5e1fe8e5a4e63fb2827ab3b89527e0fd4ae35259 F test/select4.test 29a2ffb187f3d8b6ca42a0a6b619e9cabe12e228 @@ -102,7 +102,7 @@ F www/arch.fig d5f9752a4dbf242e9cfffffd3f5762b6c63b3bcf F www/arch.png 82ef36db1143828a7abc88b1e308a5f55d4336f4 F www/arch.tcl 03b521d252575f93b9c52f7c8b0007011512fcfb F www/c_interface.tcl a59ee0835d1b33fcddab7d4fd65cf9e50f7d2dc7 -F www/changes.tcl 00cfa817042f33097616ff0de388e6503aab3968 +F www/changes.tcl 5ff43653f6387ce7b6e0a8aa384855b3f3b76550 F www/crosscompile.tcl c99efacb3aefaa550c6e80d91b240f55eb9fd33e F www/download.tcl 3e51c9ff1326b0a182846134987301310dff7d60 F www/dynload.tcl 02eb8273aa78cfa9070dd4501dca937fb22b466c @@ -114,7 +114,7 @@ F www/speed.tcl ab7d6d3bc898472bd94320a5d3c63de928d4804b F www/sqlite.tcl 6a21242a272e9c0939a04419a51c3d50cae33e3e F www/tclsqlite.tcl 13d50723f583888fc80ae1a38247c0ab415066fa F www/vdbe.tcl bb7d620995f0a987293e9d4fb6185a3b077e9b44 -P 747bf1b30b74cfd0e9c27e7c0bc5172637f35520 -R 27dbd8ccaf5ffe5bb0031ee011cf1c46 +P e2d84f71ed39cbb75884205521aa9e316ab3b56c +R 3132f2509f739a21413aa9b99beee504 U drh -Z 99c58d4e0e5ee15af3687db26a182ad1 +Z 7eb9951a3a4ed42d9ed165321f9fc396 diff --git a/manifest.uuid b/manifest.uuid index 8ea6ce4871..da1b629dd9 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e2d84f71ed39cbb75884205521aa9e316ab3b56c \ No newline at end of file +22948fc685299ca888907eea68edb8a6e87c3f49 \ No newline at end of file diff --git a/src/os.c b/src/os.c index 862eb278b6..07b2d1b661 100644 --- a/src/os.c +++ b/src/os.c @@ -577,7 +577,7 @@ int sqliteOsLock(OsFile id, int wrlock){ needSysLock = 1; } }else{ - if( id.pLock<0 ){ + if( id.pLock->cnt<0 ){ rc = SQLITE_BUSY; }else{ rc = SQLITE_OK; diff --git a/src/pager.c b/src/pager.c index 7134744e69..3beb7f4b65 100644 --- a/src/pager.c +++ b/src/pager.c @@ -18,7 +18,7 @@ ** file simultaneously, or one process from reading the database while ** another is writing. ** -** @(#) $Id: pager.c,v 1.27 2001/10/12 17:30:05 drh Exp $ +** @(#) $Id: pager.c,v 1.28 2001/10/18 12:34:47 drh Exp $ */ #include "sqliteInt.h" #include "pager.h" @@ -1076,7 +1076,7 @@ int sqlitepager_rollback(Pager *pPager){ } pPager->dbSize = -1; return rc; -}; +} /* ** Return TRUE if the database file is opened read-only. Return FALSE diff --git a/src/select.c b/src/select.c index 70e962cc4e..49e8f02cd1 100644 --- a/src/select.c +++ b/src/select.c @@ -12,7 +12,7 @@ ** This file contains C code routines that are called by the parser ** to handle SELECT statements in SQLite. ** -** $Id: select.c,v 1.40 2001/10/15 00:44:36 drh Exp $ +** $Id: select.c,v 1.41 2001/10/18 12:34:47 drh Exp $ */ #include "sqliteInt.h" @@ -256,10 +256,10 @@ void generateColumnNames(Parse *pParse, IdList *pTabList, ExprList *pEList){ }else if( p->op==TK_COLUMN && pTabList ){ if( pTabList->nId>1 || showFullNames ){ char *zName = 0; - Table *pTab = pTabList->a[p->iTable].pTab; + Table *pTab = pTabList->a[p->iTable - pParse->nTab].pTab; char *zTab; - zTab = pTabList->a[p->iTable].zAlias; + zTab = pTabList->a[p->iTable - pParse->nTab].zAlias; if( zTab==0 ) zTab = pTab->zName; sqliteSetString(&zName, zTab, ".", pTab->aCol[p->iColumn].zName, 0); sqliteVdbeAddOp(v, OP_ColumnName, i, 0); diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 72d63abca6..92d01724ba 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -11,7 +11,7 @@ ************************************************************************* ** Internal interface definitions for SQLite. ** -** @(#) $Id: sqliteInt.h,v 1.62 2001/10/15 00:44:36 drh Exp $ +** @(#) $Id: sqliteInt.h,v 1.63 2001/10/18 12:34:47 drh Exp $ */ #include "sqlite.h" #include "hash.h" @@ -412,7 +412,6 @@ int sqliteSortCompare(const char *, const char *); char *sqliteStrDup(const char*); char *sqliteStrNDup(const char*, int); #endif -int sqliteGetToken(const char*, int *); void sqliteSetString(char **, const char *, ...); void sqliteSetNString(char **, ...); void sqliteDequote(char*); diff --git a/src/tclsqlite.c b/src/tclsqlite.c index bf713e8e4e..7893b73b5b 100644 --- a/src/tclsqlite.c +++ b/src/tclsqlite.c @@ -11,7 +11,7 @@ ************************************************************************* ** A TCL Interface to SQLite ** -** $Id: tclsqlite.c,v 1.24 2001/09/28 17:47:14 drh Exp $ +** $Id: tclsqlite.c,v 1.25 2001/10/18 12:34:47 drh Exp $ */ #ifndef NO_TCL /* Omit this whole file if TCL is unavailable */ @@ -20,6 +20,16 @@ #include #include +/* +** If TCL uses UTF-8 and SQLite is configured to use iso8859, then we +** have to do a translation when going between the two. Set the +** UTF_TRANSLATION_NEEDED macro to indicate that we need to do +** this translation. +*/ +#if defined(TCL_UTF_MAX) && !defined(SQLITE_UTF8) +# define UTF_TRANSLATION_NEEDED 1 +#endif + /* ** There is one instance of this structure for each SQLite database ** that has been opened by the SQLite TCL interface. @@ -42,17 +52,11 @@ struct CallbackData { Tcl_Obj *pCode; /* The code to execute for each row */ int once; /* Set only for the first invocation of callback */ int tcl_rc; /* Return code from TCL script */ -}; - -/* -** If TCL uses UTF-8 and SQLite is configured to use iso8859, then we -** have to do a translation when going between the two. Set the -** UTF_TRANSLATION_NEEDED macro to indicate that we need to do -** this translation. -*/ -#if defined(TCL_UTF_MAX) && !defined(SQLITE_UTF8) -# define UTF_TRANSLATION_NEEDED 1 +#ifdef UTF_TRANSLATION_NEEDED + int nColName; /* Number of entries in the azColName[] array */ + char **azColName; /* Column names translated to UTF-8 */ #endif +}; /* ** Called for each row of the result. diff --git a/src/tokenize.c b/src/tokenize.c index 52b23b210b..ba64ddb248 100644 --- a/src/tokenize.c +++ b/src/tokenize.c @@ -15,7 +15,7 @@ ** individual tokens and sends those tokens one-by-one over to the ** parser for analysis. ** -** $Id: tokenize.c,v 1.27 2001/10/13 02:59:09 drh Exp $ +** $Id: tokenize.c,v 1.28 2001/10/18 12:34:48 drh Exp $ */ #include "sqliteInt.h" #include "os.h" @@ -138,12 +138,44 @@ static int sqliteKeywordCode(const char *z, int n){ return TK_ID; } + +/* +** If X is a character that can be used in an identifier then +** isIdChar[X] will be 1. Otherwise isIdChar[X] will be 0. +** +** In this implementation, an identifier can be a string of +** alphabetic characters, digits, and "_" plus any character +** with the high-order bit set. The latter rule means that +** any sequence of UTF-8 characters or characters taken from +** an extended ISO8859 character set can form an identifier. +*/ +static const char isIdChar[] = { +/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 8x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 9x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ax */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Bx */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Cx */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Dx */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ex */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Fx */ +}; + + /* ** Return the length of the token that begins at z[0]. Return ** -1 if the token is (or might be) incomplete. Store the token ** type in *tokenType before returning. */ -int sqliteGetToken(const char *z, int *tokenType){ +static int sqliteGetToken(const unsigned char *z, int *tokenType){ int i; switch( *z ){ case ' ': case '\t': case '\n': case '\f': case '\r': { @@ -294,23 +326,14 @@ int sqliteGetToken(const char *z, int *tokenType){ } return i; } - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': case '_': - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': { - for(i=1; z[i] && (isalnum(z[i]) || z[i]=='_'); i++){} + default: { + if( !isIdChar[*z] ){ + break; + } + for(i=1; isIdChar[z[i]]; i++){} *tokenType = sqliteKeywordCode(z, i); return i; } - default: { - break; - } } *tokenType = TK_ILLEGAL; return 1; @@ -350,7 +373,7 @@ int sqliteRunParser(Parse *pParse, char *zSql, char **pzErrMsg){ break; } pParse->sLastToken.z = &zSql[i]; - pParse->sLastToken.n = sqliteGetToken(&zSql[i], &tokenType); + pParse->sLastToken.n = sqliteGetToken((unsigned char*)&zSql[i], &tokenType); i += pParse->sLastToken.n; if( once ){ pParse->sFirstToken = pParse->sLastToken; diff --git a/src/vdbe.c b/src/vdbe.c index 29a45075ba..c1cdebf7a3 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -30,7 +30,7 @@ ** But other routines are also provided to help in building up ** a program instruction by instruction. ** -** $Id: vdbe.c,v 1.85 2001/10/13 21:56:34 drh Exp $ +** $Id: vdbe.c,v 1.86 2001/10/18 12:34:48 drh Exp $ */ #include "sqliteInt.h" #include @@ -1661,7 +1661,8 @@ case OP_Like: { int c; VERIFY( if( nos<0 ) goto not_enough_stack; ) if( Stringify(p, tos) || Stringify(p, nos) ) goto no_mem; - c = sqliteLikeCompare(zStack[tos], zStack[nos]); + c = sqliteLikeCompare((unsigned char*)zStack[tos], + (unsigned char*)zStack[nos]); POPSTACK; POPSTACK; if( pOp->p1 ) c = !c; @@ -1693,7 +1694,8 @@ case OP_Glob: { int c; VERIFY( if( nos<0 ) goto not_enough_stack; ) if( Stringify(p, tos) || Stringify(p, nos) ) goto no_mem; - c = sqliteGlobCompare(zStack[tos], zStack[nos]); + c = sqliteGlobCompare((unsigned char*)zStack[tos], + (unsigned char*)zStack[nos]); POPSTACK; POPSTACK; if( pOp->p1 ) c = !c; @@ -3263,6 +3265,7 @@ case OP_SortMakeKey: { zNewKey[j++] = 0; } zNewKey[j] = 0; + VERIFY( jtos+1); ) p->tos++; diff --git a/test/select1.test b/test/select1.test index d9fcb3db44..adcee10496 100644 --- a/test/select1.test +++ b/test/select1.test @@ -11,7 +11,7 @@ # This file implements regression tests for SQLite library. The # focus of this file is testing the SELECT statement. # -# $Id: select1.test,v 1.12 2001/10/15 00:44:36 drh Exp $ +# $Id: select1.test,v 1.13 2001/10/18 12:34:48 drh Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl @@ -249,13 +249,21 @@ do_test select1-6.1.2 { } {0 {f1 11 f1 33}} do_test select1-6.1.3 { set v [catch {execsql2 {SELECT * FROM test1 WHERE f1==11}} msg] - execsql {PRAGMA full_column_names=off} lappend v $msg } {0 {test1.f1 11 test1.f2 22}} do_test select1-6.1.4 { + set v [catch {execsql2 {SELECT DISTINCT * FROM test1 WHERE f1==11}} msg] + execsql {PRAGMA full_column_names=off} + lappend v $msg +} {0 {test1.f1 11 test1.f2 22}} +do_test select1-6.1.5 { set v [catch {execsql2 {SELECT * FROM test1 WHERE f1==11}} msg] lappend v $msg } {0 {f1 11 f2 22}} +do_test select1-6.1.6 { + set v [catch {execsql2 {SELECT DISTINCT * FROM test1 WHERE f1==11}} msg] + lappend v $msg +} {0 {f1 11 f2 22}} do_test select1-6.2 { set v [catch {execsql2 {SELECT f1 as xyzzy FROM test1 ORDER BY f2}} msg] lappend v $msg diff --git a/www/changes.tcl b/www/changes.tcl index 72285f6c38..fc60b3f6d3 100644 --- a/www/changes.tcl +++ b/www/changes.tcl @@ -17,6 +17,12 @@ proc chng {date desc} { puts "

    $desc

" } +chng {2001 Oct ?? (2.0.6)} { +
  • Support for UTF-8 and ISO8859 characters in column and table names.
  • +
  • Bug fix: Compute correct table names with the FULL_COLUMN_NAMES pragma + is turned on.
  • +} + chng {2001 Oct 14 (2.0.5)} {
  • Added the COUNT_CHANGES pragma.
  • Changes to the FULL_COLUMN_NAMES pragma to help out the ODBC driver.