From 98808babd3efdc2bb3b8b0884eda76abe6e32477 Mon Sep 17 00:00:00 2001
From: drh
Date: Thu, 18 Oct 2001 12:34:46 +0000
Subject: [PATCH] Support for UTF-8 and ISO8859 characters in identifiers. Bug
fix in the column name generator for selects (was coreing). (CVS 290)
FossilOrigin-Name: 22948fc685299ca888907eea68edb8a6e87c3f49
---
manifest | 28 +++++++++++------------
manifest.uuid | 2 +-
src/os.c | 2 +-
src/pager.c | 4 ++--
src/select.c | 6 ++---
src/sqliteInt.h | 3 +--
src/tclsqlite.c | 26 ++++++++++++---------
src/tokenize.c | 57 +++++++++++++++++++++++++++++++++--------------
src/vdbe.c | 9 +++++---
test/select1.test | 12 ++++++++--
www/changes.tcl | 6 +++++
11 files changed, 99 insertions(+), 56 deletions(-)
diff --git a/manifest b/manifest
index 2083be8a87..0be6cb473e 100644
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Version\s2.0.5\s(CVS\s465)
-D 2001-10-15T00:45:00
+C Support\sfor\sUTF-8\sand\sISO8859\scharacters\sin\sidentifiers.\s\sBug\sfix\sin\nthe\scolumn\sname\sgenerator\sfor\sselects\s(was\scoreing).\s(CVS\s290)
+D 2001-10-18T12:34:47
F Makefile.in 6801df952cb1df64aa32e4de85fed24511d28efd
F Makefile.template 1fdb891f14083ee0b63cf7282f91529634438e7a
F README 93d2977cc5c6595c448de16bdefc312b9d401533
@@ -29,27 +29,27 @@ F src/hash.h a5f5b3ce2d086a172c5879b0b06a27a82eac9fac
F src/insert.c b65c1d4b848e45d41e9dcccd2b226ca335de67b6
F src/main.c 9a18e97290d41844e8c12e021fb7c42948a19dc9
F src/md5.c 52f677bfc590e09f71d07d7e327bd59da738d07c
-F src/os.c 886bdd6c1dff71116f688e8b736d82e43e7ac9ed
+F src/os.c 2a501026a66416292a30ab5b0988ec75783340ae
F src/os.h bed702c9e3b768bc3cb1b12c90b83d099c1546be
-F src/pager.c e2e189a15e230c60e811f5e2ab25e68ae41c90be
+F src/pager.c 5e2877673e93ad2fa83e6d49fcd8d9590f8f38a5
F src/pager.h a0d4c5ae271914aa07b62aee0707997d6932b6ca
F src/parse.y 148e4cd134d3cbd816dcb0df50e49e498faa6ba4
F src/printf.c b1e22a47be8cdf707815647239991e08e8cb69f9
F src/random.c 2a9cc2c9716d14815fd4c2accf89d87a1143e46b
-F src/select.c 75bb3ca7fd42f7c6d86fc565688e7834587a9f0d
+F src/select.c 0e8089c5ae84fa3eb7e64b40350832983918e7a4
F src/shell.c cb8c41f1b2173efd212dab3f35f1fc6bf32ead76
F src/shell.tcl 27ecbd63dd88396ad16d81ab44f73e6c0ea9d20e
F src/sqlite.h.in b95c161abf1d58bceb05290fa3f657d8f388fc11
-F src/sqliteInt.h 04bfa79fcf6ade1961f6e3b9dc679a63be25cbd7
+F src/sqliteInt.h acfd52eb2949abb847b1be93687e93e3663231b2
F src/table.c abd0adbe0fee39d995287b3bcccd908d174dfcac
-F src/tclsqlite.c 765599686c19ed777ac379928d732c8bfc63ebac
+F src/tclsqlite.c 0b947866c89fe5b683fc86e8419b7f8da3cebf7d
F src/test1.c e4b31f62ea71963cbae44338acf477a04fc8fc49
F src/test2.c e9f99aa5ee73872819259d6612c11e55e1644321
F src/test3.c 4a0d7b882fdae731dbb759f512ad867122452f96
-F src/tokenize.c c3fcb76a41a22803b6060bddb5fbadc80bbe309c
+F src/tokenize.c 2b95e67bcb5f68e7fe4a5a426fcbe0891d7aaa54
F src/update.c c916182c6bfbc8a6f20c24920c4560fece6c9569
F src/util.c 4da3be37d0fd3c640d2d3033503768afdc8e5387
-F src/vdbe.c 0f8ea6ca59f0899e9e0d71a81c0bf46110447cf6
+F src/vdbe.c 01617df84381c3ace10feb370b8d1f72f275c1ab
F src/vdbe.h 86fc2ef42f48024c9a2e1b7fb01eda22b65a5295
F src/where.c 22fe910c7c8e2736eb37e9861343e90c0b513c86
F test/all.test a2320eb40b462f25bd3e33115b1cabf3791450dd
@@ -74,7 +74,7 @@ F test/printf.test 3cb415073754cb8ff076f26173143c3cd293a9da
F test/quick.test b6ec50f808efc06595fd324bf4f3fabadb9c7e9c
F test/quote.test 286db944717afa9a9bf829dd85e59185c65d5435
F test/rowid.test 427bfbbe9684fe7a2f851aa05badaae6d4972ce8
-F test/select1.test 5f47445fa3a033e02e1b07e4fcd4f142e5a46403
+F test/select1.test 129c0188565383c3d22858161b96fefd3f7fa09f
F test/select2.test f91c903e2bab0e9d45274855a981eebf846d5e32
F test/select3.test 5e1fe8e5a4e63fb2827ab3b89527e0fd4ae35259
F test/select4.test 29a2ffb187f3d8b6ca42a0a6b619e9cabe12e228
@@ -102,7 +102,7 @@ F www/arch.fig d5f9752a4dbf242e9cfffffd3f5762b6c63b3bcf
F www/arch.png 82ef36db1143828a7abc88b1e308a5f55d4336f4
F www/arch.tcl 03b521d252575f93b9c52f7c8b0007011512fcfb
F www/c_interface.tcl a59ee0835d1b33fcddab7d4fd65cf9e50f7d2dc7
-F www/changes.tcl 00cfa817042f33097616ff0de388e6503aab3968
+F www/changes.tcl 5ff43653f6387ce7b6e0a8aa384855b3f3b76550
F www/crosscompile.tcl c99efacb3aefaa550c6e80d91b240f55eb9fd33e
F www/download.tcl 3e51c9ff1326b0a182846134987301310dff7d60
F www/dynload.tcl 02eb8273aa78cfa9070dd4501dca937fb22b466c
@@ -114,7 +114,7 @@ F www/speed.tcl ab7d6d3bc898472bd94320a5d3c63de928d4804b
F www/sqlite.tcl 6a21242a272e9c0939a04419a51c3d50cae33e3e
F www/tclsqlite.tcl 13d50723f583888fc80ae1a38247c0ab415066fa
F www/vdbe.tcl bb7d620995f0a987293e9d4fb6185a3b077e9b44
-P 747bf1b30b74cfd0e9c27e7c0bc5172637f35520
-R 27dbd8ccaf5ffe5bb0031ee011cf1c46
+P e2d84f71ed39cbb75884205521aa9e316ab3b56c
+R 3132f2509f739a21413aa9b99beee504
U drh
-Z 99c58d4e0e5ee15af3687db26a182ad1
+Z 7eb9951a3a4ed42d9ed165321f9fc396
diff --git a/manifest.uuid b/manifest.uuid
index 8ea6ce4871..da1b629dd9 100644
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-e2d84f71ed39cbb75884205521aa9e316ab3b56c
\ No newline at end of file
+22948fc685299ca888907eea68edb8a6e87c3f49
\ No newline at end of file
diff --git a/src/os.c b/src/os.c
index 862eb278b6..07b2d1b661 100644
--- a/src/os.c
+++ b/src/os.c
@@ -577,7 +577,7 @@ int sqliteOsLock(OsFile id, int wrlock){
needSysLock = 1;
}
}else{
- if( id.pLock<0 ){
+ if( id.pLock->cnt<0 ){
rc = SQLITE_BUSY;
}else{
rc = SQLITE_OK;
diff --git a/src/pager.c b/src/pager.c
index 7134744e69..3beb7f4b65 100644
--- a/src/pager.c
+++ b/src/pager.c
@@ -18,7 +18,7 @@
** file simultaneously, or one process from reading the database while
** another is writing.
**
-** @(#) $Id: pager.c,v 1.27 2001/10/12 17:30:05 drh Exp $
+** @(#) $Id: pager.c,v 1.28 2001/10/18 12:34:47 drh Exp $
*/
#include "sqliteInt.h"
#include "pager.h"
@@ -1076,7 +1076,7 @@ int sqlitepager_rollback(Pager *pPager){
}
pPager->dbSize = -1;
return rc;
-};
+}
/*
** Return TRUE if the database file is opened read-only. Return FALSE
diff --git a/src/select.c b/src/select.c
index 70e962cc4e..49e8f02cd1 100644
--- a/src/select.c
+++ b/src/select.c
@@ -12,7 +12,7 @@
** This file contains C code routines that are called by the parser
** to handle SELECT statements in SQLite.
**
-** $Id: select.c,v 1.40 2001/10/15 00:44:36 drh Exp $
+** $Id: select.c,v 1.41 2001/10/18 12:34:47 drh Exp $
*/
#include "sqliteInt.h"
@@ -256,10 +256,10 @@ void generateColumnNames(Parse *pParse, IdList *pTabList, ExprList *pEList){
}else if( p->op==TK_COLUMN && pTabList ){
if( pTabList->nId>1 || showFullNames ){
char *zName = 0;
- Table *pTab = pTabList->a[p->iTable].pTab;
+ Table *pTab = pTabList->a[p->iTable - pParse->nTab].pTab;
char *zTab;
- zTab = pTabList->a[p->iTable].zAlias;
+ zTab = pTabList->a[p->iTable - pParse->nTab].zAlias;
if( zTab==0 ) zTab = pTab->zName;
sqliteSetString(&zName, zTab, ".", pTab->aCol[p->iColumn].zName, 0);
sqliteVdbeAddOp(v, OP_ColumnName, i, 0);
diff --git a/src/sqliteInt.h b/src/sqliteInt.h
index 72d63abca6..92d01724ba 100644
--- a/src/sqliteInt.h
+++ b/src/sqliteInt.h
@@ -11,7 +11,7 @@
*************************************************************************
** Internal interface definitions for SQLite.
**
-** @(#) $Id: sqliteInt.h,v 1.62 2001/10/15 00:44:36 drh Exp $
+** @(#) $Id: sqliteInt.h,v 1.63 2001/10/18 12:34:47 drh Exp $
*/
#include "sqlite.h"
#include "hash.h"
@@ -412,7 +412,6 @@ int sqliteSortCompare(const char *, const char *);
char *sqliteStrDup(const char*);
char *sqliteStrNDup(const char*, int);
#endif
-int sqliteGetToken(const char*, int *);
void sqliteSetString(char **, const char *, ...);
void sqliteSetNString(char **, ...);
void sqliteDequote(char*);
diff --git a/src/tclsqlite.c b/src/tclsqlite.c
index bf713e8e4e..7893b73b5b 100644
--- a/src/tclsqlite.c
+++ b/src/tclsqlite.c
@@ -11,7 +11,7 @@
*************************************************************************
** A TCL Interface to SQLite
**
-** $Id: tclsqlite.c,v 1.24 2001/09/28 17:47:14 drh Exp $
+** $Id: tclsqlite.c,v 1.25 2001/10/18 12:34:47 drh Exp $
*/
#ifndef NO_TCL /* Omit this whole file if TCL is unavailable */
@@ -20,6 +20,16 @@
#include
#include
+/*
+** If TCL uses UTF-8 and SQLite is configured to use iso8859, then we
+** have to do a translation when going between the two. Set the
+** UTF_TRANSLATION_NEEDED macro to indicate that we need to do
+** this translation.
+*/
+#if defined(TCL_UTF_MAX) && !defined(SQLITE_UTF8)
+# define UTF_TRANSLATION_NEEDED 1
+#endif
+
/*
** There is one instance of this structure for each SQLite database
** that has been opened by the SQLite TCL interface.
@@ -42,17 +52,11 @@ struct CallbackData {
Tcl_Obj *pCode; /* The code to execute for each row */
int once; /* Set only for the first invocation of callback */
int tcl_rc; /* Return code from TCL script */
-};
-
-/*
-** If TCL uses UTF-8 and SQLite is configured to use iso8859, then we
-** have to do a translation when going between the two. Set the
-** UTF_TRANSLATION_NEEDED macro to indicate that we need to do
-** this translation.
-*/
-#if defined(TCL_UTF_MAX) && !defined(SQLITE_UTF8)
-# define UTF_TRANSLATION_NEEDED 1
+#ifdef UTF_TRANSLATION_NEEDED
+ int nColName; /* Number of entries in the azColName[] array */
+ char **azColName; /* Column names translated to UTF-8 */
#endif
+};
/*
** Called for each row of the result.
diff --git a/src/tokenize.c b/src/tokenize.c
index 52b23b210b..ba64ddb248 100644
--- a/src/tokenize.c
+++ b/src/tokenize.c
@@ -15,7 +15,7 @@
** individual tokens and sends those tokens one-by-one over to the
** parser for analysis.
**
-** $Id: tokenize.c,v 1.27 2001/10/13 02:59:09 drh Exp $
+** $Id: tokenize.c,v 1.28 2001/10/18 12:34:48 drh Exp $
*/
#include "sqliteInt.h"
#include "os.h"
@@ -138,12 +138,44 @@ static int sqliteKeywordCode(const char *z, int n){
return TK_ID;
}
+
+/*
+** If X is a character that can be used in an identifier then
+** isIdChar[X] will be 1. Otherwise isIdChar[X] will be 0.
+**
+** In this implementation, an identifier can be a string of
+** alphabetic characters, digits, and "_" plus any character
+** with the high-order bit set. The latter rule means that
+** any sequence of UTF-8 characters or characters taken from
+** an extended ISO8859 character set can form an identifier.
+*/
+static const char isIdChar[] = {
+/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 8x */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 9x */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ax */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Bx */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Cx */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Dx */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ex */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Fx */
+};
+
+
/*
** Return the length of the token that begins at z[0]. Return
** -1 if the token is (or might be) incomplete. Store the token
** type in *tokenType before returning.
*/
-int sqliteGetToken(const char *z, int *tokenType){
+static int sqliteGetToken(const unsigned char *z, int *tokenType){
int i;
switch( *z ){
case ' ': case '\t': case '\n': case '\f': case '\r': {
@@ -294,23 +326,14 @@ int sqliteGetToken(const char *z, int *tokenType){
}
return i;
}
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
- case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
- case 's': case 't': case 'u': case 'v': case 'w': case 'x':
- case 'y': case 'z': case '_':
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
- case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
- case 'Y': case 'Z': {
- for(i=1; z[i] && (isalnum(z[i]) || z[i]=='_'); i++){}
+ default: {
+ if( !isIdChar[*z] ){
+ break;
+ }
+ for(i=1; isIdChar[z[i]]; i++){}
*tokenType = sqliteKeywordCode(z, i);
return i;
}
- default: {
- break;
- }
}
*tokenType = TK_ILLEGAL;
return 1;
@@ -350,7 +373,7 @@ int sqliteRunParser(Parse *pParse, char *zSql, char **pzErrMsg){
break;
}
pParse->sLastToken.z = &zSql[i];
- pParse->sLastToken.n = sqliteGetToken(&zSql[i], &tokenType);
+ pParse->sLastToken.n = sqliteGetToken((unsigned char*)&zSql[i], &tokenType);
i += pParse->sLastToken.n;
if( once ){
pParse->sFirstToken = pParse->sLastToken;
diff --git a/src/vdbe.c b/src/vdbe.c
index 29a45075ba..c1cdebf7a3 100644
--- a/src/vdbe.c
+++ b/src/vdbe.c
@@ -30,7 +30,7 @@
** But other routines are also provided to help in building up
** a program instruction by instruction.
**
-** $Id: vdbe.c,v 1.85 2001/10/13 21:56:34 drh Exp $
+** $Id: vdbe.c,v 1.86 2001/10/18 12:34:48 drh Exp $
*/
#include "sqliteInt.h"
#include
@@ -1661,7 +1661,8 @@ case OP_Like: {
int c;
VERIFY( if( nos<0 ) goto not_enough_stack; )
if( Stringify(p, tos) || Stringify(p, nos) ) goto no_mem;
- c = sqliteLikeCompare(zStack[tos], zStack[nos]);
+ c = sqliteLikeCompare((unsigned char*)zStack[tos],
+ (unsigned char*)zStack[nos]);
POPSTACK;
POPSTACK;
if( pOp->p1 ) c = !c;
@@ -1693,7 +1694,8 @@ case OP_Glob: {
int c;
VERIFY( if( nos<0 ) goto not_enough_stack; )
if( Stringify(p, tos) || Stringify(p, nos) ) goto no_mem;
- c = sqliteGlobCompare(zStack[tos], zStack[nos]);
+ c = sqliteGlobCompare((unsigned char*)zStack[tos],
+ (unsigned char*)zStack[nos]);
POPSTACK;
POPSTACK;
if( pOp->p1 ) c = !c;
@@ -3263,6 +3265,7 @@ case OP_SortMakeKey: {
zNewKey[j++] = 0;
}
zNewKey[j] = 0;
+ VERIFY( jtos+1); )
p->tos++;
diff --git a/test/select1.test b/test/select1.test
index d9fcb3db44..adcee10496 100644
--- a/test/select1.test
+++ b/test/select1.test
@@ -11,7 +11,7 @@
# This file implements regression tests for SQLite library. The
# focus of this file is testing the SELECT statement.
#
-# $Id: select1.test,v 1.12 2001/10/15 00:44:36 drh Exp $
+# $Id: select1.test,v 1.13 2001/10/18 12:34:48 drh Exp $
set testdir [file dirname $argv0]
source $testdir/tester.tcl
@@ -249,13 +249,21 @@ do_test select1-6.1.2 {
} {0 {f1 11 f1 33}}
do_test select1-6.1.3 {
set v [catch {execsql2 {SELECT * FROM test1 WHERE f1==11}} msg]
- execsql {PRAGMA full_column_names=off}
lappend v $msg
} {0 {test1.f1 11 test1.f2 22}}
do_test select1-6.1.4 {
+ set v [catch {execsql2 {SELECT DISTINCT * FROM test1 WHERE f1==11}} msg]
+ execsql {PRAGMA full_column_names=off}
+ lappend v $msg
+} {0 {test1.f1 11 test1.f2 22}}
+do_test select1-6.1.5 {
set v [catch {execsql2 {SELECT * FROM test1 WHERE f1==11}} msg]
lappend v $msg
} {0 {f1 11 f2 22}}
+do_test select1-6.1.6 {
+ set v [catch {execsql2 {SELECT DISTINCT * FROM test1 WHERE f1==11}} msg]
+ lappend v $msg
+} {0 {f1 11 f2 22}}
do_test select1-6.2 {
set v [catch {execsql2 {SELECT f1 as xyzzy FROM test1 ORDER BY f2}} msg]
lappend v $msg
diff --git a/www/changes.tcl b/www/changes.tcl
index 72285f6c38..fc60b3f6d3 100644
--- a/www/changes.tcl
+++ b/www/changes.tcl
@@ -17,6 +17,12 @@ proc chng {date desc} {
puts "
"
}
+chng {2001 Oct ?? (2.0.6)} {
+Support for UTF-8 and ISO8859 characters in column and table names.
+Bug fix: Compute correct table names with the FULL_COLUMN_NAMES pragma
+ is turned on.
+}
+
chng {2001 Oct 14 (2.0.5)} {
Added the COUNT_CHANGES pragma.
Changes to the FULL_COLUMN_NAMES pragma to help out the ODBC driver.