Correctly handle the situation where a collation sequence is available, but

not in the preferred encoding. (CVS 1565)

FossilOrigin-Name: 49ab4794e1b5be5cbb3b87a65477659762487cf8
This commit is contained in:
danielk1977 2004-06-10 14:01:08 +00:00
parent caec4e7a59
commit 4e6af1347c
8 changed files with 321 additions and 80 deletions

View File

@ -1,5 +1,5 @@
C Add\sthe\ssqlite3_collation_needed()\sAPI\sand\sfix\ssome\serror\shandling\scases\ninvolving\sunknown\scollation\ssequences.\s(CVS\s1564)
D 2004-06-10T10:51:53
C Correctly\shandle\sthe\ssituation\swhere\sa\scollation\ssequence\sis\savailable,\sbut\nnot\sin\sthe\spreferred\sencoding.\s(CVS\s1565)
D 2004-06-10T14:01:08
F Makefile.in ab7b0d5118e2da97bac66be8684a1034e3500f5a
F Makefile.linux-gcc a9e5a0d309fa7c38e7c14d3ecf7690879d3a5457
F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd
@ -27,7 +27,7 @@ F src/attach.c 93b8ecec4a8d7b4e9f2479e2327d90c9d01765e8
F src/auth.c 5c2f0bea4729c98c2be3b69d6b466fc51448fe79
F src/btree.c 281af87aa117de024f5b6c2728a2339cba9ef584
F src/btree.h 589427ac13bb544d298cd99726e2572a6fe4bdaa
F src/build.c 4b1a23d919fe01549702f7f1bfe7f8b656e77a17
F src/build.c b36b62f49aea7d258cb804999dcc8650e4d79464
F src/date.c 8e6fa3173386fb29fdef012ee08a853c1e9908b2
F src/delete.c 911221aadb35d610c84fadb32e71c52990827e58
F src/encode.c a876af473d1d636faa3dca51c7571f2e007eea37
@ -56,10 +56,10 @@ F src/random.c eff68e3f257e05e81eae6c4d50a51eb88beb4ff3
F src/select.c 6cb407796dde0e8f27450ead68856eb9f8188789
F src/shell.c ca519519dcbbc582f6d88f7d0e7583b857fd3469
F src/sqlite.h.in 2b6afe1de6935d3dfbd6042f46a62f1b7c3b3992
F src/sqliteInt.h 6be535d420f99c57f29f13c3c2d6a3497432b366
F src/sqliteInt.h e8e641bec4d7806023ce8192a64234d3599c5fc0
F src/table.c af14284fa36c8d41f6829e3f2819dce07d3e2de2
F src/tclsqlite.c e974c0b2479ed37334aeb268de331e0a1b21b5a8
F src/test1.c f78d6ac0675bc5db48dac9c5379c965bdadb9113
F src/test1.c 5f5c0773df1091cc02ddf6608a8f6e0c65940a56
F src/test2.c 05f810c90cf6262d5f352860e87d41a3f34207f9
F src/test3.c beafd0ccf7b9ae784744be1b1e66ffe8f64c25da
F src/test4.c a921a69821fd30209589228e64f94e9f715b6fe2
@ -73,9 +73,9 @@ F src/vacuum.c b921eb778842592e1fb48a9d4cef7e861103878f
F src/vdbe.c 90e0e6bdbdf9b77c66f2500374b5784d30c323fa
F src/vdbe.h 46f74444a213129bc4b5ce40124dd8ed613b0cde
F src/vdbeInt.h d41605853332bdbd600d7ecd60e1f54bbaea174e
F src/vdbeapi.c 4ac95766b0515538037a7aec172ed26142f97cf9
F src/vdbeapi.c bcf5821ed09070d586898374b905861c4dd73d0b
F src/vdbeaux.c 73764dadcdbf79aa2d948f863eae07b18589e663
F src/vdbemem.c 5b2fab8b5a830e5204413b808c4a2d8335189f21
F src/vdbemem.c b1599f5d24131107a21a54e618e372e1252de958
F src/where.c dda77afaa593cd54e5955ec433076de18faf62f6
F test/all.test 569a92a8ee88f5300c057cc4a8f50fbbc69a3242
F test/attach.test aed659e52635662bcd5069599aaca823533edf5a
@ -103,7 +103,7 @@ F test/crashtest1.c 09c1c7d728ccf4feb9e481671e29dda5669bbcc2
F test/date.test aed5030482ebc02bd8d386c6c86a29f694ab068d
F test/delete.test ac14bd6df5f1581d646eebc013d6b844a885dcf6
F test/enc.test a55481d45ff493804e8d88357feb4642fc50a6b2
F test/enc2.test 28b61a098dd571b06147fe9f857489edba4e405d
F test/enc2.test 1d469f58ee7f187bf06e11bd72a12bdea6362b2f
F test/expr.test 521588701dae8cf5aa2b8a18c5c897711f754332
F test/fkey1.test d65c824459916249bee501532d6154ddab0b5db7
F test/func.test 9816fbed0a5e87e00f4fc88b4cdcd638abc524c4
@ -219,7 +219,7 @@ F www/support.tcl 1801397edd271cc39a2aadd54e701184b5181248
F www/tclsqlite.tcl 19191cf2a1010eaeff74c51d83fd5f5a4d899075
F www/vdbe.tcl 59288db1ac5c0616296b26dce071c36cb611dfe9
F www/whentouse.tcl a8335bce47cc2fddb07f19052cb0cb4d9129a8e4
P 518d82d3b1ab996d675f45c94d740c98578a04a6
R 9cc8eb0d8d516e56f0a005459d5bbc05
P 67500546ea24fd7a30348960c98cd257dbfa965f
R 393c5de920a400aa587d8f21dc793fbe
U danielk1977
Z 25a8aff272a87359157d326e3bffc5a6
Z 3447cf118fd13bbb0c99c8f654b072b2

View File

@ -1 +1 @@
67500546ea24fd7a30348960c98cd257dbfa965f
49ab4794e1b5be5cbb3b87a65477659762487cf8

View File

@ -23,7 +23,7 @@
** ROLLBACK
** PRAGMA
**
** $Id: build.c,v 1.215 2004/06/10 10:50:08 danielk1977 Exp $
** $Id: build.c,v 1.216 2004/06/10 14:01:08 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>
@ -912,10 +912,10 @@ CollSeq *sqlite3FindCollSeq(
case TEXT_Utf8:
break;
case TEXT_Utf16le:
pColl = &pColl[2];
pColl = &pColl[1];
break;
case TEXT_Utf16be:
pColl = &pColl[1];
pColl = &pColl[2];
break;
default:
assert(!"Cannot happen");

View File

@ -11,7 +11,7 @@
*************************************************************************
** Internal interface definitions for SQLite.
**
** @(#) $Id: sqliteInt.h,v 1.280 2004/06/10 10:50:32 danielk1977 Exp $
** @(#) $Id: sqliteInt.h,v 1.281 2004/06/10 14:01:08 danielk1977 Exp $
*/
#include "config.h"
#include "sqlite3.h"
@ -1401,3 +1401,10 @@ CollSeq *sqlite3LocateCollSeq(Parse *pParse, const char *zName, int nName);
CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr);
int sqlite3CheckCollSeq(Parse *, CollSeq *);
int sqlite3CheckIndexCollSeq(Parse *, Index *);
const void *sqlite3ValueText(sqlite3_value*, u8);
int sqlite3ValueBytes(sqlite3_value*, u8);
void sqlite3ValueSetStr(sqlite3_value*, int, const void *,u8);
void sqlite3ValueFree(sqlite3_value*);
sqlite3_value *sqlite3ValueNew();

View File

@ -13,7 +13,7 @@
** is not included in the SQLite library. It is used for automated
** testing of the SQLite library.
**
** $Id: test1.c,v 1.74 2004/06/09 17:37:28 drh Exp $
** $Id: test1.c,v 1.75 2004/06/10 14:01:08 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "tcl.h"
@ -860,6 +860,108 @@ static int test_bind(
return TCL_OK;
}
/*
** Usage: add_test_collate <db ptr> <utf8> <utf16le> <utf16be>
**
** This function is used to test that SQLite selects the correct collation
** sequence callback when multiple versions (for different text encodings)
** are available.
**
** Calling this routine registers the collation sequence "test_collate"
** with database handle <db>. The second argument must be a list of three
** boolean values. If the first is true, then a version of test_collate is
** registered for UTF-8, if the second is true, a version is registered for
** UTF-16le, if the third is true, a UTF-16be version is available.
** Previous versions of test_collate are deleted.
**
** The collation sequence test_collate is implemented by calling the
** following TCL script:
**
** "test_collate <enc> <lhs> <rhs>"
**
** The <lhs> and <rhs> are the two values being compared, encoded in UTF-8.
** The <enc> parameter is the encoding of the collation function that
** SQLite selected to call. The TCL test script implements the
** "test_collate" proc.
**
** Note that this will only work with one intepreter at a time, as the
** interp pointer to use when evaluating the TCL script is stored in
** pTestCollateInterp.
*/
static Tcl_Interp* pTestCollateInterp;
static int test_collate_func(
void *pCtx,
int nA, const void *zA,
int nB, const void *zB
){
Tcl_Interp *i = pTestCollateInterp;
int encin = (int)pCtx;
int res;
sqlite3_value *pVal;
Tcl_Obj *pX;
pX = Tcl_NewStringObj("test_collate", -1);
Tcl_IncrRefCount(pX);
switch( encin ){
case SQLITE_UTF8:
Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj("UTF-8",-1));
break;
case SQLITE_UTF16LE:
Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj("UTF-16LE",-1));
break;
case SQLITE_UTF16BE:
Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj("UTF-16BE",-1));
break;
default:
assert(0);
}
pVal = sqlite3ValueNew();
sqlite3ValueSetStr(pVal, nA, zA, encin);
Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj(sqlite3_value_text(pVal),-1));
sqlite3ValueSetStr(pVal, nB, zB, encin);
Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj(sqlite3_value_text(pVal),-1));
sqlite3ValueFree(pVal);
Tcl_EvalObjEx(i, pX, 0);
Tcl_DecrRefCount(pX);
Tcl_GetIntFromObj(i, Tcl_GetObjResult(i), &res);
return res;
}
static int test_collate(
void * clientData,
Tcl_Interp *interp,
int objc,
Tcl_Obj *CONST objv[]
){
sqlite3 *db;
int val;
if( objc!=5 ) goto bad_args;
pTestCollateInterp = interp;
if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR;
if( TCL_OK!=Tcl_GetBooleanFromObj(interp, objv[2], &val) ) return TCL_ERROR;
sqlite3_create_collation(db, "test_collate", SQLITE_UTF8,
(void *)SQLITE_UTF8, val?test_collate_func:0);
if( TCL_OK!=Tcl_GetBooleanFromObj(interp, objv[3], &val) ) return TCL_ERROR;
sqlite3_create_collation(db, "test_collate", SQLITE_UTF16LE,
(void *)SQLITE_UTF16LE, val?test_collate_func:0);
if( TCL_OK!=Tcl_GetBooleanFromObj(interp, objv[4], &val) ) return TCL_ERROR;
sqlite3_create_collation(db, "test_collate", SQLITE_UTF16BE,
(void *)SQLITE_UTF16BE, val?test_collate_func:0);
return TCL_OK;
bad_args:
Tcl_AppendResult(interp, "wrong # args: should be \"",
Tcl_GetStringFromObj(objv[0], 0), " <DB> <utf8> <utf16le> <utf16be>", 0);
return TCL_ERROR;
}
/*
** Usage: breakpoint
**
@ -1868,6 +1970,7 @@ int Sqlitetest1_Init(Tcl_Interp *interp){
{ "sqlite3OsClose", test_sqlite3OsClose, 0 },
{ "sqlite3OsLock", test_sqlite3OsLock, 0 },
{ "sqlite3OsUnlock", test_sqlite3OsUnlock, 0 },
{ "add_test_collate", test_collate, 0 },
};
int i;

View File

@ -58,40 +58,10 @@ long long int sqlite3_value_int64(sqlite3_value *pVal){
return pVal->i;
}
const unsigned char *sqlite3_value_text(sqlite3_value *pVal){
if( pVal->flags&MEM_Null ){
/* For a NULL return a NULL Pointer */
return 0;
}
if( pVal->flags&MEM_Str ){
/* If there is already a string representation, make sure it is in
** encoded in UTF-8.
*/
sqlite3VdbeChangeEncoding(pVal, TEXT_Utf8);
}else if( !(pVal->flags&MEM_Blob) ){
/* Otherwise, unless this is a blob, convert it to a UTF-8 string */
sqlite3VdbeMemStringify(pVal, TEXT_Utf8);
}
return pVal->z;
return (const char *)sqlite3ValueText(pVal, TEXT_Utf8);
}
const void *sqlite3_value_text16(sqlite3_value* pVal){
if( pVal->flags&MEM_Null ){
/* For a NULL return a NULL Pointer */
return 0;
}
if( pVal->flags&MEM_Str ){
/* If there is already a string representation, make sure it is in
** encoded in UTF-16 machine byte order.
*/
sqlite3VdbeChangeEncoding(pVal, TEXT_Utf16);
}else if( !(pVal->flags&MEM_Blob) ){
/* Otherwise, unless this is a blob, convert it to a UTF-16 string */
sqlite3VdbeMemStringify(pVal, TEXT_Utf16);
}
return (const void *)(pVal->z);
return sqlite3ValueText(pVal, TEXT_Utf16);
}
int sqlite3_value_type(sqlite3_value* pVal){
return pVal->type;

View File

@ -47,12 +47,15 @@ int sqlite3VdbeChangeEncoding(Mem *pMem, int desiredEnc){
*/
char *z;
int n;
int rc = sqlite3utfTranslate(pMem->z, pMem->n, pMem->enc,
(void **)&z, &n, desiredEnc);
int rc;
rc = sqlite3utfTranslate(pMem->z, pMem->n, pMem->enc, &z, &n, desiredEnc);
if( rc!=SQLITE_OK ){
return rc;
}
if( pMem->flags&MEM_Dyn ){
sqliteFree(pMem->z);
}
/* Result of sqlite3utfTranslate is currently always dynamically
** allocated and nul terminated. This might be altered as a performance
** enhancement later.
@ -444,38 +447,20 @@ int sqlite3MemCompare(const Mem *pMem1, const Mem *pMem2, const CollSeq *pColl){
if( pMem1->enc==pColl->enc ){
return pColl->xCmp(pColl->pUser,pMem1->n,pMem1->z,pMem2->n,pMem2->z);
}else{
switch( pColl->enc ){
case SQLITE_UTF8:
return pColl->xCmp(
pColl->pUser,
sqlite3_value_bytes((sqlite3_value *)pMem1),
sqlite3_value_text((sqlite3_value *)pMem1),
sqlite3_value_bytes((sqlite3_value *)pMem2),
sqlite3_value_text((sqlite3_value *)pMem2)
);
case SQLITE_UTF16LE:
case SQLITE_UTF16BE:
/* FIX ME: Handle non-native UTF-16 properly instead of
** assuming it is always native. */
return pColl->xCmp(
pColl->pUser,
sqlite3_value_bytes16((sqlite3_value *)pMem1),
sqlite3_value_text16((sqlite3_value *)pMem1),
sqlite3_value_bytes16((sqlite3_value *)pMem2),
sqlite3_value_text16((sqlite3_value *)pMem2)
);
default:
assert(!"Cannot happen");
}
return pColl->xCmp(
pColl->pUser,
sqlite3ValueBytes((sqlite3_value*)pMem1, pColl->enc),
sqlite3ValueText((sqlite3_value*)pMem1, pColl->enc),
sqlite3ValueBytes((sqlite3_value*)pMem2, pColl->enc),
sqlite3ValueText((sqlite3_value*)pMem2, pColl->enc)
);
}
}
/* If a NULL pointer was passed as the collate function, fall through
** to the blob case and use memcmp().
*/
** to the blob case and use memcmp(). */
}
/* Both values must be blobs. Compare using memcmp().
*/
/* Both values must be blobs. Compare using memcmp(). */
rc = memcmp(pMem1->z, pMem2->z, (pMem1->n>pMem2->n)?pMem2->n:pMem1->n);
if( rc==0 ){
rc = pMem1->n - pMem2->n;
@ -588,3 +573,72 @@ void sqlite3VdbeMemSanity(Mem *pMem, u8 db_enc){
|| (pMem->flags&MEM_Null)==0 );
}
#endif
/* This function is only available internally, it is not part of the
** external API. It works in a similar way to sqlite3_value_text(),
** except the data returned is in the encoding specified by the second
** parameter, which must be one of SQLITE_UTF16BE, SQLITE_UTF16LE or
** SQLITE_UTF8.
*/
const void *sqlite3ValueText(sqlite3_value* pVal, u8 enc){
assert( enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE || enc==SQLITE_UTF8);
if( pVal->flags&MEM_Null ){
/* For a NULL return a NULL Pointer */
return 0;
}
if( pVal->flags&MEM_Str ){
/* If there is already a string representation, make sure it is in
** encoded in the required UTF-16 byte order.
*/
sqlite3VdbeChangeEncoding(pVal, enc);
}else if( !(pVal->flags&MEM_Blob) ){
/* Otherwise, unless this is a blob, convert it to a UTF-16 string */
sqlite3VdbeMemStringify(pVal, enc);
}
return (const void *)(pVal->z);
}
sqlite3_value* sqlite3ValueNew(){
Mem *p = sqliteMalloc(sizeof(*p));
if( p ){
p->flags = MEM_Null;
p->type = SQLITE_NULL;
}
return p;
}
void sqlite3ValueSetStr(sqlite3_value *v, int n, const void *z, u8 enc){
Mem *p = (Mem *)v;
if( p->z && p->flags&MEM_Dyn ){
sqliteFree(p->z);
}
p->z = (char *)z;
p->n = n;
p->enc = enc;
p->type = SQLITE_TEXT;
p->flags = (MEM_Str|MEM_Static);
if( p->n<0 ){
if( enc==SQLITE_UTF8 ){
p->n = strlen(p->z);
}else{
p->n = sqlite3utf16ByteLen(p->z, -1);
}
}
return;
}
void sqlite3ValueFree(sqlite3_value *v){
sqlite3ValueSetStr(v, 0, 0, SQLITE_UTF8);
sqliteFree(v);
}
int sqlite3ValueBytes(sqlite3_value *pVal, u8 enc){
Mem *p = (Mem*)pVal;
if( (p->flags & MEM_Blob)!=0 || sqlite3ValueText(pVal, enc) ){
return p->n;
}
return 0;
}

View File

@ -13,7 +13,7 @@
# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
# UTF-16be).
#
# $Id: enc2.test,v 1.7 2004/06/10 05:59:25 danielk1977 Exp $
# $Id: enc2.test,v 1.8 2004/06/10 14:01:08 danielk1977 Exp $
set testdir [file dirname $argv0]
source $testdir/tester.tcl
@ -143,5 +143,112 @@ do_test enc2-4.3 {
} {1 {attached databases must use the same text encoding as main database}}
db2 close
db close
# The following tests - enc2-5.* - test that SQLite selects the correct
# collation sequence when more than one is available.
set ::values [list one two three four five]
set ::test_collate_enc INVALID
proc test_collate {enc lhs rhs} {
set ::test_collate_enc $enc
set l [lsearch -exact $::values $lhs]
set r [lsearch -exact $::values $rhs]
set res [expr $l - $r]
# puts "test_collate $enc $lhs $rhs -> $res"
return $res
}
file delete -force test.db
set DB [sqlite db test.db]
do_test enc2-5.0 {
execsql {
CREATE TABLE t5(a);
INSERT INTO t5 VALUES('one');
INSERT INTO t5 VALUES('two');
INSERT INTO t5 VALUES('five');
INSERT INTO t5 VALUES('three');
INSERT INTO t5 VALUES('four');
}
} {}
do_test enc2-5.1 {
add_test_collate $DB 1 1 1
set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
lappend res $::test_collate_enc
} {one two three four five UTF-8}
do_test enc2-5.2 {
add_test_collate $DB 0 1 0
set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
lappend res $::test_collate_enc
} {one two three four five UTF-16LE}
breakpoint
do_test enc2-5.3 {
add_test_collate $DB 0 0 1
set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
lappend res $::test_collate_enc
} {one two three four five UTF-16BE}
file delete -force test.db
set DB [sqlite db test.db]
execsql {pragma encoding = 'UTF-16LE'}
do_test enc2-5.4 {
execsql {
CREATE TABLE t5(a);
INSERT INTO t5 VALUES('one');
INSERT INTO t5 VALUES('two');
INSERT INTO t5 VALUES('five');
INSERT INTO t5 VALUES('three');
INSERT INTO t5 VALUES('four');
}
} {}
do_test enc2-5.5 {
add_test_collate $DB 1 1 1
set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
lappend res $::test_collate_enc
} {one two three four five UTF-16LE}
do_test enc2-5.6 {
add_test_collate $DB 1 0 1
set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
lappend res $::test_collate_enc
} {one two three four five UTF-16BE}
breakpoint
do_test enc2-5.7 {
add_test_collate $DB 1 0 0
set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
lappend res $::test_collate_enc
} {one two three four five UTF-8}
file delete -force test.db
set DB [sqlite db test.db]
execsql {pragma encoding = 'UTF-16BE'}
do_test enc2-5.8 {
execsql {
CREATE TABLE t5(a);
INSERT INTO t5 VALUES('one');
INSERT INTO t5 VALUES('two');
INSERT INTO t5 VALUES('five');
INSERT INTO t5 VALUES('three');
INSERT INTO t5 VALUES('four');
}
} {}
do_test enc2-5.9 {
add_test_collate $DB 1 1 1
set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
lappend res $::test_collate_enc
} {one two three four five UTF-16BE}
do_test enc2-5.10 {
add_test_collate $DB 1 1 0
set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
lappend res $::test_collate_enc
} {one two three four five UTF-16LE}
breakpoint
do_test enc2-5.11 {
add_test_collate $DB 1 0 0
set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
lappend res $::test_collate_enc
} {one two three four five UTF-8}
finish_test