Add interface to configure SQLite to use ICU collation functions. (CVS 3936)

FossilOrigin-Name: b29a81b4fbb926fa09186340342848b9fe589033
This commit is contained in:
danielk1977 2007-05-07 11:53:13 +00:00
parent 6885390791
commit 2559136971
4 changed files with 233 additions and 12 deletions

View File

@ -21,6 +21,7 @@
#include <unicode/utypes.h>
#include <unicode/uregex.h>
#include <unicode/ustring.h>
#include <unicode/ucol.h>
#include <assert.h>
#include "sqlite3.h"
@ -51,6 +52,24 @@ static void xFree(void *p){
** http://unicode.org/reports/tr21/tr21-5.html#Caseless_Matching
*/
/*
** This function is called when an ICU function called from within
** the implementation of an SQL scalar function returns an error.
**
** The scalar function context passed as the first argument is
** loaded with an error message based on the following two args.
*/
static void icuFunctionError(
sqlite3_context *pCtx, /* SQLite scalar function context */
const char *zName, /* Name of ICU function that failed */
UErrorCode e /* Error code returned by ICU function */
){
char zBuf[128];
sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e));
zBuf[127] = '\0';
sqlite3_result_error(pCtx, zBuf, -1);
}
/*
** Function to delete compiled regexp objects. Registered as
** a destructor function with sqlite3_set_auxdata().
@ -104,7 +123,7 @@ static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){
sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete);
}else{
assert(!pExpr);
sqlite3_result_error(p, "Error compiling regular expression", -1);
icuFunctionError(p, "uregex_open", status);
return;
}
}
@ -112,14 +131,14 @@ static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){
/* Configure the text that the regular expression operates on. */
uregex_setText(pExpr, zString, -1, &status);
if( !U_SUCCESS(status) ){
sqlite3_result_error(p, "Error configuring regular expression", -1);
icuFunctionError(p, "uregex_setText", status);
return;
}
/* Attempt the match */
res = uregex_matches(pExpr, 0, &status);
if( !U_SUCCESS(status) ){
sqlite3_result_error(p, "Error matching regular expression", -1);
icuFunctionError(p, "uregex_matches", status);
return;
}
@ -190,13 +209,94 @@ static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){
}
if( !U_SUCCESS(status) ){
sqlite3_result_error(p, "Error converting case", -1);
icuFunctionError(p, "u_strToLower()/u_strToUpper", status);
return;
}
sqlite3_result_text16(p, zOutput, -1, xFree);
}
/*
** Collation sequence destructor function. The pCtx argument points to
** a UCollator structure previously allocated using ucol_open().
*/
static void icuCollationDel(void *pCtx){
UCollator *p = (UCollator *)pCtx;
ucol_close(p);
}
/*
** Collation sequence comparison function. The pCtx argument points to
** a UCollator structure previously allocated using ucol_open().
*/
static int icuCollationColl(
void *pCtx,
int nLeft,
const void *zLeft,
int nRight,
const void *zRight
){
UCollationResult res;
UCollator *p = (UCollator *)pCtx;
res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2);
switch( res ){
case UCOL_LESS: return -1;
case UCOL_GREATER: return +1;
case UCOL_EQUAL: return 0;
}
assert(!"Bad return value from ucol_strcoll()");
return 0;
}
/*
** Implementation of the scalar function icu_load_collation().
**
** This scalar function is used to add ICU collation based collation
** types to an SQLite database connection. It is intended to be called
** as follows:
**
** SELECT icu_load_collation(<locale>, <collation-name>);
**
** Where <locale> is a string containing an ICU locale identifier (i.e.
** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the
** collation sequence to create.
*/
static void icuLoadCollation(
sqlite3_context *p,
int nArg,
sqlite3_value **apArg
){
sqlite3 *db = (sqlite3 *)sqlite3_user_data(p);
UErrorCode status = U_ZERO_ERROR;
const char *zLocale; /* Locale identifier - (eg. "jp_JP") */
const char *zName; /* SQL Collation sequence name (eg. "japanese") */
UCollator *pUCollator; /* ICU library collation object */
int rc; /* Return code from sqlite3_create_collation_x() */
assert(nArg==2);
zLocale = (const char *)sqlite3_value_text(apArg[0]);
zName = (const char *)sqlite3_value_text(apArg[1]);
if( !zLocale || !zName ){
return;
}
pUCollator = ucol_open(zLocale, &status);
if( !U_SUCCESS(status) ){
icuFunctionError(p, "ucol_open", status);
return;
}
assert(p);
rc = sqlite3_create_collation_x(db, zName, SQLITE_UTF16, (void *)pUCollator,
icuCollationColl, icuCollationDel
);
if( rc!=SQLITE_OK ){
ucol_close(pUCollator);
sqlite3_result_error(p, "Error registering collation function", -1);
}
}
/*
** Register the ICU extension functions with database db.
*/
@ -219,6 +319,8 @@ int sqlite3IcuInit(sqlite3 *db){
{"lower", 2, SQLITE_UTF8, 0, icuCaseFunc16},
{"upper", 1, SQLITE_UTF8, (void*)1, icuCaseFunc16},
{"upper", 2, SQLITE_UTF8, (void*)1, icuCaseFunc16},
{"icu_load_collation", 2, SQLITE_UTF8, (void*)db, icuLoadCollation},
};
int rc = SQLITE_OK;

View File

@ -1,5 +1,5 @@
C Change\ssqlite3_snprintf()\sso\sthat\sit\sdoes\snot\swrite\sa\szero-terminator\sif\nthe\sbuffer\ssize\sargument\sis\sless\sthan\s1.\s\sTicket\s#2341.\s\sAdded\sdocumentation\nabout\sthe\ssqlite3_snprintf()\sfunction.\s(CVS\s3935)
D 2007-05-07T11:24:30
C Add\sinterface\sto\sconfigure\sSQLite\sto\suse\sICU\scollation\sfunctions.\s(CVS\s3936)
D 2007-05-07T11:53:14
F Makefile.in ea8888bdcf53313d26576fcabcb6d0a10ecd35cd
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
@ -43,7 +43,7 @@ F ext/fts2/fts2_porter.c 991a45463553c7318063fe7773368a6c0f39e35d
F ext/fts2/fts2_tokenizer.h 4c5ffe31d63622869eb6eec1503df7f6996fd1bd
F ext/fts2/fts2_tokenizer1.c 5c979fe8815f95396beb22b627571da895a025af
F ext/fts2/mkfts2amal.tcl 2a9ec76b0760fe7f3669dca5bc0d60728bc1c977
F ext/icu/icu.c a30999ba467749ed6232d02cc8c4b5a0e62cd727
F ext/icu/icu.c 509ac3d8afea8af6835edb9d96a52a80dd56c152
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895
F ltmain.sh 56abb507100ed2d4261f6dd1653dec3cf4066387
F main.mk 09c19ae05ac9e5654d5fd866a980b21ad9df8f30
@ -246,6 +246,7 @@ F test/fts2m.test 4b30142ead6f3ed076e880a2a464064c5ad58c51
F test/fts2n.test a70357e72742681eaebfdbe9007b87ff3b771638
F test/func.test 6727c7729472ae52b5acd86e802f89aa350ba50f
F test/hook.test 7e7645fd9a033f79cce8fdff151e32715e7ec50a
F test/icu.test e6bfae7f625c88fd14df6f540fe835bdfc1e4329
F test/in.test 369cb2aa1eab02296b4ec470732fe8c131260b1d
F test/incrblob.test 7f82ae497364612aa17a37f77f12e01e2bee9f20
F test/incrblob_err.test 9dae0762ba4d73b516d176d091c6b2b16f625953
@ -481,7 +482,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5
P ff49d48f2f025898a0f4ace1fc227e1d367ea89f
R d693f630962da031deefd4769c7a8268
U drh
Z ff527a494f455c458a9191e7c78f4220
P f3ae4ac5fe0bfa2f91e76a6def86c444e51fe80b
R 20cd23ff512d65479e7ed637ec43cb14
U danielk1977
Z 04087bd460c94ac099a97176e3a307b1

View File

@ -1 +1 @@
f3ae4ac5fe0bfa2f91e76a6def86c444e51fe80b
b29a81b4fbb926fa09186340342848b9fe589033

118
test/icu.test Normal file
View File

@ -0,0 +1,118 @@
# 2007 May 1
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# $Id: icu.test,v 1.1 2007/05/07 11:53:14 danielk1977 Exp $
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
ifcapable !icu {
finish_test
return
}
# Create a table to work with.
#
execsql {CREATE TABLE test1(i1 int, i2 int, r1 real, r2 real, t1 text, t2 text)}
execsql {INSERT INTO test1 VALUES(1,2,1.1,2.2,'hello','world')}
proc test_expr {name settings expr result} {
do_test $name [format {
db one {
BEGIN;
UPDATE test1 SET %s;
SELECT %s FROM test1;
ROLLBACK;
}
} $settings $expr] $result
}
# Tests of the REGEXP operator.
#
test_expr icu-1.1 {i1='hello'} {i1 REGEXP 'hello'} 1
test_expr icu-1.2 {i1='hello'} {i1 REGEXP '.ello'} 1
test_expr icu-1.3 {i1='hello'} {i1 REGEXP '.ell'} 0
test_expr icu-1.4 {i1='hello'} {i1 REGEXP '.ell.*'} 1
test_expr icu-1.5 {i1=NULL} {i1 REGEXP '.ell.*'} {}
# Some non-ascii characters with defined case mappings
#
set ::EGRAVE "\xC8"
set ::egrave "\xE8"
set ::OGRAVE "\xD2"
set ::ograve "\xF2"
# That German letter that looks a bit like a B. The
# upper-case version of which is "SS" (two characters).
#
set ::szlig "\xDF"
# Tests of the upper()/lower() functions.
#
test_expr icu-2.1 {i1='HellO WorlD'} {upper(i1)} {HELLO WORLD}
test_expr icu-2.2 {i1='HellO WorlD'} {lower(i1)} {hello world}
test_expr icu-2.3 {i1=$::egrave} {lower(i1)} $::egrave
test_expr icu-2.4 {i1=$::egrave} {upper(i1)} $::EGRAVE
test_expr icu-2.5 {i1=$::ograve} {lower(i1)} $::ograve
test_expr icu-2.6 {i1=$::ograve} {upper(i1)} $::OGRAVE
test_expr icu-2.3 {i1=$::EGRAVE} {lower(i1)} $::egrave
test_expr icu-2.4 {i1=$::EGRAVE} {upper(i1)} $::EGRAVE
test_expr icu-2.5 {i1=$::OGRAVE} {lower(i1)} $::ograve
test_expr icu-2.6 {i1=$::OGRAVE} {upper(i1)} $::OGRAVE
test_expr icu-2.7 {i1=$::szlig} {upper(i1)} "SS"
test_expr icu-2.8 {i1='SS'} {lower(i1)} "ss"
# In turkish (locale="tr_TR"), the lower case version of I
# is "small dotless i" (code point 0x131 (decimal 305)).
#
set ::small_dotless_i "\u0131"
test_expr icu-3.1 {i1='I'} {lower(i1)} "i"
test_expr icu-3.2 {i1='I'} {lower(i1, 'tr_tr')} $::small_dotless_i
test_expr icu-3.3 {i1='I'} {lower(i1, 'en_AU')} "i"
#--------------------------------------------------------------------
# Test the collation sequence function.
#
do_test icu-4.1 {
execsql {
CREATE TABLE fruit(name);
INSERT INTO fruit VALUES('plum');
INSERT INTO fruit VALUES('cherry');
INSERT INTO fruit VALUES('apricot');
INSERT INTO fruit VALUES('peach');
INSERT INTO fruit VALUES('chokecherry');
INSERT INTO fruit VALUES('yamot');
}
} {}
do_test icu-4.2 {
execsql {
SELECT icu_load_collation('en_US', 'AmericanEnglish');
SELECT icu_load_collation('lt_LT', 'Lithuanian');
}
execsql {
SELECT name FROM fruit ORDER BY name COLLATE AmericanEnglish ASC;
}
} {apricot cherry chokecherry peach plum yamot}
# Test collation using Lithuanian rules. In the Lithuanian
# alphabet, "y" comes right after "i".
#
do_test icu-4.3 {
execsql {
SELECT name FROM fruit ORDER BY name COLLATE Lithuanian ASC;
}
} {apricot cherry chokecherry yamot peach plum}
finish_test