Add interface to configure SQLite to use ICU collation functions. (CVS 3936)
FossilOrigin-Name: b29a81b4fbb926fa09186340342848b9fe589033
This commit is contained in:
parent
6885390791
commit
2559136971
110
ext/icu/icu.c
110
ext/icu/icu.c
@ -21,6 +21,7 @@
|
||||
#include <unicode/utypes.h>
|
||||
#include <unicode/uregex.h>
|
||||
#include <unicode/ustring.h>
|
||||
#include <unicode/ucol.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include "sqlite3.h"
|
||||
@ -51,6 +52,24 @@ static void xFree(void *p){
|
||||
** http://unicode.org/reports/tr21/tr21-5.html#Caseless_Matching
|
||||
*/
|
||||
|
||||
/*
|
||||
** This function is called when an ICU function called from within
|
||||
** the implementation of an SQL scalar function returns an error.
|
||||
**
|
||||
** The scalar function context passed as the first argument is
|
||||
** loaded with an error message based on the following two args.
|
||||
*/
|
||||
static void icuFunctionError(
|
||||
sqlite3_context *pCtx, /* SQLite scalar function context */
|
||||
const char *zName, /* Name of ICU function that failed */
|
||||
UErrorCode e /* Error code returned by ICU function */
|
||||
){
|
||||
char zBuf[128];
|
||||
sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e));
|
||||
zBuf[127] = '\0';
|
||||
sqlite3_result_error(pCtx, zBuf, -1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Function to delete compiled regexp objects. Registered as
|
||||
** a destructor function with sqlite3_set_auxdata().
|
||||
@ -104,7 +123,7 @@ static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){
|
||||
sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete);
|
||||
}else{
|
||||
assert(!pExpr);
|
||||
sqlite3_result_error(p, "Error compiling regular expression", -1);
|
||||
icuFunctionError(p, "uregex_open", status);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -112,14 +131,14 @@ static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){
|
||||
/* Configure the text that the regular expression operates on. */
|
||||
uregex_setText(pExpr, zString, -1, &status);
|
||||
if( !U_SUCCESS(status) ){
|
||||
sqlite3_result_error(p, "Error configuring regular expression", -1);
|
||||
icuFunctionError(p, "uregex_setText", status);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Attempt the match */
|
||||
res = uregex_matches(pExpr, 0, &status);
|
||||
if( !U_SUCCESS(status) ){
|
||||
sqlite3_result_error(p, "Error matching regular expression", -1);
|
||||
icuFunctionError(p, "uregex_matches", status);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -190,13 +209,94 @@ static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){
|
||||
}
|
||||
|
||||
if( !U_SUCCESS(status) ){
|
||||
sqlite3_result_error(p, "Error converting case", -1);
|
||||
icuFunctionError(p, "u_strToLower()/u_strToUpper", status);
|
||||
return;
|
||||
}
|
||||
|
||||
sqlite3_result_text16(p, zOutput, -1, xFree);
|
||||
}
|
||||
|
||||
/*
|
||||
** Collation sequence destructor function. The pCtx argument points to
|
||||
** a UCollator structure previously allocated using ucol_open().
|
||||
*/
|
||||
static void icuCollationDel(void *pCtx){
|
||||
UCollator *p = (UCollator *)pCtx;
|
||||
ucol_close(p);
|
||||
}
|
||||
|
||||
/*
|
||||
** Collation sequence comparison function. The pCtx argument points to
|
||||
** a UCollator structure previously allocated using ucol_open().
|
||||
*/
|
||||
static int icuCollationColl(
|
||||
void *pCtx,
|
||||
int nLeft,
|
||||
const void *zLeft,
|
||||
int nRight,
|
||||
const void *zRight
|
||||
){
|
||||
UCollationResult res;
|
||||
UCollator *p = (UCollator *)pCtx;
|
||||
res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2);
|
||||
switch( res ){
|
||||
case UCOL_LESS: return -1;
|
||||
case UCOL_GREATER: return +1;
|
||||
case UCOL_EQUAL: return 0;
|
||||
}
|
||||
assert(!"Bad return value from ucol_strcoll()");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Implementation of the scalar function icu_load_collation().
|
||||
**
|
||||
** This scalar function is used to add ICU collation based collation
|
||||
** types to an SQLite database connection. It is intended to be called
|
||||
** as follows:
|
||||
**
|
||||
** SELECT icu_load_collation(<locale>, <collation-name>);
|
||||
**
|
||||
** Where <locale> is a string containing an ICU locale identifier (i.e.
|
||||
** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the
|
||||
** collation sequence to create.
|
||||
*/
|
||||
static void icuLoadCollation(
|
||||
sqlite3_context *p,
|
||||
int nArg,
|
||||
sqlite3_value **apArg
|
||||
){
|
||||
sqlite3 *db = (sqlite3 *)sqlite3_user_data(p);
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
const char *zLocale; /* Locale identifier - (eg. "jp_JP") */
|
||||
const char *zName; /* SQL Collation sequence name (eg. "japanese") */
|
||||
UCollator *pUCollator; /* ICU library collation object */
|
||||
int rc; /* Return code from sqlite3_create_collation_x() */
|
||||
|
||||
assert(nArg==2);
|
||||
zLocale = (const char *)sqlite3_value_text(apArg[0]);
|
||||
zName = (const char *)sqlite3_value_text(apArg[1]);
|
||||
|
||||
if( !zLocale || !zName ){
|
||||
return;
|
||||
}
|
||||
|
||||
pUCollator = ucol_open(zLocale, &status);
|
||||
if( !U_SUCCESS(status) ){
|
||||
icuFunctionError(p, "ucol_open", status);
|
||||
return;
|
||||
}
|
||||
assert(p);
|
||||
|
||||
rc = sqlite3_create_collation_x(db, zName, SQLITE_UTF16, (void *)pUCollator,
|
||||
icuCollationColl, icuCollationDel
|
||||
);
|
||||
if( rc!=SQLITE_OK ){
|
||||
ucol_close(pUCollator);
|
||||
sqlite3_result_error(p, "Error registering collation function", -1);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Register the ICU extension functions with database db.
|
||||
*/
|
||||
@ -219,6 +319,8 @@ int sqlite3IcuInit(sqlite3 *db){
|
||||
{"lower", 2, SQLITE_UTF8, 0, icuCaseFunc16},
|
||||
{"upper", 1, SQLITE_UTF8, (void*)1, icuCaseFunc16},
|
||||
{"upper", 2, SQLITE_UTF8, (void*)1, icuCaseFunc16},
|
||||
|
||||
{"icu_load_collation", 2, SQLITE_UTF8, (void*)db, icuLoadCollation},
|
||||
};
|
||||
|
||||
int rc = SQLITE_OK;
|
||||
|
15
manifest
15
manifest
@ -1,5 +1,5 @@
|
||||
C Change\ssqlite3_snprintf()\sso\sthat\sit\sdoes\snot\swrite\sa\szero-terminator\sif\nthe\sbuffer\ssize\sargument\sis\sless\sthan\s1.\s\sTicket\s#2341.\s\sAdded\sdocumentation\nabout\sthe\ssqlite3_snprintf()\sfunction.\s(CVS\s3935)
|
||||
D 2007-05-07T11:24:30
|
||||
C Add\sinterface\sto\sconfigure\sSQLite\sto\suse\sICU\scollation\sfunctions.\s(CVS\s3936)
|
||||
D 2007-05-07T11:53:14
|
||||
F Makefile.in ea8888bdcf53313d26576fcabcb6d0a10ecd35cd
|
||||
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
|
||||
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
|
||||
@ -43,7 +43,7 @@ F ext/fts2/fts2_porter.c 991a45463553c7318063fe7773368a6c0f39e35d
|
||||
F ext/fts2/fts2_tokenizer.h 4c5ffe31d63622869eb6eec1503df7f6996fd1bd
|
||||
F ext/fts2/fts2_tokenizer1.c 5c979fe8815f95396beb22b627571da895a025af
|
||||
F ext/fts2/mkfts2amal.tcl 2a9ec76b0760fe7f3669dca5bc0d60728bc1c977
|
||||
F ext/icu/icu.c a30999ba467749ed6232d02cc8c4b5a0e62cd727
|
||||
F ext/icu/icu.c 509ac3d8afea8af6835edb9d96a52a80dd56c152
|
||||
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895
|
||||
F ltmain.sh 56abb507100ed2d4261f6dd1653dec3cf4066387
|
||||
F main.mk 09c19ae05ac9e5654d5fd866a980b21ad9df8f30
|
||||
@ -246,6 +246,7 @@ F test/fts2m.test 4b30142ead6f3ed076e880a2a464064c5ad58c51
|
||||
F test/fts2n.test a70357e72742681eaebfdbe9007b87ff3b771638
|
||||
F test/func.test 6727c7729472ae52b5acd86e802f89aa350ba50f
|
||||
F test/hook.test 7e7645fd9a033f79cce8fdff151e32715e7ec50a
|
||||
F test/icu.test e6bfae7f625c88fd14df6f540fe835bdfc1e4329
|
||||
F test/in.test 369cb2aa1eab02296b4ec470732fe8c131260b1d
|
||||
F test/incrblob.test 7f82ae497364612aa17a37f77f12e01e2bee9f20
|
||||
F test/incrblob_err.test 9dae0762ba4d73b516d176d091c6b2b16f625953
|
||||
@ -481,7 +482,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9
|
||||
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
|
||||
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
|
||||
F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5
|
||||
P ff49d48f2f025898a0f4ace1fc227e1d367ea89f
|
||||
R d693f630962da031deefd4769c7a8268
|
||||
U drh
|
||||
Z ff527a494f455c458a9191e7c78f4220
|
||||
P f3ae4ac5fe0bfa2f91e76a6def86c444e51fe80b
|
||||
R 20cd23ff512d65479e7ed637ec43cb14
|
||||
U danielk1977
|
||||
Z 04087bd460c94ac099a97176e3a307b1
|
||||
|
@ -1 +1 @@
|
||||
f3ae4ac5fe0bfa2f91e76a6def86c444e51fe80b
|
||||
b29a81b4fbb926fa09186340342848b9fe589033
|
118
test/icu.test
Normal file
118
test/icu.test
Normal file
@ -0,0 +1,118 @@
|
||||
# 2007 May 1
|
||||
#
|
||||
# The author disclaims copyright to this source code. In place of
|
||||
# a legal notice, here is a blessing:
|
||||
#
|
||||
# May you do good and not evil.
|
||||
# May you find forgiveness for yourself and forgive others.
|
||||
# May you share freely, never taking more than you give.
|
||||
#
|
||||
#***********************************************************************
|
||||
#
|
||||
# $Id: icu.test,v 1.1 2007/05/07 11:53:14 danielk1977 Exp $
|
||||
#
|
||||
|
||||
set testdir [file dirname $argv0]
|
||||
source $testdir/tester.tcl
|
||||
|
||||
ifcapable !icu {
|
||||
finish_test
|
||||
return
|
||||
}
|
||||
|
||||
# Create a table to work with.
|
||||
#
|
||||
execsql {CREATE TABLE test1(i1 int, i2 int, r1 real, r2 real, t1 text, t2 text)}
|
||||
execsql {INSERT INTO test1 VALUES(1,2,1.1,2.2,'hello','world')}
|
||||
proc test_expr {name settings expr result} {
|
||||
do_test $name [format {
|
||||
db one {
|
||||
BEGIN;
|
||||
UPDATE test1 SET %s;
|
||||
SELECT %s FROM test1;
|
||||
ROLLBACK;
|
||||
}
|
||||
} $settings $expr] $result
|
||||
}
|
||||
|
||||
# Tests of the REGEXP operator.
|
||||
#
|
||||
test_expr icu-1.1 {i1='hello'} {i1 REGEXP 'hello'} 1
|
||||
test_expr icu-1.2 {i1='hello'} {i1 REGEXP '.ello'} 1
|
||||
test_expr icu-1.3 {i1='hello'} {i1 REGEXP '.ell'} 0
|
||||
test_expr icu-1.4 {i1='hello'} {i1 REGEXP '.ell.*'} 1
|
||||
test_expr icu-1.5 {i1=NULL} {i1 REGEXP '.ell.*'} {}
|
||||
|
||||
# Some non-ascii characters with defined case mappings
|
||||
#
|
||||
set ::EGRAVE "\xC8"
|
||||
set ::egrave "\xE8"
|
||||
|
||||
set ::OGRAVE "\xD2"
|
||||
set ::ograve "\xF2"
|
||||
|
||||
# That German letter that looks a bit like a B. The
|
||||
# upper-case version of which is "SS" (two characters).
|
||||
#
|
||||
set ::szlig "\xDF"
|
||||
|
||||
# Tests of the upper()/lower() functions.
|
||||
#
|
||||
test_expr icu-2.1 {i1='HellO WorlD'} {upper(i1)} {HELLO WORLD}
|
||||
test_expr icu-2.2 {i1='HellO WorlD'} {lower(i1)} {hello world}
|
||||
test_expr icu-2.3 {i1=$::egrave} {lower(i1)} $::egrave
|
||||
test_expr icu-2.4 {i1=$::egrave} {upper(i1)} $::EGRAVE
|
||||
test_expr icu-2.5 {i1=$::ograve} {lower(i1)} $::ograve
|
||||
test_expr icu-2.6 {i1=$::ograve} {upper(i1)} $::OGRAVE
|
||||
test_expr icu-2.3 {i1=$::EGRAVE} {lower(i1)} $::egrave
|
||||
test_expr icu-2.4 {i1=$::EGRAVE} {upper(i1)} $::EGRAVE
|
||||
test_expr icu-2.5 {i1=$::OGRAVE} {lower(i1)} $::ograve
|
||||
test_expr icu-2.6 {i1=$::OGRAVE} {upper(i1)} $::OGRAVE
|
||||
|
||||
test_expr icu-2.7 {i1=$::szlig} {upper(i1)} "SS"
|
||||
test_expr icu-2.8 {i1='SS'} {lower(i1)} "ss"
|
||||
|
||||
# In turkish (locale="tr_TR"), the lower case version of I
|
||||
# is "small dotless i" (code point 0x131 (decimal 305)).
|
||||
#
|
||||
set ::small_dotless_i "\u0131"
|
||||
test_expr icu-3.1 {i1='I'} {lower(i1)} "i"
|
||||
test_expr icu-3.2 {i1='I'} {lower(i1, 'tr_tr')} $::small_dotless_i
|
||||
test_expr icu-3.3 {i1='I'} {lower(i1, 'en_AU')} "i"
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
# Test the collation sequence function.
|
||||
#
|
||||
do_test icu-4.1 {
|
||||
execsql {
|
||||
CREATE TABLE fruit(name);
|
||||
INSERT INTO fruit VALUES('plum');
|
||||
INSERT INTO fruit VALUES('cherry');
|
||||
INSERT INTO fruit VALUES('apricot');
|
||||
INSERT INTO fruit VALUES('peach');
|
||||
INSERT INTO fruit VALUES('chokecherry');
|
||||
INSERT INTO fruit VALUES('yamot');
|
||||
}
|
||||
} {}
|
||||
do_test icu-4.2 {
|
||||
execsql {
|
||||
SELECT icu_load_collation('en_US', 'AmericanEnglish');
|
||||
SELECT icu_load_collation('lt_LT', 'Lithuanian');
|
||||
}
|
||||
execsql {
|
||||
SELECT name FROM fruit ORDER BY name COLLATE AmericanEnglish ASC;
|
||||
}
|
||||
} {apricot cherry chokecherry peach plum yamot}
|
||||
|
||||
|
||||
# Test collation using Lithuanian rules. In the Lithuanian
|
||||
# alphabet, "y" comes right after "i".
|
||||
#
|
||||
do_test icu-4.3 {
|
||||
execsql {
|
||||
SELECT name FROM fruit ORDER BY name COLLATE Lithuanian ASC;
|
||||
}
|
||||
} {apricot cherry chokecherry yamot peach plum}
|
||||
|
||||
finish_test
|
||||
|
Loading…
Reference in New Issue
Block a user