Add the start of the ICU extension. (CVS 3931)

FossilOrigin-Name: f473e8526770b6a332dfde3e1fd1ddf8df493e9a
This commit is contained in:
danielk1977 2007-05-06 16:04:11 +00:00
parent d230f648de
commit 83852acc44
6 changed files with 272 additions and 12 deletions

239
ext/icu/icu.c Normal file

@ -0,0 +1,239 @@
/*
** This file implements an integration between the ICU library
** ("International Components for Unicode", an open-source library
** for handling unicode data) and SQLite. The integration uses
** ICU to provide the following to SQLite:
**
** * Implementations of the SQL scalar upper() and lower()
** functions for case mapping,
**
** * Collation sequences
**
** * Implementation of the SQL regexp() function (and hence REGEXP
** operator) using the ICU uregex_XX() APIs.
**
** * LIKE
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU)
#include <unicode/utypes.h>
#include <unicode/uregex.h>
#include <unicode/ustring.h>
#include <assert.h>
#include "sqlite3.h"
#ifndef SQLITE_CORE
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1
#endif
/*
** Collation sequences:
**
** ucol_open()
** ucol_strcoll()
** ucol_close()
*/
/*
** Version of sqlite3_free() that is always a function, never a macro.
*/
static void xFree(void *p){
sqlite3_free(p);
}
/*
** Function to delete compiled regexp objects. Registered as
** a destructor function with sqlite3_set_auxdata().
*/
static void icuRegexpDelete(void *p){
URegularExpression *pExpr = (URegularExpression *)p;
uregex_close(pExpr);
}
/*
** Implementation of SQLite REGEXP operator. This scalar function takes
** two arguments. The first is a regular expression pattern to compile
** the second is a string to match against that pattern. If either
** argument is an SQL NULL, then NULL Is returned. Otherwise, the result
** is 1 if the string matches the pattern, or 0 otherwise.
**
** SQLite maps the regexp() function to the regexp() operator such
** that the following two are equivalent:
**
** zString REGEXP zPattern
** regexp(zPattern, zString)
**
** Uses the following ICU regexp APIs:
**
** uregex_open()
** uregex_matches()
** uregex_close()
*/
static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){
UErrorCode status = U_ZERO_ERROR;
URegularExpression *pExpr;
UBool res;
const UChar *zString = sqlite3_value_text16(apArg[1]);
/* If the left hand side of the regexp operator is NULL,
** then the result is also NULL.
*/
if( !zString ){
return;
}
pExpr = sqlite3_get_auxdata(p, 0);
if( !pExpr ){
const UChar *zPattern = sqlite3_value_text16(apArg[0]);
if( !zPattern ){
return;
}
pExpr = uregex_open(zPattern, -1, 0, 0, &status);
if( U_SUCCESS(status) ){
sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete);
}else{
assert(!pExpr);
sqlite3_result_error(p, "Error compiling regular expression", -1);
return;
}
}
/* Configure the text that the regular expression operates on. */
uregex_setText(pExpr, zString, -1, &status);
if( !U_SUCCESS(status) ){
sqlite3_result_error(p, "Error configuring regular expression", -1);
return;
}
/* Attempt the match */
res = uregex_matches(pExpr, 0, &status);
if( !U_SUCCESS(status) ){
sqlite3_result_error(p, "Error matching regular expression", -1);
return;
}
/* Set the text that the regular expression operates on to a NULL
** pointer. This is not really necessary, but it is tidier than
** leaving the regular expression object configured with an invalid
** pointer after this function returns.
*/
uregex_setText(pExpr, 0, 0, &status);
/* Return 1 or 0. */
sqlite3_result_int(p, res ? 1 : 0);
}
/*
** Implementations of scalar functions for case mapping - upper() and
** lower(). Function upper() converts it's input to upper-case (ABC).
** Function lower() converts to lower-case (abc).
**
** ICU provides two types of case mapping, "general" case mapping and
** "language specific". Refer to ICU documentation for the differences
** between the two.
**
** To utilise "general" case mapping, the upper() or lower() scalar
** functions are invoked with one argument:
**
** upper('ABC') -> 'abc'
** lower('abc') -> 'ABC'
**
** To access ICU "language specific" case mapping, upper() or lower()
** should be invoked with two arguments. The second argument is the name
** of the locale to use. Passing an empty string ("") or SQL NULL value
** as the second argument is the smae as invoking the 1 argument version
** of upper() or lower().
**
** lower('I', 'en_us') -> 'i'
** lower('I', 'tr_tr') -> 'ı' (small dotless i)
*/
static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){
const UChar *zInput;
UChar *zOutput;
int nInput;
int nOutput;
UErrorCode status = U_ZERO_ERROR;
const char *zLocale = 0;
assert(nArg==1 || nArg==2);
if( nArg==2 ){
zLocale = (const char *)sqlite3_value_text(apArg[1]);
}
zInput = sqlite3_value_text16(apArg[0]);
nInput = sqlite3_value_bytes16(apArg[0]);
nOutput = nInput * 2 + 2;
zOutput = sqlite3_malloc(nInput*2+2);
if( !zOutput ){
return;
}
if( sqlite3_user_data(p) ){
u_strToUpper(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status);
}else{
u_strToLower(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status);
}
if( !U_SUCCESS(status) ){
sqlite3_result_error(p, "Error converting case", -1);
return;
}
sqlite3_result_text16(p, zOutput, -1, xFree);
}
/*
** Register the ICU extension functions with database db.
*/
int sqlite3IcuInit(sqlite3 *db){
int rc;
int ii;
struct IcuScalar {
const char *zName;
int nArg;
int enc;
void *pContext;
void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
} scalars[] = {
{"regexp", 2, SQLITE_ANY, 0, icuRegexpFunc},
{"lower", 1, SQLITE_UTF16, 0, icuCaseFunc16},
{"lower", 2, SQLITE_UTF16, 0, icuCaseFunc16},
{"upper", 1, SQLITE_UTF16, (void*)1, icuCaseFunc16},
{"upper", 2, SQLITE_UTF16, (void*)1, icuCaseFunc16},
{"lower", 1, SQLITE_UTF8, 0, icuCaseFunc16},
{"lower", 2, SQLITE_UTF8, 0, icuCaseFunc16},
{"upper", 1, SQLITE_UTF8, (void*)1, icuCaseFunc16},
{"upper", 2, SQLITE_UTF8, (void*)1, icuCaseFunc16},
};
for(ii=0; ii<(sizeof(scalars)/sizeof(struct IcuScalar)); ii++){
struct IcuScalar *p = &scalars[ii];
rc = sqlite3_create_function(
db, p->zName, p->nArg, p->enc, p->pContext, p->xFunc, 0, 0
);
}
return SQLITE_OK;
}
#if !SQLITE_CORE
int sqlite3_extension_init(
sqlite3 *db,
char **pzErrMsg,
const sqlite3_api_routines *pApi
){
SQLITE_EXTENSION_INIT2(pApi)
return sqlite3IcuInit(db);
}
#endif
#endif

@ -65,6 +65,8 @@ LIBOBJ+= alter.o analyze.o attach.o auth.o btree.o build.o \
vdbe.o vdbeapi.o vdbeaux.o vdbeblob.o vdbefifo.o vdbemem.o \
where.o utf.o legacy.o vtab.o
LIBOBJ += icu.o
# All of the source code files.
#
SRC = \
@ -141,6 +143,8 @@ SRC += \
$(TOP)/ext/fts2/fts2_porter.c \
$(TOP)/ext/fts2/fts2_tokenizer.h \
$(TOP)/ext/fts2/fts2_tokenizer1.c
SRC += \
$(TOP)/ext/icu/icu.c
# Generated source code files
#
@ -311,6 +315,9 @@ func.o: $(TOP)/src/func.c $(HDR)
hash.o: $(TOP)/src/hash.c $(HDR)
$(TCCX) -c $(TOP)/src/hash.c
icu.o: $(TOP)/ext/icu/icu.c $(HDR)
$(TCCX) -c $(TOP)/ext/icu/icu.c
insert.o: $(TOP)/src/insert.c $(HDR)
$(TCCX) -c $(TOP)/src/insert.c

@ -1,5 +1,5 @@
C Add\s"Powered\sBy\sSQLite"\slogo\scontributed\sby\sAlberto\sSimoes.\s(CVS\s3930)
D 2007-05-05T22:45:01
C Add\sthe\sstart\sof\sthe\sICU\sextension.\s(CVS\s3931)
D 2007-05-06T16:04:12
F Makefile.in ea8888bdcf53313d26576fcabcb6d0a10ecd35cd
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
@ -43,9 +43,10 @@ F ext/fts2/fts2_porter.c 991a45463553c7318063fe7773368a6c0f39e35d
F ext/fts2/fts2_tokenizer.h 4c5ffe31d63622869eb6eec1503df7f6996fd1bd
F ext/fts2/fts2_tokenizer1.c 5c979fe8815f95396beb22b627571da895a025af
F ext/fts2/mkfts2amal.tcl 2a9ec76b0760fe7f3669dca5bc0d60728bc1c977
F ext/icu/icu.c 17ad2387c27133b5fcf7037687d5ff346f4a1da7
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895
F ltmain.sh 56abb507100ed2d4261f6dd1653dec3cf4066387
F main.mk a7522a769f2fe70c9bebd6bd8a36bed79be0e5cb
F main.mk 6298bd3fcbc590fe05354f7d962c57ac202d7bcd
F mkdll.sh ed62756baf44babf562a7843588790c02fee2106
F mkopcodec.awk bd46ad001c98dfbab07b1713cb8e692fa0e5415d
F mkopcodeh.awk cde995d269aa06c94adbf6455bea0acedb913fa5
@ -76,7 +77,7 @@ F src/hash.h 1b3f7e2609141fd571f62199fc38687d262e9564
F src/insert.c e595ca26805dfb3a9ebaabc28e7947c479f3b14d
F src/legacy.c 388c71ad7fbcd898ba1bcbfc98a3ac954bfa5d01
F src/loadext.c afe4f4755dc49c36ef505748bbdddecb9f1d02a2
F src/main.c e6eb036c3580ba9116fedfe4a8b58ed63d5abb37
F src/main.c 2e11ec21dd11008a339b1d6d61aeb9b51992b2cf
F src/malloc.c b89e31258a85158d15795bf87ae3ba007e56329b
F src/md5.c c5fdfa5c2593eaee2e32a5ce6c6927c986eaf217
F src/os.c 4650e98aadd27abfe1698ff58edf6893c58d4881
@ -105,7 +106,7 @@ F src/sqlite3ext.h 7d0d363ea7327e817ef0dfe1b7eee1f171b72890
F src/sqliteInt.h 3ffe2f9c801575e315451e7d2831c4a90a165aa8
F src/table.c a8de75bcedf84d4060d804264b067ab3b1a3561d
F src/tclsqlite.c f3414b2d6bc37e6760b49c9abd3504ff69f4441b
F src/test1.c 515a91ed7ee95a6fa5b40873aa4f5aa9c858080e
F src/test1.c 17e8402b4b567317b4e49923658d1f5874eaee5f
F src/test2.c 24458b17ab2f3c90cbc1c8446bd7ffe69be62f88
F src/test3.c 946ea9d1a8c928656e3c70f0a2fcb8e733a15e86
F src/test4.c 8b784cd82de158a2317cb4ac4bc86f91ad315e25
@ -479,7 +480,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5
P bece7a6bae3a52da03c25b577e15a648eb91805a
R e9cef941050626d3869bdb65d6d0f240
U drh
Z 860115faaebbc6372d09fd94f2e033c9
P 25cfa7740a0899511d9200d99c2617c45ff00f32
R 404e1ba474e37ae2a0e69793ca2f91cc
U danielk1977
Z 18a7c2474aed82907ffca52bb141507e

@ -1 +1 @@
25cfa7740a0899511d9200d99c2617c45ff00f32
f473e8526770b6a332dfde3e1fd1ddf8df493e9a

@ -14,7 +14,7 @@
** other files are for internal use by SQLite and should not be
** accessed by users of the library.
**
** $Id: main.c,v 1.370 2007/04/18 14:24:33 danielk1977 Exp $
** $Id: main.c,v 1.371 2007/05/06 16:04:12 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
@ -985,6 +985,13 @@ static int openDatabase(
}
#endif
#ifdef SQLITE_ENABLE_ICU
{
extern int sqlite3IcuInit(sqlite3*);
sqlite3IcuInit(db);
}
#endif
/* -DSQLITE_DEFAULT_LOCKING_MODE=1 makes EXCLUSIVE the default locking
** mode. -DSQLITE_DEFAULT_LOCKING_MODE=0 make NORMAL the default locking
** mode. Doing nothing at all also makes NORMAL the default.

@ -13,7 +13,7 @@
** is not included in the SQLite library. It is used for automated
** testing of the SQLite library.
**
** $Id: test1.c,v 1.246 2007/05/05 18:39:25 drh Exp $
** $Id: test1.c,v 1.247 2007/05/06 16:04:12 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "tcl.h"
@ -4266,6 +4266,12 @@ static void set_options(Tcl_Interp *interp){
Tcl_SetVar2(interp, "sqlite_options", "globalrecover", "1", TCL_GLOBAL_ONLY);
#endif
#ifdef SQLITE_ENABLE_ICU
Tcl_SetVar2(interp, "sqlite_options", "icu", "1", TCL_GLOBAL_ONLY);
#else
Tcl_SetVar2(interp, "sqlite_options", "icu", "0", TCL_GLOBAL_ONLY);
#endif
#ifdef SQLITE_OMIT_INCRBLOB
Tcl_SetVar2(interp, "sqlite_options", "incrblob", "0", TCL_GLOBAL_ONLY);
#else