Merge the unicode61 tokenizer and the shared-cache-memory database changes

into the sessions branch.

FossilOrigin-Name: df817e70afc3f41e680d8f84dfa5772d5b3ae4d9
This commit is contained in:
drh 2012-05-28 18:22:41 +00:00
commit 14bcd7d25e
29 changed files with 27253 additions and 68 deletions

View File

@ -165,8 +165,9 @@ LIBOBJS0 = alter.lo analyze.lo attach.lo auth.lo \
backup.lo bitvec.lo btmutex.lo btree.lo build.lo \
callback.lo complete.lo ctime.lo date.lo delete.lo \
expr.lo fault.lo fkey.lo \
fts3.lo fts3_aux.lo fts3_expr.lo fts3_hash.lo fts3_icu.lo fts3_porter.lo \
fts3_snippet.lo fts3_tokenizer.lo fts3_tokenizer1.lo fts3_write.lo \
fts3.lo fts3_aux.lo fts3_expr.lo fts3_hash.lo fts3_icu.lo \
fts3_porter.lo fts3_snippet.lo fts3_tokenizer.lo fts3_tokenizer1.lo \
fts3_unicode.lo fts3_unicode2.lo fts3_write.lo \
func.lo global.lo hash.lo \
icu.lo insert.lo journal.lo legacy.lo loadext.lo \
main.lo malloc.lo mem0.lo mem1.lo mem2.lo mem3.lo mem5.lo \
@ -317,6 +318,8 @@ SRC += \
$(TOP)/ext/fts3/fts3_tokenizer.h \
$(TOP)/ext/fts3/fts3_tokenizer.c \
$(TOP)/ext/fts3/fts3_tokenizer1.c \
$(TOP)/ext/fts3/fts3_unicode.c \
$(TOP)/ext/fts3/fts3_unicode2.c \
$(TOP)/ext/fts3/fts3_write.c
SRC += \
$(TOP)/ext/icu/sqliteicu.h \

View File

@ -1 +1 @@
3.7.12.1
3.7.13

18
configure vendored
View File

@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.62 for sqlite 3.7.12.1.
# Generated by GNU Autoconf 2.62 for sqlite 3.7.13.
#
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
# 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
@ -743,8 +743,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='sqlite'
PACKAGE_TARNAME='sqlite'
PACKAGE_VERSION='3.7.12.1'
PACKAGE_STRING='sqlite 3.7.12.1'
PACKAGE_VERSION='3.7.13'
PACKAGE_STRING='sqlite 3.7.13'
PACKAGE_BUGREPORT=''
# Factoring default headers for most tests.
@ -1485,7 +1485,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures sqlite 3.7.12.1 to adapt to many kinds of systems.
\`configure' configures sqlite 3.7.13 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1550,7 +1550,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of sqlite 3.7.12.1:";;
short | recursive ) echo "Configuration of sqlite 3.7.13:";;
esac
cat <<\_ACEOF
@ -1666,7 +1666,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
sqlite configure 3.7.12.1
sqlite configure 3.7.13
generated by GNU Autoconf 2.62
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@ -1680,7 +1680,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by sqlite $as_me 3.7.12.1, which was
It was created by sqlite $as_me 3.7.13, which was
generated by GNU Autoconf 2.62. Invocation command line was
$ $0 $@
@ -14032,7 +14032,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by sqlite $as_me 3.7.12.1, which was
This file was extended by sqlite $as_me 3.7.13, which was
generated by GNU Autoconf 2.62. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@ -14085,7 +14085,7 @@ Report bugs to <bug-autoconf@gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_version="\\
sqlite config.status 3.7.12.1
sqlite config.status 3.7.13
configured by $0, generated by GNU Autoconf 2.62,
with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"

View File

@ -11,7 +11,7 @@
);
The built-in tokenizers (valid values to pass as <tokenizer name>) are
"simple" and "porter".
"simple", "porter" and "unicode".
<tokenizer-args> should consist of zero or more white-space separated
arguments to pass to the selected tokenizer implementation. The

View File

@ -3554,6 +3554,9 @@ static void hashDestroy(void *p){
*/
void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
#ifndef SQLITE_DISABLE_FTS3_UNICODE
void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule);
#endif
#ifdef SQLITE_ENABLE_ICU
void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
#endif
@ -3569,12 +3572,19 @@ int sqlite3Fts3Init(sqlite3 *db){
Fts3Hash *pHash = 0;
const sqlite3_tokenizer_module *pSimple = 0;
const sqlite3_tokenizer_module *pPorter = 0;
#ifndef SQLITE_DISABLE_FTS3_UNICODE
const sqlite3_tokenizer_module *pUnicode = 0;
#endif
#ifdef SQLITE_ENABLE_ICU
const sqlite3_tokenizer_module *pIcu = 0;
sqlite3Fts3IcuTokenizerModule(&pIcu);
#endif
#ifndef SQLITE_DISABLE_FTS3_UNICODE
sqlite3Fts3UnicodeTokenizer(&pUnicode);
#endif
#ifdef SQLITE_TEST
rc = sqlite3Fts3InitTerm(db);
if( rc!=SQLITE_OK ) return rc;
@ -3598,6 +3608,10 @@ int sqlite3Fts3Init(sqlite3 *db){
if( rc==SQLITE_OK ){
if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple)
|| sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter)
#ifndef SQLITE_DISABLE_FTS3_UNICODE
|| sqlite3Fts3HashInsert(pHash, "unicode61", 10, (void *)pUnicode)
#endif
#ifdef SQLITE_ENABLE_ICU
|| (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu))
#endif

View File

@ -541,5 +541,9 @@ int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr);
int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *);
/* fts3_unicode2.c (functions generated by parsing unicode text files) */
int sqlite3FtsUnicodeTolower(int);
int sqlite3FtsUnicodeIsalnum(int);
#endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */
#endif /* _FTSINT_H */

246
ext/fts3/fts3_unicode.c Normal file
View File

@ -0,0 +1,246 @@
/*
** 2012 May 24
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** Implementation of the "unicode" full-text-search tokenizer.
*/
#ifndef SQLITE_DISABLE_FTS3_UNICODE
#include "fts3Int.h"
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "fts3_tokenizer.h"
/*
** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied
** from the sqlite3 source file utf.c. If this file is compiled as part
** of the amalgamation, they are not required.
*/
#ifndef SQLITE_AMALGAMATION
static const unsigned char sqlite3Utf8Trans1[] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
};
#define READ_UTF8(zIn, zTerm, c) \
c = *(zIn++); \
if( c>=0xc0 ){ \
c = sqlite3Utf8Trans1[c-0xc0]; \
while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \
c = (c<<6) + (0x3f & *(zIn++)); \
} \
if( c<0x80 \
|| (c&0xFFFFF800)==0xD800 \
|| (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \
}
#define WRITE_UTF8(zOut, c) { \
if( c<0x00080 ){ \
*zOut++ = (u8)(c&0xFF); \
} \
else if( c<0x00800 ){ \
*zOut++ = 0xC0 + (u8)((c>>6)&0x1F); \
*zOut++ = 0x80 + (u8)(c & 0x3F); \
} \
else if( c<0x10000 ){ \
*zOut++ = 0xE0 + (u8)((c>>12)&0x0F); \
*zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \
*zOut++ = 0x80 + (u8)(c & 0x3F); \
}else{ \
*zOut++ = 0xF0 + (u8)((c>>18) & 0x07); \
*zOut++ = 0x80 + (u8)((c>>12) & 0x3F); \
*zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \
*zOut++ = 0x80 + (u8)(c & 0x3F); \
} \
}
#endif /* ifndef SQLITE_AMALGAMATION */
typedef struct unicode_tokenizer unicode_tokenizer;
typedef struct unicode_cursor unicode_cursor;
struct unicode_tokenizer {
sqlite3_tokenizer base;
};
struct unicode_cursor {
sqlite3_tokenizer_cursor base;
const unsigned char *aInput; /* Input text being tokenized */
int nInput; /* Size of aInput[] in bytes */
int iOff; /* Current offset within aInput[] */
int iToken; /* Index of next token to be returned */
char *zToken; /* storage for current token */
int nAlloc; /* space allocated at zToken */
};
/*
** Create a new tokenizer instance.
*/
static int unicodeCreate(
int nArg, /* Size of array argv[] */
const char * const *azArg, /* Tokenizer creation arguments */
sqlite3_tokenizer **pp /* OUT: New tokenizer handle */
){
unicode_tokenizer *pNew; /* New tokenizer object */
pNew = (unicode_tokenizer *) sqlite3_malloc(sizeof(unicode_tokenizer));
if( pNew==NULL ){
return SQLITE_NOMEM;
}
memset(pNew, 0, sizeof(unicode_tokenizer));
*pp = &pNew->base;
return SQLITE_OK;
}
/*
** Destroy a tokenizer allocated by unicodeCreate().
*/
static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){
sqlite3_free(pTokenizer);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is pInput[0..nBytes-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int unicodeOpen(
sqlite3_tokenizer *p, /* The tokenizer */
const char *aInput, /* Input string */
int nInput, /* Size of string aInput in bytes */
sqlite3_tokenizer_cursor **pp /* OUT: New cursor object */
){
unicode_cursor *pCsr;
pCsr = (unicode_cursor *)sqlite3_malloc(sizeof(unicode_cursor));
if( pCsr==0 ){
return SQLITE_NOMEM;
}
memset(pCsr, 0, sizeof(unicode_cursor));
pCsr->aInput = (const unsigned char *)aInput;
if( aInput==0 ){
pCsr->nInput = 0;
}else if( nInput<0 ){
pCsr->nInput = (int)strlen(aInput);
}else{
pCsr->nInput = nInput;
}
*pp = &pCsr->base;
UNUSED_PARAMETER(p);
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to
** simpleOpen() above.
*/
static int unicodeClose(sqlite3_tokenizer_cursor *pCursor){
unicode_cursor *pCsr = (unicode_cursor *) pCursor;
sqlite3_free(pCsr->zToken);
sqlite3_free(pCsr);
return SQLITE_OK;
}
/*
** Extract the next token from a tokenization cursor. The cursor must
** have been opened by a prior call to simpleOpen().
*/
static int unicodeNext(
sqlite3_tokenizer_cursor *p, /* Cursor returned by simpleOpen */
const char **paToken, /* OUT: Token text */
int *pnToken, /* OUT: Number of bytes at *paToken */
int *piStart, /* OUT: Starting offset of token */
int *piEnd, /* OUT: Ending offset of token */
int *piPos /* OUT: Position integer of token */
){
unicode_cursor *pCsr = (unicode_cursor *)p;
int iCode;
char *zOut;
const unsigned char *z = &pCsr->aInput[pCsr->iOff];
const unsigned char *zStart = z;
const unsigned char *zEnd;
const unsigned char *zTerm = &pCsr->aInput[pCsr->nInput];
/* Scan past any delimiter characters before the start of the next token.
** Return SQLITE_DONE early if this takes us all the way to the end of
** the input. */
while( z<zTerm ){
READ_UTF8(z, zTerm, iCode);
if( sqlite3FtsUnicodeIsalnum(iCode) ) break;
zStart = z;
}
if( zStart>=zTerm ) return SQLITE_DONE;
zOut = pCsr->zToken;
do {
/* Grow the output buffer if required. */
if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){
char *zNew = sqlite3_realloc(pCsr->zToken, pCsr->nAlloc+64);
if( !zNew ) return SQLITE_NOMEM;
zOut = &zNew[zOut - pCsr->zToken];
pCsr->zToken = zNew;
pCsr->nAlloc += 64;
}
/* Write the folded case of the last character read to the output */
zEnd = z;
WRITE_UTF8(zOut, sqlite3FtsUnicodeTolower(iCode));
/* If the cursor is not at EOF, read the next character */
if( z>=zTerm ) break;
READ_UTF8(z, zTerm, iCode);
}while( sqlite3FtsUnicodeIsalnum(iCode) );
/* Set the output variables and return. */
pCsr->iOff = (z - pCsr->aInput);
*paToken = pCsr->zToken;
*pnToken = zOut - pCsr->zToken;
*piStart = (zStart - pCsr->aInput);
*piEnd = (zEnd - pCsr->aInput);
*piPos = pCsr->iToken++;
return SQLITE_OK;
}
/*
** Set *ppModule to a pointer to the sqlite3_tokenizer_module
** structure for the unicode tokenizer.
*/
void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const **ppModule){
static const sqlite3_tokenizer_module module = {
0,
unicodeCreate,
unicodeDestroy,
unicodeOpen,
unicodeClose,
unicodeNext,
0,
};
*ppModule = &module;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
#endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */

296
ext/fts3/fts3_unicode2.c Normal file
View File

@ -0,0 +1,296 @@
/*
** 2012 May 25
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
*/
/*
** DO NOT EDIT THIS MACHINE GENERATED FILE.
*/
#if !defined(SQLITE_DISABLE_FTS3_UNICODE)
#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
#include <assert.h>
/*
** Return true if the argument corresponds to a unicode codepoint
** classified as either a letter or a number. Otherwise false.
**
** The results are undefined if the value passed to this function
** is less than zero.
*/
int sqlite3FtsUnicodeIsalnum(int c){
/* Each unsigned integer in the following array corresponds to a contiguous
** range of unicode codepoints that are not either letters or numbers (i.e.
** codepoints for which this function should return 0).
**
** The most significant 22 bits in each 32-bit value contain the first
** codepoint in the range. The least significant 10 bits are used to store
** the size of the range (always at least 1). In other words, the value
** ((C<<22) + N) represents a range of N codepoints starting with codepoint
** C. It is not possible to represent a range larger than 1023 codepoints
** using this format.
*/
const static unsigned int aEntry[] = {
0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01,
0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01,
0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802,
0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F,
0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401,
0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804,
0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403,
0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812,
0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001,
0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802,
0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805,
0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401,
0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807,
0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001,
0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01,
0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804,
0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001,
0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802,
0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01,
0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06,
0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007,
0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006,
0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417,
0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14,
0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07,
0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01,
0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001,
0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802,
0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F,
0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002,
0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802,
0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006,
0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D,
0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802,
0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027,
0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403,
0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805,
0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04,
0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401,
0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005,
0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B,
0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A,
0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001,
0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59,
0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807,
0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01,
0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E,
0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100,
0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10,
0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402,
0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804,
0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012,
0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004,
0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002,
0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,
0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,
0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02,
0x037FFC02, 0x03E3FC01, 0x03EC7801, 0x03ECA401, 0x03EEC810,
0x03F4F802, 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023,
0x03F95013, 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807,
0x03FCEC06, 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405,
0x04040003, 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E,
0x040E7C01, 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01,
0x04280403, 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01,
0x04294009, 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016,
0x04420003, 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004,
0x04460003, 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004,
0x05BD442E, 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5,
0x07480046, 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01,
0x075C5401, 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401,
0x075EA401, 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064,
0x07C2800F, 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F,
0x07C4C03C, 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009,
0x07C94002, 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014,
0x07CE8025, 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001,
0x07D108B6, 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018,
0x07D7EC46, 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401,
0x38008060, 0x380400F0, 0x3C000001, 0x3FFFF401, 0x40000001,
0x43FFF401,
};
static const unsigned int aAscii[4] = {
0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
};
if( c<128 ){
return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
}else if( c<(1<<22) ){
unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
int iRes;
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
if( key >= aEntry[iTest] ){
iRes = iTest;
iLo = iTest+1;
}else{
iHi = iTest-1;
}
}
assert( aEntry[0]<key );
assert( key>=aEntry[iRes] );
return (c >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
}
return 1;
}
/*
** Interpret the argument as a unicode codepoint. If the codepoint
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.
** Otherwise, return a copy of the argument.
**
** The results are undefined if the value passed to this function
** is less than zero.
*/
int sqlite3FtsUnicodeTolower(int c){
/* Each entry in the following array defines a rule for folding a range
** of codepoints to lower case. The rule applies to a range of nRange
** codepoints starting at codepoint iCode.
**
** If the least significant bit in flags is clear, then the rule applies
** to all nRange codepoints (i.e. all nRange codepoints are upper case and
** need to be folded). Or, if it is set, then the rule only applies to
** every second codepoint in the range, starting with codepoint C.
**
** The 7 most significant bits in flags are an index into the aiOff[]
** array. If a specific codepoint C does require folding, then its lower
** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
**
** The contents of this array are generated by parsing the CaseFolding.txt
** file distributed as part of the "Unicode Character Database". See
** http://www.unicode.org for details.
*/
static const struct TableEntry {
unsigned short iCode;
unsigned char flags;
unsigned char nRange;
} aEntry[] = {
{65, 14, 26}, {181, 64, 1}, {192, 14, 23},
{216, 14, 7}, {256, 1, 48}, {306, 1, 6},
{313, 1, 16}, {330, 1, 46}, {376, 116, 1},
{377, 1, 6}, {383, 104, 1}, {385, 50, 1},
{386, 1, 4}, {390, 44, 1}, {391, 0, 1},
{393, 42, 2}, {395, 0, 1}, {398, 32, 1},
{399, 38, 1}, {400, 40, 1}, {401, 0, 1},
{403, 42, 1}, {404, 46, 1}, {406, 52, 1},
{407, 48, 1}, {408, 0, 1}, {412, 52, 1},
{413, 54, 1}, {415, 56, 1}, {416, 1, 6},
{422, 60, 1}, {423, 0, 1}, {425, 60, 1},
{428, 0, 1}, {430, 60, 1}, {431, 0, 1},
{433, 58, 2}, {435, 1, 4}, {439, 62, 1},
{440, 0, 1}, {444, 0, 1}, {452, 2, 1},
{453, 0, 1}, {455, 2, 1}, {456, 0, 1},
{458, 2, 1}, {459, 1, 18}, {478, 1, 18},
{497, 2, 1}, {498, 1, 4}, {502, 122, 1},
{503, 134, 1}, {504, 1, 40}, {544, 110, 1},
{546, 1, 18}, {570, 70, 1}, {571, 0, 1},
{573, 108, 1}, {574, 68, 1}, {577, 0, 1},
{579, 106, 1}, {580, 28, 1}, {581, 30, 1},
{582, 1, 10}, {837, 36, 1}, {880, 1, 4},
{886, 0, 1}, {902, 18, 1}, {904, 16, 3},
{908, 26, 1}, {910, 24, 2}, {913, 14, 17},
{931, 14, 9}, {962, 0, 1}, {975, 4, 1},
{976, 140, 1}, {977, 142, 1}, {981, 146, 1},
{982, 144, 1}, {984, 1, 24}, {1008, 136, 1},
{1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1},
{1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1},
{1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32},
{1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1},
{1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38},
{4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1},
{7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1},
{7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6},
{7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6},
{8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8},
{8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2},
{8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1},
{8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2},
{8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2},
{8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2},
{8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1},
{8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16},
{8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47},
{11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1},
{11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1},
{11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1},
{11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2},
{11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1},
{42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14},
{42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1},
{42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1},
{42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1},
{65313, 14, 26},
};
static const unsigned short aiOff[] = {
1, 2, 8, 15, 16, 26, 28, 32,
37, 38, 40, 48, 63, 64, 69, 71,
79, 80, 116, 202, 203, 205, 206, 207,
209, 210, 211, 213, 214, 217, 218, 219,
775, 7264, 10792, 10795, 23228, 23256, 30204, 54721,
54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274,
57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406,
65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462,
65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511,
65514, 65521, 65527, 65528, 65529,
};
int ret = c;
assert( c>=0 );
assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
if( c<128 ){
if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
}else if( c<65536 ){
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
int iRes = -1;
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
int cmp = (c - aEntry[iTest].iCode);
if( cmp>=0 ){
iRes = iTest;
iLo = iTest+1;
}else{
iHi = iTest-1;
}
}
assert( iRes<0 || c>=aEntry[iRes].iCode );
if( iRes>=0 ){
const struct TableEntry *p = &aEntry[iRes];
if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
assert( ret>0 );
}
}
}
else if( c>=66560 && c<66600 ){
ret = c + 40;
}
return ret;
}
#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */
#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */

View File

@ -3174,7 +3174,12 @@ static void fts3UpdateDocTotals(
}else{
memset(a, 0, sizeof(u32)*(nStat) );
}
sqlite3_reset(pStmt);
rc = sqlite3_reset(pStmt);
if( rc!=SQLITE_OK ){
sqlite3_free(a);
*pRC = rc;
return;
}
if( nChng<0 && a[0]<(u32)(-nChng) ){
a[0] = 0;
}else{

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,574 @@
# Parameter $zName must be a path to the file UnicodeData.txt. This command
# reads the file and returns a list of codepoints (integers). The list
# contains all codepoints in the UnicodeData.txt assigned to any "General
# Category" that is not a "Letter" or "Number".
#
proc an_load_unicodedata_text {zName} {
set fd [open $zName]
set lField {
code
character_name
general_category
canonical_combining_classes
bidirectional_category
character_decomposition_mapping
decimal_digit_value
digit_value
numeric_value
mirrored
unicode_1_name
iso10646_comment_field
uppercase_mapping
lowercase_mapping
titlecase_mapping
}
set lRet [list]
while { ![eof $fd] } {
set line [gets $fd]
if {$line == ""} continue
set fields [split $line ";"]
if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
foreach $lField $fields {}
set iCode [expr "0x$code"]
set bAlnum [expr {[lsearch {L N} [string range $general_category 0 0]]>=0}]
if { !$bAlnum } { lappend lRet $iCode }
}
close $fd
set lRet
}
proc an_load_separator_ranges {} {
global unicodedata.txt
set lSep [an_load_unicodedata_text ${unicodedata.txt}]
unset -nocomplain iFirst
unset -nocomplain nRange
set lRange [list]
foreach sep $lSep {
if {0==[info exists iFirst]} {
set iFirst $sep
set nRange 1
} elseif { $sep == ($iFirst+$nRange) } {
incr nRange
} else {
lappend lRange [list $iFirst $nRange]
set iFirst $sep
set nRange 1
}
}
lappend lRange [list $iFirst $nRange]
set lRange
}
proc an_print_range_array {lRange} {
set iFirstMax 0
set nRangeMax 0
foreach range $lRange {
foreach {iFirst nRange} $range {}
if {$iFirst > $iFirstMax} {set iFirstMax $iFirst}
if {$nRange > $nRangeMax} {set nRangeMax $nRange}
}
if {$iFirstMax >= (1<<22)} {error "first-max is too large for format"}
if {$nRangeMax >= (1<<10)} {error "range-max is too large for format"}
puts -nonewline " "
puts [string trim {
/* Each unsigned integer in the following array corresponds to a contiguous
** range of unicode codepoints that are not either letters or numbers (i.e.
** codepoints for which this function should return 0).
**
** The most significant 22 bits in each 32-bit value contain the first
** codepoint in the range. The least significant 10 bits are used to store
** the size of the range (always at least 1). In other words, the value
** ((C<<22) + N) represents a range of N codepoints starting with codepoint
** C. It is not possible to represent a range larger than 1023 codepoints
** using this format.
*/
}]
puts -nonewline " const static unsigned int aEntry\[\] = \{"
set i 0
foreach range $lRange {
foreach {iFirst nRange} $range {}
set u32 [format "0x%08X" [expr ($iFirst<<10) + $nRange]]
if {($i % 5)==0} {puts "" ; puts -nonewline " "}
puts -nonewline " $u32,"
incr i
}
puts ""
puts " \};"
}
proc an_print_ascii_bitmap {lRange} {
foreach range $lRange {
foreach {iFirst nRange} $range {}
for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} {
if {$i<=127} { set a($i) 1 }
}
}
set aAscii [list 0 0 0 0]
foreach key [array names a] {
set idx [expr $key >> 5]
lset aAscii $idx [expr [lindex $aAscii $idx] | (1 << ($key&0x001F))]
}
puts " static const unsigned int aAscii\[4\] = \{"
puts -nonewline " "
foreach v $aAscii { puts -nonewline [format " 0x%08X," $v] }
puts ""
puts " \};"
}
proc print_isalnum {zFunc lRange} {
puts "/*"
puts "** Return true if the argument corresponds to a unicode codepoint"
puts "** classified as either a letter or a number. Otherwise false."
puts "**"
puts "** The results are undefined if the value passed to this function"
puts "** is less than zero."
puts "*/"
puts "int ${zFunc}\(int c)\{"
an_print_range_array $lRange
an_print_ascii_bitmap $lRange
puts {
if( c<128 ){
return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
}else if( c<(1<<22) ){
unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
int iRes;
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
if( key >= aEntry[iTest] ){
iRes = iTest;
iLo = iTest+1;
}else{
iHi = iTest-1;
}
}
assert( aEntry[0]<key );
assert( key>=aEntry[iRes] );
return (c >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
}
return 1;}
puts "\}"
}
proc print_test_isalnum {zFunc lRange} {
foreach range $lRange {
foreach {iFirst nRange} $range {}
for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} { set a($i) 1 }
}
puts "static int isalnum_test(int *piCode)\{"
puts -nonewline " unsigned char aAlnum\[\] = \{"
for {set i 0} {$i < 70000} {incr i} {
if {($i % 32)==0} { puts "" ; puts -nonewline " " }
set bFlag [expr ![info exists a($i)]]
puts -nonewline "${bFlag},"
}
puts ""
puts " \};"
puts -nonewline " int aLargeSep\[\] = \{"
set i 0
foreach iSep [lsort -integer [array names a]] {
if {$iSep<70000} continue
if {($i % 8)==0} { puts "" ; puts -nonewline " " }
puts -nonewline " $iSep,"
incr i
}
puts ""
puts " \};"
puts -nonewline " int aLargeOther\[\] = \{"
set i 0
foreach iSep [lsort -integer [array names a]] {
if {$iSep<70000} continue
if {[info exists a([expr $iSep-1])]==0} {
if {($i % 8)==0} { puts "" ; puts -nonewline " " }
puts -nonewline " [expr $iSep-1],"
incr i
}
if {[info exists a([expr $iSep+1])]==0} {
if {($i % 8)==0} { puts "" ; puts -nonewline " " }
puts -nonewline " [expr $iSep+1],"
incr i
}
}
puts ""
puts " \};"
puts [subst -nocommands {
int i;
for(i=0; i<sizeof(aAlnum)/sizeof(aAlnum[0]); i++){
if( ${zFunc}(i)!=aAlnum[i] ){
*piCode = i;
return 1;
}
}
for(i=0; i<sizeof(aLargeSep)/sizeof(aLargeSep[0]); i++){
if( ${zFunc}(aLargeSep[i])!=0 ){
*piCode = aLargeSep[i];
return 1;
}
}
for(i=0; i<sizeof(aLargeOther)/sizeof(aLargeOther[0]); i++){
if( ${zFunc}(aLargeOther[i])!=1 ){
*piCode = aLargeOther[i];
return 1;
}
}
}]
puts " return 0;"
puts "\}"
}
#-------------------------------------------------------------------------
proc tl_load_casefolding_txt {zName} {
global tl_lookup_table
set fd [open $zName]
while { ![eof $fd] } {
set line [gets $fd]
if {[string range $line 0 0] == "#"} continue
if {$line == ""} continue
foreach x {a b c d} {unset -nocomplain $x}
foreach {a b c d} [split $line ";"] {}
set a2 [list]
set c2 [list]
foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }
foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }
set b [string trim $b]
set d [string trim $d]
if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 }
}
}
proc tl_create_records {} {
global tl_lookup_table
set iFirst ""
set nOff 0
set nRange 0
set nIncr 0
set lRecord [list]
foreach code [lsort -integer [array names tl_lookup_table]] {
set mapping $tl_lookup_table($code)
if {$iFirst == ""} {
set iFirst $code
set nOff [expr $mapping - $code]
set nRange 1
set nIncr 1
} else {
set diff [expr $code - ($iFirst + ($nIncr * ($nRange - 1)))]
if { $nRange==1 && ($diff==1 || $diff==2) } {
set nIncr $diff
}
if {$diff != $nIncr || ($mapping - $code)!=$nOff} {
if { $nRange==1 } {set nIncr 1}
lappend lRecord [list $iFirst $nIncr $nRange $nOff]
set iFirst $code
set nOff [expr $mapping - $code]
set nRange 1
set nIncr 1
} else {
incr nRange
}
}
}
lappend lRecord [list $iFirst $nIncr $nRange $nOff]
set lRecord
}
proc tl_print_table_header {} {
puts -nonewline " "
puts [string trim {
/* Each entry in the following array defines a rule for folding a range
** of codepoints to lower case. The rule applies to a range of nRange
** codepoints starting at codepoint iCode.
**
** If the least significant bit in flags is clear, then the rule applies
** to all nRange codepoints (i.e. all nRange codepoints are upper case and
** need to be folded). Or, if it is set, then the rule only applies to
** every second codepoint in the range, starting with codepoint C.
**
** The 7 most significant bits in flags are an index into the aiOff[]
** array. If a specific codepoint C does require folding, then its lower
** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
**
** The contents of this array are generated by parsing the CaseFolding.txt
** file distributed as part of the "Unicode Character Database". See
** http://www.unicode.org for details.
*/
}]
puts " static const struct TableEntry \{"
puts " unsigned short iCode;"
puts " unsigned char flags;"
puts " unsigned char nRange;"
puts " \} aEntry\[\] = \{"
}
proc tl_print_table_entry {togglevar entry liOff} {
upvar $togglevar t
foreach {iFirst nIncr nRange nOff} $entry {}
if {$iFirst > (1<<16)} { return 1 }
if {[info exists t]==0} {set t 0}
if {$t==0} { puts -nonewline " " }
set flags 0
if {$nIncr==2} { set flags 1 ; set nRange [expr $nRange * 2]}
if {$nOff<0} { incr nOff [expr (1<<16)] }
set idx [lsearch $liOff $nOff]
if {$idx<0} {error "malfunction generating aiOff"}
set flags [expr $flags + $idx*2]
set txt "{$iFirst, $flags, $nRange},"
if {$t==2} {
puts $txt
} else {
puts -nonewline [format "% -23s" $txt]
}
set t [expr ($t+1)%3]
return 0
}
proc tl_print_table_footer {togglevar} {
upvar $togglevar t
if {$t!=0} {puts ""}
puts " \};"
}
proc tl_print_if_entry {entry} {
foreach {iFirst nIncr nRange nOff} $entry {}
if {$nIncr==2} {error "tl_print_if_entry needs improvement!"}
puts " else if( c>=$iFirst && c<[expr $iFirst+$nRange] )\{"
puts " ret = c + $nOff;"
puts " \}"
}
proc tl_generate_ioff_table {lRecord} {
foreach entry $lRecord {
foreach {iFirst nIncr nRange iOff} $entry {}
if {$iOff<0} { incr iOff [expr (1<<16)] }
if {[info exists a($iOff)]} continue
set a($iOff) 1
}
set liOff [lsort -integer [array names a]]
if {[llength $liOff]>128} { error "Too many distinct ioffs" }
return $liOff
}
proc tl_print_ioff_table {liOff} {
puts -nonewline " static const unsigned short aiOff\[\] = \{"
set i 0
foreach off $liOff {
if {($i % 8)==0} {puts "" ; puts -nonewline " "}
puts -nonewline [format "% -7s" "$off,"]
incr i
}
puts ""
puts " \};"
}
proc print_tolower {zFunc} {
set lRecord [tl_create_records]
set lHigh [list]
puts "/*"
puts "** Interpret the argument as a unicode codepoint. If the codepoint"
puts "** is an upper case character that has a lower case equivalent,"
puts "** return the codepoint corresponding to the lower case version."
puts "** Otherwise, return a copy of the argument."
puts "**"
puts "** The results are undefined if the value passed to this function"
puts "** is less than zero."
puts "*/"
puts "int ${zFunc}\(int c)\{"
set liOff [tl_generate_ioff_table $lRecord]
tl_print_table_header
foreach entry $lRecord {
if {[tl_print_table_entry toggle $entry $liOff]} {
lappend lHigh $entry
}
}
tl_print_table_footer toggle
tl_print_ioff_table $liOff
puts {
int ret = c;
assert( c>=0 );
assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
if( c<128 ){
if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
}else if( c<65536 ){
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
int iRes = -1;
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
int cmp = (c - aEntry[iTest].iCode);
if( cmp>=0 ){
iRes = iTest;
iLo = iTest+1;
}else{
iHi = iTest-1;
}
}
assert( iRes<0 || c>=aEntry[iRes].iCode );
if( iRes>=0 ){
const struct TableEntry *p = &aEntry[iRes];
if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
assert( ret>0 );
}
}
}
}
foreach entry $lHigh {
tl_print_if_entry $entry
}
puts ""
puts " return ret;"
puts "\}"
}
proc print_tolower_test {zFunc} {
global tl_lookup_table
puts "static int tolower_test(int *piCode)\{"
puts -nonewline " static int aLookup\[\] = \{"
for {set i 0} {$i < 70000} {incr i} {
set expected $i
catch { set expected $tl_lookup_table($i) }
if {($i % 8)==0} { puts "" ; puts -nonewline " " }
puts -nonewline "$expected, "
}
puts " \};"
puts " int i;"
puts " for(i=0; i<sizeof(aLookup)/sizeof(aLookup\[0\]); i++)\{"
puts " if( ${zFunc}\(i)!=aLookup\[i\] )\{"
puts " *piCode = i;"
puts " return 1;"
puts " \}"
puts " \}"
puts " return 0;"
puts "\}"
}
proc print_fileheader {} {
puts [string trim {
/*
** 2012 May 25
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
*/
/*
** DO NOT EDIT THIS MACHINE GENERATED FILE.
*/
}]
puts ""
puts "#if !defined(SQLITE_DISABLE_FTS3_UNICODE)"
puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"
puts ""
puts "#include <assert.h>"
puts ""
}
proc print_test_main {} {
puts ""
puts "#include <stdio.h>"
puts ""
puts "int main(int argc, char **argv)\{"
puts " int r1, r2;"
puts " int code;"
puts " r1 = isalnum_test(&code);"
puts " if( r1 ) printf(\"isalnum(): Problem with code %d\\n\",code);"
puts " else printf(\"isalnum(): test passed\\n\");"
puts " r2 = tolower_test(&code);"
puts " if( r2 ) printf(\"tolower(): Problem with code %d\\n\",code);"
puts " else printf(\"tolower(): test passed\\n\");"
puts " return (r1 || r2);"
puts "\}"
}
# Proces the command line arguments. Exit early if they are not to
# our liking.
#
proc usage {} {
puts -nonewline stderr "Usage: $::argv0 ?-test? "
puts stderr "<CaseFolding.txt file> <UnicodeData.txt file>"
exit 1
}
if {[llength $argv]!=2 && [llength $argv]!=3} usage
if {[llength $argv]==3 && [lindex $argv 0]!="-test"} usage
set unicodedata.txt [lindex $argv end]
set casefolding.txt [lindex $argv end-1]
set generate_test_code [expr {[llength $argv]==3}]
# Print the isalnum() function to stdout.
#
print_fileheader
set lRange [an_load_separator_ranges]
print_isalnum sqlite3FtsUnicodeIsalnum $lRange
# Leave a gap between the two generated C functions.
#
puts ""
puts ""
# Print the tolower() function to stdout.
#
tl_load_casefolding_txt ${casefolding.txt}
print_tolower sqlite3FtsUnicodeTolower
# Print the test routines and main() function to stdout, if -test
# was specified.
#
if {$::generate_test_code} {
print_test_isalnum sqlite3FtsUnicodeIsalnum $lRange
print_tolower_test sqlite3FtsUnicodeTolower
print_test_main
}
puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */"
puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */"

View File

@ -56,6 +56,7 @@ LIBOBJ+= alter.o analyze.o attach.o auth.o \
callback.o complete.o ctime.o date.o delete.o expr.o fault.o fkey.o \
fts3.o fts3_aux.o fts3_expr.o fts3_hash.o fts3_icu.o fts3_porter.o \
fts3_snippet.o fts3_tokenizer.o fts3_tokenizer1.o \
fts3_unicode.o fts3_unicode2.o \
fts3_write.o func.o global.o hash.o \
icu.o insert.o journal.o legacy.o loadext.o \
main.o malloc.o mem0.o mem1.o mem2.o mem3.o mem5.o \
@ -199,6 +200,8 @@ SRC += \
$(TOP)/ext/fts3/fts3_tokenizer.h \
$(TOP)/ext/fts3/fts3_tokenizer.c \
$(TOP)/ext/fts3/fts3_tokenizer1.c \
$(TOP)/ext/fts3/fts3_unicode.c \
$(TOP)/ext/fts3/fts3_unicode2.c \
$(TOP)/ext/fts3/fts3_write.c
SRC += \
$(TOP)/ext/icu/sqliteicu.h \
@ -511,6 +514,12 @@ fts3_tokenizer.o: $(TOP)/ext/fts3/fts3_tokenizer.c $(HDR) $(EXTHDR)
fts3_tokenizer1.o: $(TOP)/ext/fts3/fts3_tokenizer1.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_tokenizer1.c
fts3_unicode.o: $(TOP)/ext/fts3/fts3_unicode.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_unicode.c
fts3_unicode2.o: $(TOP)/ext/fts3/fts3_unicode2.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_unicode2.c
fts3_write.o: $(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c

View File

@ -1,12 +1,12 @@
C Version\s3.7.12.1
D 2012-05-22T13:01:39.761
C Merge\sthe\sunicode61\stokenizer\sand\sthe\sshared-cache-memory\sdatabase\schanges\ninto\sthe\ssessions\sbranch.
D 2012-05-28T18:22:41.850
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 2f37e468503dbe79d35c9f6dffcf3fae1ae9ec20
F Makefile.in 4f37eb61be9d38643cdd839a74b8e3bad724cfcf
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
F Makefile.msc efc373edae0f99cb3b04672ccefea8162664d40b
F Makefile.vxworks 3b7fe7a0571fdadc61363ebc1b23732d2d6363ca
F README cd04a36fbc7ea56932a4052d7d0b7f09f27c33d6
F VERSION 1e25ebddd2ed5811c10bdabe914cd46d2dc38af8
F VERSION 3e857b9b826e818eec9411eafe2c3fa22c1dbb8a
F aclocal.m4 a5c22d164aff7ed549d53a90fa56d56955281f50
F addopcodes.awk 17dc593f791f874d2c23a0f9360850ded0286531
F art/sqlite370.eps aa97a671332b432a54e1d74ff5e8775be34200c2
@ -15,7 +15,7 @@ F art/sqlite370.jpg d512473dae7e378a67e28ff96a34da7cb331def2
F config.guess 226d9a188c6196f3033ffc651cbc9dcee1a42977
F config.h.in 0921066a13130082764ab4ab6456f7b5bebe56de
F config.sub 9ebe4c3b3dab6431ece34f16828b594fb420da55
F configure faa04198b719ec9cb2166aa0b163558e9b8ddd77 x
F configure 79405675c313ce4a5e94afac6ec880bb3e27b4f1 x
F configure.ac 9ee886c21c095b3272137b1553ae416c8b8c8557
F contrib/sqlitecon.tcl 210a913ad63f9f991070821e599d600bd913e0ad
F doc/lemon.html 3091574143dd3415669b6745843ff8d011d33549
@ -53,11 +53,11 @@ F ext/fts2/fts2_tokenizer1.c 0123d21078e053bd98fd6186c5c6dc6d67969f2e
F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0
F ext/fts3/README.content fdc666a70d5257a64fee209f97cf89e0e6e32b51
F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9
F ext/fts3/README.tokenizers e0a8b81383ea60d0334d274fadf305ea14a8c314
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
F ext/fts3/fts3.c a7adf6747d1fdd627ecd421c1709996741ca6693
F ext/fts3/fts3.c 41824d0db7d244ca335ce98162df1244863a05c4
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
F ext/fts3/fts3Int.h aca752b99c15ee738f5bcf0910eafb9e4aeb1b97
F ext/fts3/fts3Int.h 7b163fa22e7a625c404c424f2779a4d7b14c14ad
F ext/fts3/fts3_aux.c 5205182bd8f372782597888156404766edf5781e
F ext/fts3/fts3_expr.c dbc7ba4c3a6061adde0f38ed8e9b349568299551
F ext/fts3/fts3_hash.c 8dd2d06b66c72c628c2732555a32bc0943114914
@ -70,10 +70,15 @@ F ext/fts3/fts3_test.c 348f7d08cae05285794e23dc4fe8b8fdf66e264a
F ext/fts3/fts3_tokenizer.c 3da7254a9881f7e270ab28e2004e0d22b3212bce
F ext/fts3/fts3_tokenizer.h 66dec98e365854b6cd2d54f1a96bb6d428fc5a68
F ext/fts3/fts3_tokenizer1.c 5c98225a53705e5ee34824087478cf477bdb7004
F ext/fts3/fts3_write.c cd4af00b3b0512b4d76177a267fcaafab44cbce4
F ext/fts3/fts3_unicode.c 76b6f6fe6e86acd75b08272502fae74a13cef310
F ext/fts3/fts3_unicode2.c 3ddf1728a396a03b5a73ff0f11ecfd2009de117d
F ext/fts3/fts3_write.c 6a6391d6b01114f885e24e1f66bbc11ffba0e9e2
F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9
F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197
F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c
F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
F ext/fts3/unicode/mkunicode.tcl 2029991cc2cd0bf71df12768578a29c852bf54d1
F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9
F ext/icu/icu.c eb9ae1d79046bd7871aa97ee6da51eb770134b5a
F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37
@ -110,7 +115,7 @@ F ext/session/sqlite3session.h f374c9c4c96e08f67ac418871c29d423245c7673
F ext/session/test_session.c ea4dc9b4a1895c8e6bddcbfe3838d7eb57df2d99
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x
F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8
F main.mk a7a6511a621e89c25b59deb9614cf83611d724e1
F main.mk d1663c1ef1e9352b86138f40363f172f7a7f7728
F mkdll.sh 7d09b23c05d56532e9d44a50868eb4b12ff4f74a
F mkextu.sh 416f9b7089d80e5590a29692c9d9280a10dbad9f
F mkextw.sh 4123480947681d9b434a5e7b1ee08135abe409ac
@ -130,7 +135,7 @@ F src/auth.c 523da7fb4979469955d822ff9298352d6b31de34
F src/backup.c 6be23a344d3301ae38e92fddb3a33b91c309fce4
F src/bitvec.c af50f1c8c0ff54d6bdb7a80e2fceca5a93670bef
F src/btmutex.c 976f45a12e37293e32cae0281b15a21d48a8aaa7
F src/btree.c df800f10896bc2ddaa1125c532d6e7a7b9efc532
F src/btree.c f0b71054103cb77eb5e782088c16998ec4f06624
F src/btree.h 48a013f8964f12d944d90e4700df47b72dd6d923
F src/btreeInt.h 38a639c0542c29fe8331a221c4aed0cb8686249e
F src/build.c 2bb2163bb1e69f59e9f36a9413079ead42fa1d2c
@ -152,7 +157,7 @@ F src/journal.c 552839e54d1bf76fb8f7abe51868b66acacf6a0e
F src/legacy.c a199d7683d60cef73089e892409113e69c23a99f
F src/lempar.c 0ee69fca0be54cd93939df98d2aca4ca46f44416
F src/loadext.c f20382fbaeec832438a1ba7797bee3d3c8a6d51d
F src/main.c acab639c93295650b9ef056689e8092cfcf49f2a
F src/main.c bc2d30cc532dca475a2a4a18cb67e1b0ec389f43
F src/malloc.c fe085aa851b666b7c375c1ff957643dc20a04bf6
F src/mem0.c 6a55ebe57c46ca1a7d98da93aaa07f99f1059645
F src/mem1.c b3677415e69603d6a0e7c5410a1b3731d55beda1
@ -173,8 +178,8 @@ F src/os_common.h 92815ed65f805560b66166e3583470ff94478f04
F src/os_os2.c 4a75888ba3dfc820ad5e8177025972d74d7f2440
F src/os_unix.c 424d46e0edab969293c2223f09923b2178171f47
F src/os_win.c 412d6434133c7c81dc48b7702f3ea5e61c309e5c
F src/pager.c bb5635dde0b152797836d1c72275284724bb563c
F src/pager.h ef1eaf8593e78f73885c1dfac27ad83bee23bdc5
F src/pager.c 9d4d6406512002d9a243ec27b9c01e93fda43e36
F src/pager.h 8b8c9bc065a3c66769df8724dfdf492ee1aab3c5
F src/parse.y f29df90bd3adc64b33114ab1de9fb7768fcf2099
F src/pcache.c f8043b433a57aba85384a531e3937a804432a346
F src/pcache.h 1b5dcc3dc8103d03e625b177023ee67764fa6b7c
@ -187,7 +192,7 @@ F src/resolve.c b3c70ab28cac60de33684c9aa9e5138dcf71d6dd
F src/rowset.c f6a49f3e9579428024662f6e2931832511f831a1
F src/select.c f6c4833c4d8e94714761d99013d74f381e084f1d
F src/shell.c c16f72e34f611f060546709564c121a67cb2b31b
F src/sqlite.h.in e8b2634aa58853522d559e0291dd3e9ab9e622b2
F src/sqlite.h.in 45a846045ddb8c4318f2919f3a70f011df5ca783
F src/sqlite3ext.h 6904f4aadf976f95241311fbffb00823075d9477
F src/sqliteInt.h d13f48a1067fc637f86f77ee6b4ad9f98c7c10c6
F src/sqliteLimit.h 164b0e6749d31e0daa1a4589a169d31c0dec7b3d
@ -206,8 +211,8 @@ F src/test9.c bea1e8cf52aa93695487badedd6e1886c321ea60
F src/test_async.c 0612a752896fad42d55c3999a5122af10dcf22ad
F src/test_autoext.c 30e7bd98ab6d70a62bb9ba572e4c7df347fe645e
F src/test_backup.c c129c91127e9b46e335715ae2e75756e25ba27de
F src/test_btree.c 47cd771250f09cdc6e12dda5bc71bc0b3abc96e2
F src/test_config.c 52aa8cab6e20e9de078a0fb4a6f45a0466fbae2a
F src/test_btree.c 5b89601dcb42a33ba8b820a6b763cc9cb48bac16
F src/test_config.c 31ad0c013a7c1cfb3c12dc8cd33c7fbe92be0050
F src/test_demovfs.c 20a4975127993f4959890016ae9ce5535a880094
F src/test_devsym.c e7498904e72ba7491d142d5c83b476c4e76993bc
F src/test_func.c 090f2c3339e85c2c964435f99aed6f3da9d59525
@ -246,7 +251,7 @@ F src/update.c 89de085a0bf4da448472029d0420a2b1cf1824ee
F src/utf.c 890c67dcfcc7a74623c95baac7535aadfe265e84
F src/util.c 4f6cfad661b2e3454b0cdd5b1b9d39a54942d0e3
F src/vacuum.c bfd53f9bd20a8fdb70b0fa8e77182b866875c0d8
F src/vdbe.c b79d6de881aeec7d22e944238fa0d9bd5a3ef5ae
F src/vdbe.c 15bcec4d80f6471daf6e03ec9d6d32f9f9df4cbf
F src/vdbe.h 87b8ff40de3f55dbcdc33029416862f517c37a2f
F src/vdbeInt.h f1956902b06b4f05ce965aafab6fe220a5477f9c
F src/vdbeapi.c 2fc381f651738feb2495cb001cf2114dea596cc3
@ -396,7 +401,7 @@ F test/e_resolve.test dcce9308fb13b934ce29591105d031d3e14fbba6
F test/e_select.test f5d4b81205701deacfae42051ae200969c41d2c0
F test/e_select2.test 5c3d3da19c7b3e90ae444579db2b70098599ab92
F test/e_update.test 161d5dc6a3ed9dd08f5264d13e20735d7a89f00c
F test/e_uri.test e8b894474fdfe7b18b0c9cb2d911270de2ad64ce
F test/e_uri.test cd2ddb4494c7ebf30b6e3539645bb4e54c0104b9
F test/e_vacuum.test 331da289ae186656cf5f2eb27f577a89c0c221af
F test/enc.test e54531cd6bf941ee6760be041dff19a104c7acea
F test/enc2.test 796c59832e2b9a52842f382ffda8f3e989db03ad
@ -489,7 +494,7 @@ F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851
F test/fts3expr.test 5e745b2b6348499d9ef8d59015de3182072c564c
F test/fts3expr2.test 18da930352e5693eaa163a3eacf96233b7290d1a
F test/fts3fault.test cb72dccb0a3b9f730f16c5240f3fcb9303eb1660
F test/fts3fault2.test b62a2bc843c20414405f80e5eeb78e39bc68fe53
F test/fts3fault2.test 3198eef2804deea7cac8403e771d9cbcb752d887
F test/fts3first.test dbdedd20914c8d539aa3206c9b34a23775644641
F test/fts3malloc.test b86ea33db9e8c58c0c2f8027a9fcadaf6a1568be
F test/fts3matchinfo.test 6507fe1c342e542300d65ea637d4110eccf894e6
@ -508,6 +513,7 @@ F test/fts4langid.test 24a6e41063b416bbdf371ff6b4476fa41c194aa7
F test/fts4merge.test c424309743fdd203f8e56a1f1cd7872cd66cc0ee
F test/fts4merge2.test 5faa558d1b672f82b847d2a337465fa745e46891
F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7
F test/fts4unicode.test c812e9cf843e26ba633f58b36a2629f878af20fd
F test/func.test 9809b7622d721904a8cc33c1ffb87f46d506ed01
F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f
F test/func3.test 001021e5b88bd02a3b365a5c5fd8f6f49d39744a
@ -648,7 +654,7 @@ F test/pageropt.test 9191867ed19a2b3db6c42d1b36b6fbc657cd1ab0
F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0
F test/pcache.test 065aa286e722ab24f2e51792c1f093bf60656b16
F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025
F test/permutations.test 684feefb2c75ecdd2f976e51c673de9e1ca2f93d
F test/permutations.test 7f1f9e22593cf13d548d3cce7f7eddc72872ae2f
F test/pragma.test c51c148defe32bf4a419a522f95d26838d5cf677
F test/pragma2.test 3a55f82b954242c642f8342b17dffc8b47472947
F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552
@ -696,7 +702,7 @@ F test/selectB.test 954e4e49cf1f896d61794e440669e03a27ceea25
F test/selectC.test 871fb55d884d3de5943c4057ebd22c2459e71977
F test/server1.test 46803bd3fe8b99b30dbc5ff38ffc756f5c13a118
F test/session.test c1a17c11ef7d01c24fe2b9f7871190d949a8e718
F test/shared.test 34945a516532b11182c3eb26e31247eee3c9ae48
F test/shared.test 64fe647f17b2de0622437829a9e9823c20439fce
F test/shared2.test 03eb4a8d372e290107d34b6ce1809919a698e879
F test/shared3.test ebf77f023f4bdaa8f74f65822b559e86ce5c6257
F test/shared4.test 72d90821e8d2fc918a08f16d32880868d8ee8e9d
@ -985,7 +991,7 @@ F tool/mkkeywordhash.c bb52064aa614e1426445e4b2b9b00eeecd23cc79
F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e
F tool/mkspeedsql.tcl a1a334d288f7adfe6e996f2e712becf076745c97
F tool/mksqlite3c-noext.tcl 105023aa86f696a74b1d6a4929d1e1c3baf9471c
F tool/mksqlite3c.tcl 9fbac513cd9d5ac95ad55630f49bb16c5347ab75
F tool/mksqlite3c.tcl f289ba51f74f45c71a80c13e6c74a6dd92763253
F tool/mksqlite3h.tcl 78013ad79a5e492e5f764f3c7a8ef834255061f8
F tool/mksqlite3internalh.tcl 7b43894e21bcb1bb39e11547ce7e38a063357e87
F tool/offsets.c fe4262fdfa378e8f5499a42136d17bf3b98f6091
@ -1011,7 +1017,7 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
F tool/warnings-clang.sh a8a0a3babda96dfb1ff51adda3cbbf3dfb7266c2
F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381
P be71d2f6678c5dd8a165a67ef6d3f64678227260 6d326d44fd1d626aae0e8456e5fa2049f1ce0789
R 8ca88d845e5923388befe35b00d807d4
P d07b7b67d1b3bf65cfe8d96d45a7f1d387bea7ce bcc72d413e8db5fe8b32147ac22d406e2cd6bb60
R b36c04fadd1ffa2ea62f85f8b70ce2d4
U drh
Z aad7c1b5110d40f9689e7d5474bdb3b3
Z 8ba551aeea4cedaaff2626e9d4283953

View File

@ -1 +1 @@
d07b7b67d1b3bf65cfe8d96d45a7f1d387bea7ce
df817e70afc3f41e680d8f84dfa5772d5b3ae4d9

View File

@ -1721,7 +1721,8 @@ int sqlite3BtreeOpen(
const int isMemdb = 0;
#else
const int isMemdb = (zFilename && strcmp(zFilename, ":memory:")==0)
|| (isTempDb && sqlite3TempInMemory(db));
|| (isTempDb && sqlite3TempInMemory(db))
|| (vfsFlags & SQLITE_OPEN_MEMORY)!=0;
#endif
assert( db!=0 );
@ -1757,7 +1758,7 @@ int sqlite3BtreeOpen(
** If this Btree is a candidate for shared cache, try to find an
** existing BtShared object that we can share with
*/
if( isMemdb==0 && isTempDb==0 ){
if( isTempDb==0 && (isMemdb==0 || (vfsFlags&SQLITE_OPEN_URI)!=0) ){
if( vfsFlags & SQLITE_OPEN_SHAREDCACHE ){
int nFullPathname = pVfs->mxPathname+1;
char *zFullPathname = sqlite3Malloc(nFullPathname);
@ -1767,11 +1768,16 @@ int sqlite3BtreeOpen(
sqlite3_free(p);
return SQLITE_NOMEM;
}
rc = sqlite3OsFullPathname(pVfs, zFilename, nFullPathname, zFullPathname);
if( rc ){
sqlite3_free(zFullPathname);
sqlite3_free(p);
return rc;
if( isMemdb ){
memcpy(zFullPathname, zFilename, sqlite3Strlen30(zFilename)+1);
}else{
rc = sqlite3OsFullPathname(pVfs, zFilename,
nFullPathname, zFullPathname);
if( rc ){
sqlite3_free(zFullPathname);
sqlite3_free(p);
return rc;
}
}
#if SQLITE_THREADSAFE
mutexOpen = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_OPEN);
@ -1781,7 +1787,7 @@ int sqlite3BtreeOpen(
#endif
for(pBt=GLOBAL(BtShared*,sqlite3SharedCacheList); pBt; pBt=pBt->pNext){
assert( pBt->nRef>0 );
if( 0==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager))
if( 0==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager, 0))
&& sqlite3PagerVfs(pBt->pPager)==pVfs ){
int iDb;
for(iDb=db->nDb-1; iDb>=0; iDb--){
@ -8046,14 +8052,15 @@ char *sqlite3BtreeIntegrityCheck(
#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
/*
** Return the full pathname of the underlying database file.
** Return the full pathname of the underlying database file. Return
** an empty string if the database is in-memory or a TEMP database.
**
** The pager filename is invariant as long as the pager is
** open so it is safe to access without the BtShared mutex.
*/
const char *sqlite3BtreeGetFilename(Btree *p){
assert( p->pBt->pPager!=0 );
return sqlite3PagerFilename(p->pBt->pPager);
return sqlite3PagerFilename(p->pBt->pPager, 1);
}
/*

View File

@ -2033,10 +2033,14 @@ int sqlite3ParseUri(
{ "ro", SQLITE_OPEN_READONLY },
{ "rw", SQLITE_OPEN_READWRITE },
{ "rwc", SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE },
{ "memory",
SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE
| SQLITE_OPEN_MEMORY },
{ 0, 0 }
};
mask = SQLITE_OPEN_READONLY|SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE;
mask = SQLITE_OPEN_READONLY | SQLITE_OPEN_READWRITE
| SQLITE_OPEN_CREATE | SQLITE_OPEN_MEMORY;
aMode = aOpenMode;
limit = mask & flags;
zModeType = "access";
@ -2057,7 +2061,7 @@ int sqlite3ParseUri(
rc = SQLITE_ERROR;
goto parse_uri_out;
}
if( mode>limit ){
if( (mode & ~SQLITE_OPEN_MEMORY)>limit ){
*pzErrMsg = sqlite3_mprintf("%s mode not allowed: %s",
zModeType, zVal);
rc = SQLITE_PERM;
@ -2076,6 +2080,7 @@ int sqlite3ParseUri(
memcpy(zFile, zUri, nUri);
zFile[nUri] = '\0';
zFile[nUri+1] = '\0';
flags &= ~SQLITE_OPEN_URI;
}
*ppVfs = sqlite3_vfs_find(zVfs);

View File

@ -4360,7 +4360,12 @@ int sqlite3PagerOpen(
#ifndef SQLITE_OMIT_MEMORYDB
if( flags & PAGER_MEMORY ){
memDb = 1;
zFilename = 0;
if( zFilename && zFilename[0] ){
zPathname = sqlite3DbStrDup(0, zFilename);
if( zPathname==0 ) return SQLITE_NOMEM;
nPathname = sqlite3Strlen30(zPathname);
zFilename = 0;
}
}
#endif
@ -6296,9 +6301,16 @@ int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint){
/*
** Return the full pathname of the database file.
**
** Except, if the pager is in-memory only, then return an empty string if
** nullIfMemDb is true. This routine is called with nullIfMemDb==1 when
** used to report the filename to the user, for compatibility with legacy
** behavior. But when the Btree needs to know the filename for matching to
** shared cache, it uses nullIfMemDb==0 so that in-memory databases can
** participate in shared-cache.
*/
const char *sqlite3PagerFilename(Pager *pPager){
return pPager->zFilename;
const char *sqlite3PagerFilename(Pager *pPager, int nullIfMemDb){
return (nullIfMemDb && pPager->memDb) ? "" : pPager->zFilename;
}
/*

View File

@ -151,7 +151,7 @@ int sqlite3PagerCloseWal(Pager *pPager);
u8 sqlite3PagerIsreadonly(Pager*);
int sqlite3PagerRefcount(Pager*);
int sqlite3PagerMemUsed(Pager*);
const char *sqlite3PagerFilename(Pager*);
const char *sqlite3PagerFilename(Pager*, int);
const sqlite3_vfs *sqlite3PagerVfs(Pager*);
sqlite3_file *sqlite3PagerFile(Pager*);
const char *sqlite3PagerJournalname(Pager*);

View File

@ -473,6 +473,7 @@ int sqlite3_exec(
#define SQLITE_OPEN_EXCLUSIVE 0x00000010 /* VFS only */
#define SQLITE_OPEN_AUTOPROXY 0x00000020 /* VFS only */
#define SQLITE_OPEN_URI 0x00000040 /* Ok for sqlite3_open_v2() */
#define SQLITE_OPEN_MEMORY 0x00000080 /* Ok for sqlite3_open_v2() */
#define SQLITE_OPEN_MAIN_DB 0x00000100 /* VFS only */
#define SQLITE_OPEN_TEMP_DB 0x00000200 /* VFS only */
#define SQLITE_OPEN_TRANSIENT_DB 0x00000400 /* VFS only */
@ -2570,18 +2571,20 @@ void sqlite3_progress_handler(sqlite3*, int, int(*)(void*), void*);
** present, then the VFS specified by the option takes precedence over
** the value passed as the fourth parameter to sqlite3_open_v2().
**
** <li> <b>mode</b>: ^(The mode parameter may be set to either "ro", "rw" or
** "rwc". Attempting to set it to any other value is an error)^.
** <li> <b>mode</b>: ^(The mode parameter may be set to either "ro", "rw",
** "rwc", or "memory". Attempting to set it to any other value is
** an error)^.
** ^If "ro" is specified, then the database is opened for read-only
** access, just as if the [SQLITE_OPEN_READONLY] flag had been set in the
** third argument to sqlite3_prepare_v2(). ^If the mode option is set to
** "rw", then the database is opened for read-write (but not create)
** access, as if SQLITE_OPEN_READWRITE (but not SQLITE_OPEN_CREATE) had
** been set. ^Value "rwc" is equivalent to setting both
** SQLITE_OPEN_READWRITE and SQLITE_OPEN_CREATE. ^If sqlite3_open_v2() is
** used, it is an error to specify a value for the mode parameter that is
** less restrictive than that specified by the flags passed as the third
** parameter.
** SQLITE_OPEN_READWRITE and SQLITE_OPEN_CREATE. ^If the mode option is
** set to "memory" then a pure [in-memory database] that never reads or
** or writes from disk is used. ^It is an error to specify a value for
** the mode parameter that is less restrictive than that specified by
** the flags passed in the third parameter to sqlite3_open_v2().
**
** <li> <b>cache</b>: ^The cache parameter may be set to either "shared" or
** "private". ^Setting it to "shared" is equivalent to setting the
@ -4622,7 +4625,6 @@ void *sqlite3_update_hook(
/*
** CAPI3REF: Enable Or Disable Shared Pager Cache
** KEYWORDS: {shared cache}
**
** ^(This routine enables or disables the sharing of the database cache
** and schema data structures between [database connection | connections]

View File

@ -33,7 +33,7 @@ int sqlite3BtreeSharedCacheReport(
BtShared *pBt;
Tcl_Obj *pRet = Tcl_NewObj();
for(pBt=GLOBAL(BtShared*,sqlite3SharedCacheList); pBt; pBt=pBt->pNext){
const char *zFile = sqlite3PagerFilename(pBt->pPager);
const char *zFile = sqlite3PagerFilename(pBt->pPager, 1);
Tcl_ListObjAppendElement(interp, pRet, Tcl_NewStringObj(zFile, -1));
Tcl_ListObjAppendElement(interp, pRet, Tcl_NewIntObj(pBt->nRef));
}

View File

@ -313,6 +313,12 @@ static void set_options(Tcl_Interp *interp){
Tcl_SetVar2(interp, "sqlite_options", "fts3", "0", TCL_GLOBAL_ONLY);
#endif
#if !defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_DISABLE_FTS3_UNICODE)
Tcl_SetVar2(interp, "sqlite_options", "fts3_unicode", "0", TCL_GLOBAL_ONLY);
#else
Tcl_SetVar2(interp, "sqlite_options", "fts3_unicode", "1", TCL_GLOBAL_ONLY);
#endif
#ifdef SQLITE_OMIT_GET_TABLE
Tcl_SetVar2(interp, "sqlite_options", "gettable", "0", TCL_GLOBAL_ONLY);
#else

View File

@ -5563,7 +5563,7 @@ case OP_JournalMode: { /* out2-prerelease */
if( !sqlite3PagerOkToChangeJournalMode(pPager) ) eNew = eOld;
#ifndef SQLITE_OMIT_WAL
zFilename = sqlite3PagerFilename(pPager);
zFilename = sqlite3PagerFilename(pPager, 1);
/* Do not allow a transition to journal_mode=WAL for a database
** in temporary storage or if the VFS does not support shared memory

View File

@ -254,6 +254,8 @@ foreach {tn uri error} "
4 {file:test.db?mode=Ro} {no such access mode: Ro}
5 {file:test.db?mode=Rw} {no such access mode: Rw}
6 {file:test.db?mode=Rwc} {no such access mode: Rwc}
7 {file:test.db?mode=memory} {not an error}
8 {file:test.db?mode=MEMORY} {no such access mode: MEMORY}
" {
do_test 7.$tn { open_uri_error $uri } $error
}

View File

@ -131,4 +131,28 @@ do_faultsim_test 4.1 -prep {
faultsim_test_result {0 {}}
}
ifcapable fts3_unicode {
do_test 5.0 {
faultsim_delete_and_reopen
execsql {
CREATE VIRTUAL TABLE ft USING fts4(a, tokenize=unicode61);
}
faultsim_save_and_close
} {}
do_faultsim_test 5.1 -faults oom* -prep {
faultsim_restore_and_reopen
db eval {SELECT * FROM sqlite_master}
} -body {
execsql { INSERT INTO ft VALUES('the quick brown fox'); }
execsql { INSERT INTO ft VALUES(
'theunusuallylongtokenthatjustdragsonandonandonandthendragsonsomemoreeof'
);
}
execsql { SELECT docid FROM ft WHERE ft MATCH 'th*' }
} -test {
faultsim_test_result {0 {1 2}}
}
}
finish_test

227
test/fts4unicode.test Normal file
View File

@ -0,0 +1,227 @@
# 2012 May 25
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# The tests in this file focus on testing the "unicode" FTS tokenizer.
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
ifcapable !fts3_unicode { finish_test ; return }
set ::testprefix fts4unicode
proc do_unicode_token_test {tn input res} {
set input [string map {' ''} $input]
uplevel [list do_execsql_test $tn "
SELECT fts3_tokenizer_test('unicode61', '$input');
" [list [list {*}$res]]]
}
do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D}
do_unicode_token_test 1.1 {Ä Ö Ü} {0 ä Ä 1 ö Ö 2 ü Ü}
do_unicode_token_test 1.2 {xÄx xÖx xÜx} {0 xäx xÄx 1 xöx xÖx 2 xüx xÜx}
# 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s.
do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF"
do_unicode_token_test 1.4 "\u1E9E" "0 ß \u1E9E"
do_unicode_token_test 1.5 "\u1E9E" "0 \uDF \u1E9E"
do_unicode_token_test 1.6 "The quick brown fox" {
0 the The 1 quick quick 2 brown brown 3 fox fox
}
do_unicode_token_test 1.7 "The\u00bfquick\u224ebrown\u2263fox" {
0 the The 1 quick quick 2 brown brown 3 fox fox
}
#-------------------------------------------------------------------------
#
set docs [list {
Enhance the INSERT syntax to allow multiple rows to be inserted via the
VALUES clause.
} {
Enhance the CREATE VIRTUAL TABLE command to support the IF NOT EXISTS clause.
} {
Added the sqlite3_stricmp() interface as a counterpart to sqlite3_strnicmp().
} {
Added the sqlite3_db_readonly() interface.
} {
Added the SQLITE_FCNTL_PRAGMA file control, giving VFS implementations the
ability to add new PRAGMA statements or to override built-in PRAGMAs.
} {
Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
the same row that contains the maximum x value.
} {
Added support for the FTS4 languageid option.
} {
Documented support for the FTS4 content option. This feature has actually
been in the code since version 3.7.9 but is only now considered to be
officially supported.
} {
Pending statements no longer block ROLLBACK. Instead, the pending statement
will return SQLITE_ABORT upon next access after the ROLLBACK.
} {
Improvements to the handling of CSV inputs in the command-line shell
} {
Fix a bug introduced in version 3.7.10 that might cause a LEFT JOIN to be
incorrectly converted into an INNER JOIN if the WHERE clause indexable terms
connected by OR.
}]
set map(a) [list "\u00C4" "\u00E4"] ; # LATIN LETTER A WITH DIAERESIS
set map(e) [list "\u00CB" "\u00EB"] ; # LATIN LETTER E WITH DIAERESIS
set map(i) [list "\u00CF" "\u00EF"] ; # LATIN LETTER I WITH DIAERESIS
set map(o) [list "\u00D6" "\u00F6"] ; # LATIN LETTER O WITH DIAERESIS
set map(u) [list "\u00DC" "\u00FC"] ; # LATIN LETTER U WITH DIAERESIS
set map(y) [list "\u0178" "\u00FF"] ; # LATIN LETTER Y WITH DIAERESIS
set map(h) [list "\u1E26" "\u1E27"] ; # LATIN LETTER H WITH DIAERESIS
set map(w) [list "\u1E84" "\u1E85"] ; # LATIN LETTER W WITH DIAERESIS
set map(x) [list "\u1E8C" "\u1E8D"] ; # LATIN LETTER X WITH DIAERESIS
foreach k [array names map] {
lappend mappings [string toupper $k] [lindex $map($k) 0]
lappend mappings $k [lindex $map($k) 1]
}
proc mapdoc {doc} {
set doc [regsub -all {[[:space:]]+} $doc " "]
string map $::mappings [string trim $doc]
}
do_test 2.0 {
execsql { CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61, x); }
foreach doc $docs {
set d [mapdoc $doc]
execsql { INSERT INTO t2 VALUES($d) }
}
} {}
do_test 2.1 {
set q [mapdoc "row"]
execsql { SELECT * FROM t2 WHERE t2 MATCH $q }
} [list [mapdoc {
Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
the same row that contains the maximum x value.
}]]
foreach {tn query snippet} {
2 "row" {
...returns the value of y on the same [row] that contains
the maximum x value.
}
3 "ROW" {
...returns the value of y on the same [row] that contains
the maximum x value.
}
4 "rollback" {
...[ROLLBACK]. Instead, the pending statement
will return SQLITE_ABORT upon next access after the [ROLLBACK].
}
5 "rOllback" {
...[ROLLBACK]. Instead, the pending statement
will return SQLITE_ABORT upon next access after the [ROLLBACK].
}
6 "lang*" {
Added support for the FTS4 [languageid] option.
}
} {
do_test 2.$tn {
set q [mapdoc $query]
execsql { SELECT snippet(t2, '[', ']', '...') FROM t2 WHERE t2 MATCH $q }
} [list [mapdoc $snippet]]
}
#-------------------------------------------------------------------------
# Make sure the unicode61 tokenizer does not crash if it is passed a
# NULL pointer.
reset_db
do_execsql_test 3.1 {
CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61, x, y);
INSERT INTO t1 VALUES(NULL, 'a b c');
}
do_execsql_test 3.2 {
SELECT snippet(t1, '[', ']') FROM t1 WHERE t1 MATCH 'b'
} {{a [b] c}}
do_execsql_test 3.3 {
BEGIN;
DELETE FROM t1;
INSERT INTO t1 VALUES('b b b b b b b b b b b', 'b b b b b b b b b b b b b');
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 VALUES('a b c', NULL);
INSERT INTO t1 VALUES('a x c', NULL);
COMMIT;
}
do_execsql_test 3.4 {
SELECT * FROM t1 WHERE t1 MATCH 'a b';
} {{a b c} {}}
#-------------------------------------------------------------------------
#
reset_db
do_test 4.1 {
set a "abc\uFFFEdef"
set b "abc\uD800def"
set c "\uFFFEdef"
set d "\uD800def"
execsql {
CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61, x);
INSERT INTO t1 VALUES($a);
INSERT INTO t1 VALUES($b);
INSERT INTO t1 VALUES($c);
INSERT INTO t1 VALUES($d);
}
} {}
do_test 4.2 {
set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}]
set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}]
set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
execsql {
INSERT INTO t1 VALUES($a);
INSERT INTO t1 VALUES($b);
INSERT INTO t1 VALUES($c);
INSERT INTO t1 VALUES($d);
}
} {}
do_test 4.3 {
set a [binary format c* {0xF7 0xBF 0xBF 0xBF}]
set b [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF}]
set c [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF}]
set d [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF}]
execsql {
INSERT INTO t1 VALUES($a);
INSERT INTO t1 VALUES($b);
INSERT INTO t1 VALUES($c);
INSERT INTO t1 VALUES($d);
}
} {}
finish_test

View File

@ -188,7 +188,7 @@ test_suite "fts3" -prefix "" -description {
fts4aa.test fts4content.test
fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test
fts3corrupt2.test fts3first.test fts4langid.test fts4merge.test
fts4check.test
fts4check.test fts4unicode.test
}

View File

@ -1056,7 +1056,96 @@ do_test shared-$av-15.2 {
db close
db2 close
}
# Shared cache on a :memory: database. This only works for URI filenames.
#
do_test shared-$av-16.1 {
sqlite3 db1 file::memory: -uri 1
sqlite3 db2 file::memory: -uri 1
db1 eval {
CREATE TABLE t1(x); INSERT INTO t1 VALUES(1),(2),(3);
}
db2 eval {
SELECT x FROM t1 ORDER BY x;
}
} {1 2 3}
do_test shared-$av-16.2 {
db2 eval {
INSERT INTO t1 VALUES(99);
DELETE FROM t1 WHERE x=2;
}
db1 eval {
SELECT x FROM t1 ORDER BY x;
}
} {1 3 99}
# Verify that there is no cache sharing ordinary (non-URI) filenames are
# used.
#
do_test shared-$av-16.3 {
db1 close
db2 close
sqlite3 db1 :memory:
sqlite3 db2 :memory:
db1 eval {
CREATE TABLE t1(x); INSERT INTO t1 VALUES(4),(5),(6);
}
catchsql {
SELECT * FROM t1;
} db2
} {1 {no such table: t1}}
# Shared cache on named memory databases.
#
do_test shared-$av-16.4 {
db1 close
db2 close
forcedelete test.db test.db-wal test.db-journal
sqlite3 db1 file:test.db?mode=memory -uri 1
sqlite3 db2 file:test.db?mode=memory -uri 1
db1 eval {
CREATE TABLE t1(x); INSERT INTO t1 VALUES(1),(2),(3);
}
db2 eval {
SELECT x FROM t1 ORDER BY x;
}
} {1 2 3}
do_test shared-$av-16.5 {
db2 eval {
INSERT INTO t1 VALUES(99);
DELETE FROM t1 WHERE x=2;
}
db1 eval {
SELECT x FROM t1 ORDER BY x;
}
} {1 3 99}
do_test shared-$av-16.6 {
file exists test.db
} {0} ;# Verify that the database is in-memory
# Shared cache on named memory databases with different names.
#
do_test shared-$av-16.7 {
db1 close
db2 close
forcedelete test1.db test2.db
sqlite3 db1 file:test1.db?mode=memory -uri 1
sqlite3 db2 file:test2.db?mode=memory -uri 1
db1 eval {
CREATE TABLE t1(x); INSERT INTO t1 VALUES(1),(2),(3);
}
catchsql {
SELECT x FROM t1 ORDER BY x;
} db2
} {1 {no such table: t1}}
do_test shared-$av-16.8 {
file exists test1.db
} {0} ;# Verify that the database is in-memory
db1 close
db2 close
} ;# end of autovacuum on/off loop
sqlite3_enable_shared_cache $::enable_shared_cache
finish_test

View File

@ -316,6 +316,8 @@ foreach file {
fts3_tokenizer1.c
fts3_write.c
fts3_snippet.c
fts3_unicode.c
fts3_unicode2.c
rtree.c
icu.c