diff --git a/manifest b/manifest index 26a47b7b7d..d81ead2f6c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Added\ssqlite3OsLock\sfor\swin32.\s\sAssertion\sfault\sin\sattach.test.\s(CVS\s1533) -D 2004-06-06T00:42:26 +C Enhance\suser\sfunction\sAPI\sto\ssupport\sassociation\sof\smeta-data\swith\sconstant\narguments\sand\sthe\sspecification\sof\stext\sencoding\spreference.\sThe\sLIKE\noperator\stakes\sadvantage\sof\sboth.\s(CVS\s1534) +D 2004-06-06T09:44:04 F Makefile.in ab7b0d5118e2da97bac66be8684a1034e3500f5a F Makefile.linux-gcc b86a99c493a5bfb402d1d9178dcdc4bd4b32f906 F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd @@ -31,13 +31,13 @@ F src/build.c e12e602f06e37a0fbcb49af17cba68ad85e101b6 F src/date.c 8e6fa3173386fb29fdef012ee08a853c1e9908b2 F src/delete.c b30f08250c9ed53a25a13c7c04599c1e8753992d F src/encode.c a876af473d1d636faa3dca51c7571f2e007eea37 -F src/expr.c c55461f27a29c593d5b670a77583e44a0d80af0e -F src/func.c 3b87e2e8b9aaa3a6d36b2c9616e7f404be38a667 +F src/expr.c f05a5594679fe5297d41578d2e0ffff827772906 +F src/func.c e873366ba5ad97926336fd05c3af22ca54ad3e74 F src/hash.c 440c2f8cb373ee1b4e13a0988489c7cd95d55b6f F src/hash.h 762d95f1e567664d1eafc1687de755626be962fb F src/insert.c 4268d9e3959cc845ea243fb4ec7507269404dad9 F src/legacy.c ad23746f15f67e34577621b1875f639c94839e1f -F src/main.c 4e8e5c96e5a9460e71b97c83cb30cb3ad44db259 +F src/main.c 2e4d37f0f8f31694b79823a530ea4b52c8e1a7fd F src/md5.c 4302e84ae516c616bb079c4e6d038c0addb33481 F src/os.h 4e480eb92737ebcdd1e1136bdbf5cd22223bd1b4 F src/os_common.h 744286a27de55c52f1b18921e8d17abbf7fafc0f @@ -55,8 +55,8 @@ F src/printf.c ef750e8e2398ca7e8b58be991075f08c6a7f0e53 F src/random.c eff68e3f257e05e81eae6c4d50a51eb88beb4ff3 F src/select.c 02d711160100ef3a730060f7cfb5bc85fde06d72 F src/shell.c 79af86d39b2149c7f16219fcbe636e7c2da9df8e -F src/sqlite.h.in dae6a7b4f0ff7310b2ae3f0952feaa9caae94f12 -F src/sqliteInt.h 99f2b4ff4ed28123890a0c71359fec3d2c5901c9 +F src/sqlite.h.in 4705697dd7213f322d59ffc69b48b8ac32b23373 +F src/sqliteInt.h 67b1265bb461899409de0d56a2a0c335f119ff36 F src/table.c af14284fa36c8d41f6829e3f2819dce07d3e2de2 F src/tclsqlite.c 3db6b868bd844bfb71720c8e573f4c9b0d536bd5 F src/test1.c 4a3cc1b628a29f24c0a43227a035d0f2a96eb634 @@ -67,14 +67,14 @@ F src/test5.c 44178ce85c3afd2004ab4eeb5cfd7487116ce366 F src/tokenize.c 183c5d7da11affab5d70d903d33409c8c0ce6c5b F src/trigger.c 04b2c310d0d056b213609cab6df5fff03d5eaf88 F src/update.c 259f06e7b22c684b2d3dda54a18185892d6e9573 -F src/utf.c c8be20ecdcb10659e23c43e35d835460e964d248 -F src/util.c d3d2f62ec94160db3cb2b092267405ba99122152 +F src/utf.c 0e83deb064da62c202c1765e6194e938ca16d20f +F src/util.c 026035fcb4d34cce0b541c4b8b0b058d93cb1da6 F src/vacuum.c b921eb778842592e1fb48a9d4cef7e861103878f -F src/vdbe.c 7f270f9a882bd51f7156cb87e24c5805b192be45 +F src/vdbe.c 392c6b02c525ea12dff403ba4ceb42b0afcb42f5 F src/vdbe.h 46f74444a213129bc4b5ce40124dd8ed613b0cde F src/vdbeInt.h ab592f23ed5a1913f9a506bd7b76c5e39377942a -F src/vdbeapi.c b3d8e559eb4a6cd6b49db5b2650426a54324adc5 -F src/vdbeaux.c 185f5ad1269d92684565be3a9bdb330bbda4f597 +F src/vdbeapi.c 4ac95766b0515538037a7aec172ed26142f97cf9 +F src/vdbeaux.c c47fd5433a965b5f06de2498a56401861ce5ecbe F src/vdbemem.c 5d029d83bc60eaf9c45837fcbc0b03348ec95d7a F src/where.c 444a7c3a8b1eb7bba072e489af628555d21d92a4 F test/all.test 569a92a8ee88f5300c057cc4a8f50fbbc69a3242 @@ -215,7 +215,7 @@ F www/support.tcl 1801397edd271cc39a2aadd54e701184b5181248 F www/tclsqlite.tcl 19191cf2a1010eaeff74c51d83fd5f5a4d899075 F www/vdbe.tcl 59288db1ac5c0616296b26dce071c36cb611dfe9 F www/whentouse.tcl a8335bce47cc2fddb07f19052cb0cb4d9129a8e4 -P c2899b437366d879258ab4f6ae47868441010eca -R 2034d14c2145fcc5ef775d02c0479e42 -U drh -Z 2f61e1823db0bf2026ec8a2b5fdc4a80 +P 9e6cd9ec75f726ef85e60f593aaa895791315071 +R f098c036151f30238f65c2c50a0dd90e +U danielk1977 +Z 5b48605b31bccaed46d70ecdd5c38ae2 diff --git a/manifest.uuid b/manifest.uuid index b74ebe0a82..5ebff6bb9d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9e6cd9ec75f726ef85e60f593aaa895791315071 \ No newline at end of file +92337d8f79b9754cd61c73e7db2e792a1f482f50 \ No newline at end of file diff --git a/src/expr.c b/src/expr.c index b3ba9f47f2..dd2a8cef34 100644 --- a/src/expr.c +++ b/src/expr.c @@ -12,7 +12,7 @@ ** This file contains routines used for analyzing expressions and ** for generating VDBE code that evaluates expressions in SQLite. ** -** $Id: expr.c,v 1.134 2004/06/05 10:22:17 danielk1977 Exp $ +** $Id: expr.c,v 1.135 2004/06/06 09:44:04 danielk1977 Exp $ */ #include "sqliteInt.h" #include @@ -968,11 +968,12 @@ int sqlite3ExprCheck(Parse *pParse, Expr *pExpr, int allowAgg, int *pIsAgg){ int nId; /* Number of characters in function name */ const char *zId; /* The function name. */ FuncDef *pDef; + int iPrefEnc = (pParse->db->enc==TEXT_Utf8)?0:1; getFunctionName(pExpr, &zId, &nId); - pDef = sqlite3FindFunction(pParse->db, zId, nId, n, 0); + pDef = sqlite3FindFunction(pParse->db, zId, nId, n, iPrefEnc, 0); if( pDef==0 ){ - pDef = sqlite3FindFunction(pParse->db, zId, nId, -1, 0); + pDef = sqlite3FindFunction(pParse->db, zId, nId, -1, iPrefEnc, 0); if( pDef==0 ){ no_such_func = 1; }else{ @@ -1233,12 +1234,15 @@ void sqlite3ExprCode(Parse *pParse, Expr *pExpr){ const char *zId; int p2 = 0; int i; + int iPrefEnc = (pParse->db->enc==TEXT_Utf8)?0:1; getFunctionName(pExpr, &zId, &nId); - pDef = sqlite3FindFunction(pParse->db, zId, nId, nExpr, 0); + pDef = sqlite3FindFunction(pParse->db, zId, nId, nExpr, iPrefEnc, 0); assert( pDef!=0 ); nExpr = sqlite3ExprCodeExprList(pParse, pList); for(i=0; ia[i].pExpr) ){ + p2 |= (1<=pParse->nAgg ){ + int iPrefEnc = (pParse->db->enc==TEXT_Utf8)?0:1; i = appendAggInfo(pParse); if( i<0 ) return 1; pParse->aAgg[i].isAgg = 1; pParse->aAgg[i].pExpr = pExpr; pParse->aAgg[i].pFunc = sqlite3FindFunction(pParse->db, pExpr->token.z, pExpr->token.n, - pExpr->pList ? pExpr->pList->nExpr : 0, 0); + pExpr->pList ? pExpr->pList->nExpr : 0, iPrefEnc, 0); } pExpr->iAgg = i; break; @@ -1677,9 +1682,10 @@ int sqlite3ExprAnalyzeAggregates(Parse *pParse, Expr *pExpr){ } /* -** Locate a user function given a name and a number of arguments. -** Return a pointer to the FuncDef structure that defines that -** function, or return NULL if the function does not exist. +** Locate a user function given a name, a number of arguments and a flag +** indicating whether the function prefers UTF-16 over UTF-8. Return a +** pointer to the FuncDef structure that defines that function, or return +** NULL if the function does not exist. ** ** If the createFlag argument is true, then a new (blank) FuncDef ** structure is created and liked into the "db" structure if a @@ -1690,39 +1696,70 @@ int sqlite3ExprAnalyzeAggregates(Parse *pParse, Expr *pExpr){ ** If createFlag is false and nArg is -1, then the first valid ** function found is returned. A function is valid if either xFunc ** or xStep is non-zero. +** +** If createFlag is false, then a function with the required name and +** number of arguments may be returned even if the eTextRep flag does not +** match that requested. */ FuncDef *sqlite3FindFunction( sqlite *db, /* An open database */ const char *zName, /* Name of the function. Not null-terminated */ int nName, /* Number of characters in the name */ int nArg, /* Number of arguments. -1 means any number */ + int eTextRep, /* True to retrieve UTF-16 versions. */ int createFlag /* Create new entry if true and does not otherwise exist */ ){ - FuncDef *pFirst, *p, *pMaybe; - pFirst = p = (FuncDef*)sqlite3HashFind(&db->aFunc, zName, nName); - if( p && !createFlag && nArg<0 ){ - while( p && p->xFunc==0 && p->xStep==0 ){ p = p->pNext; } - return p; + FuncDef *p; /* Iterator variable */ + FuncDef *pFirst; /* First function with this name */ + FuncDef *pBest = 0; /* Best match found so far */ + int matchqual = 0; + + /* Normalize argument values to simplify comparisons below. */ + if( eTextRep ) eTextRep = 1; + if( nArg<-1 ) nArg = -1; + + pFirst = (FuncDef*)sqlite3HashFind(&db->aFunc, zName, nName); + for(p=pFirst; p; p=p->pNext){ + if( 1 || p->xFunc || p->xStep ){ + if( p->nArg==nArg && p->iPrefEnc==eTextRep ){ + /* A perfect match. */ + pBest = p; + matchqual = 4; + break; + } + if( p->nArg==nArg ){ + /* Number of arguments matches, but not the text encoding */ + pBest = p; + matchqual = 3; + } + else if( (p->nArg<0) || (nArg<0) ){ + if( matchqual<2 && p->iPrefEnc==eTextRep ){ + /* Matched a varargs function with correct text encoding */ + pBest = p; + matchqual = 2; + } + if( matchqual<1 ){ + /* Matched a varargs function with incorrect text encoding */ + pBest = p; + matchqual = 1; + } + } + } } - pMaybe = 0; - while( p && p->nArg!=nArg ){ - if( p->nArg<0 && !createFlag && (p->xFunc || p->xStep) ) pMaybe = p; - p = p->pNext; + + if( createFlag && matchqual<4 && + (pBest = sqliteMalloc(sizeof(*pBest)+nName+1)) ){ + pBest->nArg = nArg; + pBest->pNext = pFirst; + pBest->zName = (char*)&pBest[1]; + memcpy(pBest->zName, zName, nName); + pBest->zName[nName] = 0; + sqlite3HashInsert(&db->aFunc, pBest->zName, nName, (void*)pBest); } - if( p && !createFlag && p->xFunc==0 && p->xStep==0 ){ - return 0; + + if( pBest && (pBest->xStep || pBest->xFunc || createFlag) ){ + return pBest; } - if( p==0 && pMaybe ){ - assert( createFlag==0 ); - return pMaybe; - } - if( p==0 && createFlag && (p = sqliteMalloc(sizeof(*p)+nName+1))!=0 ){ - p->nArg = nArg; - p->pNext = pFirst; - p->zName = (char*)&p[1]; - memcpy(p->zName, zName, nName); - p->zName[nName] = 0; - sqlite3HashInsert(&db->aFunc, p->zName, nName, (void*)p); - } - return p; + return 0; } + diff --git a/src/func.c b/src/func.c index 706bfcf750..e6cd5d0f70 100644 --- a/src/func.c +++ b/src/func.c @@ -16,7 +16,7 @@ ** sqliteRegisterBuildinFunctions() found at the bottom of the file. ** All other code has file scope. ** -** $Id: func.c,v 1.62 2004/05/31 18:51:58 drh Exp $ +** $Id: func.c,v 1.63 2004/06/06 09:44:04 danielk1977 Exp $ */ #include #include @@ -292,24 +292,237 @@ static void last_statement_change_count( sqlite3_result_int(context, sqlite3_last_statement_changes(db)); } +/* +** A LIKE pattern compiles to an instance of the following structure. Refer +** to the comment for compileLike() function for details. +*/ +struct LikePattern { + int nState; + struct LikeState { + int val; /* Unicode codepoint or -1 for any char i.e. '_' */ + int failstate; /* State to jump to if next char is not val */ + } aState[0]; +}; +typedef struct LikePattern LikePattern; + +void deleteLike(void *pLike){ + sqliteFree(pLike); +} + + +/* #define TRACE_LIKE */ + +#if defined(TRACE_LIKE) && !defined(NDEBUG) +char *dumpLike(LikePattern *pLike){ + int i; + int k = 0; + char *zBuf = (char *)sqliteMalloc(pLike->nState*40); + + k += sprintf(&zBuf[k], "%d states - ", pLike->nState); + for(i=0; inState; i++){ + k += sprintf(&zBuf[k], " %d:(%d, %d)", i, pLike->aState[i].val, + pLike->aState[i].failstate); + } + return zBuf; +} +#endif + +/* +** This function compiles an SQL 'LIKE' pattern into a state machine, +** represented by a LikePattern structure. +** +** Each state of the state-machine has two attributes, 'val' and +** 'failstate'. The val attribute is either the value of a unicode +** codepoint, or -1, indicating a '_' wildcard (match any single +** character). The failstate is either the number of another state +** or -1, indicating jump to 'no match'. +** +** To see if a string matches a pattern the pattern is +** compiled to a state machine that is executed according to the algorithm +** below. The string is assumed to be terminated by a 'NUL' character +** (unicode codepoint 0). +** +** 1 S = 0 +** 2 DO +** 3 C = +** 4 IF( C matches ) +** 5 S = S+1 +** 6 ELSE IF( S != ) +** 7 S = +** 8 +** 9 WHILE( (C != NUL) AND (S != FAILED) ) +** 10 +** 11 IF( S == ) +** 12 RETURN MATCH +** 13 ELSE +** 14 RETURN NO-MATCH +** +** In practice there is a small optimization to avoid the +** operation in line 8 of the description above. +** +** For example, the following pattern, 'X%ABabc%_Y' is compiled to +** the state machine below. +** +** State Val FailState +** ------------------------------- +** 0 120 (x) -1 (NO MATCH) +** 1 97 (a) 1 +** 2 98 (b) 1 +** 3 97 (a) 1 +** 4 98 (b) 2 +** 5 99 (c) 3 +** 6 -1 (_) 6 +** 7 121 (y) 7 +** 8 0 (NUL) 7 +** +** The algorithms implemented to compile and execute the state machine were +** first presented in "Fast pattern matching in strings", Knuth, Morris and +** Pratt, 1977. +** +*/ +LikePattern *compileLike(sqlite3_value *pPattern, u8 enc){ + LikePattern *pLike; + struct LikeState *aState; + int pc_state = -1; /* State number of previous '%' wild card */ + int n = 0; + int c; + + int offset = 0; + const char *zLike; + + if( enc==TEXT_Utf8 ){ + zLike = sqlite3_value_text(pPattern); + n = sqlite3_value_bytes(pPattern) + 1; + }else{ + zLike = sqlite3_value_text16(pPattern); + n = sqlite3_value_bytes16(pPattern)/2 + 1; + } + + pLike = (LikePattern *) + sqliteMalloc(sizeof(LikePattern)+n*sizeof(struct LikeState)); + aState = pLike->aState; + + n = 0; + do { + c = sqlite3ReadUniChar(zLike, &offset, &enc, 1); + if( c==95 ){ /* A '_' wildcard */ + aState[n].val = -1; + n++; + }else if( c==37 ){ /* A '%' wildcard */ + aState[n].failstate = n; + pc_state = n; + }else{ /* A regular character */ + aState[n].val = c; + + assert( pc_state<=n ); + if( pc_state<0 ){ + aState[n].failstate = -1; + }else if( pc_state==n ){ + aState[n].failstate = pc_state; + }else{ + int k = pLike->aState[n-1].failstate; + while( k>pc_state && aState[k+1].val!=-1 && aState[k+1].val!=c ){ + k = aState[k].failstate; + } + if( k!=pc_state && aState[k+1].val==c ){ + assert( k==pc_state ); + k++; + } + aState[n].failstate = k; + } + n++; + } + }while( c ); + pLike->nState = n; +#if defined(TRACE_LIKE) && !defined(NDEBUG) + { + char *zCompiled = dumpLike(pLike); + printf("Pattern=\"%s\" Compiled=\"%s\"\n", zPattern, zCompiled); + sqliteFree(zCompiled); + } +#endif + return pLike; +} + /* ** Implementation of the like() SQL function. This function implements ** the build-in LIKE operator. The first argument to the function is the -** string and the second argument is the pattern. So, the SQL statements: +** pattern and the second argument is the string. So, the SQL statements: ** ** A LIKE B ** -** is implemented as like(A,B). +** is implemented as like(B,A). +** +** If the pointer retrieved by via a call to sqlite3_user_data() is +** not NULL, then this function uses UTF-16. Otherwise UTF-8. */ static void likeFunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ - const unsigned char *zA = sqlite3_value_text(argv[0]); - const unsigned char *zB = sqlite3_value_text(argv[1]); - if( zA && zB ){ - sqlite3_result_int(context, sqlite3LikeCompare(zA, zB)); + int s; + int c; + int nc; + u8 enc; + int offset = 0; + const unsigned char *zString; + LikePattern *pLike = sqlite3_get_auxdata(context, 0); + + /* If either argument is NULL, the result is NULL */ + if( sqlite3_value_type(argv[1])==SQLITE_NULL || + sqlite3_value_type(argv[0])==SQLITE_NULL ){ + return; + } + + /* If the user-data pointer is NULL, use UTF-8. Otherwise UTF-16. */ + if( sqlite3_user_data(context) ){ + enc = TEXT_Utf16; + zString = (const unsigned char *)sqlite3_value_text16(argv[1]); + }else{ + enc = TEXT_Utf8; + zString = sqlite3_value_text(argv[1]); + } + + /* If the LIKE pattern has not been compiled, compile it now. */ + if( !pLike ){ + pLike = compileLike(argv[0], enc); + if( !pLike ){ + sqlite3_result_error(context, "out of memory", -1); + return; + } + sqlite3_set_auxdata(context, 0, pLike, deleteLike); + } + + s = 0; + nc = 1; + do { + int val = pLike->aState[s].val; + if( nc ) c = sqlite3ReadUniChar(zString, &offset, &enc, 1); + +#if defined(TRACE_LIKE) && !defined(NDEBUG) + printf("State=%d:(%d, %d) Input=%d\n", + s, pLike->aState[s].val, + pLike->aState[s].failstate, c); +#endif + + if( val==-1 || val==c ){ + s++; + nc = 1; + }else{ + if( pLike->aState[s].failstate==s ){ + nc = 1; + }else{ + nc = 0; + s = pLike->aState[s].failstate; + } + } + }while( c && s>=0 ); + + if( s==pLike->nState ){ + sqlite3_result_int(context, 1); + }else{ + sqlite3_result_int(context, 0); } } @@ -642,39 +855,40 @@ void sqlite3RegisterBuiltinFunctions(sqlite *db){ char *zName; signed char nArg; u8 argType; /* 0: none. 1: db 2: (-1) */ + u8 eTextRep; /* 1: UTF-16. 0: UTF-8 */ void (*xFunc)(sqlite3_context*,int,sqlite3_value **); } aFuncs[] = { - { "min", -1, 0, minmaxFunc }, - { "min", 0, 0, 0 }, - { "max", -1, 2, minmaxFunc }, - { "max", 0, 2, 0 }, - { "typeof", 1, 0, typeofFunc }, - { "classof", 1, 0, typeofFunc }, /* FIX ME: hack */ - { "length", 1, 0, lengthFunc }, - { "substr", 3, 0, substrFunc }, - { "abs", 1, 0, absFunc }, - { "round", 1, 0, roundFunc }, - { "round", 2, 0, roundFunc }, - { "upper", 1, 0, upperFunc }, - { "lower", 1, 0, lowerFunc }, - { "coalesce", -1, 0, ifnullFunc }, - { "coalesce", 0, 0, 0 }, - { "coalesce", 1, 0, 0 }, - { "ifnull", 2, 0, ifnullFunc }, - { "random", -1, 0, randomFunc }, - { "like", 2, 0, likeFunc }, - { "glob", 2, 0, globFunc }, - { "nullif", 2, 0, nullifFunc }, - { "sqlite_version", 0, 0, versionFunc}, - { "quote", 1, 0, quoteFunc }, - { "last_insert_rowid", 0, 1, last_insert_rowid }, - { "change_count", 0, 1, change_count }, - { "last_statement_change_count", 0, 1, last_statement_change_count }, + { "min", -1, 0, 0, minmaxFunc }, + { "min", 0, 0, 0, 0 }, + { "max", -1, 2, 0, minmaxFunc }, + { "max", 0, 2, 0, 0 }, + { "typeof", 1, 0, 0, typeofFunc }, + { "length", 1, 0, 0, lengthFunc }, + { "substr", 3, 0, 0, substrFunc }, + { "abs", 1, 0, 0, absFunc }, + { "round", 1, 0, 0, roundFunc }, + { "round", 2, 0, 0, roundFunc }, + { "upper", 1, 0, 0, upperFunc }, + { "lower", 1, 0, 0, lowerFunc }, + { "coalesce", -1, 0, 0, ifnullFunc }, + { "coalesce", 0, 0, 0, 0 }, + { "coalesce", 1, 0, 0, 0 }, + { "ifnull", 2, 0, 0, ifnullFunc }, + { "random", -1, 0, 0, randomFunc }, + { "like", 2, 0, 0, likeFunc }, /* UTF-8 */ + { "like", 2, 2, 1, likeFunc }, /* UTF-16 */ + { "glob", 2, 0, 0, globFunc }, + { "nullif", 2, 0, 0, nullifFunc }, + { "sqlite_version", 0, 0, 0, versionFunc}, + { "quote", 1, 0, 0, quoteFunc }, + { "last_insert_rowid", 0, 1, 0, last_insert_rowid }, + { "change_count", 0, 1, 0, change_count }, + { "last_statement_change_count", 0, 1, 0, last_statement_change_count }, #ifdef SQLITE_SOUNDEX - { "soundex", 1, 0, soundexFunc}, + { "soundex", 1, 0, 0, soundexFunc}, #endif #ifdef SQLITE_TEST - { "randstr", 2, 0, randStr }, + { "randstr", 2, 0, 0, randStr }, #endif }; static struct { diff --git a/src/main.c b/src/main.c index ce93f2eff2..c0dada0a64 100644 --- a/src/main.c +++ b/src/main.c @@ -14,7 +14,7 @@ ** other files are for internal use by SQLite and should not be ** accessed by users of the library. ** -** $Id: main.c,v 1.204 2004/06/04 06:22:01 danielk1977 Exp $ +** $Id: main.c,v 1.205 2004/06/06 09:44:04 danielk1977 Exp $ */ #include "sqliteInt.h" #include "os.h" @@ -656,7 +656,7 @@ int sqlite3_create_function( return SQLITE_ERROR; } - p = sqlite3FindFunction(db, zFunctionName, nName, nArg, 1); + p = sqlite3FindFunction(db, zFunctionName, nName, nArg, eTextRep, 1); if( p==0 ) return 1; p->xFunc = xFunc; p->xStep = xStep; diff --git a/src/sqlite.h.in b/src/sqlite.h.in index e710ada5d1..0b6cd99dc1 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -12,7 +12,7 @@ ** This header file defines the interface that the SQLite library ** presents to client programs. ** -** @(#) $Id: sqlite.h.in,v 1.92 2004/06/05 10:22:18 danielk1977 Exp $ +** @(#) $Id: sqlite.h.in,v 1.93 2004/06/06 09:44:04 danielk1977 Exp $ */ #ifndef _SQLITE_H_ #define _SQLITE_H_ @@ -835,6 +835,13 @@ int sqlite3_reset(sqlite3_stmt *pStmt); ** aggregate takes. If this parameter is negative, then the function or ** aggregate may take any number of arguments. ** +** If the fourth parameter is non-zero, this indicates that the function is +** more likely to handle text in UTF-16 encoding than UTF-8. This does not +** change the behaviour of the programming interface. However, if two +** versions of the same function are registered, one with eTextRep non-zero +** and the other zero, SQLite invokes the version likely to minimize +** conversions between unicode encodings. +** ** The seventh, eighth and ninth parameters, xFunc, xStep and xFinal, are ** pointers to user implemented C functions that implement the user ** function or aggregate. A scalar function requires an implementation of diff --git a/src/sqliteInt.h b/src/sqliteInt.h index aa95275db5..8d9815f6c4 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -11,7 +11,7 @@ ************************************************************************* ** Internal interface definitions for SQLite. ** -** @(#) $Id: sqliteInt.h,v 1.268 2004/06/04 06:22:02 danielk1977 Exp $ +** @(#) $Id: sqliteInt.h,v 1.269 2004/06/06 09:44:05 danielk1977 Exp $ */ #include "config.h" #include "sqlite3.h" @@ -1265,7 +1265,6 @@ void sqlite3UnlinkAndDeleteIndex(sqlite*,Index*); void sqlite3Vacuum(Parse*, Token*); int sqlite3RunVacuum(char**, sqlite*); int sqlite3GlobCompare(const unsigned char*,const unsigned char*); -int sqlite3LikeCompare(const unsigned char*,const unsigned char*); char *sqlite3TableNameFromToken(Token*); int sqlite3ExprCheck(Parse*, Expr*, int, int*); int sqlite3ExprType(Expr*); @@ -1297,7 +1296,7 @@ ExprList *sqlite3ExprListDup(ExprList*); SrcList *sqlite3SrcListDup(SrcList*); IdList *sqlite3IdListDup(IdList*); Select *sqlite3SelectDup(Select*); -FuncDef *sqlite3FindFunction(sqlite*,const char*,int,int,int); +FuncDef *sqlite3FindFunction(sqlite*,const char*,int,int,int,int); void sqlite3RegisterBuiltinFunctions(sqlite*); void sqlite3RegisterDateTimeFunctions(sqlite*); int sqlite3SafetyOn(sqlite*); @@ -1373,3 +1372,4 @@ u8 sqlite3UtfReadBom(const void *zData, int nData); void *sqlite3HexToBlob(const char *z); int sqlite3TwoPartName(Parse *, Token *, Token *, Token **); const char *sqlite3ErrStr(int); +int sqlite3ReadUniChar(const char *zStr, int *pOffset, u8 *pEnc, int fold); diff --git a/src/utf.c b/src/utf.c index 65dd05e4a3..4da418b6ad 100644 --- a/src/utf.c +++ b/src/utf.c @@ -12,7 +12,7 @@ ** This file contains routines used to translate between UTF-8, ** UTF-16, UTF-16BE, and UTF-16LE. ** -** $Id: utf.c,v 1.16 2004/06/02 00:29:24 danielk1977 Exp $ +** $Id: utf.c,v 1.17 2004/06/06 09:44:05 danielk1977 Exp $ ** ** Notes on UTF-8: ** @@ -74,6 +74,138 @@ struct UtfString { */ #define READ_16(pZ,big_endian) (big_endian?BE16(pZ):LE16(pZ)) +/* +** The following macro, LOWERCASE(x), takes an integer representing a +** unicode code point. The value returned is the same code point folded to +** lower case, if applicable. SQLite currently understands the upper/lower +** case relationship between the 26 characters used in the English +** language only. +** +** This means that characters with umlauts etc. will not be folded +** correctly (unless they are encoded as composite characters, which would +** doubtless cause much trouble). +*/ +#define LOWERCASE(x) (x<91?(int)(UpperToLower[x]):x); +static unsigned char UpperToLower[91] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103, + 104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121, + 122, +}; + +/* +** The first parameter, zStr, points at a unicode string. This routine +** reads a single character from the string and returns the codepoint value +** of the character read. +** +** The value of *pEnc is the string encoding. If *pEnc is TEXT_Utf16le or +** TEXT_Utf16be, and the first character read is a byte-order-mark, then +** the value of *pEnc is modified if necessary. In this case the next +** character is read and it's code-point value returned. +** +** The value of *pOffset is the byte-offset in zStr from which to begin +** reading. It is incremented by the number of bytes read by this function. +** +** If the fourth parameter, fold, is non-zero, then codepoint values are +** folded to lower-case before being returned. See comments for macro +** LOWERCASE(x) for details. +*/ +int sqlite3ReadUniChar(const char *zStr, int *pOffset, u8 *pEnc, int fold){ + int ret = 0; + + switch( *pEnc ){ + case TEXT_Utf8: { + struct Utf8TblRow { + u8 b1_mask; + u8 b1_masked_val; + u8 b1_value_mask; + int trailing_bytes; + }; + static const struct Utf8TblRow utf8tbl[] = { + { 0x80, 0x00, 0x7F, 0 }, + { 0xE0, 0xC0, 0x1F, 1 }, + { 0xF0, 0xE0, 0x0F, 2 }, + { 0xF8, 0xF0, 0x0E, 3 }, + { 0, 0, 0, 0} + }; + + u8 b1; /* First byte of the potentially multi-byte utf-8 character */ + int ii; + struct Utf8TblRow const *pRow; + + pRow = &(utf8tbl[0]); + + b1 = zStr[(*pOffset)++]; + while( pRow->b1_mask && (b1&pRow->b1_mask)!=pRow->b1_masked_val ){ + pRow++; + } + if( !pRow->b1_mask ){ + return (int)0xFFFD; + } + + ret = (u32)(b1&pRow->b1_value_mask); + for( ii=0; iitrailing_bytes; ii++ ){ + u8 b = zStr[(*pOffset)++]; + if( (b&0xC0)!=0x80 ){ + return (int)0xFFFD; + } + ret = (ret<<6) + (u32)(b&0x3F); + } + + break; + } + + case TEXT_Utf16le: + case TEXT_Utf16be: { + u32 code_point; /* the first code-point in the character */ + u32 code_point2; /* the second code-point in the character, if any */ + + code_point = READ_16(&zStr[*pOffset], (*pEnc==TEXT_Utf16be)); + *pOffset += 2; + + /* If this is a non-surrogate code-point, just cast it to an int and + ** this is the code-point value. + */ + if( code_point<0xD800 || code_point>0xE000 ){ + ret = code_point; + break; + } + + /* If this is a trailing surrogate code-point, then the string is + ** malformed; return the replacement character. + */ + if( code_point>0xDBFF ){ + return (int)0xFFFD; + } + + /* The code-point just read is a leading surrogate code-point. If their + ** is not enough data left or the next code-point is not a trailing + ** surrogate, return the replacement character. + */ + code_point2 = READ_16(&zStr[*pOffset], (*pEnc==TEXT_Utf16be)); + *pOffset += 2; + if( code_point2<0xDC00 || code_point>0xDFFF ){ + return (int)0xFFFD; + } + + ret = ( + (((code_point&0x03C0)+0x0040)<<16) + /* uuuuu */ + ((code_point&0x003F)<<10) + /* xxxxxx */ + (code_point2&0x03FF) /* yy yyyyyyyy */ + ); + } + default: + assert(0); + } + + if( fold ){ + return LOWERCASE(ret); + } + return ret; +} + /* ** Read the BOM from the start of *pStr, if one is present. Return zero ** for little-endian, non-zero for big-endian. If no BOM is present, return @@ -133,47 +265,8 @@ u8 sqlite3UtfReadBom(const void *zData, int nData){ ** strings, the unicode replacement character U+FFFD may be returned. */ static u32 readUtf8(UtfString *pStr){ - struct Utf8TblRow { - u8 b1_mask; - u8 b1_masked_val; - u8 b1_value_mask; - int trailing_bytes; - }; - static const struct Utf8TblRow utf8tbl[] = { - { 0x80, 0x00, 0x7F, 0 }, - { 0xE0, 0xC0, 0x1F, 1 }, - { 0xF0, 0xE0, 0x0F, 2 }, - { 0xF8, 0xF0, 0x0E, 3 }, - { 0, 0, 0, 0} - }; - - u8 b1; /* First byte of the potentially multi-byte utf-8 character */ - u32 ret = 0; /* Return value */ - int ii; - struct Utf8TblRow const *pRow; - - pRow = &(utf8tbl[0]); - - b1 = pStr->pZ[pStr->c]; - pStr->c++; - while( pRow->b1_mask && (b1&pRow->b1_mask)!=pRow->b1_masked_val ){ - pRow++; - } - if( !pRow->b1_mask ){ - return 0xFFFD; - } - - ret = (u32)(b1&pRow->b1_value_mask); - for( ii=0; iitrailing_bytes; ii++ ){ - u8 b = pStr->pZ[pStr->c+ii]; - if( (b&0xC0)!=0x80 ){ - return 0xFFFD; - } - ret = (ret<<6) + (u32)(b&0x3F); - } - - pStr->c += pRow->trailing_bytes; - return ret; + u8 enc = TEXT_Utf8; + return sqlite3ReadUniChar(pStr->pZ, &pStr->c, &enc, 0); } /* diff --git a/src/util.c b/src/util.c index 00f20f6dc2..637782d38d 100644 --- a/src/util.c +++ b/src/util.c @@ -14,7 +14,7 @@ ** This file contains functions for allocating memory, comparing ** strings, and stuff like that. ** -** $Id: util.c,v 1.96 2004/06/02 00:41:10 drh Exp $ +** $Id: util.c,v 1.97 2004/06/06 09:44:05 danielk1977 Exp $ */ #include "sqliteInt.h" #include @@ -1055,57 +1055,6 @@ sqlite3GlobCompare(const unsigned char *zPattern, const unsigned char *zString){ return *zString==0; } -/* -** Compare two UTF-8 strings for equality using the "LIKE" operator of -** SQL. The '%' character matches any sequence of 0 or more -** characters and '_' matches any single character. Case is -** not significant. -** -** This routine is just an adaptation of the sqlite3GlobCompare() -** routine above. -*/ -int -sqlite3LikeCompare(const unsigned char *zPattern, const unsigned char *zString){ - register int c; - int c2; - - while( (c = UpperToLower[*zPattern])!=0 ){ - switch( c ){ - case '%': { - while( (c=zPattern[1]) == '%' || c == '_' ){ - if( c=='_' ){ - if( *zString==0 ) return 0; - sqliteNextChar(zString); - } - zPattern++; - } - if( c==0 ) return 1; - c = UpperToLower[c]; - while( (c2=UpperToLower[*zString])!=0 ){ - while( c2 != 0 && c2 != c ){ c2 = UpperToLower[*++zString]; } - if( c2==0 ) return 0; - if( sqlite3LikeCompare(&zPattern[1],zString) ) return 1; - sqliteNextChar(zString); - } - return 0; - } - case '_': { - if( *zString==0 ) return 0; - sqliteNextChar(zString); - zPattern++; - break; - } - default: { - if( c != UpperToLower[*zString] ) return 0; - zPattern++; - zString++; - break; - } - } - } - return *zString==0; -} - /* ** Change the sqlite.magic from SQLITE_MAGIC_OPEN to SQLITE_MAGIC_BUSY. ** Return an error (non-zero) if the magic was not SQLITE_MAGIC_OPEN diff --git a/src/vdbe.c b/src/vdbe.c index 29ac27cff6..848d7cccf8 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -43,7 +43,7 @@ ** in this file for details. If in doubt, do not deviate from existing ** commenting and indentation practices when changing or adding code. ** -** $Id: vdbe.c,v 1.358 2004/06/05 10:22:18 danielk1977 Exp $ +** $Id: vdbe.c,v 1.359 2004/06/06 09:44:05 danielk1977 Exp $ */ #include "sqliteInt.h" #include "os.h" @@ -1273,7 +1273,7 @@ case OP_Function: { */ if( ctx.pVdbeFunc ){ int mask = pOp->p2; - for(i=0; inAux; i++){ struct AuxData *pAux = &ctx.pVdbeFunc->apAux[i]; if( (i>31 || !(mask&(1<pAux ){ pAux->xDelete(pAux->pAux); diff --git a/src/vdbeapi.c b/src/vdbeapi.c index 1e40fad751..ac7d976bbf 100644 --- a/src/vdbeapi.c +++ b/src/vdbeapi.c @@ -248,6 +248,7 @@ void sqlite3_set_auxdata( pCtx->pVdbeFunc = sqliteRealloc(pCtx->pVdbeFunc, nMalloc); if( !pCtx->pVdbeFunc ) return; pCtx->pVdbeFunc->nAux = iArg+1; + pCtx->pVdbeFunc->pFunc = pCtx->pFunc; } pAuxData = &pCtx->pVdbeFunc->apAux[iArg]; diff --git a/src/vdbeaux.c b/src/vdbeaux.c index d3a982ad63..e17a1cbcc1 100644 --- a/src/vdbeaux.c +++ b/src/vdbeaux.c @@ -1235,9 +1235,10 @@ void sqlite3VdbeDelete(Vdbe *p){ sqliteFree(pOp->p3); } if( pOp->p3type==P3_VDBEFUNC ){ + int j; VdbeFunc *pVdbeFunc = (VdbeFunc *)pOp->p3; - for(i=0; inAux; i++){ - struct AuxData *pAuxData = &pVdbeFunc->apAux[i].pAux; + for(j=0; jnAux; j++){ + struct AuxData *pAuxData = &pVdbeFunc->apAux[j].pAux; if( pAuxData->pAux && pAuxData->xDelete ){ pAuxData->xDelete(pAuxData->pAux); }