From a5d14fe7c5285cfac5c9a9aa90b7e61664b7be3c Mon Sep 17 00:00:00 2001 From: drh Date: Tue, 4 May 2004 15:00:46 +0000 Subject: [PATCH] Added template for the utf.c file containing conversion routines. (CVS 1313) FossilOrigin-Name: 89b42c468f437003f74a1785370e75b2585fa9e2 --- manifest | 11 ++++---- manifest.uuid | 2 +- src/utf.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 6 deletions(-) create mode 100644 src/utf.c diff --git a/manifest b/manifest index b575a89a88..a17867b8c7 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Incremental\sbtree.c\schanges.\s(CVS\s1312) -D 2004-05-03T19:49:33 +C Added\stemplate\sfor\sthe\sutf.c\sfile\scontaining\sconversion\sroutines.\s(CVS\s1313) +D 2004-05-04T15:00:47 F Makefile.in ab7b0d5118e2da97bac66be8684a1034e3500f5a F Makefile.linux-gcc b86a99c493a5bfb402d1d9178dcdc4bd4b32f906 F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd @@ -59,6 +59,7 @@ F src/test4.c 6e3e31acfaf21d66420fc35fda5b17dc0000cc8d F src/tokenize.c 6676b946fd8825b67ab52140af4fdc57a70bda48 F src/trigger.c a9927b57c865b6f3df3fb5e40c9824d722660ded F src/update.c 4c50328ebc127852bde8e2950eb8933234802c21 +F src/utf.c 8d74ddbfffdc1f2e87bfc11b8c1e2a806313a715 F src/util.c b2287b07ddf55ef7aaa8888a9473123995a69f40 F src/vacuum.c a4e8464c9f6d60659c5343e9d62c742463227820 F src/vdbe.c 7c33f761fdc799633468766fb53eda4301daa6b3 @@ -188,7 +189,7 @@ F www/sqlite.tcl 3c83b08cf9f18aa2d69453ff441a36c40e431604 F www/tclsqlite.tcl b9271d44dcf147a93c98f8ecf28c927307abd6da F www/vdbe.tcl 9b9095d4495f37697fd1935d10e14c6015e80aa1 F www/whentouse.tcl a8335bce47cc2fddb07f19052cb0cb4d9129a8e4 -P 0eee3b5cd400e9548437632ec1dfe625a3fca9cf -R 4e56ff31d4947da66d5a46115140d57d +P fdc629dbbf974024215969e0bd3def4597258812 +R 862d1dd5fd20c7467c3bdf6faef3043d U drh -Z 57bf0fd86b068e3d5bb631ec9f2832d0 +Z b159d386ddf509db1096e77212da2220 diff --git a/manifest.uuid b/manifest.uuid index ebafdbf789..e74662e46c 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -fdc629dbbf974024215969e0bd3def4597258812 \ No newline at end of file +89b42c468f437003f74a1785370e75b2585fa9e2 \ No newline at end of file diff --git a/src/utf.c b/src/utf.c new file mode 100644 index 0000000000..6990553e0f --- /dev/null +++ b/src/utf.c @@ -0,0 +1,77 @@ +/* +** 2004 April 13 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains routines used to translate between UTF-8, +** UTF-16, UTF-16BE, and UTF-16LE. +** +** $Id: utf.c,v 1.1 2004/05/04 15:00:47 drh Exp $ +** +** Notes on UTF-8: +** +** Byte-0 Byte-1 Byte-2 Byte-3 Value +** 0xxxxxxx 00000000 00000000 0xxxxxxx +** 110yyyyy 10xxxxxx 00000000 00000yyy yyxxxxxx +** 1110zzzz 10yyyyyy 10xxxxxx 00000000 zzzzyyyy yyxxxxxx +** 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx 000uuuuu zzzzyyyy yyxxxxxx +** +** +** Notes on UTF-16: (with wwww+1==uuuuu) +** +** Word-0 Word-1 Value +** 110110wwwwxxxxxx 110111yyyyyyyyyy 000uuuuu xxxxxxyy yyyyyyyy +** xxxxxxxxyyyyyyyy 00000000 xxxxxxxx yyyyyyyy +** +** BOM or Byte Order Mark: +** 0xff 0xfe little-endian utf-16 follows +** 0xfe 0xff big-endian utf-16 follows +*/ + +/* +** Convert a string in UTF-16 native byte (or with a Byte-order-mark or +** "BOM") into a UTF-8 string. The UTF-8 string is written into space +** obtained from sqlit3Malloc() and must be released by the calling function. +** +** The parameter N is the number of bytes in the UTF-16 string. If N is +** negative, the entire string up to the first \u0000 character is translated. +** +** The returned UTF-8 string is always \000 terminated. +*/ +unsigned char *sqlite3utf16to8(const void *pData, int N){ + unsigned char *in = (unsigned char *)pData; +} + +/* +** Convert a string in UTF-16 native byte or with a BOM into a UTF-16LE +** string. The conversion occurs in-place. The output overwrites the +** input. N bytes are converted. If N is negative everything is converted +** up to the first \u0000 character. +** +** If the native byte order is little-endian and there is no BOM, then +** this routine is a no-op. If there is a BOM at the start of the string, +** it is removed. +*/ +void sqlite3utf16to16le(void *pData, int N){ +} +void sqlite3utf16to16be(void *pData, int N){ +} + +/* +** Translation from UTF-16LE to UTF-16BE and back again is accomplished +** using the library function swab(). +*/ + +/* +** Translate UTF-8 to UTF-16BE or UTF-16LE +*/ +void *sqlite3utf8to16be(const unsigned char *pIn, int N){ +} +void *sqlite3utf8to16le(const unsigned char *pIn, int N){ +}