diff --git a/ext/fts3/fts3_unicode2.c b/ext/fts3/fts3_unicode2.c index 3c24569026..fba688ff9c 100644 --- a/ext/fts3/fts3_unicode2.c +++ b/ext/fts3/fts3_unicode2.c @@ -101,28 +101,27 @@ int sqlite3FtsUnicodeIsalnum(int c){ 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803, 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07, 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02, - 0x037FFC02, 0x03E3FC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, - 0x03F4F802, 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, - 0x03F95013, 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, - 0x03FCEC06, 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, - 0x04040003, 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, - 0x040E7C01, 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, - 0x04280403, 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, - 0x04294009, 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, - 0x04420003, 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, - 0x04460003, 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, - 0x05BD442E, 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, - 0x07480046, 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, - 0x075C5401, 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, - 0x075EA401, 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, - 0x07C2800F, 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, - 0x07C4C03C, 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, - 0x07C94002, 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, - 0x07CE8025, 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, - 0x07D108B6, 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, - 0x07D7EC46, 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, - 0x38008060, 0x380400F0, 0x3C000001, 0x3FFFF401, 0x40000001, - 0x43FFF401, + 0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802, + 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013, + 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06, + 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003, + 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01, + 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403, + 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009, + 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003, + 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003, + 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E, + 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046, + 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401, + 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401, + 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F, + 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C, + 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002, + 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025, + 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6, + 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46, + 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060, + 0x380400F0, }; static const unsigned int aAscii[4] = { 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, diff --git a/ext/fts3/unicode/mkunicode.tcl b/ext/fts3/unicode/mkunicode.tcl index 0d58e8aa5c..2da17c51a5 100644 --- a/ext/fts3/unicode/mkunicode.tcl +++ b/ext/fts3/unicode/mkunicode.tcl @@ -239,7 +239,10 @@ proc an_load_unicodedata_text {zName} { foreach $lField $fields {} set iCode [expr "0x$code"] - set bAlnum [expr {[lsearch {L N} [string range $general_category 0 0]]>=0}] + set bAlnum [expr { + [lsearch {L N} [string range $general_category 0 0]] >= 0 + || $general_category=="Co" + }] if { !$bAlnum } { lappend lRet $iCode } } @@ -360,7 +363,7 @@ proc print_isalnum {zFunc lRange} { } assert( aEntry[0]=aEntry[iRes] ); - return (c >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); + return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); } return 1;} puts "\}" @@ -729,7 +732,7 @@ proc print_fileheader {} { */ }] puts "" - puts "#if !defined(SQLITE_DISABLE_FTS3_UNICODE)" + puts "#if defined(SQLITE_ENABLE_FTS4_UNICODE61)" puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)" puts "" puts "#include " @@ -805,4 +808,4 @@ if {$::generate_test_code} { } puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */" -puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */" +puts "#endif /* !defined(SQLITE_ENABLE_FTS4_UNICODE61) */" diff --git a/manifest b/manifest index 4c2a451fd1..600d211c42 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\stypo\sin\sa\scollating\sfunction\sinside\sthe\se_reindex.test\sscript. -D 2013-06-03T20:39:15.752 +C Up\suntil\snow\sthe\sfts4\s"unicode61"\stokenizer\shas\streated\sall\sprivate\suse\scodepoints\sexcept\sthe\sfirst\sand\slast\sof\seach\sof\sthe\sthree\sranges\sas\salphanumeric\s(eligible\sto\sbe\spart\sof\stokens).\sThis\scommit\sfixes\sthis\sso\sthat\sall\sprivate\suse\scodepoints\sare\sconsidered\salphanumeric.\sIn\sother\swords,\sit\sfixes\sthe\shandling\sof\scodepoints\s0xE000,\s0xF8FF,\s0xF0000,\s0xFFFFD,\s0x100000\sand\s0x10FFFD. +D 2013-06-05T16:17:21.916 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5e41da95d92656a5004b03d3576e8b226858a28e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -95,14 +95,14 @@ F ext/fts3/fts3_tokenizer.c bbdc731bc91338050675c6d1da9ab82147391e16 F ext/fts3/fts3_tokenizer.h 64c6ef6c5272c51ebe60fc607a896e84288fcbc3 F ext/fts3/fts3_tokenizer1.c 5c98225a53705e5ee34824087478cf477bdb7004 F ext/fts3/fts3_unicode.c 92391b4b4fb043564c6539ea9b8661e3bcba47b9 -F ext/fts3/fts3_unicode2.c a863f05f758af36777dffc2facc898bc73fec896 +F ext/fts3/fts3_unicode2.c 0113d3acf13429e6dc38e0647d1bc71211c31a4d F ext/fts3/fts3_write.c 6a1fc0e922e76b68e594bf7bc33bac72af9dc47b F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 -F ext/fts3/unicode/mkunicode.tcl 7a9bc018e2962abb79563c5a39fe581fcbf2f675 +F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c 7538f98eab2854cf17fa5f7797bffa6c76e3863b F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -550,7 +550,7 @@ F test/fts4merge.test c424309743fdd203f8e56a1f1cd7872cd66cc0ee F test/fts4merge2.test 5faa558d1b672f82b847d2a337465fa745e46891 F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test c19c85ca1faa7b6d536832b49c12e1867235f584 -F test/fts4unicode.test 25ccad45896f8e50f6a694cff738a35f798cdb40 +F test/fts4unicode.test c8ac44217bf6c17812b03eaafa6c06995ad304c2 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test b0fc34fdc36897769651975a2b0a606312753643 F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f @@ -1093,7 +1093,7 @@ F tool/vdbe-compress.tcl f12c884766bd14277f4fcedcae07078011717381 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 F tool/win/sqlite.vsix 97894c2790eda7b5bce3cc79cb2a8ec2fde9b3ac -P 3bd5ad095b23102dd3379cb62997cbf23cc67b7a -R 0bab77d0f95310ae1c21cfea10915144 -U drh -Z 1023ee14390bd42e471d5323a67fa234 +P 4d74fccf02134a998a84097b021ba9d501e34ff0 +R 659aea33cb10f326783eda2b62f9d699 +U dan +Z 74ecc7396dceda2a9a9f04f8bd9d8ced diff --git a/manifest.uuid b/manifest.uuid index e3db3db774..0ba0d6eb30 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -4d74fccf02134a998a84097b021ba9d501e34ff0 \ No newline at end of file +6cfd9af5250029c0d275be027b4208c48954a8a1 \ No newline at end of file diff --git a/test/fts4unicode.test b/test/fts4unicode.test index 8bd83f6d9e..aee7f05b1b 100644 --- a/test/fts4unicode.test +++ b/test/fts4unicode.test @@ -384,5 +384,23 @@ foreach T $tokenizers { do_isspace_test 6.$T.19 $T {8287 12288} } +#------------------------------------------------------------------------- +# Test that the private use ranges are treated as alphanumeric. +# +breakpoint +foreach {tn1 c} { + 1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff +} { + foreach {tn2 config res} { + 1 "" "0 hello*world hello*world" + 2 "separators=*" "0 hello hello 1 world world" + } { + set config [string map [list * $c] $config] + set input [string map [list * $c] "hello*world"] + set output [string map [list * $c] $res] + do_unicode_token_test3 7.$tn1.$tn2 {*}$config $input $output + } +} + finish_test