From 1a0e5b37b242b640c00d876a085ed657fe7b8012 Mon Sep 17 00:00:00 2001 From: dan <dan@noemail.net> Date: Wed, 24 Jan 2018 18:28:39 +0000 Subject: [PATCH] Reorganize spellfix.c to make it easier to edit automatically (e.g. using a script). No changes to functionality. FossilOrigin-Name: 090a64faaac579c6ed1cddb02d14e7089468a4aeaeec11e78d461790bcb6eb0a --- ext/misc/spellfix.c | 832 ++++++++++++++++++++++---------------------- manifest | 14 +- manifest.uuid | 2 +- 3 files changed, 432 insertions(+), 416 deletions(-) diff --git a/ext/misc/spellfix.c b/ext/misc/spellfix.c index 4f17b88e1d..2d7ef1b02b 100644 --- a/ext/misc/spellfix.c +++ b/ext/misc/spellfix.c @@ -1200,404 +1200,412 @@ static int utf8Charlen(const char *zIn, int nIn){ return nChar; } +typedef struct Transliteration Transliteration; +struct Transliteration { + unsigned short int cFrom; + unsigned char cTo0, cTo1, cTo2, cTo3; +}; + /* ** Table of translations from unicode characters into ASCII. */ -static const struct { - unsigned short int cFrom; - unsigned char cTo0, cTo1; -} translit[] = { - { 0x00A0, 0x20, 0x00 }, /* to */ - { 0x00B5, 0x75, 0x00 }, /* µ to u */ - { 0x00C0, 0x41, 0x00 }, /* À to A */ - { 0x00C1, 0x41, 0x00 }, /* Á to A */ - { 0x00C2, 0x41, 0x00 }, /*  to A */ - { 0x00C3, 0x41, 0x00 }, /* à to A */ - { 0x00C4, 0x41, 0x65 }, /* Ä to Ae */ - { 0x00C5, 0x41, 0x61 }, /* Å to Aa */ - { 0x00C6, 0x41, 0x45 }, /* Æ to AE */ - { 0x00C7, 0x43, 0x00 }, /* Ç to C */ - { 0x00C8, 0x45, 0x00 }, /* È to E */ - { 0x00C9, 0x45, 0x00 }, /* É to E */ - { 0x00CA, 0x45, 0x00 }, /* Ê to E */ - { 0x00CB, 0x45, 0x00 }, /* Ë to E */ - { 0x00CC, 0x49, 0x00 }, /* Ì to I */ - { 0x00CD, 0x49, 0x00 }, /* Í to I */ - { 0x00CE, 0x49, 0x00 }, /* Î to I */ - { 0x00CF, 0x49, 0x00 }, /* Ï to I */ - { 0x00D0, 0x44, 0x00 }, /* Ð to D */ - { 0x00D1, 0x4E, 0x00 }, /* Ñ to N */ - { 0x00D2, 0x4F, 0x00 }, /* Ò to O */ - { 0x00D3, 0x4F, 0x00 }, /* Ó to O */ - { 0x00D4, 0x4F, 0x00 }, /* Ô to O */ - { 0x00D5, 0x4F, 0x00 }, /* Õ to O */ - { 0x00D6, 0x4F, 0x65 }, /* Ö to Oe */ - { 0x00D7, 0x78, 0x00 }, /* × to x */ - { 0x00D8, 0x4F, 0x00 }, /* Ø to O */ - { 0x00D9, 0x55, 0x00 }, /* Ù to U */ - { 0x00DA, 0x55, 0x00 }, /* Ú to U */ - { 0x00DB, 0x55, 0x00 }, /* Û to U */ - { 0x00DC, 0x55, 0x65 }, /* Ü to Ue */ - { 0x00DD, 0x59, 0x00 }, /* Ý to Y */ - { 0x00DE, 0x54, 0x68 }, /* Þ to Th */ - { 0x00DF, 0x73, 0x73 }, /* ß to ss */ - { 0x00E0, 0x61, 0x00 }, /* à to a */ - { 0x00E1, 0x61, 0x00 }, /* á to a */ - { 0x00E2, 0x61, 0x00 }, /* â to a */ - { 0x00E3, 0x61, 0x00 }, /* ã to a */ - { 0x00E4, 0x61, 0x65 }, /* ä to ae */ - { 0x00E5, 0x61, 0x61 }, /* å to aa */ - { 0x00E6, 0x61, 0x65 }, /* æ to ae */ - { 0x00E7, 0x63, 0x00 }, /* ç to c */ - { 0x00E8, 0x65, 0x00 }, /* è to e */ - { 0x00E9, 0x65, 0x00 }, /* é to e */ - { 0x00EA, 0x65, 0x00 }, /* ê to e */ - { 0x00EB, 0x65, 0x00 }, /* ë to e */ - { 0x00EC, 0x69, 0x00 }, /* ì to i */ - { 0x00ED, 0x69, 0x00 }, /* í to i */ - { 0x00EE, 0x69, 0x00 }, /* î to i */ - { 0x00EF, 0x69, 0x00 }, /* ï to i */ - { 0x00F0, 0x64, 0x00 }, /* ð to d */ - { 0x00F1, 0x6E, 0x00 }, /* ñ to n */ - { 0x00F2, 0x6F, 0x00 }, /* ò to o */ - { 0x00F3, 0x6F, 0x00 }, /* ó to o */ - { 0x00F4, 0x6F, 0x00 }, /* ô to o */ - { 0x00F5, 0x6F, 0x00 }, /* õ to o */ - { 0x00F6, 0x6F, 0x65 }, /* ö to oe */ - { 0x00F7, 0x3A, 0x00 }, /* ÷ to : */ - { 0x00F8, 0x6F, 0x00 }, /* ø to o */ - { 0x00F9, 0x75, 0x00 }, /* ù to u */ - { 0x00FA, 0x75, 0x00 }, /* ú to u */ - { 0x00FB, 0x75, 0x00 }, /* û to u */ - { 0x00FC, 0x75, 0x65 }, /* ü to ue */ - { 0x00FD, 0x79, 0x00 }, /* ý to y */ - { 0x00FE, 0x74, 0x68 }, /* þ to th */ - { 0x00FF, 0x79, 0x00 }, /* ÿ to y */ - { 0x0100, 0x41, 0x00 }, /* Ā to A */ - { 0x0101, 0x61, 0x00 }, /* ā to a */ - { 0x0102, 0x41, 0x00 }, /* Ă to A */ - { 0x0103, 0x61, 0x00 }, /* ă to a */ - { 0x0104, 0x41, 0x00 }, /* Ą to A */ - { 0x0105, 0x61, 0x00 }, /* ą to a */ - { 0x0106, 0x43, 0x00 }, /* Ć to C */ - { 0x0107, 0x63, 0x00 }, /* ć to c */ - { 0x0108, 0x43, 0x68 }, /* Ĉ to Ch */ - { 0x0109, 0x63, 0x68 }, /* ĉ to ch */ - { 0x010A, 0x43, 0x00 }, /* Ċ to C */ - { 0x010B, 0x63, 0x00 }, /* ċ to c */ - { 0x010C, 0x43, 0x00 }, /* Č to C */ - { 0x010D, 0x63, 0x00 }, /* č to c */ - { 0x010E, 0x44, 0x00 }, /* Ď to D */ - { 0x010F, 0x64, 0x00 }, /* ď to d */ - { 0x0110, 0x44, 0x00 }, /* Đ to D */ - { 0x0111, 0x64, 0x00 }, /* đ to d */ - { 0x0112, 0x45, 0x00 }, /* Ē to E */ - { 0x0113, 0x65, 0x00 }, /* ē to e */ - { 0x0114, 0x45, 0x00 }, /* Ĕ to E */ - { 0x0115, 0x65, 0x00 }, /* ĕ to e */ - { 0x0116, 0x45, 0x00 }, /* Ė to E */ - { 0x0117, 0x65, 0x00 }, /* ė to e */ - { 0x0118, 0x45, 0x00 }, /* Ę to E */ - { 0x0119, 0x65, 0x00 }, /* ę to e */ - { 0x011A, 0x45, 0x00 }, /* Ě to E */ - { 0x011B, 0x65, 0x00 }, /* ě to e */ - { 0x011C, 0x47, 0x68 }, /* Ĝ to Gh */ - { 0x011D, 0x67, 0x68 }, /* ĝ to gh */ - { 0x011E, 0x47, 0x00 }, /* Ğ to G */ - { 0x011F, 0x67, 0x00 }, /* ğ to g */ - { 0x0120, 0x47, 0x00 }, /* Ġ to G */ - { 0x0121, 0x67, 0x00 }, /* ġ to g */ - { 0x0122, 0x47, 0x00 }, /* Ģ to G */ - { 0x0123, 0x67, 0x00 }, /* ģ to g */ - { 0x0124, 0x48, 0x68 }, /* Ĥ to Hh */ - { 0x0125, 0x68, 0x68 }, /* ĥ to hh */ - { 0x0126, 0x48, 0x00 }, /* Ħ to H */ - { 0x0127, 0x68, 0x00 }, /* ħ to h */ - { 0x0128, 0x49, 0x00 }, /* Ĩ to I */ - { 0x0129, 0x69, 0x00 }, /* ĩ to i */ - { 0x012A, 0x49, 0x00 }, /* Ī to I */ - { 0x012B, 0x69, 0x00 }, /* ī to i */ - { 0x012C, 0x49, 0x00 }, /* Ĭ to I */ - { 0x012D, 0x69, 0x00 }, /* ĭ to i */ - { 0x012E, 0x49, 0x00 }, /* Į to I */ - { 0x012F, 0x69, 0x00 }, /* į to i */ - { 0x0130, 0x49, 0x00 }, /* İ to I */ - { 0x0131, 0x69, 0x00 }, /* ı to i */ - { 0x0132, 0x49, 0x4A }, /* IJ to IJ */ - { 0x0133, 0x69, 0x6A }, /* ij to ij */ - { 0x0134, 0x4A, 0x68 }, /* Ĵ to Jh */ - { 0x0135, 0x6A, 0x68 }, /* ĵ to jh */ - { 0x0136, 0x4B, 0x00 }, /* Ķ to K */ - { 0x0137, 0x6B, 0x00 }, /* ķ to k */ - { 0x0138, 0x6B, 0x00 }, /* ĸ to k */ - { 0x0139, 0x4C, 0x00 }, /* Ĺ to L */ - { 0x013A, 0x6C, 0x00 }, /* ĺ to l */ - { 0x013B, 0x4C, 0x00 }, /* Ļ to L */ - { 0x013C, 0x6C, 0x00 }, /* ļ to l */ - { 0x013D, 0x4C, 0x00 }, /* Ľ to L */ - { 0x013E, 0x6C, 0x00 }, /* ľ to l */ - { 0x013F, 0x4C, 0x2E }, /* Ŀ to L. */ - { 0x0140, 0x6C, 0x2E }, /* ŀ to l. */ - { 0x0141, 0x4C, 0x00 }, /* Ł to L */ - { 0x0142, 0x6C, 0x00 }, /* ł to l */ - { 0x0143, 0x4E, 0x00 }, /* Ń to N */ - { 0x0144, 0x6E, 0x00 }, /* ń to n */ - { 0x0145, 0x4E, 0x00 }, /* Ņ to N */ - { 0x0146, 0x6E, 0x00 }, /* ņ to n */ - { 0x0147, 0x4E, 0x00 }, /* Ň to N */ - { 0x0148, 0x6E, 0x00 }, /* ň to n */ - { 0x0149, 0x27, 0x6E }, /* ʼn to 'n */ - { 0x014A, 0x4E, 0x47 }, /* Ŋ to NG */ - { 0x014B, 0x6E, 0x67 }, /* ŋ to ng */ - { 0x014C, 0x4F, 0x00 }, /* Ō to O */ - { 0x014D, 0x6F, 0x00 }, /* ō to o */ - { 0x014E, 0x4F, 0x00 }, /* Ŏ to O */ - { 0x014F, 0x6F, 0x00 }, /* ŏ to o */ - { 0x0150, 0x4F, 0x00 }, /* Ő to O */ - { 0x0151, 0x6F, 0x00 }, /* ő to o */ - { 0x0152, 0x4F, 0x45 }, /* Œ to OE */ - { 0x0153, 0x6F, 0x65 }, /* œ to oe */ - { 0x0154, 0x52, 0x00 }, /* Ŕ to R */ - { 0x0155, 0x72, 0x00 }, /* ŕ to r */ - { 0x0156, 0x52, 0x00 }, /* Ŗ to R */ - { 0x0157, 0x72, 0x00 }, /* ŗ to r */ - { 0x0158, 0x52, 0x00 }, /* Ř to R */ - { 0x0159, 0x72, 0x00 }, /* ř to r */ - { 0x015A, 0x53, 0x00 }, /* Ś to S */ - { 0x015B, 0x73, 0x00 }, /* ś to s */ - { 0x015C, 0x53, 0x68 }, /* Ŝ to Sh */ - { 0x015D, 0x73, 0x68 }, /* ŝ to sh */ - { 0x015E, 0x53, 0x00 }, /* Ş to S */ - { 0x015F, 0x73, 0x00 }, /* ş to s */ - { 0x0160, 0x53, 0x00 }, /* Š to S */ - { 0x0161, 0x73, 0x00 }, /* š to s */ - { 0x0162, 0x54, 0x00 }, /* Ţ to T */ - { 0x0163, 0x74, 0x00 }, /* ţ to t */ - { 0x0164, 0x54, 0x00 }, /* Ť to T */ - { 0x0165, 0x74, 0x00 }, /* ť to t */ - { 0x0166, 0x54, 0x00 }, /* Ŧ to T */ - { 0x0167, 0x74, 0x00 }, /* ŧ to t */ - { 0x0168, 0x55, 0x00 }, /* Ũ to U */ - { 0x0169, 0x75, 0x00 }, /* ũ to u */ - { 0x016A, 0x55, 0x00 }, /* Ū to U */ - { 0x016B, 0x75, 0x00 }, /* ū to u */ - { 0x016C, 0x55, 0x00 }, /* Ŭ to U */ - { 0x016D, 0x75, 0x00 }, /* ŭ to u */ - { 0x016E, 0x55, 0x00 }, /* Ů to U */ - { 0x016F, 0x75, 0x00 }, /* ů to u */ - { 0x0170, 0x55, 0x00 }, /* Ű to U */ - { 0x0171, 0x75, 0x00 }, /* ű to u */ - { 0x0172, 0x55, 0x00 }, /* Ų to U */ - { 0x0173, 0x75, 0x00 }, /* ų to u */ - { 0x0174, 0x57, 0x00 }, /* Ŵ to W */ - { 0x0175, 0x77, 0x00 }, /* ŵ to w */ - { 0x0176, 0x59, 0x00 }, /* Ŷ to Y */ - { 0x0177, 0x79, 0x00 }, /* ŷ to y */ - { 0x0178, 0x59, 0x00 }, /* Ÿ to Y */ - { 0x0179, 0x5A, 0x00 }, /* Ź to Z */ - { 0x017A, 0x7A, 0x00 }, /* ź to z */ - { 0x017B, 0x5A, 0x00 }, /* Ż to Z */ - { 0x017C, 0x7A, 0x00 }, /* ż to z */ - { 0x017D, 0x5A, 0x00 }, /* Ž to Z */ - { 0x017E, 0x7A, 0x00 }, /* ž to z */ - { 0x017F, 0x73, 0x00 }, /* ſ to s */ - { 0x0192, 0x66, 0x00 }, /* ƒ to f */ - { 0x0218, 0x53, 0x00 }, /* Ș to S */ - { 0x0219, 0x73, 0x00 }, /* ș to s */ - { 0x021A, 0x54, 0x00 }, /* Ț to T */ - { 0x021B, 0x74, 0x00 }, /* ț to t */ - { 0x0386, 0x41, 0x00 }, /* Ά to A */ - { 0x0388, 0x45, 0x00 }, /* Έ to E */ - { 0x0389, 0x49, 0x00 }, /* Ή to I */ - { 0x038A, 0x49, 0x00 }, /* Ί to I */ - { 0x038C, 0x4f, 0x00 }, /* Ό to O */ - { 0x038E, 0x59, 0x00 }, /* Ύ to Y */ - { 0x038F, 0x4f, 0x00 }, /* Ώ to O */ - { 0x0390, 0x69, 0x00 }, /* ΐ to i */ - { 0x0391, 0x41, 0x00 }, /* Α to A */ - { 0x0392, 0x42, 0x00 }, /* Β to B */ - { 0x0393, 0x47, 0x00 }, /* Γ to G */ - { 0x0394, 0x44, 0x00 }, /* Δ to D */ - { 0x0395, 0x45, 0x00 }, /* Ε to E */ - { 0x0396, 0x5a, 0x00 }, /* Ζ to Z */ - { 0x0397, 0x49, 0x00 }, /* Η to I */ - { 0x0398, 0x54, 0x68 }, /* Θ to Th */ - { 0x0399, 0x49, 0x00 }, /* Ι to I */ - { 0x039A, 0x4b, 0x00 }, /* Κ to K */ - { 0x039B, 0x4c, 0x00 }, /* Λ to L */ - { 0x039C, 0x4d, 0x00 }, /* Μ to M */ - { 0x039D, 0x4e, 0x00 }, /* Ν to N */ - { 0x039E, 0x58, 0x00 }, /* Ξ to X */ - { 0x039F, 0x4f, 0x00 }, /* Ο to O */ - { 0x03A0, 0x50, 0x00 }, /* Π to P */ - { 0x03A1, 0x52, 0x00 }, /* Ρ to R */ - { 0x03A3, 0x53, 0x00 }, /* Σ to S */ - { 0x03A4, 0x54, 0x00 }, /* Τ to T */ - { 0x03A5, 0x59, 0x00 }, /* Υ to Y */ - { 0x03A6, 0x46, 0x00 }, /* Φ to F */ - { 0x03A7, 0x43, 0x68 }, /* Χ to Ch */ - { 0x03A8, 0x50, 0x73 }, /* Ψ to Ps */ - { 0x03A9, 0x4f, 0x00 }, /* Ω to O */ - { 0x03AA, 0x49, 0x00 }, /* Ϊ to I */ - { 0x03AB, 0x59, 0x00 }, /* Ϋ to Y */ - { 0x03AC, 0x61, 0x00 }, /* ά to a */ - { 0x03AD, 0x65, 0x00 }, /* έ to e */ - { 0x03AE, 0x69, 0x00 }, /* ή to i */ - { 0x03AF, 0x69, 0x00 }, /* ί to i */ - { 0x03B1, 0x61, 0x00 }, /* α to a */ - { 0x03B2, 0x62, 0x00 }, /* β to b */ - { 0x03B3, 0x67, 0x00 }, /* γ to g */ - { 0x03B4, 0x64, 0x00 }, /* δ to d */ - { 0x03B5, 0x65, 0x00 }, /* ε to e */ - { 0x03B6, 0x7a, 0x00 }, /* ζ to z */ - { 0x03B7, 0x69, 0x00 }, /* η to i */ - { 0x03B8, 0x74, 0x68 }, /* θ to th */ - { 0x03B9, 0x69, 0x00 }, /* ι to i */ - { 0x03BA, 0x6b, 0x00 }, /* κ to k */ - { 0x03BB, 0x6c, 0x00 }, /* λ to l */ - { 0x03BC, 0x6d, 0x00 }, /* μ to m */ - { 0x03BD, 0x6e, 0x00 }, /* ν to n */ - { 0x03BE, 0x78, 0x00 }, /* ξ to x */ - { 0x03BF, 0x6f, 0x00 }, /* ο to o */ - { 0x03C0, 0x70, 0x00 }, /* π to p */ - { 0x03C1, 0x72, 0x00 }, /* ρ to r */ - { 0x03C3, 0x73, 0x00 }, /* σ to s */ - { 0x03C4, 0x74, 0x00 }, /* τ to t */ - { 0x03C5, 0x79, 0x00 }, /* υ to y */ - { 0x03C6, 0x66, 0x00 }, /* φ to f */ - { 0x03C7, 0x63, 0x68 }, /* χ to ch */ - { 0x03C8, 0x70, 0x73 }, /* ψ to ps */ - { 0x03C9, 0x6f, 0x00 }, /* ω to o */ - { 0x03CA, 0x69, 0x00 }, /* ϊ to i */ - { 0x03CB, 0x79, 0x00 }, /* ϋ to y */ - { 0x03CC, 0x6f, 0x00 }, /* ό to o */ - { 0x03CD, 0x79, 0x00 }, /* ύ to y */ - { 0x03CE, 0x69, 0x00 }, /* ώ to i */ - { 0x0400, 0x45, 0x00 }, /* Ѐ to E */ - { 0x0401, 0x45, 0x00 }, /* Ё to E */ - { 0x0402, 0x44, 0x00 }, /* Ђ to D */ - { 0x0403, 0x47, 0x00 }, /* Ѓ to G */ - { 0x0404, 0x45, 0x00 }, /* Є to E */ - { 0x0405, 0x5a, 0x00 }, /* Ѕ to Z */ - { 0x0406, 0x49, 0x00 }, /* І to I */ - { 0x0407, 0x49, 0x00 }, /* Ї to I */ - { 0x0408, 0x4a, 0x00 }, /* Ј to J */ - { 0x0409, 0x49, 0x00 }, /* Љ to I */ - { 0x040A, 0x4e, 0x00 }, /* Њ to N */ - { 0x040B, 0x44, 0x00 }, /* Ћ to D */ - { 0x040C, 0x4b, 0x00 }, /* Ќ to K */ - { 0x040D, 0x49, 0x00 }, /* Ѝ to I */ - { 0x040E, 0x55, 0x00 }, /* Ў to U */ - { 0x040F, 0x44, 0x00 }, /* Џ to D */ - { 0x0410, 0x41, 0x00 }, /* А to A */ - { 0x0411, 0x42, 0x00 }, /* Б to B */ - { 0x0412, 0x56, 0x00 }, /* В to V */ - { 0x0413, 0x47, 0x00 }, /* Г to G */ - { 0x0414, 0x44, 0x00 }, /* Д to D */ - { 0x0415, 0x45, 0x00 }, /* Е to E */ - { 0x0416, 0x5a, 0x68 }, /* Ж to Zh */ - { 0x0417, 0x5a, 0x00 }, /* З to Z */ - { 0x0418, 0x49, 0x00 }, /* И to I */ - { 0x0419, 0x49, 0x00 }, /* Й to I */ - { 0x041A, 0x4b, 0x00 }, /* К to K */ - { 0x041B, 0x4c, 0x00 }, /* Л to L */ - { 0x041C, 0x4d, 0x00 }, /* М to M */ - { 0x041D, 0x4e, 0x00 }, /* Н to N */ - { 0x041E, 0x4f, 0x00 }, /* О to O */ - { 0x041F, 0x50, 0x00 }, /* П to P */ - { 0x0420, 0x52, 0x00 }, /* Р to R */ - { 0x0421, 0x53, 0x00 }, /* С to S */ - { 0x0422, 0x54, 0x00 }, /* Т to T */ - { 0x0423, 0x55, 0x00 }, /* У to U */ - { 0x0424, 0x46, 0x00 }, /* Ф to F */ - { 0x0425, 0x4b, 0x68 }, /* Х to Kh */ - { 0x0426, 0x54, 0x63 }, /* Ц to Tc */ - { 0x0427, 0x43, 0x68 }, /* Ч to Ch */ - { 0x0428, 0x53, 0x68 }, /* Ш to Sh */ - { 0x0429, 0x53, 0x68 }, /* Щ to Shch */ - { 0x042A, 0x61, 0x00 }, /* to A */ - { 0x042B, 0x59, 0x00 }, /* Ы to Y */ - { 0x042C, 0x59, 0x00 }, /* to Y */ - { 0x042D, 0x45, 0x00 }, /* Э to E */ - { 0x042E, 0x49, 0x75 }, /* Ю to Iu */ - { 0x042F, 0x49, 0x61 }, /* Я to Ia */ - { 0x0430, 0x61, 0x00 }, /* а to a */ - { 0x0431, 0x62, 0x00 }, /* б to b */ - { 0x0432, 0x76, 0x00 }, /* в to v */ - { 0x0433, 0x67, 0x00 }, /* г to g */ - { 0x0434, 0x64, 0x00 }, /* д to d */ - { 0x0435, 0x65, 0x00 }, /* е to e */ - { 0x0436, 0x7a, 0x68 }, /* ж to zh */ - { 0x0437, 0x7a, 0x00 }, /* з to z */ - { 0x0438, 0x69, 0x00 }, /* и to i */ - { 0x0439, 0x69, 0x00 }, /* й to i */ - { 0x043A, 0x6b, 0x00 }, /* к to k */ - { 0x043B, 0x6c, 0x00 }, /* л to l */ - { 0x043C, 0x6d, 0x00 }, /* м to m */ - { 0x043D, 0x6e, 0x00 }, /* н to n */ - { 0x043E, 0x6f, 0x00 }, /* о to o */ - { 0x043F, 0x70, 0x00 }, /* п to p */ - { 0x0440, 0x72, 0x00 }, /* р to r */ - { 0x0441, 0x73, 0x00 }, /* с to s */ - { 0x0442, 0x74, 0x00 }, /* т to t */ - { 0x0443, 0x75, 0x00 }, /* у to u */ - { 0x0444, 0x66, 0x00 }, /* ф to f */ - { 0x0445, 0x6b, 0x68 }, /* х to kh */ - { 0x0446, 0x74, 0x63 }, /* ц to tc */ - { 0x0447, 0x63, 0x68 }, /* ч to ch */ - { 0x0448, 0x73, 0x68 }, /* ш to sh */ - { 0x0449, 0x73, 0x68 }, /* щ to shch */ - { 0x044A, 0x61, 0x00 }, /* to a */ - { 0x044B, 0x79, 0x00 }, /* ы to y */ - { 0x044C, 0x79, 0x00 }, /* to y */ - { 0x044D, 0x65, 0x00 }, /* э to e */ - { 0x044E, 0x69, 0x75 }, /* ю to iu */ - { 0x044F, 0x69, 0x61 }, /* я to ia */ - { 0x0450, 0x65, 0x00 }, /* ѐ to e */ - { 0x0451, 0x65, 0x00 }, /* ё to e */ - { 0x0452, 0x64, 0x00 }, /* ђ to d */ - { 0x0453, 0x67, 0x00 }, /* ѓ to g */ - { 0x0454, 0x65, 0x00 }, /* є to e */ - { 0x0455, 0x7a, 0x00 }, /* ѕ to z */ - { 0x0456, 0x69, 0x00 }, /* і to i */ - { 0x0457, 0x69, 0x00 }, /* ї to i */ - { 0x0458, 0x6a, 0x00 }, /* ј to j */ - { 0x0459, 0x69, 0x00 }, /* љ to i */ - { 0x045A, 0x6e, 0x00 }, /* њ to n */ - { 0x045B, 0x64, 0x00 }, /* ћ to d */ - { 0x045C, 0x6b, 0x00 }, /* ќ to k */ - { 0x045D, 0x69, 0x00 }, /* ѝ to i */ - { 0x045E, 0x75, 0x00 }, /* ў to u */ - { 0x045F, 0x64, 0x00 }, /* џ to d */ - { 0x1E02, 0x42, 0x00 }, /* Ḃ to B */ - { 0x1E03, 0x62, 0x00 }, /* ḃ to b */ - { 0x1E0A, 0x44, 0x00 }, /* Ḋ to D */ - { 0x1E0B, 0x64, 0x00 }, /* ḋ to d */ - { 0x1E1E, 0x46, 0x00 }, /* Ḟ to F */ - { 0x1E1F, 0x66, 0x00 }, /* ḟ to f */ - { 0x1E40, 0x4D, 0x00 }, /* Ṁ to M */ - { 0x1E41, 0x6D, 0x00 }, /* ṁ to m */ - { 0x1E56, 0x50, 0x00 }, /* Ṗ to P */ - { 0x1E57, 0x70, 0x00 }, /* ṗ to p */ - { 0x1E60, 0x53, 0x00 }, /* Ṡ to S */ - { 0x1E61, 0x73, 0x00 }, /* ṡ to s */ - { 0x1E6A, 0x54, 0x00 }, /* Ṫ to T */ - { 0x1E6B, 0x74, 0x00 }, /* ṫ to t */ - { 0x1E80, 0x57, 0x00 }, /* Ẁ to W */ - { 0x1E81, 0x77, 0x00 }, /* ẁ to w */ - { 0x1E82, 0x57, 0x00 }, /* Ẃ to W */ - { 0x1E83, 0x77, 0x00 }, /* ẃ to w */ - { 0x1E84, 0x57, 0x00 }, /* Ẅ to W */ - { 0x1E85, 0x77, 0x00 }, /* ẅ to w */ - { 0x1EF2, 0x59, 0x00 }, /* Ỳ to Y */ - { 0x1EF3, 0x79, 0x00 }, /* ỳ to y */ - { 0xFB00, 0x66, 0x66 }, /* ff to ff */ - { 0xFB01, 0x66, 0x69 }, /* fi to fi */ - { 0xFB02, 0x66, 0x6C }, /* fl to fl */ - { 0xFB05, 0x73, 0x74 }, /* ſt to st */ - { 0xFB06, 0x73, 0x74 }, /* st to st */ +static const Transliteration translit[] = { + { 0x00A0, 0x20, 0x00, 0x00, 0x00 }, /* to */ + { 0x00B5, 0x75, 0x00, 0x00, 0x00 }, /* µ to u */ + { 0x00C0, 0x41, 0x00, 0x00, 0x00 }, /* À to A */ + { 0x00C1, 0x41, 0x00, 0x00, 0x00 }, /* Á to A */ + { 0x00C2, 0x41, 0x00, 0x00, 0x00 }, /*  to A */ + { 0x00C3, 0x41, 0x00, 0x00, 0x00 }, /* à to A */ + { 0x00C4, 0x41, 0x65, 0x00, 0x00 }, /* Ä to Ae */ + { 0x00C5, 0x41, 0x61, 0x00, 0x00 }, /* Å to Aa */ + { 0x00C6, 0x41, 0x45, 0x00, 0x00 }, /* Æ to AE */ + { 0x00C7, 0x43, 0x00, 0x00, 0x00 }, /* Ç to C */ + { 0x00C8, 0x45, 0x00, 0x00, 0x00 }, /* È to E */ + { 0x00C9, 0x45, 0x00, 0x00, 0x00 }, /* É to E */ + { 0x00CA, 0x45, 0x00, 0x00, 0x00 }, /* Ê to E */ + { 0x00CB, 0x45, 0x00, 0x00, 0x00 }, /* Ë to E */ + { 0x00CC, 0x49, 0x00, 0x00, 0x00 }, /* Ì to I */ + { 0x00CD, 0x49, 0x00, 0x00, 0x00 }, /* Í to I */ + { 0x00CE, 0x49, 0x00, 0x00, 0x00 }, /* Î to I */ + { 0x00CF, 0x49, 0x00, 0x00, 0x00 }, /* Ï to I */ + { 0x00D0, 0x44, 0x00, 0x00, 0x00 }, /* Ð to D */ + { 0x00D1, 0x4E, 0x00, 0x00, 0x00 }, /* Ñ to N */ + { 0x00D2, 0x4F, 0x00, 0x00, 0x00 }, /* Ò to O */ + { 0x00D3, 0x4F, 0x00, 0x00, 0x00 }, /* Ó to O */ + { 0x00D4, 0x4F, 0x00, 0x00, 0x00 }, /* Ô to O */ + { 0x00D5, 0x4F, 0x00, 0x00, 0x00 }, /* Õ to O */ + { 0x00D6, 0x4F, 0x65, 0x00, 0x00 }, /* Ö to Oe */ + { 0x00D7, 0x78, 0x00, 0x00, 0x00 }, /* × to x */ + { 0x00D8, 0x4F, 0x00, 0x00, 0x00 }, /* Ø to O */ + { 0x00D9, 0x55, 0x00, 0x00, 0x00 }, /* Ù to U */ + { 0x00DA, 0x55, 0x00, 0x00, 0x00 }, /* Ú to U */ + { 0x00DB, 0x55, 0x00, 0x00, 0x00 }, /* Û to U */ + { 0x00DC, 0x55, 0x65, 0x00, 0x00 }, /* Ü to Ue */ + { 0x00DD, 0x59, 0x00, 0x00, 0x00 }, /* Ý to Y */ + { 0x00DE, 0x54, 0x68, 0x00, 0x00 }, /* Þ to Th */ + { 0x00DF, 0x73, 0x73, 0x00, 0x00 }, /* ß to ss */ + { 0x00E0, 0x61, 0x00, 0x00, 0x00 }, /* à to a */ + { 0x00E1, 0x61, 0x00, 0x00, 0x00 }, /* á to a */ + { 0x00E2, 0x61, 0x00, 0x00, 0x00 }, /* â to a */ + { 0x00E3, 0x61, 0x00, 0x00, 0x00 }, /* ã to a */ + { 0x00E4, 0x61, 0x65, 0x00, 0x00 }, /* ä to ae */ + { 0x00E5, 0x61, 0x61, 0x00, 0x00 }, /* å to aa */ + { 0x00E6, 0x61, 0x65, 0x00, 0x00 }, /* æ to ae */ + { 0x00E7, 0x63, 0x00, 0x00, 0x00 }, /* ç to c */ + { 0x00E8, 0x65, 0x00, 0x00, 0x00 }, /* è to e */ + { 0x00E9, 0x65, 0x00, 0x00, 0x00 }, /* é to e */ + { 0x00EA, 0x65, 0x00, 0x00, 0x00 }, /* ê to e */ + { 0x00EB, 0x65, 0x00, 0x00, 0x00 }, /* ë to e */ + { 0x00EC, 0x69, 0x00, 0x00, 0x00 }, /* ì to i */ + { 0x00ED, 0x69, 0x00, 0x00, 0x00 }, /* í to i */ + { 0x00EE, 0x69, 0x00, 0x00, 0x00 }, /* î to i */ + { 0x00EF, 0x69, 0x00, 0x00, 0x00 }, /* ï to i */ + { 0x00F0, 0x64, 0x00, 0x00, 0x00 }, /* ð to d */ + { 0x00F1, 0x6E, 0x00, 0x00, 0x00 }, /* ñ to n */ + { 0x00F2, 0x6F, 0x00, 0x00, 0x00 }, /* ò to o */ + { 0x00F3, 0x6F, 0x00, 0x00, 0x00 }, /* ó to o */ + { 0x00F4, 0x6F, 0x00, 0x00, 0x00 }, /* ô to o */ + { 0x00F5, 0x6F, 0x00, 0x00, 0x00 }, /* õ to o */ + { 0x00F6, 0x6F, 0x65, 0x00, 0x00 }, /* ö to oe */ + { 0x00F7, 0x3A, 0x00, 0x00, 0x00 }, /* ÷ to : */ + { 0x00F8, 0x6F, 0x00, 0x00, 0x00 }, /* ø to o */ + { 0x00F9, 0x75, 0x00, 0x00, 0x00 }, /* ù to u */ + { 0x00FA, 0x75, 0x00, 0x00, 0x00 }, /* ú to u */ + { 0x00FB, 0x75, 0x00, 0x00, 0x00 }, /* û to u */ + { 0x00FC, 0x75, 0x65, 0x00, 0x00 }, /* ü to ue */ + { 0x00FD, 0x79, 0x00, 0x00, 0x00 }, /* ý to y */ + { 0x00FE, 0x74, 0x68, 0x00, 0x00 }, /* þ to th */ + { 0x00FF, 0x79, 0x00, 0x00, 0x00 }, /* ÿ to y */ + { 0x0100, 0x41, 0x00, 0x00, 0x00 }, /* Ā to A */ + { 0x0101, 0x61, 0x00, 0x00, 0x00 }, /* ā to a */ + { 0x0102, 0x41, 0x00, 0x00, 0x00 }, /* Ă to A */ + { 0x0103, 0x61, 0x00, 0x00, 0x00 }, /* ă to a */ + { 0x0104, 0x41, 0x00, 0x00, 0x00 }, /* Ą to A */ + { 0x0105, 0x61, 0x00, 0x00, 0x00 }, /* ą to a */ + { 0x0106, 0x43, 0x00, 0x00, 0x00 }, /* Ć to C */ + { 0x0107, 0x63, 0x00, 0x00, 0x00 }, /* ć to c */ + { 0x0108, 0x43, 0x68, 0x00, 0x00 }, /* Ĉ to Ch */ + { 0x0109, 0x63, 0x68, 0x00, 0x00 }, /* ĉ to ch */ + { 0x010A, 0x43, 0x00, 0x00, 0x00 }, /* Ċ to C */ + { 0x010B, 0x63, 0x00, 0x00, 0x00 }, /* ċ to c */ + { 0x010C, 0x43, 0x00, 0x00, 0x00 }, /* Č to C */ + { 0x010D, 0x63, 0x00, 0x00, 0x00 }, /* č to c */ + { 0x010E, 0x44, 0x00, 0x00, 0x00 }, /* Ď to D */ + { 0x010F, 0x64, 0x00, 0x00, 0x00 }, /* ď to d */ + { 0x0110, 0x44, 0x00, 0x00, 0x00 }, /* Đ to D */ + { 0x0111, 0x64, 0x00, 0x00, 0x00 }, /* đ to d */ + { 0x0112, 0x45, 0x00, 0x00, 0x00 }, /* Ē to E */ + { 0x0113, 0x65, 0x00, 0x00, 0x00 }, /* ē to e */ + { 0x0114, 0x45, 0x00, 0x00, 0x00 }, /* Ĕ to E */ + { 0x0115, 0x65, 0x00, 0x00, 0x00 }, /* ĕ to e */ + { 0x0116, 0x45, 0x00, 0x00, 0x00 }, /* Ė to E */ + { 0x0117, 0x65, 0x00, 0x00, 0x00 }, /* ė to e */ + { 0x0118, 0x45, 0x00, 0x00, 0x00 }, /* Ę to E */ + { 0x0119, 0x65, 0x00, 0x00, 0x00 }, /* ę to e */ + { 0x011A, 0x45, 0x00, 0x00, 0x00 }, /* Ě to E */ + { 0x011B, 0x65, 0x00, 0x00, 0x00 }, /* ě to e */ + { 0x011C, 0x47, 0x68, 0x00, 0x00 }, /* Ĝ to Gh */ + { 0x011D, 0x67, 0x68, 0x00, 0x00 }, /* ĝ to gh */ + { 0x011E, 0x47, 0x00, 0x00, 0x00 }, /* Ğ to G */ + { 0x011F, 0x67, 0x00, 0x00, 0x00 }, /* ğ to g */ + { 0x0120, 0x47, 0x00, 0x00, 0x00 }, /* Ġ to G */ + { 0x0121, 0x67, 0x00, 0x00, 0x00 }, /* ġ to g */ + { 0x0122, 0x47, 0x00, 0x00, 0x00 }, /* Ģ to G */ + { 0x0123, 0x67, 0x00, 0x00, 0x00 }, /* ģ to g */ + { 0x0124, 0x48, 0x68, 0x00, 0x00 }, /* Ĥ to Hh */ + { 0x0125, 0x68, 0x68, 0x00, 0x00 }, /* ĥ to hh */ + { 0x0126, 0x48, 0x00, 0x00, 0x00 }, /* Ħ to H */ + { 0x0127, 0x68, 0x00, 0x00, 0x00 }, /* ħ to h */ + { 0x0128, 0x49, 0x00, 0x00, 0x00 }, /* Ĩ to I */ + { 0x0129, 0x69, 0x00, 0x00, 0x00 }, /* ĩ to i */ + { 0x012A, 0x49, 0x00, 0x00, 0x00 }, /* Ī to I */ + { 0x012B, 0x69, 0x00, 0x00, 0x00 }, /* ī to i */ + { 0x012C, 0x49, 0x00, 0x00, 0x00 }, /* Ĭ to I */ + { 0x012D, 0x69, 0x00, 0x00, 0x00 }, /* ĭ to i */ + { 0x012E, 0x49, 0x00, 0x00, 0x00 }, /* Į to I */ + { 0x012F, 0x69, 0x00, 0x00, 0x00 }, /* į to i */ + { 0x0130, 0x49, 0x00, 0x00, 0x00 }, /* İ to I */ + { 0x0131, 0x69, 0x00, 0x00, 0x00 }, /* ı to i */ + { 0x0132, 0x49, 0x4A, 0x00, 0x00 }, /* IJ to IJ */ + { 0x0133, 0x69, 0x6A, 0x00, 0x00 }, /* ij to ij */ + { 0x0134, 0x4A, 0x68, 0x00, 0x00 }, /* Ĵ to Jh */ + { 0x0135, 0x6A, 0x68, 0x00, 0x00 }, /* ĵ to jh */ + { 0x0136, 0x4B, 0x00, 0x00, 0x00 }, /* Ķ to K */ + { 0x0137, 0x6B, 0x00, 0x00, 0x00 }, /* ķ to k */ + { 0x0138, 0x6B, 0x00, 0x00, 0x00 }, /* ĸ to k */ + { 0x0139, 0x4C, 0x00, 0x00, 0x00 }, /* Ĺ to L */ + { 0x013A, 0x6C, 0x00, 0x00, 0x00 }, /* ĺ to l */ + { 0x013B, 0x4C, 0x00, 0x00, 0x00 }, /* Ļ to L */ + { 0x013C, 0x6C, 0x00, 0x00, 0x00 }, /* ļ to l */ + { 0x013D, 0x4C, 0x00, 0x00, 0x00 }, /* Ľ to L */ + { 0x013E, 0x6C, 0x00, 0x00, 0x00 }, /* ľ to l */ + { 0x013F, 0x4C, 0x2E, 0x00, 0x00 }, /* Ŀ to L. */ + { 0x0140, 0x6C, 0x2E, 0x00, 0x00 }, /* ŀ to l. */ + { 0x0141, 0x4C, 0x00, 0x00, 0x00 }, /* Ł to L */ + { 0x0142, 0x6C, 0x00, 0x00, 0x00 }, /* ł to l */ + { 0x0143, 0x4E, 0x00, 0x00, 0x00 }, /* Ń to N */ + { 0x0144, 0x6E, 0x00, 0x00, 0x00 }, /* ń to n */ + { 0x0145, 0x4E, 0x00, 0x00, 0x00 }, /* Ņ to N */ + { 0x0146, 0x6E, 0x00, 0x00, 0x00 }, /* ņ to n */ + { 0x0147, 0x4E, 0x00, 0x00, 0x00 }, /* Ň to N */ + { 0x0148, 0x6E, 0x00, 0x00, 0x00 }, /* ň to n */ + { 0x0149, 0x27, 0x6E, 0x00, 0x00 }, /* ʼn to 'n */ + { 0x014A, 0x4E, 0x47, 0x00, 0x00 }, /* Ŋ to NG */ + { 0x014B, 0x6E, 0x67, 0x00, 0x00 }, /* ŋ to ng */ + { 0x014C, 0x4F, 0x00, 0x00, 0x00 }, /* Ō to O */ + { 0x014D, 0x6F, 0x00, 0x00, 0x00 }, /* ō to o */ + { 0x014E, 0x4F, 0x00, 0x00, 0x00 }, /* Ŏ to O */ + { 0x014F, 0x6F, 0x00, 0x00, 0x00 }, /* ŏ to o */ + { 0x0150, 0x4F, 0x00, 0x00, 0x00 }, /* Ő to O */ + { 0x0151, 0x6F, 0x00, 0x00, 0x00 }, /* ő to o */ + { 0x0152, 0x4F, 0x45, 0x00, 0x00 }, /* Œ to OE */ + { 0x0153, 0x6F, 0x65, 0x00, 0x00 }, /* œ to oe */ + { 0x0154, 0x52, 0x00, 0x00, 0x00 }, /* Ŕ to R */ + { 0x0155, 0x72, 0x00, 0x00, 0x00 }, /* ŕ to r */ + { 0x0156, 0x52, 0x00, 0x00, 0x00 }, /* Ŗ to R */ + { 0x0157, 0x72, 0x00, 0x00, 0x00 }, /* ŗ to r */ + { 0x0158, 0x52, 0x00, 0x00, 0x00 }, /* Ř to R */ + { 0x0159, 0x72, 0x00, 0x00, 0x00 }, /* ř to r */ + { 0x015A, 0x53, 0x00, 0x00, 0x00 }, /* Ś to S */ + { 0x015B, 0x73, 0x00, 0x00, 0x00 }, /* ś to s */ + { 0x015C, 0x53, 0x68, 0x00, 0x00 }, /* Ŝ to Sh */ + { 0x015D, 0x73, 0x68, 0x00, 0x00 }, /* ŝ to sh */ + { 0x015E, 0x53, 0x00, 0x00, 0x00 }, /* Ş to S */ + { 0x015F, 0x73, 0x00, 0x00, 0x00 }, /* ş to s */ + { 0x0160, 0x53, 0x00, 0x00, 0x00 }, /* Š to S */ + { 0x0161, 0x73, 0x00, 0x00, 0x00 }, /* š to s */ + { 0x0162, 0x54, 0x00, 0x00, 0x00 }, /* Ţ to T */ + { 0x0163, 0x74, 0x00, 0x00, 0x00 }, /* ţ to t */ + { 0x0164, 0x54, 0x00, 0x00, 0x00 }, /* Ť to T */ + { 0x0165, 0x74, 0x00, 0x00, 0x00 }, /* ť to t */ + { 0x0166, 0x54, 0x00, 0x00, 0x00 }, /* Ŧ to T */ + { 0x0167, 0x74, 0x00, 0x00, 0x00 }, /* ŧ to t */ + { 0x0168, 0x55, 0x00, 0x00, 0x00 }, /* Ũ to U */ + { 0x0169, 0x75, 0x00, 0x00, 0x00 }, /* ũ to u */ + { 0x016A, 0x55, 0x00, 0x00, 0x00 }, /* Ū to U */ + { 0x016B, 0x75, 0x00, 0x00, 0x00 }, /* ū to u */ + { 0x016C, 0x55, 0x00, 0x00, 0x00 }, /* Ŭ to U */ + { 0x016D, 0x75, 0x00, 0x00, 0x00 }, /* ŭ to u */ + { 0x016E, 0x55, 0x00, 0x00, 0x00 }, /* Ů to U */ + { 0x016F, 0x75, 0x00, 0x00, 0x00 }, /* ů to u */ + { 0x0170, 0x55, 0x00, 0x00, 0x00 }, /* Ű to U */ + { 0x0171, 0x75, 0x00, 0x00, 0x00 }, /* ű to u */ + { 0x0172, 0x55, 0x00, 0x00, 0x00 }, /* Ų to U */ + { 0x0173, 0x75, 0x00, 0x00, 0x00 }, /* ų to u */ + { 0x0174, 0x57, 0x00, 0x00, 0x00 }, /* Ŵ to W */ + { 0x0175, 0x77, 0x00, 0x00, 0x00 }, /* ŵ to w */ + { 0x0176, 0x59, 0x00, 0x00, 0x00 }, /* Ŷ to Y */ + { 0x0177, 0x79, 0x00, 0x00, 0x00 }, /* ŷ to y */ + { 0x0178, 0x59, 0x00, 0x00, 0x00 }, /* Ÿ to Y */ + { 0x0179, 0x5A, 0x00, 0x00, 0x00 }, /* Ź to Z */ + { 0x017A, 0x7A, 0x00, 0x00, 0x00 }, /* ź to z */ + { 0x017B, 0x5A, 0x00, 0x00, 0x00 }, /* Ż to Z */ + { 0x017C, 0x7A, 0x00, 0x00, 0x00 }, /* ż to z */ + { 0x017D, 0x5A, 0x00, 0x00, 0x00 }, /* Ž to Z */ + { 0x017E, 0x7A, 0x00, 0x00, 0x00 }, /* ž to z */ + { 0x017F, 0x73, 0x00, 0x00, 0x00 }, /* ſ to s */ + { 0x0192, 0x66, 0x00, 0x00, 0x00 }, /* ƒ to f */ + { 0x0218, 0x53, 0x00, 0x00, 0x00 }, /* Ș to S */ + { 0x0219, 0x73, 0x00, 0x00, 0x00 }, /* ș to s */ + { 0x021A, 0x54, 0x00, 0x00, 0x00 }, /* Ț to T */ + { 0x021B, 0x74, 0x00, 0x00, 0x00 }, /* ț to t */ + { 0x0386, 0x41, 0x00, 0x00, 0x00 }, /* Ά to A */ + { 0x0388, 0x45, 0x00, 0x00, 0x00 }, /* Έ to E */ + { 0x0389, 0x49, 0x00, 0x00, 0x00 }, /* Ή to I */ + { 0x038A, 0x49, 0x00, 0x00, 0x00 }, /* Ί to I */ + { 0x038C, 0x4f, 0x00, 0x00, 0x00 }, /* Ό to O */ + { 0x038E, 0x59, 0x00, 0x00, 0x00 }, /* Ύ to Y */ + { 0x038F, 0x4f, 0x00, 0x00, 0x00 }, /* Ώ to O */ + { 0x0390, 0x69, 0x00, 0x00, 0x00 }, /* ΐ to i */ + { 0x0391, 0x41, 0x00, 0x00, 0x00 }, /* Α to A */ + { 0x0392, 0x42, 0x00, 0x00, 0x00 }, /* Β to B */ + { 0x0393, 0x47, 0x00, 0x00, 0x00 }, /* Γ to G */ + { 0x0394, 0x44, 0x00, 0x00, 0x00 }, /* Δ to D */ + { 0x0395, 0x45, 0x00, 0x00, 0x00 }, /* Ε to E */ + { 0x0396, 0x5a, 0x00, 0x00, 0x00 }, /* Ζ to Z */ + { 0x0397, 0x49, 0x00, 0x00, 0x00 }, /* Η to I */ + { 0x0398, 0x54, 0x68, 0x00, 0x00 }, /* Θ to Th */ + { 0x0399, 0x49, 0x00, 0x00, 0x00 }, /* Ι to I */ + { 0x039A, 0x4b, 0x00, 0x00, 0x00 }, /* Κ to K */ + { 0x039B, 0x4c, 0x00, 0x00, 0x00 }, /* Λ to L */ + { 0x039C, 0x4d, 0x00, 0x00, 0x00 }, /* Μ to M */ + { 0x039D, 0x4e, 0x00, 0x00, 0x00 }, /* Ν to N */ + { 0x039E, 0x58, 0x00, 0x00, 0x00 }, /* Ξ to X */ + { 0x039F, 0x4f, 0x00, 0x00, 0x00 }, /* Ο to O */ + { 0x03A0, 0x50, 0x00, 0x00, 0x00 }, /* Π to P */ + { 0x03A1, 0x52, 0x00, 0x00, 0x00 }, /* Ρ to R */ + { 0x03A3, 0x53, 0x00, 0x00, 0x00 }, /* Σ to S */ + { 0x03A4, 0x54, 0x00, 0x00, 0x00 }, /* Τ to T */ + { 0x03A5, 0x59, 0x00, 0x00, 0x00 }, /* Υ to Y */ + { 0x03A6, 0x46, 0x00, 0x00, 0x00 }, /* Φ to F */ + { 0x03A7, 0x43, 0x68, 0x00, 0x00 }, /* Χ to Ch */ + { 0x03A8, 0x50, 0x73, 0x00, 0x00 }, /* Ψ to Ps */ + { 0x03A9, 0x4f, 0x00, 0x00, 0x00 }, /* Ω to O */ + { 0x03AA, 0x49, 0x00, 0x00, 0x00 }, /* Ϊ to I */ + { 0x03AB, 0x59, 0x00, 0x00, 0x00 }, /* Ϋ to Y */ + { 0x03AC, 0x61, 0x00, 0x00, 0x00 }, /* ά to a */ + { 0x03AD, 0x65, 0x00, 0x00, 0x00 }, /* έ to e */ + { 0x03AE, 0x69, 0x00, 0x00, 0x00 }, /* ή to i */ + { 0x03AF, 0x69, 0x00, 0x00, 0x00 }, /* ί to i */ + { 0x03B1, 0x61, 0x00, 0x00, 0x00 }, /* α to a */ + { 0x03B2, 0x62, 0x00, 0x00, 0x00 }, /* β to b */ + { 0x03B3, 0x67, 0x00, 0x00, 0x00 }, /* γ to g */ + { 0x03B4, 0x64, 0x00, 0x00, 0x00 }, /* δ to d */ + { 0x03B5, 0x65, 0x00, 0x00, 0x00 }, /* ε to e */ + { 0x03B6, 0x7a, 0x00, 0x00, 0x00 }, /* ζ to z */ + { 0x03B7, 0x69, 0x00, 0x00, 0x00 }, /* η to i */ + { 0x03B8, 0x74, 0x68, 0x00, 0x00 }, /* θ to th */ + { 0x03B9, 0x69, 0x00, 0x00, 0x00 }, /* ι to i */ + { 0x03BA, 0x6b, 0x00, 0x00, 0x00 }, /* κ to k */ + { 0x03BB, 0x6c, 0x00, 0x00, 0x00 }, /* λ to l */ + { 0x03BC, 0x6d, 0x00, 0x00, 0x00 }, /* μ to m */ + { 0x03BD, 0x6e, 0x00, 0x00, 0x00 }, /* ν to n */ + { 0x03BE, 0x78, 0x00, 0x00, 0x00 }, /* ξ to x */ + { 0x03BF, 0x6f, 0x00, 0x00, 0x00 }, /* ο to o */ + { 0x03C0, 0x70, 0x00, 0x00, 0x00 }, /* π to p */ + { 0x03C1, 0x72, 0x00, 0x00, 0x00 }, /* ρ to r */ + { 0x03C3, 0x73, 0x00, 0x00, 0x00 }, /* σ to s */ + { 0x03C4, 0x74, 0x00, 0x00, 0x00 }, /* τ to t */ + { 0x03C5, 0x79, 0x00, 0x00, 0x00 }, /* υ to y */ + { 0x03C6, 0x66, 0x00, 0x00, 0x00 }, /* φ to f */ + { 0x03C7, 0x63, 0x68, 0x00, 0x00 }, /* χ to ch */ + { 0x03C8, 0x70, 0x73, 0x00, 0x00 }, /* ψ to ps */ + { 0x03C9, 0x6f, 0x00, 0x00, 0x00 }, /* ω to o */ + { 0x03CA, 0x69, 0x00, 0x00, 0x00 }, /* ϊ to i */ + { 0x03CB, 0x79, 0x00, 0x00, 0x00 }, /* ϋ to y */ + { 0x03CC, 0x6f, 0x00, 0x00, 0x00 }, /* ό to o */ + { 0x03CD, 0x79, 0x00, 0x00, 0x00 }, /* ύ to y */ + { 0x03CE, 0x69, 0x00, 0x00, 0x00 }, /* ώ to i */ + { 0x0400, 0x45, 0x00, 0x00, 0x00 }, /* Ѐ to E */ + { 0x0401, 0x45, 0x00, 0x00, 0x00 }, /* Ё to E */ + { 0x0402, 0x44, 0x00, 0x00, 0x00 }, /* Ђ to D */ + { 0x0403, 0x47, 0x00, 0x00, 0x00 }, /* Ѓ to G */ + { 0x0404, 0x45, 0x00, 0x00, 0x00 }, /* Є to E */ + { 0x0405, 0x5a, 0x00, 0x00, 0x00 }, /* Ѕ to Z */ + { 0x0406, 0x49, 0x00, 0x00, 0x00 }, /* І to I */ + { 0x0407, 0x49, 0x00, 0x00, 0x00 }, /* Ї to I */ + { 0x0408, 0x4a, 0x00, 0x00, 0x00 }, /* Ј to J */ + { 0x0409, 0x49, 0x00, 0x00, 0x00 }, /* Љ to I */ + { 0x040A, 0x4e, 0x00, 0x00, 0x00 }, /* Њ to N */ + { 0x040B, 0x44, 0x00, 0x00, 0x00 }, /* Ћ to D */ + { 0x040C, 0x4b, 0x00, 0x00, 0x00 }, /* Ќ to K */ + { 0x040D, 0x49, 0x00, 0x00, 0x00 }, /* Ѝ to I */ + { 0x040E, 0x55, 0x00, 0x00, 0x00 }, /* Ў to U */ + { 0x040F, 0x44, 0x00, 0x00, 0x00 }, /* Џ to D */ + { 0x0410, 0x41, 0x00, 0x00, 0x00 }, /* А to A */ + { 0x0411, 0x42, 0x00, 0x00, 0x00 }, /* Б to B */ + { 0x0412, 0x56, 0x00, 0x00, 0x00 }, /* В to V */ + { 0x0413, 0x47, 0x00, 0x00, 0x00 }, /* Г to G */ + { 0x0414, 0x44, 0x00, 0x00, 0x00 }, /* Д to D */ + { 0x0415, 0x45, 0x00, 0x00, 0x00 }, /* Е to E */ + { 0x0416, 0x5a, 0x68, 0x00, 0x00 }, /* Ж to Zh */ + { 0x0417, 0x5a, 0x00, 0x00, 0x00 }, /* З to Z */ + { 0x0418, 0x49, 0x00, 0x00, 0x00 }, /* И to I */ + { 0x0419, 0x49, 0x00, 0x00, 0x00 }, /* Й to I */ + { 0x041A, 0x4b, 0x00, 0x00, 0x00 }, /* К to K */ + { 0x041B, 0x4c, 0x00, 0x00, 0x00 }, /* Л to L */ + { 0x041C, 0x4d, 0x00, 0x00, 0x00 }, /* М to M */ + { 0x041D, 0x4e, 0x00, 0x00, 0x00 }, /* Н to N */ + { 0x041E, 0x4f, 0x00, 0x00, 0x00 }, /* О to O */ + { 0x041F, 0x50, 0x00, 0x00, 0x00 }, /* П to P */ + { 0x0420, 0x52, 0x00, 0x00, 0x00 }, /* Р to R */ + { 0x0421, 0x53, 0x00, 0x00, 0x00 }, /* С to S */ + { 0x0422, 0x54, 0x00, 0x00, 0x00 }, /* Т to T */ + { 0x0423, 0x55, 0x00, 0x00, 0x00 }, /* У to U */ + { 0x0424, 0x46, 0x00, 0x00, 0x00 }, /* Ф to F */ + { 0x0425, 0x4b, 0x68, 0x00, 0x00 }, /* Х to Kh */ + { 0x0426, 0x54, 0x63, 0x00, 0x00 }, /* Ц to Tc */ + { 0x0427, 0x43, 0x68, 0x00, 0x00 }, /* Ч to Ch */ + { 0x0428, 0x53, 0x68, 0x00, 0x00 }, /* Ш to Sh */ + { 0x0429, 0x53, 0x68, 0x63, 0x68 }, /* Щ to Shch */ + { 0x042A, 0x61, 0x00, 0x00, 0x00 }, /* to A */ + { 0x042B, 0x59, 0x00, 0x00, 0x00 }, /* Ы to Y */ + { 0x042C, 0x59, 0x00, 0x00, 0x00 }, /* to Y */ + { 0x042D, 0x45, 0x00, 0x00, 0x00 }, /* Э to E */ + { 0x042E, 0x49, 0x75, 0x00, 0x00 }, /* Ю to Iu */ + { 0x042F, 0x49, 0x61, 0x00, 0x00 }, /* Я to Ia */ + { 0x0430, 0x61, 0x00, 0x00, 0x00 }, /* а to a */ + { 0x0431, 0x62, 0x00, 0x00, 0x00 }, /* б to b */ + { 0x0432, 0x76, 0x00, 0x00, 0x00 }, /* в to v */ + { 0x0433, 0x67, 0x00, 0x00, 0x00 }, /* г to g */ + { 0x0434, 0x64, 0x00, 0x00, 0x00 }, /* д to d */ + { 0x0435, 0x65, 0x00, 0x00, 0x00 }, /* е to e */ + { 0x0436, 0x7a, 0x68, 0x00, 0x00 }, /* ж to zh */ + { 0x0437, 0x7a, 0x00, 0x00, 0x00 }, /* з to z */ + { 0x0438, 0x69, 0x00, 0x00, 0x00 }, /* и to i */ + { 0x0439, 0x69, 0x00, 0x00, 0x00 }, /* й to i */ + { 0x043A, 0x6b, 0x00, 0x00, 0x00 }, /* к to k */ + { 0x043B, 0x6c, 0x00, 0x00, 0x00 }, /* л to l */ + { 0x043C, 0x6d, 0x00, 0x00, 0x00 }, /* м to m */ + { 0x043D, 0x6e, 0x00, 0x00, 0x00 }, /* н to n */ + { 0x043E, 0x6f, 0x00, 0x00, 0x00 }, /* о to o */ + { 0x043F, 0x70, 0x00, 0x00, 0x00 }, /* п to p */ + { 0x0440, 0x72, 0x00, 0x00, 0x00 }, /* р to r */ + { 0x0441, 0x73, 0x00, 0x00, 0x00 }, /* с to s */ + { 0x0442, 0x74, 0x00, 0x00, 0x00 }, /* т to t */ + { 0x0443, 0x75, 0x00, 0x00, 0x00 }, /* у to u */ + { 0x0444, 0x66, 0x00, 0x00, 0x00 }, /* ф to f */ + { 0x0445, 0x6b, 0x68, 0x00, 0x00 }, /* х to kh */ + { 0x0446, 0x74, 0x63, 0x00, 0x00 }, /* ц to tc */ + { 0x0447, 0x63, 0x68, 0x00, 0x00 }, /* ч to ch */ + { 0x0448, 0x73, 0x68, 0x00, 0x00 }, /* ш to sh */ + { 0x0449, 0x73, 0x68, 0x63, 0x68 }, /* щ to shch */ + { 0x044A, 0x61, 0x00, 0x00, 0x00 }, /* to a */ + { 0x044B, 0x79, 0x00, 0x00, 0x00 }, /* ы to y */ + { 0x044C, 0x79, 0x00, 0x00, 0x00 }, /* to y */ + { 0x044D, 0x65, 0x00, 0x00, 0x00 }, /* э to e */ + { 0x044E, 0x69, 0x75, 0x00, 0x00 }, /* ю to iu */ + { 0x044F, 0x69, 0x61, 0x00, 0x00 }, /* я to ia */ + { 0x0450, 0x65, 0x00, 0x00, 0x00 }, /* ѐ to e */ + { 0x0451, 0x65, 0x00, 0x00, 0x00 }, /* ё to e */ + { 0x0452, 0x64, 0x00, 0x00, 0x00 }, /* ђ to d */ + { 0x0453, 0x67, 0x00, 0x00, 0x00 }, /* ѓ to g */ + { 0x0454, 0x65, 0x00, 0x00, 0x00 }, /* є to e */ + { 0x0455, 0x7a, 0x00, 0x00, 0x00 }, /* ѕ to z */ + { 0x0456, 0x69, 0x00, 0x00, 0x00 }, /* і to i */ + { 0x0457, 0x69, 0x00, 0x00, 0x00 }, /* ї to i */ + { 0x0458, 0x6a, 0x00, 0x00, 0x00 }, /* ј to j */ + { 0x0459, 0x69, 0x00, 0x00, 0x00 }, /* љ to i */ + { 0x045A, 0x6e, 0x00, 0x00, 0x00 }, /* њ to n */ + { 0x045B, 0x64, 0x00, 0x00, 0x00 }, /* ћ to d */ + { 0x045C, 0x6b, 0x00, 0x00, 0x00 }, /* ќ to k */ + { 0x045D, 0x69, 0x00, 0x00, 0x00 }, /* ѝ to i */ + { 0x045E, 0x75, 0x00, 0x00, 0x00 }, /* ў to u */ + { 0x045F, 0x64, 0x00, 0x00, 0x00 }, /* џ to d */ + { 0x1E02, 0x42, 0x00, 0x00, 0x00 }, /* Ḃ to B */ + { 0x1E03, 0x62, 0x00, 0x00, 0x00 }, /* ḃ to b */ + { 0x1E0A, 0x44, 0x00, 0x00, 0x00 }, /* Ḋ to D */ + { 0x1E0B, 0x64, 0x00, 0x00, 0x00 }, /* ḋ to d */ + { 0x1E1E, 0x46, 0x00, 0x00, 0x00 }, /* Ḟ to F */ + { 0x1E1F, 0x66, 0x00, 0x00, 0x00 }, /* ḟ to f */ + { 0x1E40, 0x4D, 0x00, 0x00, 0x00 }, /* Ṁ to M */ + { 0x1E41, 0x6D, 0x00, 0x00, 0x00 }, /* ṁ to m */ + { 0x1E56, 0x50, 0x00, 0x00, 0x00 }, /* Ṗ to P */ + { 0x1E57, 0x70, 0x00, 0x00, 0x00 }, /* ṗ to p */ + { 0x1E60, 0x53, 0x00, 0x00, 0x00 }, /* Ṡ to S */ + { 0x1E61, 0x73, 0x00, 0x00, 0x00 }, /* ṡ to s */ + { 0x1E6A, 0x54, 0x00, 0x00, 0x00 }, /* Ṫ to T */ + { 0x1E6B, 0x74, 0x00, 0x00, 0x00 }, /* ṫ to t */ + { 0x1E80, 0x57, 0x00, 0x00, 0x00 }, /* Ẁ to W */ + { 0x1E81, 0x77, 0x00, 0x00, 0x00 }, /* ẁ to w */ + { 0x1E82, 0x57, 0x00, 0x00, 0x00 }, /* Ẃ to W */ + { 0x1E83, 0x77, 0x00, 0x00, 0x00 }, /* ẃ to w */ + { 0x1E84, 0x57, 0x00, 0x00, 0x00 }, /* Ẅ to W */ + { 0x1E85, 0x77, 0x00, 0x00, 0x00 }, /* ẅ to w */ + { 0x1EF2, 0x59, 0x00, 0x00, 0x00 }, /* Ỳ to Y */ + { 0x1EF3, 0x79, 0x00, 0x00, 0x00 }, /* ỳ to y */ + { 0xFB00, 0x66, 0x66, 0x00, 0x00 }, /* ff to ff */ + { 0xFB01, 0x66, 0x69, 0x00, 0x00 }, /* fi to fi */ + { 0xFB02, 0x66, 0x6C, 0x00, 0x00 }, /* fl to fl */ + { 0xFB05, 0x73, 0x74, 0x00, 0x00 }, /* ſt to st */ + { 0xFB06, 0x73, 0x74, 0x00, 0x00 }, /* st to st */ }; +static const Transliteration *spellfixFindTranslit(int c, int *pxTop){ + *pxTop = (sizeof(translit)/sizeof(translit[0])) - 1; + return translit; +} + /* ** Convert the input string from UTF-8 into pure ASCII by converting ** all non-ASCII characters to some combination of characters in the @@ -1621,23 +1629,24 @@ static unsigned char *transliterate(const unsigned char *zIn, int nIn){ zOut[nOut++] = (unsigned char)c; }else{ int xTop, xBtm, x; - xTop = sizeof(translit)/sizeof(translit[0]) - 1; + const Transliteration *tbl = spellfixFindTranslit(c, &xTop); xBtm = 0; while( xTop>=xBtm ){ x = (xTop + xBtm)/2; - if( translit[x].cFrom==c ){ - zOut[nOut++] = translit[x].cTo0; - if( translit[x].cTo1 ){ - zOut[nOut++] = translit[x].cTo1; - /* Add an extra "ch" after the "sh" for Щ and щ */ - if( c==0x0429 || c== 0x0449 ){ - zOut[nOut++] = 'c'; - zOut[nOut++] = 'h'; + if( tbl[x].cFrom==c ){ + zOut[nOut++] = tbl[x].cTo0; + if( tbl[x].cTo1 ){ + zOut[nOut++] = tbl[x].cTo1; + if( tbl[x].cTo2 ){ + zOut[nOut++] = tbl[x].cTo2; + if( tbl[x].cTo3 ){ + zOut[nOut++] = tbl[x].cTo3; + } } } c = 0; break; - }else if( translit[x].cFrom>c ){ + }else if( tbl[x].cFrom>c ){ xTop = x-1; }else{ xBtm = x+1; @@ -1668,15 +1677,22 @@ static int translen_to_charlen(const char *zIn, int nIn, int nTrans){ nOut++; if( c>=128 ){ int xTop, xBtm, x; - xTop = sizeof(translit)/sizeof(translit[0]) - 1; + const Transliteration *tbl = spellfixFindTranslit(c, &xTop); xBtm = 0; while( xTop>=xBtm ){ x = (xTop + xBtm)/2; - if( translit[x].cFrom==c ){ - if( translit[x].cTo1 ) nOut++; - if( c==0x0429 || c== 0x0449 ) nOut += 2; + if( tbl[x].cFrom==c ){ + if( tbl[x].cTo1 ){ + nOut++; + if( tbl[x].cTo2 ){ + nOut++; + if( tbl[x].cTo3 ){ + nOut++; + } + } + } break; - }else if( translit[x].cFrom>c ){ + }else if( tbl[x].cFrom>c ){ xTop = x-1; }else{ xBtm = x+1; diff --git a/manifest b/manifest index e20e592681..db881f438c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Rearrange\sfields\sof\sthe\sBtCursor\sobject\sso\sthat\sit\sis\ssmaller\sand\srequires\sless\ninitialization,\sfor\sa\ssmall\sperformance\simprovement. -D 2018-01-24T16:04:21.587 +C Reorganize\sspellfix.c\sto\smake\sit\seasier\sto\sedit\sautomatically\s(e.g.\susing\sa\s\nscript).\sNo\schanges\sto\sfunctionality. +D 2018-01-24T18:28:39.575 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F Makefile.in 7a3f714b4fcf793108042b7b0a5c720b0b310ec84314d61ba7f3f49f27e550ea @@ -294,7 +294,7 @@ F ext/misc/series.c f3c0dba5c5c749ce1782b53076108f87cf0b71041eb6023f727a9c50681d F ext/misc/sha1.c 0b9e9b855354910d3ca467bf39099d570e73db56 F ext/misc/shathree.c 9e960ba50483214c6a7a4b1517f8d8cef799e9db381195178c3fd3ad207e10c0 F ext/misc/showauth.c 732578f0fe4ce42d577e1c86dc89dd14a006ab52 -F ext/misc/spellfix.c 41cf26c6b89fcaa8798ae10ae64d39c1f1d9d6995152e545bd491c13058b8fac +F ext/misc/spellfix.c 705ce1ed701b38f43c8ee7dd848877ff51c09eda322e39ccac6873adb4b74735 F ext/misc/sqlar.c 57d5bc45cd5492208e451f697404be88f8612527d64c9d42f96b325b64983d74 F ext/misc/stmt.c 6f16443abb3551e3f5813bb13ba19a30e7032830015b0f92fe0c0453045c0a11 F ext/misc/totype.c 4a167594e791abeed95e0a8db028822b5e8fe512 @@ -1702,7 +1702,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 16ebe558d2f0982f630c39b6c1b00d14d27011d454cced213f17c26491a2c06e -R fc9e02eb09b9540ef2f7fa9944ac0566 -U drh -Z c7e32730b85a19ed6c94c7a445674e7d +P 0ddf5292cc0411ec6fcb7399ecf2904c899e0488404d3f65490fbe1db15efdf4 +R 9f30c4a7d442a62a313819b7586e7fb7 +U dan +Z 5b6a5ad739adf30d436b457769ddc2cb diff --git a/manifest.uuid b/manifest.uuid index 1dd418e847..795b17508a 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -0ddf5292cc0411ec6fcb7399ecf2904c899e0488404d3f65490fbe1db15efdf4 \ No newline at end of file +090a64faaac579c6ed1cddb02d14e7089468a4aeaeec11e78d461790bcb6eb0a \ No newline at end of file