src/fl_utf.c: fixed UCS handling up to and including 0x10FFFF [STR 2349]
also enabled doxygen comments for ERRORS_TO_CP1252, STRICT_RFC3629 and ERRORS_TO_ISO8859_1 preprocessor #defines, and updated other documentation. git-svn-id: file:///fltk/svn/fltk/branches/branch-1.3@7609 ea41ed52-d2ee-0310-a9c1-e6b18d33e121
This commit is contained in:
parent
c1fbbf03ac
commit
20a837c756
19
src/fl_utf.c
19
src/fl_utf.c
@ -68,17 +68,17 @@
|
|||||||
/** @} */
|
/** @} */
|
||||||
#endif /* 0 */
|
#endif /* 0 */
|
||||||
|
|
||||||
/* Set to 1 to turn bad UTF8 bytes into ISO-8859-1. If this is to zero
|
/*!Set to 1 to turn bad UTF8 bytes into ISO-8859-1. If this is to zero
|
||||||
they are instead turned into the Unicode REPLACEMENT CHARACTER, of
|
they are instead turned into the Unicode REPLACEMENT CHARACTER, of
|
||||||
value 0xfffd.
|
value 0xfffd.
|
||||||
If this is on fl_utf8decode will correctly map most (perhaps all)
|
If this is on fl_utf8decode() will correctly map most (perhaps all)
|
||||||
human-readable text that is in ISO-8859-1. This may allow you
|
human-readable text that is in ISO-8859-1. This may allow you
|
||||||
to completely ignore character sets in your code because virtually
|
to completely ignore character sets in your code because virtually
|
||||||
everything is either ISO-8859-1 or UTF-8.
|
everything is either ISO-8859-1 or UTF-8.
|
||||||
*/
|
*/
|
||||||
#define ERRORS_TO_ISO8859_1 1
|
#define ERRORS_TO_ISO8859_1 1
|
||||||
|
|
||||||
/* Set to 1 to turn bad UTF8 bytes in the 0x80-0x9f range into the
|
/*!Set to 1 to turn bad UTF8 bytes in the 0x80-0x9f range into the
|
||||||
Unicode index for Microsoft's CP1252 character set. You should
|
Unicode index for Microsoft's CP1252 character set. You should
|
||||||
also set ERRORS_TO_ISO8859_1. With this a huge amount of more
|
also set ERRORS_TO_ISO8859_1. With this a huge amount of more
|
||||||
available text (such as all web pages) are correctly converted
|
available text (such as all web pages) are correctly converted
|
||||||
@ -86,7 +86,7 @@
|
|||||||
*/
|
*/
|
||||||
#define ERRORS_TO_CP1252 1
|
#define ERRORS_TO_CP1252 1
|
||||||
|
|
||||||
/* A number of Unicode code points are in fact illegal and should not
|
/*!A number of Unicode code points are in fact illegal and should not
|
||||||
be produced by a UTF-8 converter. Turn this on will replace the
|
be produced by a UTF-8 converter. Turn this on will replace the
|
||||||
bytes in those encodings with errors. If you do this then converting
|
bytes in those encodings with errors. If you do this then converting
|
||||||
arbitrary 16-bit data to UTF-8 and then back is not an identity,
|
arbitrary 16-bit data to UTF-8 and then back is not an identity,
|
||||||
@ -286,7 +286,7 @@ int fl_utf8bytes(unsigned ucs) {
|
|||||||
return 2;
|
return 2;
|
||||||
} else if (ucs < 0x010000U) {
|
} else if (ucs < 0x010000U) {
|
||||||
return 3;
|
return 3;
|
||||||
} else if (ucs < 0x10ffffU) {
|
} else if (ucs <= 0x10ffffU) {
|
||||||
return 4;
|
return 4;
|
||||||
} else {
|
} else {
|
||||||
return 3; /* length of the illegal character encoding */
|
return 3; /* length of the illegal character encoding */
|
||||||
@ -322,7 +322,7 @@ int fl_utf8encode(unsigned ucs, char* buf) {
|
|||||||
buf[1] = 0x80 | ((ucs >> 6) & 0x3F);
|
buf[1] = 0x80 | ((ucs >> 6) & 0x3F);
|
||||||
buf[2] = 0x80 | (ucs & 0x3F);
|
buf[2] = 0x80 | (ucs & 0x3F);
|
||||||
return 3;
|
return 3;
|
||||||
} else if (ucs < 0x0010ffffU) {
|
} else if (ucs <= 0x0010ffffU) {
|
||||||
buf[0] = 0xf0 | (ucs >> 18);
|
buf[0] = 0xf0 | (ucs >> 18);
|
||||||
buf[1] = 0x80 | ((ucs >> 12) & 0x3F);
|
buf[1] = 0x80 | ((ucs >> 12) & 0x3F);
|
||||||
buf[2] = 0x80 | ((ucs >> 6) & 0x3F);
|
buf[2] = 0x80 | ((ucs >> 6) & 0x3F);
|
||||||
@ -868,13 +868,14 @@ int fl_utf8test(const char* src, unsigned srclen) {
|
|||||||
\param [in] ucs Unicode character value
|
\param [in] ucs Unicode character value
|
||||||
\returns width of character in columns
|
\returns width of character in columns
|
||||||
|
|
||||||
This is an implementation of wcwidth() and wcswidth()
|
See http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c for Markus Kuhn's
|
||||||
|
original implementation of wcwidth() and wcswidth()
|
||||||
(defined in IEEE Std 1002.1-2001) for Unicode.
|
(defined in IEEE Std 1002.1-2001) for Unicode.
|
||||||
See http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
|
|
||||||
|
|
||||||
WARNING: this function returns widths for "raw" Unicode characters.
|
\b WARNING: this function returns widths for "raw" Unicode characters.
|
||||||
It does not even try to map C1 control characters (0x80 to 0x9F) to
|
It does not even try to map C1 control characters (0x80 to 0x9F) to
|
||||||
CP1252, and C0/C1 control characters and DEL will return -1.
|
CP1252, and C0/C1 control characters and DEL will return -1.
|
||||||
|
You are advised to use fl_width(const char* src) instead.
|
||||||
*/
|
*/
|
||||||
int fl_wcwidth_(unsigned int ucs) {
|
int fl_wcwidth_(unsigned int ucs) {
|
||||||
return mk_wcwidth(ucs);
|
return mk_wcwidth(ucs);
|
||||||
|
Loading…
Reference in New Issue
Block a user