From d2391b2dfdf3b9f43616a9822f82e39376639a86 Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Thu, 3 Oct 2019 00:28:08 +0100 Subject: [PATCH] make the use of libutf8proc coditional allow the browser to be compiled without libutf8proc. Without this library safe utf8 conversion and verification is not possible so the browser will loose the ability to navigate to adresses using the idna punycode scheme. --- Makefile | 2 +- Makefile.defaults | 12 +- utils/idna.c | 431 +++++++++++++++++++++++++++------------------- utils/idna.h | 9 +- 4 files changed, 269 insertions(+), 185 deletions(-) diff --git a/Makefile b/Makefile index b2e89ac5c..ebde9dc93 100644 --- a/Makefile +++ b/Makefile @@ -528,7 +528,6 @@ $(eval $(call feature_switch,DUKTAPE,Javascript (Duktape),,,,,)) $(eval $(call pkg_config_find_and_add,libcss,CSS)) $(eval $(call pkg_config_find_and_add,libdom,DOM)) $(eval $(call pkg_config_find_and_add,libnsutils,nsutils)) -$(eval $(call pkg_config_find_and_add,libutf8proc,utf8proc)) # Common libraries without pkg-config support LDFLAGS += -lz @@ -554,6 +553,7 @@ else endif $(eval $(call pkg_config_find_and_add_enabled,OPENSSL,openssl,OpenSSL)) +$(eval $(call pkg_config_find_and_add_enabled,UTF8PROC,libutf8proc,utf8)) $(eval $(call pkg_config_find_and_add_enabled,WEBP,libwebp,WEBP)) $(eval $(call pkg_config_find_and_add_enabled,PNG,libpng,PNG)) $(eval $(call pkg_config_find_and_add_enabled,BMP,libnsbmp,BMP)) diff --git a/Makefile.defaults b/Makefile.defaults index 31d4a424c..c7edf7b7d 100644 --- a/Makefile.defaults +++ b/Makefile.defaults @@ -89,6 +89,14 @@ NETSURF_BUILTIN_LOG_FILTER := "(level:WARNING || cat:jserrors)" # if the logging level is set to verbose NETSURF_BUILTIN_VERBOSE_FILTER := "(level:VERBOSE || cat:jserrors)" +# Force using glibc internal iconv implementation instead of external libiconv +# Valid options: YES, NO +NETSURF_USE_LIBICONV_PLUG := YES + +# Enable use of utf8proc for international domain name processing +# Valid options: YES, NO, AUTO (highly recommended) +NETSURF_USE_UTF8PROC := YES + # Enable stripping the NetSurf binary # Valid options: YES, NO NETSURF_STRIP_BINARY := NO @@ -104,10 +112,6 @@ NETSURF_UA_FORMAT_STRING := "NetSurf/%d.%d (%s)" # riscos/gui.c NETSURF_HOMEPAGE := "about:welcome" -# Force using glibc internal iconv implementation instead of external libiconv -# Valid options: YES, NO -NETSURF_USE_LIBICONV_PLUG := YES - # Enable building the source object cache filesystem based backing store. # implementation. # Valid options: YES, NO diff --git a/utils/idna.c b/utils/idna.c index d657f90e2..f00631635 100644 --- a/utils/idna.c +++ b/utils/idna.c @@ -27,7 +27,6 @@ #include #include #include -#include #include "netsurf/inttypes.h" @@ -40,14 +39,6 @@ #include "utils/utils.h" -int32_t idna_contexto[] = { - /* CONTEXTO codepoints which have a rule defined */ - 0x00b7, 0x0375, 0x05f3, 0x05f4, 0x30fb, 0x0660, 0x0661, - 0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667, 0x0668, - 0x0669, 0x06f0, 0x06f1, 0x06f2, 0x06f3, 0x06f4, 0x06f5, - 0x06f6, 0x06f7, 0x06f8, 0x06f9, 0 -}; - /** * Convert punycode status into nserror. * @@ -84,6 +75,108 @@ static nserror punycode_status_to_nserror(enum punycode_status status) return ret; } + +/** + * Convert a host label in UCS-4 to an ACE version + * + * \param ucs4_label UCS-4 NFC string containing host label + * \param len Length of host label (in characters/codepoints) + * \param ace_label ASCII-compatible encoded version + * \param out_len Length of ace_label + * \return NSERROR_OK on success, appropriate error otherwise + * + * If return value != NSERROR_OK, output will be left untouched. + */ +static nserror +idna__ucs4_to_ace(int32_t *ucs4_label, + size_t len, + char **ace_label, + size_t *out_len) +{ + char punycode[65]; /* max length of host label + NULL */ + size_t output_length = 60; /* punycode length - 4 - 1 */ + nserror ret; + + punycode[0] = 'x'; + punycode[1] = 'n'; + punycode[2] = '-'; + punycode[3] = '-'; + + ret = punycode_status_to_nserror(punycode_encode(len, + (const punycode_uint *)ucs4_label, NULL, + &output_length, punycode + 4)); + if (ret != NSERROR_OK) { + return ret; + } + + output_length += SLEN("xn--"); + punycode[output_length] = '\0'; + + *ace_label = strdup(punycode); + *out_len = output_length; + + return NSERROR_OK; +} + + +/** + * Convert a host label in ACE format to UCS-4 + * + * \param ace_label ASCII string containing host label + * \param ace_len Length of host label + * \param ucs4_label Pointer to hold UCS4 decoded version + * \param ucs4_len Pointer to hold length of ucs4_label + * \return NSERROR_OK on success, appropriate error otherwise + * + * If return value != NSERROR_OK, output will be left untouched. + */ +static nserror +idna__ace_to_ucs4(const char *ace_label, + size_t ace_len, + int32_t **ucs4_label, + size_t *ucs4_len) +{ + int32_t *ucs4; + nserror ret; + size_t output_length = ace_len; /* never exceeds input length */ + + /* The header should always have been checked before calling */ + assert((ace_label[0] == 'x') && (ace_label[1] == 'n') && + (ace_label[2] == '-') && (ace_label[3] == '-')); + + ucs4 = malloc(output_length * 4); + if (ucs4 == NULL) { + return NSERROR_NOMEM; + } + + ret = punycode_status_to_nserror(punycode_decode(ace_len - 4, + ace_label + 4, &output_length, (punycode_uint *)ucs4, NULL)); + if (ret != NSERROR_OK) { + free(ucs4); + return ret; + } + + ucs4[output_length] = '\0'; + + *ucs4_label = ucs4; + *ucs4_len = output_length; + + return NSERROR_OK; +} + + +#ifdef WITH_UTF8PROC + +#include + +int32_t idna_contexto[] = { + /* CONTEXTO codepoints which have a rule defined */ + 0x00b7, 0x0375, 0x05f3, 0x05f4, 0x30fb, 0x0660, 0x0661, + 0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667, 0x0668, + 0x0669, 0x06f0, 0x06f1, 0x06f2, 0x06f3, 0x06f4, 0x06f5, + 0x06f6, 0x06f7, 0x06f8, 0x06f9, 0 +}; + /** * Find the IDNA property of a UCS-4 codepoint * @@ -305,119 +398,6 @@ idna__ucs4_to_utf8(const int32_t *ucs4_label, } -/** - * Convert a host label in UCS-4 to an ACE version - * - * \param ucs4_label UCS-4 NFC string containing host label - * \param len Length of host label (in characters/codepoints) - * \param ace_label ASCII-compatible encoded version - * \param out_len Length of ace_label - * \return NSERROR_OK on success, appropriate error otherwise - * - * If return value != NSERROR_OK, output will be left untouched. - */ -static nserror -idna__ucs4_to_ace(int32_t *ucs4_label, - size_t len, - char **ace_label, - size_t *out_len) -{ - char punycode[65]; /* max length of host label + NULL */ - size_t output_length = 60; /* punycode length - 4 - 1 */ - nserror ret; - - punycode[0] = 'x'; - punycode[1] = 'n'; - punycode[2] = '-'; - punycode[3] = '-'; - - ret = punycode_status_to_nserror(punycode_encode(len, - (const punycode_uint *)ucs4_label, NULL, - &output_length, punycode + 4)); - if (ret != NSERROR_OK) { - return ret; - } - - output_length += SLEN("xn--"); - punycode[output_length] = '\0'; - - *ace_label = strdup(punycode); - *out_len = output_length; - - return NSERROR_OK; -} - - -/** - * Convert a host label in ACE format to UCS-4 - * - * \param ace_label ASCII string containing host label - * \param ace_len Length of host label - * \param ucs4_label Pointer to hold UCS4 decoded version - * \param ucs4_len Pointer to hold length of ucs4_label - * \return NSERROR_OK on success, appropriate error otherwise - * - * If return value != NSERROR_OK, output will be left untouched. - */ -static nserror -idna__ace_to_ucs4(const char *ace_label, - size_t ace_len, - int32_t **ucs4_label, - size_t *ucs4_len) -{ - int32_t *ucs4; - nserror ret; - size_t output_length = ace_len; /* never exceeds input length */ - - /* The header should always have been checked before calling */ - assert((ace_label[0] == 'x') && (ace_label[1] == 'n') && - (ace_label[2] == '-') && (ace_label[3] == '-')); - - ucs4 = malloc(output_length * 4); - if (ucs4 == NULL) { - return NSERROR_NOMEM; - } - - ret = punycode_status_to_nserror(punycode_decode(ace_len - 4, - ace_label + 4, &output_length, (punycode_uint *)ucs4, NULL)); - if (ret != NSERROR_OK) { - free(ucs4); - return ret; - } - - ucs4[output_length] = '\0'; - - *ucs4_label = ucs4; - *ucs4_len = output_length; - - return NSERROR_OK; -} - - -/** - * Find the length of a host label - * - * \param host String containing a host or FQDN - * \param max_length Length of host string to search (in bytes) - * \return Distance to next separator character or end of string - */ -static size_t idna__host_label_length(const char *host, size_t max_length) -{ - const char *p = host; - size_t length = 0; - - while (length < max_length) { - if ((*p == '.') || (*p == ':') || (*p == '\0')) { - break; - } - length++; - p++; - } - - return length; -} - - /** * Check if a host label is valid for IDNA2008 * @@ -505,6 +485,155 @@ static bool idna__is_valid(int32_t *label, size_t len) } +/** + * Verify an ACE label is valid + * + * \param label Host label to check + * \param len Length of label + * \return true if valid, false otherwise + */ +static bool idna__verify(const char *label, size_t len) +{ + nserror error; + int32_t *ucs4; + char *ace; + ssize_t ucs4_len; + size_t u_ucs4_len, ace_len; + + /* Convert our ACE label back to UCS-4 */ + error = idna__ace_to_ucs4(label, len, &ucs4, &u_ucs4_len); + if (error != NSERROR_OK) { + return false; + } + + /* Perform NFC normalisation */ + ucs4_len = utf8proc_normalize_utf32(ucs4, u_ucs4_len, + UTF8PROC_STABLE | UTF8PROC_COMPOSE); + if (ucs4_len < 0) { + free(ucs4); + return false; + } + + /* Convert the UCS-4 label back to ACE */ + error = idna__ucs4_to_ace(ucs4, (size_t)ucs4_len, + &ace, &ace_len); + free(ucs4); + if (error != NSERROR_OK) { + return false; + } + + /* Check if it matches the input */ + if ((len == ace_len) && (strncmp(label, ace, len) == 0)) { + free(ace); + return true; + } + + NSLOG(netsurf, INFO, "Re-encoded ACE label %s does not match input", + ace); + free(ace); + + return false; +} + + +#else /* WITH_UTF8PROC */ + + +/** + * Convert a UTF-8 string to UCS-4 + * + * \param utf8_label UTF-8 string containing host label + * \param len Length of host label (in bytes) + * \param ucs4_label Pointer to update with the output + * \param ucs4_len Pointer to update with the length + * \return NSERROR_OK on success, appropriate error otherwise + * + * If return value != NSERROR_OK, output will be left untouched. + */ +static nserror +idna__utf8_to_ucs4(const char *utf8_label, + size_t len, + int32_t **ucs4_label, + size_t *ucs4_len) +{ + return NSERROR_NOT_IMPLEMENTED; +} + + +/** + * Convert a UCS-4 string to UTF-8 + * + * \param ucs4_label UCS-4 string containing host label + * \param ucs4_len Length of host label (in bytes) + * \param utf8_label Pointer to update with the output + * \param utf8_len Pointer to update with the length + * \return NSERROR_OK on success, appropriate error otherwise + * + * If return value != NSERROR_OK, output will be left untouched. + */ +static nserror +idna__ucs4_to_utf8(const int32_t *ucs4_label, + size_t ucs4_len, + char **utf8_label, + size_t *utf8_len) +{ + return NSERROR_NOT_IMPLEMENTED; +} + + +/** + * Check if a host label is valid for IDNA2008 + * + * \param label Host label to check (UCS-4) + * \param len Length of host label (in characters/codepoints) + * \return true if compliant, false otherwise + */ +static bool idna__is_valid(int32_t *label, size_t len) +{ + return true; +} + + +/** + * Verify an ACE label is valid + * + * \param label Host label to check + * \param len Length of label + * \return true if valid, false otherwise + */ +static bool idna__verify(const char *label, size_t len) +{ + return true; +} + + +#endif /* WITH_UTF8PROC */ + + +/** + * Find the length of a host label + * + * \param host String containing a host or FQDN + * \param max_length Length of host string to search (in bytes) + * \return Distance to next separator character or end of string + */ +static size_t idna__host_label_length(const char *host, size_t max_length) +{ + const char *p = host; + size_t length = 0; + + while (length < max_length) { + if ((*p == '.') || (*p == ':') || (*p == '\0')) { + break; + } + length++; + p++; + } + + return length; +} + + /** * Check if a host label is LDH * @@ -560,57 +689,6 @@ static bool idna__is_ace(const char *label, size_t len) } -/** - * Verify an ACE label is valid - * - * \param label Host label to check - * \param len Length of label - * \return true if valid, false otherwise - */ -static bool idna__verify(const char *label, size_t len) -{ - nserror error; - int32_t *ucs4; - char *ace; - ssize_t ucs4_len; - size_t u_ucs4_len, ace_len; - - /* Convert our ACE label back to UCS-4 */ - error = idna__ace_to_ucs4(label, len, &ucs4, &u_ucs4_len); - if (error != NSERROR_OK) { - return false; - } - - /* Perform NFC normalisation */ - ucs4_len = utf8proc_normalize_utf32(ucs4, u_ucs4_len, - UTF8PROC_STABLE | UTF8PROC_COMPOSE); - if (ucs4_len < 0) { - free(ucs4); - return false; - } - - /* Convert the UCS-4 label back to ACE */ - error = idna__ucs4_to_ace(ucs4, (size_t)ucs4_len, - &ace, &ace_len); - free(ucs4); - if (error != NSERROR_OK) { - return false; - } - - /* Check if it matches the input */ - if ((len == ace_len) && (strncmp(label, ace, len) == 0)) { - free(ace); - return true; - } - - NSLOG(netsurf, INFO, "Re-encoded ACE label %s does not match input", - ace); - free(ace); - - return false; -} - - /* exported interface documented in idna.h */ nserror idna_encode(const char *host, size_t len, char **ace_host, size_t *ace_len) @@ -631,8 +709,9 @@ idna_encode(const char *host, size_t len, char **ace_host, size_t *ace_len) /* This string is IDN or invalid */ /* Convert to Unicode */ - if ((error = idna__utf8_to_ucs4(host, label_len, - &ucs4_host, &ucs4_len)) != NSERROR_OK) { + error = idna__utf8_to_ucs4(host, label_len, + &ucs4_host, &ucs4_len); + if (error != NSERROR_OK) { return error; } @@ -710,7 +789,7 @@ idna_decode(const char *ace_host, size_t ace_len, char **host, size_t *host_len) /* Decode to Unicode */ error = idna__ace_to_ucs4(ace_host, label_len, - &ucs4_host, &ucs4_len); + &ucs4_host, &ucs4_len); if (error != NSERROR_OK) { return error; } diff --git a/utils/idna.h b/utils/idna.h index 1fb344730..efc73eb72 100644 --- a/utils/idna.h +++ b/utils/idna.h @@ -16,12 +16,13 @@ * along with this program. If not, see . */ -/** \file - * NetSurf international domain name handling (interface). +/** + * \file + * interface to international domain name handling. */ -#ifndef _NETSURF_UTILS_IDNA_H_ -#define _NETSURF_UTILS_IDNA_H_ +#ifndef NETSURF_UTILS_IDNA_H_ +#define NETSURF_UTILS_IDNA_H_ /** * Unicode canonical combining class for virama