mirror of
https://github.com/netsurf-browser/netsurf
synced 2024-12-22 12:12:35 +03:00
make the use of libutf8proc coditional
allow the browser to be compiled without libutf8proc. Without this library safe utf8 conversion and verification is not possible so the browser will loose the ability to navigate to adresses using the idna punycode scheme.
This commit is contained in:
parent
79e406d951
commit
d2391b2dfd
2
Makefile
2
Makefile
@ -528,7 +528,6 @@ $(eval $(call feature_switch,DUKTAPE,Javascript (Duktape),,,,,))
|
||||
$(eval $(call pkg_config_find_and_add,libcss,CSS))
|
||||
$(eval $(call pkg_config_find_and_add,libdom,DOM))
|
||||
$(eval $(call pkg_config_find_and_add,libnsutils,nsutils))
|
||||
$(eval $(call pkg_config_find_and_add,libutf8proc,utf8proc))
|
||||
|
||||
# Common libraries without pkg-config support
|
||||
LDFLAGS += -lz
|
||||
@ -554,6 +553,7 @@ else
|
||||
endif
|
||||
$(eval $(call pkg_config_find_and_add_enabled,OPENSSL,openssl,OpenSSL))
|
||||
|
||||
$(eval $(call pkg_config_find_and_add_enabled,UTF8PROC,libutf8proc,utf8))
|
||||
$(eval $(call pkg_config_find_and_add_enabled,WEBP,libwebp,WEBP))
|
||||
$(eval $(call pkg_config_find_and_add_enabled,PNG,libpng,PNG))
|
||||
$(eval $(call pkg_config_find_and_add_enabled,BMP,libnsbmp,BMP))
|
||||
|
@ -89,6 +89,14 @@ NETSURF_BUILTIN_LOG_FILTER := "(level:WARNING || cat:jserrors)"
|
||||
# if the logging level is set to verbose
|
||||
NETSURF_BUILTIN_VERBOSE_FILTER := "(level:VERBOSE || cat:jserrors)"
|
||||
|
||||
# Force using glibc internal iconv implementation instead of external libiconv
|
||||
# Valid options: YES, NO
|
||||
NETSURF_USE_LIBICONV_PLUG := YES
|
||||
|
||||
# Enable use of utf8proc for international domain name processing
|
||||
# Valid options: YES, NO, AUTO (highly recommended)
|
||||
NETSURF_USE_UTF8PROC := YES
|
||||
|
||||
# Enable stripping the NetSurf binary
|
||||
# Valid options: YES, NO
|
||||
NETSURF_STRIP_BINARY := NO
|
||||
@ -104,10 +112,6 @@ NETSURF_UA_FORMAT_STRING := "NetSurf/%d.%d (%s)"
|
||||
# riscos/gui.c
|
||||
NETSURF_HOMEPAGE := "about:welcome"
|
||||
|
||||
# Force using glibc internal iconv implementation instead of external libiconv
|
||||
# Valid options: YES, NO
|
||||
NETSURF_USE_LIBICONV_PLUG := YES
|
||||
|
||||
# Enable building the source object cache filesystem based backing store.
|
||||
# implementation.
|
||||
# Valid options: YES, NO
|
||||
|
431
utils/idna.c
431
utils/idna.c
@ -27,7 +27,6 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <libutf8proc/utf8proc.h>
|
||||
|
||||
#include "netsurf/inttypes.h"
|
||||
|
||||
@ -40,14 +39,6 @@
|
||||
#include "utils/utils.h"
|
||||
|
||||
|
||||
int32_t idna_contexto[] = {
|
||||
/* CONTEXTO codepoints which have a rule defined */
|
||||
0x00b7, 0x0375, 0x05f3, 0x05f4, 0x30fb, 0x0660, 0x0661,
|
||||
0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667, 0x0668,
|
||||
0x0669, 0x06f0, 0x06f1, 0x06f2, 0x06f3, 0x06f4, 0x06f5,
|
||||
0x06f6, 0x06f7, 0x06f8, 0x06f9, 0
|
||||
};
|
||||
|
||||
/**
|
||||
* Convert punycode status into nserror.
|
||||
*
|
||||
@ -84,6 +75,108 @@ static nserror punycode_status_to_nserror(enum punycode_status status)
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert a host label in UCS-4 to an ACE version
|
||||
*
|
||||
* \param ucs4_label UCS-4 NFC string containing host label
|
||||
* \param len Length of host label (in characters/codepoints)
|
||||
* \param ace_label ASCII-compatible encoded version
|
||||
* \param out_len Length of ace_label
|
||||
* \return NSERROR_OK on success, appropriate error otherwise
|
||||
*
|
||||
* If return value != NSERROR_OK, output will be left untouched.
|
||||
*/
|
||||
static nserror
|
||||
idna__ucs4_to_ace(int32_t *ucs4_label,
|
||||
size_t len,
|
||||
char **ace_label,
|
||||
size_t *out_len)
|
||||
{
|
||||
char punycode[65]; /* max length of host label + NULL */
|
||||
size_t output_length = 60; /* punycode length - 4 - 1 */
|
||||
nserror ret;
|
||||
|
||||
punycode[0] = 'x';
|
||||
punycode[1] = 'n';
|
||||
punycode[2] = '-';
|
||||
punycode[3] = '-';
|
||||
|
||||
ret = punycode_status_to_nserror(punycode_encode(len,
|
||||
(const punycode_uint *)ucs4_label, NULL,
|
||||
&output_length, punycode + 4));
|
||||
if (ret != NSERROR_OK) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
output_length += SLEN("xn--");
|
||||
punycode[output_length] = '\0';
|
||||
|
||||
*ace_label = strdup(punycode);
|
||||
*out_len = output_length;
|
||||
|
||||
return NSERROR_OK;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert a host label in ACE format to UCS-4
|
||||
*
|
||||
* \param ace_label ASCII string containing host label
|
||||
* \param ace_len Length of host label
|
||||
* \param ucs4_label Pointer to hold UCS4 decoded version
|
||||
* \param ucs4_len Pointer to hold length of ucs4_label
|
||||
* \return NSERROR_OK on success, appropriate error otherwise
|
||||
*
|
||||
* If return value != NSERROR_OK, output will be left untouched.
|
||||
*/
|
||||
static nserror
|
||||
idna__ace_to_ucs4(const char *ace_label,
|
||||
size_t ace_len,
|
||||
int32_t **ucs4_label,
|
||||
size_t *ucs4_len)
|
||||
{
|
||||
int32_t *ucs4;
|
||||
nserror ret;
|
||||
size_t output_length = ace_len; /* never exceeds input length */
|
||||
|
||||
/* The header should always have been checked before calling */
|
||||
assert((ace_label[0] == 'x') && (ace_label[1] == 'n') &&
|
||||
(ace_label[2] == '-') && (ace_label[3] == '-'));
|
||||
|
||||
ucs4 = malloc(output_length * 4);
|
||||
if (ucs4 == NULL) {
|
||||
return NSERROR_NOMEM;
|
||||
}
|
||||
|
||||
ret = punycode_status_to_nserror(punycode_decode(ace_len - 4,
|
||||
ace_label + 4, &output_length, (punycode_uint *)ucs4, NULL));
|
||||
if (ret != NSERROR_OK) {
|
||||
free(ucs4);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ucs4[output_length] = '\0';
|
||||
|
||||
*ucs4_label = ucs4;
|
||||
*ucs4_len = output_length;
|
||||
|
||||
return NSERROR_OK;
|
||||
}
|
||||
|
||||
|
||||
#ifdef WITH_UTF8PROC
|
||||
|
||||
#include <libutf8proc/utf8proc.h>
|
||||
|
||||
int32_t idna_contexto[] = {
|
||||
/* CONTEXTO codepoints which have a rule defined */
|
||||
0x00b7, 0x0375, 0x05f3, 0x05f4, 0x30fb, 0x0660, 0x0661,
|
||||
0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667, 0x0668,
|
||||
0x0669, 0x06f0, 0x06f1, 0x06f2, 0x06f3, 0x06f4, 0x06f5,
|
||||
0x06f6, 0x06f7, 0x06f8, 0x06f9, 0
|
||||
};
|
||||
|
||||
/**
|
||||
* Find the IDNA property of a UCS-4 codepoint
|
||||
*
|
||||
@ -305,119 +398,6 @@ idna__ucs4_to_utf8(const int32_t *ucs4_label,
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert a host label in UCS-4 to an ACE version
|
||||
*
|
||||
* \param ucs4_label UCS-4 NFC string containing host label
|
||||
* \param len Length of host label (in characters/codepoints)
|
||||
* \param ace_label ASCII-compatible encoded version
|
||||
* \param out_len Length of ace_label
|
||||
* \return NSERROR_OK on success, appropriate error otherwise
|
||||
*
|
||||
* If return value != NSERROR_OK, output will be left untouched.
|
||||
*/
|
||||
static nserror
|
||||
idna__ucs4_to_ace(int32_t *ucs4_label,
|
||||
size_t len,
|
||||
char **ace_label,
|
||||
size_t *out_len)
|
||||
{
|
||||
char punycode[65]; /* max length of host label + NULL */
|
||||
size_t output_length = 60; /* punycode length - 4 - 1 */
|
||||
nserror ret;
|
||||
|
||||
punycode[0] = 'x';
|
||||
punycode[1] = 'n';
|
||||
punycode[2] = '-';
|
||||
punycode[3] = '-';
|
||||
|
||||
ret = punycode_status_to_nserror(punycode_encode(len,
|
||||
(const punycode_uint *)ucs4_label, NULL,
|
||||
&output_length, punycode + 4));
|
||||
if (ret != NSERROR_OK) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
output_length += SLEN("xn--");
|
||||
punycode[output_length] = '\0';
|
||||
|
||||
*ace_label = strdup(punycode);
|
||||
*out_len = output_length;
|
||||
|
||||
return NSERROR_OK;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert a host label in ACE format to UCS-4
|
||||
*
|
||||
* \param ace_label ASCII string containing host label
|
||||
* \param ace_len Length of host label
|
||||
* \param ucs4_label Pointer to hold UCS4 decoded version
|
||||
* \param ucs4_len Pointer to hold length of ucs4_label
|
||||
* \return NSERROR_OK on success, appropriate error otherwise
|
||||
*
|
||||
* If return value != NSERROR_OK, output will be left untouched.
|
||||
*/
|
||||
static nserror
|
||||
idna__ace_to_ucs4(const char *ace_label,
|
||||
size_t ace_len,
|
||||
int32_t **ucs4_label,
|
||||
size_t *ucs4_len)
|
||||
{
|
||||
int32_t *ucs4;
|
||||
nserror ret;
|
||||
size_t output_length = ace_len; /* never exceeds input length */
|
||||
|
||||
/* The header should always have been checked before calling */
|
||||
assert((ace_label[0] == 'x') && (ace_label[1] == 'n') &&
|
||||
(ace_label[2] == '-') && (ace_label[3] == '-'));
|
||||
|
||||
ucs4 = malloc(output_length * 4);
|
||||
if (ucs4 == NULL) {
|
||||
return NSERROR_NOMEM;
|
||||
}
|
||||
|
||||
ret = punycode_status_to_nserror(punycode_decode(ace_len - 4,
|
||||
ace_label + 4, &output_length, (punycode_uint *)ucs4, NULL));
|
||||
if (ret != NSERROR_OK) {
|
||||
free(ucs4);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ucs4[output_length] = '\0';
|
||||
|
||||
*ucs4_label = ucs4;
|
||||
*ucs4_len = output_length;
|
||||
|
||||
return NSERROR_OK;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Find the length of a host label
|
||||
*
|
||||
* \param host String containing a host or FQDN
|
||||
* \param max_length Length of host string to search (in bytes)
|
||||
* \return Distance to next separator character or end of string
|
||||
*/
|
||||
static size_t idna__host_label_length(const char *host, size_t max_length)
|
||||
{
|
||||
const char *p = host;
|
||||
size_t length = 0;
|
||||
|
||||
while (length < max_length) {
|
||||
if ((*p == '.') || (*p == ':') || (*p == '\0')) {
|
||||
break;
|
||||
}
|
||||
length++;
|
||||
p++;
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Check if a host label is valid for IDNA2008
|
||||
*
|
||||
@ -505,6 +485,155 @@ static bool idna__is_valid(int32_t *label, size_t len)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Verify an ACE label is valid
|
||||
*
|
||||
* \param label Host label to check
|
||||
* \param len Length of label
|
||||
* \return true if valid, false otherwise
|
||||
*/
|
||||
static bool idna__verify(const char *label, size_t len)
|
||||
{
|
||||
nserror error;
|
||||
int32_t *ucs4;
|
||||
char *ace;
|
||||
ssize_t ucs4_len;
|
||||
size_t u_ucs4_len, ace_len;
|
||||
|
||||
/* Convert our ACE label back to UCS-4 */
|
||||
error = idna__ace_to_ucs4(label, len, &ucs4, &u_ucs4_len);
|
||||
if (error != NSERROR_OK) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Perform NFC normalisation */
|
||||
ucs4_len = utf8proc_normalize_utf32(ucs4, u_ucs4_len,
|
||||
UTF8PROC_STABLE | UTF8PROC_COMPOSE);
|
||||
if (ucs4_len < 0) {
|
||||
free(ucs4);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Convert the UCS-4 label back to ACE */
|
||||
error = idna__ucs4_to_ace(ucs4, (size_t)ucs4_len,
|
||||
&ace, &ace_len);
|
||||
free(ucs4);
|
||||
if (error != NSERROR_OK) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Check if it matches the input */
|
||||
if ((len == ace_len) && (strncmp(label, ace, len) == 0)) {
|
||||
free(ace);
|
||||
return true;
|
||||
}
|
||||
|
||||
NSLOG(netsurf, INFO, "Re-encoded ACE label %s does not match input",
|
||||
ace);
|
||||
free(ace);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
#else /* WITH_UTF8PROC */
|
||||
|
||||
|
||||
/**
|
||||
* Convert a UTF-8 string to UCS-4
|
||||
*
|
||||
* \param utf8_label UTF-8 string containing host label
|
||||
* \param len Length of host label (in bytes)
|
||||
* \param ucs4_label Pointer to update with the output
|
||||
* \param ucs4_len Pointer to update with the length
|
||||
* \return NSERROR_OK on success, appropriate error otherwise
|
||||
*
|
||||
* If return value != NSERROR_OK, output will be left untouched.
|
||||
*/
|
||||
static nserror
|
||||
idna__utf8_to_ucs4(const char *utf8_label,
|
||||
size_t len,
|
||||
int32_t **ucs4_label,
|
||||
size_t *ucs4_len)
|
||||
{
|
||||
return NSERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert a UCS-4 string to UTF-8
|
||||
*
|
||||
* \param ucs4_label UCS-4 string containing host label
|
||||
* \param ucs4_len Length of host label (in bytes)
|
||||
* \param utf8_label Pointer to update with the output
|
||||
* \param utf8_len Pointer to update with the length
|
||||
* \return NSERROR_OK on success, appropriate error otherwise
|
||||
*
|
||||
* If return value != NSERROR_OK, output will be left untouched.
|
||||
*/
|
||||
static nserror
|
||||
idna__ucs4_to_utf8(const int32_t *ucs4_label,
|
||||
size_t ucs4_len,
|
||||
char **utf8_label,
|
||||
size_t *utf8_len)
|
||||
{
|
||||
return NSERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Check if a host label is valid for IDNA2008
|
||||
*
|
||||
* \param label Host label to check (UCS-4)
|
||||
* \param len Length of host label (in characters/codepoints)
|
||||
* \return true if compliant, false otherwise
|
||||
*/
|
||||
static bool idna__is_valid(int32_t *label, size_t len)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Verify an ACE label is valid
|
||||
*
|
||||
* \param label Host label to check
|
||||
* \param len Length of label
|
||||
* \return true if valid, false otherwise
|
||||
*/
|
||||
static bool idna__verify(const char *label, size_t len)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
#endif /* WITH_UTF8PROC */
|
||||
|
||||
|
||||
/**
|
||||
* Find the length of a host label
|
||||
*
|
||||
* \param host String containing a host or FQDN
|
||||
* \param max_length Length of host string to search (in bytes)
|
||||
* \return Distance to next separator character or end of string
|
||||
*/
|
||||
static size_t idna__host_label_length(const char *host, size_t max_length)
|
||||
{
|
||||
const char *p = host;
|
||||
size_t length = 0;
|
||||
|
||||
while (length < max_length) {
|
||||
if ((*p == '.') || (*p == ':') || (*p == '\0')) {
|
||||
break;
|
||||
}
|
||||
length++;
|
||||
p++;
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Check if a host label is LDH
|
||||
*
|
||||
@ -560,57 +689,6 @@ static bool idna__is_ace(const char *label, size_t len)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Verify an ACE label is valid
|
||||
*
|
||||
* \param label Host label to check
|
||||
* \param len Length of label
|
||||
* \return true if valid, false otherwise
|
||||
*/
|
||||
static bool idna__verify(const char *label, size_t len)
|
||||
{
|
||||
nserror error;
|
||||
int32_t *ucs4;
|
||||
char *ace;
|
||||
ssize_t ucs4_len;
|
||||
size_t u_ucs4_len, ace_len;
|
||||
|
||||
/* Convert our ACE label back to UCS-4 */
|
||||
error = idna__ace_to_ucs4(label, len, &ucs4, &u_ucs4_len);
|
||||
if (error != NSERROR_OK) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Perform NFC normalisation */
|
||||
ucs4_len = utf8proc_normalize_utf32(ucs4, u_ucs4_len,
|
||||
UTF8PROC_STABLE | UTF8PROC_COMPOSE);
|
||||
if (ucs4_len < 0) {
|
||||
free(ucs4);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Convert the UCS-4 label back to ACE */
|
||||
error = idna__ucs4_to_ace(ucs4, (size_t)ucs4_len,
|
||||
&ace, &ace_len);
|
||||
free(ucs4);
|
||||
if (error != NSERROR_OK) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Check if it matches the input */
|
||||
if ((len == ace_len) && (strncmp(label, ace, len) == 0)) {
|
||||
free(ace);
|
||||
return true;
|
||||
}
|
||||
|
||||
NSLOG(netsurf, INFO, "Re-encoded ACE label %s does not match input",
|
||||
ace);
|
||||
free(ace);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/* exported interface documented in idna.h */
|
||||
nserror
|
||||
idna_encode(const char *host, size_t len, char **ace_host, size_t *ace_len)
|
||||
@ -631,8 +709,9 @@ idna_encode(const char *host, size_t len, char **ace_host, size_t *ace_len)
|
||||
/* This string is IDN or invalid */
|
||||
|
||||
/* Convert to Unicode */
|
||||
if ((error = idna__utf8_to_ucs4(host, label_len,
|
||||
&ucs4_host, &ucs4_len)) != NSERROR_OK) {
|
||||
error = idna__utf8_to_ucs4(host, label_len,
|
||||
&ucs4_host, &ucs4_len);
|
||||
if (error != NSERROR_OK) {
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -710,7 +789,7 @@ idna_decode(const char *ace_host, size_t ace_len, char **host, size_t *host_len)
|
||||
|
||||
/* Decode to Unicode */
|
||||
error = idna__ace_to_ucs4(ace_host, label_len,
|
||||
&ucs4_host, &ucs4_len);
|
||||
&ucs4_host, &ucs4_len);
|
||||
if (error != NSERROR_OK) {
|
||||
return error;
|
||||
}
|
||||
|
@ -16,12 +16,13 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* NetSurf international domain name handling (interface).
|
||||
/**
|
||||
* \file
|
||||
* interface to international domain name handling.
|
||||
*/
|
||||
|
||||
#ifndef _NETSURF_UTILS_IDNA_H_
|
||||
#define _NETSURF_UTILS_IDNA_H_
|
||||
#ifndef NETSURF_UTILS_IDNA_H_
|
||||
#define NETSURF_UTILS_IDNA_H_
|
||||
|
||||
/**
|
||||
* Unicode canonical combining class for virama
|
||||
|
Loading…
Reference in New Issue
Block a user