fallback to old url creation behaviour if the IDNA host parse fails

This commit is contained in:
Vincent Sanders 2014-06-09 18:05:36 +01:00
parent 8e29e517d5
commit 4488c8a2e4
3 changed files with 61 additions and 23 deletions

View File

@ -22,7 +22,7 @@ urldbtest_SRCS := content/urldb.c utils/url.c utils/utils.c utils/log.c \
urldbtest_CFLAGS := $(shell pkg-config --cflags libwapcaplet libdom) -O2 urldbtest_CFLAGS := $(shell pkg-config --cflags libwapcaplet libdom) -O2
urldbtest_LDFLAGS := $(shell pkg-config --libs libwapcaplet libdom) urldbtest_LDFLAGS := $(shell pkg-config --libs libwapcaplet libdom)
nsurl_SRCS := utils/corestrings.c utils/log.c utils/nsurl.c test/nsurl.c nsurl_SRCS := utils/corestrings.c utils/log.c utils/nsurl.c utils/idna.c utils/utf8proc.c test/nsurl.c
nsurl_CFLAGS := $(shell pkg-config --cflags libwapcaplet libdom) nsurl_CFLAGS := $(shell pkg-config --cflags libwapcaplet libdom)
nsurl_LDFLAGS := $(shell pkg-config --libs libwapcaplet libdom) nsurl_LDFLAGS := $(shell pkg-config --libs libwapcaplet libdom)

View File

@ -141,6 +141,7 @@ static const struct test_pairs join_tests[] = {
{ " / ", "http://a/" }, { " / ", "http://a/" },
{ " ? ", "http://a/b/c/d;p?" }, { " ? ", "http://a/b/c/d;p?" },
{ " h ", "http://a/b/c/h" }, { " h ", "http://a/b/c/h" },
{ "http://<!--#echo var=", "http://<!--/#echo%20var="},
/* [1] Extra slash beyond rfc3986 5.4.1 example, since we're /* [1] Extra slash beyond rfc3986 5.4.1 example, since we're
* testing normalisation in addition to joining */ * testing normalisation in addition to joining */
/* [2] Using the strict parsers option */ /* [2] Using the strict parsers option */

View File

@ -685,6 +685,7 @@ static nserror nsurl__create_from_section(const char * const url_s,
char *pos_norm, char *pos_norm,
struct nsurl_components *url) struct nsurl_components *url)
{ {
nserror ret;
int ascii_offset; int ascii_offset;
int start = 0; int start = 0;
int end = 0; int end = 0;
@ -961,14 +962,20 @@ static nserror nsurl__create_from_section(const char * const url_s,
/* host */ /* host */
/* Encode host according to IDNA2008 */ /* Encode host according to IDNA2008 */
if (idna_encode(norm_start, length, &host, &host_len) == NSERROR_OK) { ret = idna_encode(norm_start, length, &host, &host_len);
if (ret == NSERROR_OK) {
/* valid idna encoding */
if (lwc_intern_string(host, host_len, if (lwc_intern_string(host, host_len,
&url->host) != lwc_error_ok) { &url->host) != lwc_error_ok) {
return NSERROR_NOMEM; return NSERROR_NOMEM;
} }
free(host); free(host);
} else { } else {
return NSERROR_BAD_URL; /* fall back to straight interning */
if (lwc_intern_string(norm_start, length,
&url->host) != lwc_error_ok) {
return NSERROR_NOMEM;
}
} }
} }
@ -1736,6 +1743,8 @@ nserror nsurl_join(const nsurl *base, const char *rel, nsurl **joined)
assert(base != NULL); assert(base != NULL);
assert(rel != NULL); assert(rel != NULL);
LOG(("base \"%s\" rel \"%s\"", nsurl_access(base), rel));
/* Peg out the URL sections */ /* Peg out the URL sections */
nsurl__get_string_markers(rel, &m, true); nsurl__get_string_markers(rel, &m, true);
@ -1743,11 +1752,13 @@ nserror nsurl_join(const nsurl *base, const char *rel, nsurl **joined)
length = nsurl__get_longest_section(&m); length = nsurl__get_longest_section(&m);
/* Initially assume that the joined URL can be formed entierly from /* Initially assume that the joined URL can be formed entierly from
* the relative URL. */ * the relative URL.
*/
joined_parts = NSURL_F_REL; joined_parts = NSURL_F_REL;
/* Update joined_compnents to indicate any required parts from the /* Update joined_compnents to indicate any required parts from the
* base URL. */ * base URL.
*/
if (m.scheme_end - m.start <= 0) { if (m.scheme_end - m.start <= 0) {
/* The relative url has no scheme. /* The relative url has no scheme.
* Use base URL's scheme. */ * Use base URL's scheme. */
@ -1777,7 +1788,8 @@ nserror nsurl_join(const nsurl *base, const char *rel, nsurl **joined)
} }
/* Allocate enough memory to url escape the longest section, plus /* Allocate enough memory to url escape the longest section, plus
* space for path merging (if required). */ * space for path merging (if required).
*/
if (joined_parts & NSURL_F_MERGED_PATH) { if (joined_parts & NSURL_F_MERGED_PATH) {
/* Need to merge paths */ /* Need to merge paths */
length += (base->components.path != NULL) ? length += (base->components.path != NULL) ?
@ -1789,8 +1801,9 @@ nserror nsurl_join(const nsurl *base, const char *rel, nsurl **joined)
lwc_string_length(base->components.path) : 0); lwc_string_length(base->components.path) : 0);
buff = malloc(length + 5); buff = malloc(length + 5);
if (buff == NULL) if (buff == NULL) {
return NSERROR_NOMEM; return NSERROR_NOMEM;
}
buff_pos = buff; buff_pos = buff;
@ -1803,8 +1816,11 @@ nserror nsurl_join(const nsurl *base, const char *rel, nsurl **joined)
} else { } else {
c.scheme_type = m.scheme_type; c.scheme_type = m.scheme_type;
error |= nsurl__create_from_section(rel, URL_SCHEME, &m, error = nsurl__create_from_section(rel, URL_SCHEME, &m, buff, &c);
buff, &c); if (error != NSERROR_OK) {
free(buff);
return error;
}
} }
if (joined_parts & NSURL_F_BASE_AUTHORITY) { if (joined_parts & NSURL_F_BASE_AUTHORITY) {
@ -1813,10 +1829,16 @@ nserror nsurl_join(const nsurl *base, const char *rel, nsurl **joined)
c.host = nsurl__component_copy(base->components.host); c.host = nsurl__component_copy(base->components.host);
c.port = nsurl__component_copy(base->components.port); c.port = nsurl__component_copy(base->components.port);
} else { } else {
error |= nsurl__create_from_section(rel, URL_CREDENTIALS, &m, error = nsurl__create_from_section(rel, URL_CREDENTIALS, &m,
buff, &c); buff, &c);
error |= nsurl__create_from_section(rel, URL_HOST, &m, if (error == NSERROR_OK) {
buff, &c); error = nsurl__create_from_section(rel, URL_HOST, &m,
buff, &c);
}
if (error != NSERROR_OK) {
free(buff);
return error;
}
} }
if (joined_parts & NSURL_F_BASE_PATH) { if (joined_parts & NSURL_F_BASE_PATH) {
@ -1866,8 +1888,12 @@ nserror nsurl_join(const nsurl *base, const char *rel, nsurl **joined)
m_path.query = new_length; m_path.query = new_length;
buff_start = buff_pos + new_length; buff_start = buff_pos + new_length;
error |= nsurl__create_from_section(buff_pos, URL_PATH, &m_path, error = nsurl__create_from_section(buff_pos, URL_PATH, &m_path,
buff_start, &c); buff_start, &c);
if (error != NSERROR_OK) {
free(buff);
return error;
}
} else { } else {
struct url_markers m_path; struct url_markers m_path;
@ -1883,24 +1909,34 @@ nserror nsurl_join(const nsurl *base, const char *rel, nsurl **joined)
m_path.query = new_length; m_path.query = new_length;
buff_start = buff_pos + new_length; buff_start = buff_pos + new_length;
error |= nsurl__create_from_section(buff_pos, URL_PATH, &m_path,
error = nsurl__create_from_section(buff_pos, URL_PATH, &m_path,
buff_start, &c); buff_start, &c);
if (error != NSERROR_OK) {
free(buff);
return error;
}
} }
if (joined_parts & NSURL_F_BASE_QUERY) if (joined_parts & NSURL_F_BASE_QUERY) {
c.query = nsurl__component_copy(base->components.query); c.query = nsurl__component_copy(base->components.query);
else } else {
error |= nsurl__create_from_section(rel, URL_QUERY, &m, error = nsurl__create_from_section(rel, URL_QUERY, &m,
buff, &c); buff, &c);
if (error != NSERROR_OK) {
free(buff);
return error;
}
}
error |= nsurl__create_from_section(rel, URL_FRAGMENT, &m, error = nsurl__create_from_section(rel, URL_FRAGMENT, &m, buff, &c);
buff, &c);
/* Free temporary buffer */ /* Free temporary buffer */
free(buff); free(buff);
if (error != NSERROR_OK) if (error != NSERROR_OK) {
return NSERROR_NOMEM; return error;
}
/* Get the string length and find which parts of url are present */ /* Get the string length and find which parts of url are present */
nsurl__get_string_data(&c, NSURL_WITH_FRAGMENT, &length, nsurl__get_string_data(&c, NSURL_WITH_FRAGMENT, &length,
@ -1908,8 +1944,9 @@ nserror nsurl_join(const nsurl *base, const char *rel, nsurl **joined)
/* Create NetSurf URL object */ /* Create NetSurf URL object */
*joined = malloc(sizeof(nsurl) + length + 1); /* Add 1 for \0 */ *joined = malloc(sizeof(nsurl) + length + 1); /* Add 1 for \0 */
if (*joined == NULL) if (*joined == NULL) {
return NSERROR_NOMEM; return NSERROR_NOMEM;
}
(*joined)->components = c; (*joined)->components = c;
(*joined)->length = length; (*joined)->length = length;