mirror of
https://github.com/netsurf-browser/netsurf
synced 2024-12-21 03:32:35 +03:00
2598 lines
63 KiB
C
2598 lines
63 KiB
C
/*
|
|
* Copyright 2011 Michael Drake <tlsa@netsurf-browser.org>
|
|
*
|
|
* This file is part of NetSurf, http://www.netsurf-browser.org/
|
|
*
|
|
* NetSurf is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; version 2 of the License.
|
|
*
|
|
* NetSurf is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
/**
|
|
* \file
|
|
* NetSurf URL handling implementation.
|
|
*
|
|
* This is the common implementation of all URL handling within the
|
|
* browser. This implementation is based upon RFC3986 although this has
|
|
* been superceeded by https://url.spec.whatwg.org/ which is based on
|
|
* actual contemporary implementations.
|
|
*
|
|
* Care must be taken with character encodings within this module as
|
|
* the specifications work with specific ascii ranges and must not be
|
|
* affected by locale. Hence the c library character type functions
|
|
* are not used.
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <ctype.h>
|
|
#include <libwapcaplet/libwapcaplet.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <strings.h>
|
|
|
|
#include "utils/corestrings.h"
|
|
#include "utils/errors.h"
|
|
#include "utils/idna.h"
|
|
#include "utils/log.h"
|
|
#include "utils/nsurl.h"
|
|
#include "utils/utils.h"
|
|
|
|
/* Define to enable NSURL debugging */
|
|
#undef NSURL_DEBUG
|
|
|
|
/** ascii character codes */
|
|
enum ascii_codepoints {
|
|
ASCII_NUL = 0,
|
|
ASCII_SPC = 0x20,
|
|
ASCII_FF = 0x0C,
|
|
ASCII_NL = 0x0A,
|
|
ASCII_CR = 0x0D,
|
|
ASCII_HT = 0x09,
|
|
ASCII_VT = 0x0B,
|
|
ASCII_PLUS = 0x2b,
|
|
ASCII_MINUS = 0x2d,
|
|
ASCII_FULLSTOP = 0x2e,
|
|
ASCII_SLASH = 0x2F,
|
|
ASCII_0 = 0x30,
|
|
ASCII_9 = 0x39,
|
|
ASCII_COLON = 0x3A,
|
|
ASCII_A = 0x41,
|
|
ASCII_Z = 0x5A,
|
|
ASCII_a = 0x61,
|
|
ASCII_z = 0x7A
|
|
};
|
|
|
|
/**
|
|
* nsurl scheme type
|
|
*/
|
|
enum scheme_type {
|
|
NSURL_SCHEME_OTHER,
|
|
NSURL_SCHEME_HTTP,
|
|
NSURL_SCHEME_HTTPS,
|
|
NSURL_SCHEME_FTP,
|
|
NSURL_SCHEME_MAILTO
|
|
};
|
|
|
|
/**
|
|
* nsurl components
|
|
*
|
|
* [scheme]://[username]:[password]@[host]:[port][path][?query]#[fragment]
|
|
*
|
|
* Note:
|
|
* "path" string includes preceding '/', if needed for the scheme
|
|
* "query" string always includes preceding '?'
|
|
*
|
|
* The other spanned punctuation is to be inserted when building URLs from
|
|
* components.
|
|
*/
|
|
struct nsurl_components {
|
|
lwc_string *scheme;
|
|
lwc_string *username;
|
|
lwc_string *password;
|
|
lwc_string *host;
|
|
lwc_string *port;
|
|
lwc_string *path;
|
|
lwc_string *query;
|
|
lwc_string *fragment;
|
|
|
|
enum scheme_type scheme_type;
|
|
};
|
|
|
|
|
|
/**
|
|
* NetSurf URL object
|
|
*/
|
|
struct nsurl {
|
|
struct nsurl_components components;
|
|
|
|
int count; /* Number of references to NetSurf URL object */
|
|
uint32_t hash; /* Hash value for nsurl identification */
|
|
|
|
size_t length; /* Length of string */
|
|
char string[FLEX_ARRAY_LEN_DECL]; /* Full URL as a string */
|
|
};
|
|
|
|
|
|
/** Marker set, indicating positions of sections within a URL string */
|
|
struct url_markers {
|
|
size_t start; /** start of URL */
|
|
size_t scheme_end;
|
|
size_t authority;
|
|
|
|
size_t colon_first;
|
|
size_t at;
|
|
size_t colon_last;
|
|
|
|
size_t path;
|
|
size_t query;
|
|
size_t fragment;
|
|
|
|
size_t end; /** end of URL */
|
|
|
|
enum scheme_type scheme_type;
|
|
};
|
|
|
|
|
|
/** Marker set, indicating positions of sections within a URL string */
|
|
struct nsurl_component_lengths {
|
|
size_t scheme;
|
|
size_t username;
|
|
size_t password;
|
|
size_t host;
|
|
size_t port;
|
|
size_t path;
|
|
size_t query;
|
|
size_t fragment;
|
|
};
|
|
|
|
|
|
/** Flags indicating which parts of a URL string are required for a nsurl */
|
|
enum nsurl_string_flags {
|
|
NSURL_F_SCHEME = (1 << 0),
|
|
NSURL_F_SCHEME_PUNCTUATION = (1 << 1),
|
|
NSURL_F_AUTHORITY_PUNCTUATION = (1 << 2),
|
|
NSURL_F_USERNAME = (1 << 3),
|
|
NSURL_F_PASSWORD = (1 << 4),
|
|
NSURL_F_CREDENTIALS_PUNCTUATION = (1 << 5),
|
|
NSURL_F_HOST = (1 << 6),
|
|
NSURL_F_PORT = (1 << 7),
|
|
NSURL_F_AUTHORITY = (NSURL_F_USERNAME |
|
|
NSURL_F_PASSWORD |
|
|
NSURL_F_HOST |
|
|
NSURL_F_PORT),
|
|
NSURL_F_PATH = (1 << 8),
|
|
NSURL_F_QUERY = (1 << 9),
|
|
NSURL_F_FRAGMENT_PUNCTUATION = (1 << 10),
|
|
NSURL_F_FRAGMENT = (1 << 11)
|
|
};
|
|
|
|
|
|
/** Sections of a URL */
|
|
enum url_sections {
|
|
URL_SCHEME,
|
|
URL_CREDENTIALS,
|
|
URL_HOST,
|
|
URL_PATH,
|
|
URL_QUERY,
|
|
URL_FRAGMENT
|
|
};
|
|
|
|
|
|
#define nsurl__component_copy(c) (c == NULL) ? NULL : lwc_string_ref(c)
|
|
|
|
#define nsurl__component_compare(c1, c2, match) \
|
|
if (c1 && c2 && lwc_error_ok == \
|
|
lwc_string_isequal(c1, c2, match)) { \
|
|
/* do nothing */ \
|
|
} else if (c1 || c2) { \
|
|
*match = false; \
|
|
}
|
|
|
|
/**
|
|
* Return a hex digit for the given numerical value.
|
|
*
|
|
* \param digit the value to get the hex digit for.
|
|
* \return character in range 0-9A-F
|
|
*/
|
|
inline static char digit2uppercase_hex(unsigned char digit) {
|
|
assert(digit < 16);
|
|
return "0123456789ABCDEF"[digit];
|
|
}
|
|
|
|
/**
|
|
* determines if a character is a whitespace in the ascii character encoding
|
|
*
|
|
* whitespace characters are space, form feed, new line, carrige
|
|
* return, horizontal tab and vertical tab.
|
|
*
|
|
* \param c character to classify
|
|
* \return zero if the character is not whitespace else 1
|
|
*/
|
|
inline static int is_ascii_space(int c)
|
|
{
|
|
if (c == ASCII_SPC ||
|
|
c == ASCII_FF ||
|
|
c == ASCII_NL ||
|
|
c == ASCII_CR ||
|
|
c == ASCII_HT ||
|
|
c == ASCII_VT) {
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* determine if a character is alphabetical in the ascii character encoding
|
|
*
|
|
* characters in the range A-Z and a-z are considered alphabetical.
|
|
*
|
|
* \param c character to classify
|
|
* \return zero if the character is not alphabetical else 1
|
|
*/
|
|
inline static int is_ascii_alpha(int c)
|
|
{
|
|
if (((c >= ASCII_A) && (c <= ASCII_Z)) ||
|
|
((c >= ASCII_a) && (c <= ASCII_z))) {
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* determine if a character is a number in the ascii character encoding
|
|
*
|
|
* characters in the range 0-9 are considered numbers.
|
|
*
|
|
* \param c character to classify
|
|
* \return 1 if the character is a number else 0
|
|
*/
|
|
inline static int is_ascii_digit(int c)
|
|
{
|
|
if ((c >= ASCII_0) && (c <= ASCII_9)) {
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* determine if a character is alphanumerical in the ascii character encoding
|
|
*
|
|
* characters in the range A-Z, a-z and 0-9 are considered alphanumeric.
|
|
*
|
|
* \param c character to classify
|
|
* \return zero if the character is not alphanumerical else 1
|
|
*/
|
|
inline static int is_ascii_alnum(int c)
|
|
{
|
|
if (((c >= ASCII_0) && (c <= ASCII_9)) ||
|
|
((c >= ASCII_A) && (c <= ASCII_Z)) ||
|
|
((c >= ASCII_a) && (c <= ASCII_z))) {
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* determine if a character is unreserved
|
|
*
|
|
* \param c character to classify.
|
|
* \return true if the character is unreserved else false.
|
|
*/
|
|
static bool nsurl__is_unreserved(unsigned char c)
|
|
{
|
|
/* From RFC3986 section 2.3 (unreserved characters)
|
|
*
|
|
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
|
*
|
|
*/
|
|
static const bool unreserved[256] = {
|
|
false, false, false, false, false, false, false, false, /* 00 */
|
|
false, false, false, false, false, false, false, false, /* 08 */
|
|
false, false, false, false, false, false, false, false, /* 10 */
|
|
false, false, false, false, false, false, false, false, /* 18 */
|
|
false, false, false, false, false, false, false, false, /* 20 */
|
|
false, false, false, false, false, true, true, false, /* 28 */
|
|
true, true, true, true, true, true, true, true, /* 30 */
|
|
true, true, false, false, false, false, false, false, /* 38 */
|
|
false, true, true, true, true, true, true, true, /* 40 */
|
|
true, true, true, true, true, true, true, true, /* 48 */
|
|
true, true, true, true, true, true, true, true, /* 50 */
|
|
true, true, true, false, false, false, false, true, /* 58 */
|
|
false, true, true, true, true, true, true, true, /* 60 */
|
|
true, true, true, true, true, true, true, true, /* 68 */
|
|
true, true, true, true, true, true, true, true, /* 70 */
|
|
true, true, true, false, false, false, true, false, /* 78 */
|
|
false, false, false, false, false, false, false, false, /* 80 */
|
|
false, false, false, false, false, false, false, false, /* 88 */
|
|
false, false, false, false, false, false, false, false, /* 90 */
|
|
false, false, false, false, false, false, false, false, /* 98 */
|
|
false, false, false, false, false, false, false, false, /* A0 */
|
|
false, false, false, false, false, false, false, false, /* A8 */
|
|
false, false, false, false, false, false, false, false, /* B0 */
|
|
false, false, false, false, false, false, false, false, /* B8 */
|
|
false, false, false, false, false, false, false, false, /* C0 */
|
|
false, false, false, false, false, false, false, false, /* C8 */
|
|
false, false, false, false, false, false, false, false, /* D0 */
|
|
false, false, false, false, false, false, false, false, /* D8 */
|
|
false, false, false, false, false, false, false, false, /* E0 */
|
|
false, false, false, false, false, false, false, false, /* E8 */
|
|
false, false, false, false, false, false, false, false, /* F0 */
|
|
false, false, false, false, false, false, false, false /* F8 */
|
|
};
|
|
return unreserved[c];
|
|
}
|
|
|
|
/**
|
|
* determine if a character should be percent escaped.
|
|
*
|
|
* The ASCII codes which should not be percent escaped
|
|
*
|
|
* \param c character to classify.
|
|
* \return true if the character should not be escaped else false.
|
|
*/
|
|
static bool nsurl__is_no_escape(unsigned char c)
|
|
{
|
|
static const bool no_escape[256] = {
|
|
false, false, false, false, false, false, false, false, /* 00 */
|
|
false, false, false, false, false, false, false, false, /* 08 */
|
|
false, false, false, false, false, false, false, false, /* 10 */
|
|
false, false, false, false, false, false, false, false, /* 18 */
|
|
false, true, false, true, true, false, true, true, /* 20 */
|
|
true, true, true, true, true, true, true, true, /* 28 */
|
|
true, true, true, true, true, true, true, true, /* 30 */
|
|
true, true, true, true, false, true, false, true, /* 38 */
|
|
true, true, true, true, true, true, true, true, /* 40 */
|
|
true, true, true, true, true, true, true, true, /* 48 */
|
|
true, true, true, true, true, true, true, true, /* 50 */
|
|
true, true, true, true, false, true, false, true, /* 58 */
|
|
false, true, true, true, true, true, true, true, /* 60 */
|
|
true, true, true, true, true, true, true, true, /* 68 */
|
|
true, true, true, true, true, true, true, true, /* 70 */
|
|
true, true, true, false, true, false, true, false, /* 78 */
|
|
false, false, false, false, false, false, false, false, /* 80 */
|
|
false, false, false, false, false, false, false, false, /* 88 */
|
|
false, false, false, false, false, false, false, false, /* 90 */
|
|
false, false, false, false, false, false, false, false, /* 98 */
|
|
false, false, false, false, false, false, false, false, /* A0 */
|
|
false, false, false, false, false, false, false, false, /* A8 */
|
|
false, false, false, false, false, false, false, false, /* B0 */
|
|
false, false, false, false, false, false, false, false, /* B8 */
|
|
false, false, false, false, false, false, false, false, /* C0 */
|
|
false, false, false, false, false, false, false, false, /* C8 */
|
|
false, false, false, false, false, false, false, false, /* D0 */
|
|
false, false, false, false, false, false, false, false, /* D8 */
|
|
false, false, false, false, false, false, false, false, /* E0 */
|
|
false, false, false, false, false, false, false, false, /* E8 */
|
|
false, false, false, false, false, false, false, false, /* F0 */
|
|
false, false, false, false, false, false, false, false, /* F8 */
|
|
};
|
|
return no_escape[c];
|
|
}
|
|
|
|
|
|
/**
|
|
* Obtains a set of markers delimiting sections in a URL string
|
|
*
|
|
* \param url_s URL string
|
|
* \param markers Updated to mark sections in the URL string
|
|
* \param joining True iff URL string is a relative URL for joining
|
|
*/
|
|
static void nsurl__get_string_markers(const char * const url_s,
|
|
struct url_markers *markers, bool joining)
|
|
{
|
|
const char *pos = url_s; /** current position in url_s */
|
|
bool is_http = false;
|
|
bool trailing_whitespace = false;
|
|
|
|
/* Initialise marker set */
|
|
struct url_markers marker = { 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, NSURL_SCHEME_OTHER };
|
|
|
|
/* Skip any leading whitespace in url_s */
|
|
while (is_ascii_space(*pos))
|
|
pos++;
|
|
|
|
/* Record start point */
|
|
marker.start = pos - url_s;
|
|
|
|
marker.scheme_end = marker.authority = marker.colon_first = marker.at =
|
|
marker.colon_last = marker.path = marker.start;
|
|
|
|
if (*pos == ASCII_NUL) {
|
|
/* Nothing but whitespace, early exit */
|
|
marker.query = marker.fragment = marker.end = marker.path;
|
|
*markers = marker;
|
|
return;
|
|
}
|
|
|
|
/* Get scheme */
|
|
if (is_ascii_alpha(*pos)) {
|
|
pos++;
|
|
|
|
while (*pos != ASCII_COLON && *pos != ASCII_NUL) {
|
|
if (!is_ascii_alnum(*pos) &&
|
|
(*pos != ASCII_PLUS) &&
|
|
(*pos != ASCII_MINUS) &&
|
|
(*pos != ASCII_FULLSTOP)) {
|
|
/* This character is not valid in the
|
|
* scheme */
|
|
break;
|
|
}
|
|
pos++;
|
|
}
|
|
|
|
if (*pos == ASCII_COLON) {
|
|
/* This delimits the end of the scheme */
|
|
size_t off;
|
|
|
|
marker.scheme_end = pos - url_s;
|
|
|
|
off = marker.scheme_end - marker.start;
|
|
|
|
/* Detect http(s) and mailto for scheme specifc
|
|
* normalisation */
|
|
if (off == SLEN("http") &&
|
|
(((*(pos - off + 0) == 'h') ||
|
|
(*(pos - off + 0) == 'H')) &&
|
|
((*(pos - off + 1) == 't') ||
|
|
(*(pos - off + 1) == 'T')) &&
|
|
((*(pos - off + 2) == 't') ||
|
|
(*(pos - off + 2) == 'T')) &&
|
|
((*(pos - off + 3) == 'p') ||
|
|
(*(pos - off + 3) == 'P')))) {
|
|
marker.scheme_type = NSURL_SCHEME_HTTP;
|
|
is_http = true;
|
|
} else if (off == SLEN("https") &&
|
|
(((*(pos - off + 0) == 'h') ||
|
|
(*(pos - off + 0) == 'H')) &&
|
|
((*(pos - off + 1) == 't') ||
|
|
(*(pos - off + 1) == 'T')) &&
|
|
((*(pos - off + 2) == 't') ||
|
|
(*(pos - off + 2) == 'T')) &&
|
|
((*(pos - off + 3) == 'p') ||
|
|
(*(pos - off + 3) == 'P')) &&
|
|
((*(pos - off + 4) == 's') ||
|
|
(*(pos - off + 4) == 'S')))) {
|
|
marker.scheme_type = NSURL_SCHEME_HTTPS;
|
|
is_http = true;
|
|
} else if (off == SLEN("ftp") &&
|
|
(((*(pos - off + 0) == 'f') ||
|
|
(*(pos - off + 0) == 'F')) &&
|
|
((*(pos - off + 1) == 't') ||
|
|
(*(pos - off + 1) == 'T')) &&
|
|
((*(pos - off + 2) == 'p') ||
|
|
(*(pos - off + 2) == 'P')))) {
|
|
marker.scheme_type = NSURL_SCHEME_FTP;
|
|
} else if (off == SLEN("mailto") &&
|
|
(((*(pos - off + 0) == 'm') ||
|
|
(*(pos - off + 0) == 'M')) &&
|
|
((*(pos - off + 1) == 'a') ||
|
|
(*(pos - off + 1) == 'A')) &&
|
|
((*(pos - off + 2) == 'i') ||
|
|
(*(pos - off + 2) == 'I')) &&
|
|
((*(pos - off + 3) == 'l') ||
|
|
(*(pos - off + 3) == 'L')) &&
|
|
((*(pos - off + 4) == 't') ||
|
|
(*(pos - off + 4) == 'T')) &&
|
|
((*(pos - off + 5) == 'o') ||
|
|
(*(pos - off + 5) == 'O')))) {
|
|
marker.scheme_type = NSURL_SCHEME_MAILTO;
|
|
}
|
|
|
|
/* Skip over colon */
|
|
pos++;
|
|
|
|
/* Mark place as start of authority */
|
|
marker.authority = marker.colon_first = marker.at =
|
|
marker.colon_last = marker.path =
|
|
pos - url_s;
|
|
|
|
} else {
|
|
/* Not found a scheme */
|
|
if (joining == false) {
|
|
/* Assuming no scheme == http */
|
|
marker.scheme_type = NSURL_SCHEME_HTTP;
|
|
is_http = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Get authority
|
|
*
|
|
* Two slashes always indicates the start of an authority.
|
|
*
|
|
* We are more relaxed in the case of http:
|
|
* a. when joining, one or more slashes indicates start of authority
|
|
* b. when not joining, we assume authority if no scheme was present
|
|
* and in the case of mailto: when we assume there is an authority.
|
|
*/
|
|
if ((*pos == '/' && *(pos + 1) == '/') ||
|
|
(is_http && ((joining && *pos == '/') ||
|
|
(joining == false &&
|
|
marker.scheme_end != marker.start))) ||
|
|
marker.scheme_type == NSURL_SCHEME_MAILTO) {
|
|
|
|
/* Skip over leading slashes */
|
|
if (*pos == '/') {
|
|
if (is_http == false) {
|
|
if (*pos == '/') pos++;
|
|
if (*pos == '/') pos++;
|
|
} else {
|
|
while (*pos == '/')
|
|
pos++;
|
|
}
|
|
|
|
marker.authority = marker.colon_first = marker.at =
|
|
marker.colon_last = marker.path =
|
|
pos - url_s;
|
|
}
|
|
|
|
/* Need to get (or complete) the authority */
|
|
while (*pos != '\0') {
|
|
if (*pos == '/' || *pos == '?' || *pos == '#') {
|
|
/* End of the authority */
|
|
break;
|
|
|
|
} else if (marker.scheme_type != NSURL_SCHEME_MAILTO &&
|
|
*pos == ':' && marker.colon_first ==
|
|
marker.authority) {
|
|
/* could be username:password or host:port
|
|
* separator */
|
|
marker.colon_first = pos - url_s;
|
|
|
|
} else if (marker.scheme_type != NSURL_SCHEME_MAILTO &&
|
|
*pos == ':' && marker.colon_first !=
|
|
marker.authority) {
|
|
/* could be host:port separator */
|
|
marker.colon_last = pos - url_s;
|
|
|
|
} else if (*pos == '@' && marker.at ==
|
|
marker.authority) {
|
|
/* Credentials @ host separator */
|
|
marker.at = pos - url_s;
|
|
}
|
|
|
|
pos++;
|
|
}
|
|
|
|
marker.path = pos - url_s;
|
|
|
|
} else if ((*pos == '\0' || *pos == '/') &&
|
|
joining == false && is_http == true) {
|
|
marker.path = pos - url_s;
|
|
}
|
|
|
|
/* Get path
|
|
*
|
|
* Needs to start with '/' if there's no authority
|
|
*/
|
|
if (*pos == '/' || ((marker.path == marker.authority) &&
|
|
(*pos != '?') && (*pos != '#') && (*pos != '\0'))) {
|
|
while (*(++pos) != '\0') {
|
|
if (*pos == '?' || *pos == '#') {
|
|
/* End of the path */
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
marker.query = pos - url_s;
|
|
|
|
/* Get query */
|
|
if (*pos == '?') {
|
|
while (*(++pos) != '\0') {
|
|
if (*pos == '#') {
|
|
/* End of the query */
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
marker.fragment = pos - url_s;
|
|
|
|
/* Get fragment */
|
|
if (*pos == '#') {
|
|
while (*(++pos) != '\0')
|
|
;
|
|
}
|
|
|
|
/* We got to the end of url_s.
|
|
* Need to skip back over trailing whitespace to find end of URL */
|
|
pos--;
|
|
if (pos >= url_s && is_ascii_space(*pos)) {
|
|
trailing_whitespace = true;
|
|
while (pos >= url_s && is_ascii_space(*pos))
|
|
pos--;
|
|
}
|
|
|
|
marker.end = pos + 1 - url_s;
|
|
|
|
if (trailing_whitespace == true) {
|
|
/* Ensure last url section doesn't pass end */
|
|
if (marker.fragment > marker.end)
|
|
marker.fragment = marker.end;
|
|
if (marker.query > marker.end)
|
|
marker.query = marker.end;
|
|
if (marker.path > marker.end)
|
|
marker.path = marker.end;
|
|
if (marker.colon_last > marker.end)
|
|
marker.colon_last = marker.end;
|
|
if (marker.at > marker.end)
|
|
marker.at = marker.end;
|
|
if (marker.colon_last > marker.end)
|
|
marker.colon_last = marker.end;
|
|
if (marker.fragment > marker.end)
|
|
marker.fragment = marker.end;
|
|
}
|
|
|
|
#ifdef NSURL_DEBUG
|
|
LOG("marker.start: %i", marker.start);
|
|
LOG("marker.scheme_end: %i", marker.scheme_end);
|
|
LOG("marker.authority: %i", marker.authority);
|
|
|
|
LOG("marker.colon_first: %i", marker.colon_first);
|
|
LOG("marker.at: %i", marker.at);
|
|
LOG("marker.colon_last: %i", marker.colon_last);
|
|
|
|
LOG("marker.path: %i", marker.path);
|
|
LOG("marker.query: %i", marker.query);
|
|
LOG("marker.fragment: %i", marker.fragment);
|
|
|
|
LOG("marker.end: %i", marker.end);
|
|
#endif
|
|
|
|
/* Got all the URL components pegged out now */
|
|
*markers = marker;
|
|
}
|
|
|
|
|
|
/**
|
|
* Remove dot segments from a path, as per rfc 3986, 5.2.4
|
|
*
|
|
* \param path path to remove dot segments from ('\0' terminated)
|
|
* \param output path with dot segments removed
|
|
* \return size of output
|
|
*/
|
|
static size_t nsurl__remove_dot_segments(char *path, char *output)
|
|
{
|
|
char *path_pos = path;
|
|
char *output_pos = output;
|
|
|
|
while (*path_pos != '\0') {
|
|
#ifdef NSURL_DEBUG
|
|
LOG(" in:%s", path_pos);
|
|
LOG("out:%.*s", output_pos - output, output);
|
|
#endif
|
|
if (*path_pos == '.') {
|
|
if (*(path_pos + 1) == '.' &&
|
|
*(path_pos + 2) == '/') {
|
|
/* Found prefix of "../" */
|
|
path_pos += SLEN("../");
|
|
continue;
|
|
|
|
} else if (*(path_pos + 1) == '/') {
|
|
/* Found prefix of "./" */
|
|
path_pos += SLEN("./");
|
|
continue;
|
|
}
|
|
} else if (*path_pos == '/' && *(path_pos + 1) == '.') {
|
|
if (*(path_pos + 2) == '/') {
|
|
/* Found prefix of "/./" */
|
|
path_pos += SLEN("/.");
|
|
continue;
|
|
|
|
} else if (*(path_pos + 2) == '\0') {
|
|
/* Found "/." at end of path */
|
|
*(output_pos++) = '/';
|
|
|
|
/* End of input path */
|
|
break;
|
|
|
|
} else if (*(path_pos + 2) == '.') {
|
|
if (*(path_pos + 3) == '/') {
|
|
/* Found prefix of "/../" */
|
|
path_pos += SLEN("/..");
|
|
|
|
if (output_pos > output)
|
|
output_pos--;
|
|
while (output_pos > output &&
|
|
*output_pos != '/')
|
|
output_pos--;
|
|
|
|
continue;
|
|
|
|
} else if (*(path_pos + 3) == '\0') {
|
|
/* Found "/.." at end of path */
|
|
|
|
while (output_pos > output &&
|
|
*(output_pos -1 ) !='/')
|
|
output_pos--;
|
|
|
|
/* End of input path */
|
|
break;
|
|
}
|
|
}
|
|
} else if (*path_pos == '.') {
|
|
if (*(path_pos + 1) == '\0') {
|
|
/* Found "." at end of path */
|
|
|
|
/* End of input path */
|
|
break;
|
|
|
|
} else if (*(path_pos + 1) == '.' &&
|
|
*(path_pos + 2) == '\0') {
|
|
/* Found ".." at end of path */
|
|
|
|
/* End of input path */
|
|
break;
|
|
}
|
|
}
|
|
/* Copy first character into output path */
|
|
*output_pos++ = *path_pos++;
|
|
|
|
/* Copy up to but not including next '/' */
|
|
while ((*path_pos != '/') && (*path_pos != '\0'))
|
|
*output_pos++ = *path_pos++;
|
|
}
|
|
|
|
return output_pos - output;
|
|
}
|
|
|
|
|
|
/**
|
|
* Get the length of the longest section
|
|
*
|
|
* \param m markers delimiting url sections in a string
|
|
* \return the length of the longest section
|
|
*/
|
|
static size_t nsurl__get_longest_section(struct url_markers *m)
|
|
{
|
|
size_t length = m->scheme_end - m->start; /* scheme */
|
|
|
|
if (length < m->at - m->authority) /* credentials */
|
|
length = m->at - m->authority;
|
|
|
|
if (length < m->path - m->at) /* host */
|
|
length = m->path - m->at;
|
|
|
|
if (length < m->query - m->path) /* path */
|
|
length = m->query - m->path;
|
|
|
|
if (length < m->fragment - m->query) /* query */
|
|
length = m->fragment - m->query;
|
|
|
|
if (length < m->end - m->fragment) /* fragment */
|
|
length = m->end - m->fragment;
|
|
|
|
return length;
|
|
}
|
|
|
|
|
|
/**
|
|
* Converts two hexadecimal digits to a single number
|
|
*
|
|
* \param c1 most significant hex digit
|
|
* \param c2 least significant hex digit
|
|
* \return the total value of the two digit hex number, or -ve if input not hex
|
|
*
|
|
* For unescaping url encoded characters.
|
|
*/
|
|
static inline int nsurl__get_ascii_offset(char c1, char c2)
|
|
{
|
|
int offset;
|
|
|
|
/* Use 1st char as most significant hex digit */
|
|
if (is_ascii_digit(c1))
|
|
offset = 16 * (c1 - '0');
|
|
else if (c1 >= 'a' && c1 <= 'f')
|
|
offset = 16 * (c1 - 'a' + 10);
|
|
else if (c1 >= 'A' && c1 <= 'F')
|
|
offset = 16 * (c1 - 'A' + 10);
|
|
else
|
|
/* Not valid hex */
|
|
return -1;
|
|
|
|
/* Use 2nd char as least significant hex digit and sum */
|
|
if (is_ascii_digit(c2))
|
|
offset += c2 - '0';
|
|
else if (c2 >= 'a' && c2 <= 'f')
|
|
offset += c2 - 'a' + 10;
|
|
else if (c2 >= 'A' && c2 <= 'F')
|
|
offset += c2 - 'A' + 10;
|
|
else
|
|
/* Not valid hex */
|
|
return -1;
|
|
|
|
return offset;
|
|
}
|
|
|
|
|
|
/**
|
|
* Create the components of a NetSurf URL object for a section of a URL string
|
|
*
|
|
* \param url_s URL string
|
|
* \param section Sets which section of URL string is to be normalised
|
|
* \param pegs Set of markers delimiting the URL string's sections
|
|
* \param pos_norm A buffer large enough for the normalised string (*3 + 1)
|
|
* \param url A NetSurf URL object, to which components may be added
|
|
* \return NSERROR_OK on success, appropriate error otherwise
|
|
*
|
|
* The section of url_s is normalised appropriately.
|
|
*/
|
|
static nserror nsurl__create_from_section(const char * const url_s,
|
|
const enum url_sections section,
|
|
const struct url_markers *pegs,
|
|
char *pos_norm,
|
|
struct nsurl_components *url)
|
|
{
|
|
nserror ret;
|
|
int ascii_offset;
|
|
int start = 0;
|
|
int end = 0;
|
|
const char *pos;
|
|
const char *pos_url_s;
|
|
char *norm_start = pos_norm;
|
|
char *host;
|
|
size_t copy_len;
|
|
size_t length;
|
|
size_t host_len;
|
|
enum {
|
|
NSURL_F_NO_PORT = (1 << 0)
|
|
} flags = 0;
|
|
|
|
switch (section) {
|
|
case URL_SCHEME:
|
|
start = pegs->start;
|
|
end = pegs->scheme_end;
|
|
break;
|
|
|
|
case URL_CREDENTIALS:
|
|
start = pegs->authority;
|
|
end = pegs->at;
|
|
break;
|
|
|
|
case URL_HOST:
|
|
start = (pegs->at == pegs->authority &&
|
|
*(url_s + pegs->at) != '@') ?
|
|
pegs->at :
|
|
pegs->at + 1;
|
|
end = pegs->path;
|
|
break;
|
|
|
|
case URL_PATH:
|
|
start = pegs->path;
|
|
end = pegs->query;
|
|
break;
|
|
|
|
case URL_QUERY:
|
|
start = pegs->query;
|
|
end = pegs->fragment;
|
|
break;
|
|
|
|
case URL_FRAGMENT:
|
|
start = (*(url_s + pegs->fragment) != '#') ?
|
|
pegs->fragment :
|
|
pegs->fragment + 1;
|
|
end = pegs->end;
|
|
break;
|
|
}
|
|
|
|
if (end < start)
|
|
end = start;
|
|
|
|
length = end - start;
|
|
|
|
/* Stage 1: Normalise the required section */
|
|
|
|
pos = pos_url_s = url_s + start;
|
|
copy_len = 0;
|
|
for (; pos < url_s + end; pos++) {
|
|
if (*pos == '%' && (pos + 2 < url_s + end)) {
|
|
/* Might be an escaped character needing unescaped */
|
|
|
|
/* Find which character which was escaped */
|
|
ascii_offset = nsurl__get_ascii_offset(*(pos + 1),
|
|
*(pos + 2));
|
|
|
|
if (ascii_offset < 0) {
|
|
/* % with invalid hex digits. */
|
|
copy_len++;
|
|
continue;
|
|
}
|
|
|
|
if ((section != URL_SCHEME && section != URL_HOST) &&
|
|
(nsurl__is_unreserved(ascii_offset) == false)) {
|
|
/* This character should be escaped after all,
|
|
* just let it get copied */
|
|
copy_len += 3;
|
|
pos += 2;
|
|
continue;
|
|
}
|
|
|
|
if (copy_len > 0) {
|
|
/* Copy up to here */
|
|
memcpy(pos_norm, pos_url_s, copy_len);
|
|
pos_norm += copy_len;
|
|
copy_len = 0;
|
|
}
|
|
|
|
/* Put the unescaped character in the normalised URL */
|
|
*(pos_norm++) = (char)ascii_offset;
|
|
pos += 2;
|
|
pos_url_s = pos + 1;
|
|
|
|
length -= 2;
|
|
|
|
} else if ((section != URL_SCHEME && section != URL_HOST) &&
|
|
(nsurl__is_no_escape(*pos) == false)) {
|
|
|
|
/* This needs to be escaped */
|
|
if (copy_len > 0) {
|
|
/* Copy up to here */
|
|
memcpy(pos_norm, pos_url_s, copy_len);
|
|
pos_norm += copy_len;
|
|
copy_len = 0;
|
|
}
|
|
|
|
/* escape */
|
|
*(pos_norm++) = '%';
|
|
*(pos_norm++) = digit2uppercase_hex(
|
|
((unsigned char)*pos) >> 4);
|
|
*(pos_norm++) = digit2uppercase_hex(
|
|
((unsigned char)*pos) & 0xf);
|
|
pos_url_s = pos + 1;
|
|
|
|
length += 2;
|
|
|
|
} else if ((section == URL_SCHEME || section == URL_HOST) &&
|
|
isupper(*pos)) {
|
|
/* Lower case this letter */
|
|
|
|
if (copy_len > 0) {
|
|
/* Copy up to here */
|
|
memcpy(pos_norm, pos_url_s, copy_len);
|
|
pos_norm += copy_len;
|
|
copy_len = 0;
|
|
}
|
|
/* Copy lower cased letter into normalised URL */
|
|
*(pos_norm++) = tolower(*pos);
|
|
pos_url_s = pos + 1;
|
|
|
|
} else {
|
|
/* This character is safe in normalised URL */
|
|
copy_len++;
|
|
}
|
|
}
|
|
|
|
if (copy_len > 0) {
|
|
/* Copy up to here */
|
|
memcpy(pos_norm, pos_url_s, copy_len);
|
|
pos_norm += copy_len;
|
|
}
|
|
|
|
/* Mark end of section */
|
|
(*pos_norm) = '\0';
|
|
|
|
/* Stage 2: Create the URL components for the required section */
|
|
switch (section) {
|
|
case URL_SCHEME:
|
|
if (length == 0) {
|
|
/* No scheme, assuming http */
|
|
url->scheme = lwc_string_ref(corestring_lwc_http);
|
|
} else {
|
|
/* Add scheme to URL */
|
|
if (lwc_intern_string(norm_start, length,
|
|
&url->scheme) != lwc_error_ok) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
}
|
|
|
|
break;
|
|
|
|
case URL_CREDENTIALS:
|
|
url->username = NULL;
|
|
url->password = NULL;
|
|
|
|
if (length != 0 && *norm_start != ':') {
|
|
char *sec_start = norm_start;
|
|
if (pegs->colon_first != pegs->authority &&
|
|
pegs->at > pegs->colon_first + 1) {
|
|
/* there's a password */
|
|
sec_start += pegs->colon_first -
|
|
pegs->authority + 1;
|
|
if (lwc_intern_string(sec_start,
|
|
pegs->at - pegs->colon_first -1,
|
|
&url->password) !=
|
|
lwc_error_ok) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
|
|
/* update start pos and length for username */
|
|
sec_start = norm_start;
|
|
length -= pegs->at - pegs->colon_first;
|
|
} else if (pegs->colon_first != pegs->authority &&
|
|
pegs->at == pegs->colon_first + 1) {
|
|
/* strip username colon */
|
|
length--;
|
|
}
|
|
|
|
/* Username */
|
|
if (lwc_intern_string(sec_start, length,
|
|
&url->username) != lwc_error_ok) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
}
|
|
|
|
break;
|
|
|
|
case URL_HOST:
|
|
url->host = NULL;
|
|
url->port = NULL;
|
|
|
|
if (length != 0) {
|
|
size_t colon = 0;
|
|
char *sec_start = norm_start;
|
|
if (pegs->at < pegs->colon_first &&
|
|
pegs->colon_last == pegs->authority) {
|
|
/* There's one colon and it's after @ marker */
|
|
colon = pegs->colon_first;
|
|
} else if (pegs->colon_last != pegs->authority) {
|
|
/* There's more than one colon */
|
|
colon = pegs->colon_last;
|
|
} else {
|
|
/* There's no colon that could be a port
|
|
* separator */
|
|
flags |= NSURL_F_NO_PORT;
|
|
}
|
|
|
|
if (!(flags & NSURL_F_NO_PORT)) {
|
|
/* Determine whether colon is a port separator
|
|
*/
|
|
sec_start += colon - pegs->at;
|
|
while (++sec_start < norm_start + length) {
|
|
if (!is_ascii_digit(*sec_start)) {
|
|
/* Character after port isn't a
|
|
* digit; not a port separator
|
|
*/
|
|
flags |= NSURL_F_NO_PORT;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!(flags & NSURL_F_NO_PORT)) {
|
|
/* There's a port */
|
|
size_t skip = (pegs->at == pegs->authority) ?
|
|
1 : 0;
|
|
sec_start = norm_start + colon - pegs->at +
|
|
skip;
|
|
if (url->scheme != NULL &&
|
|
url->scheme_type ==
|
|
NSURL_SCHEME_HTTP &&
|
|
length -
|
|
(colon - pegs->at + skip) == 2 &&
|
|
*sec_start == '8' &&
|
|
*(sec_start + 1) == '0') {
|
|
/* Scheme is http, and port is default
|
|
* (80) */
|
|
flags |= NSURL_F_NO_PORT;
|
|
}
|
|
|
|
if (length <= (colon - pegs->at + skip)) {
|
|
/* No space for a port after the colon
|
|
*/
|
|
flags |= NSURL_F_NO_PORT;
|
|
}
|
|
|
|
/* Add non-redundant ports to NetSurf URL */
|
|
sec_start = norm_start + colon - pegs->at +
|
|
skip;
|
|
if (!(flags & NSURL_F_NO_PORT) &&
|
|
lwc_intern_string(sec_start,
|
|
length -
|
|
(colon - pegs->at + skip),
|
|
&url->port) != lwc_error_ok) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
|
|
/* update length for host */
|
|
skip = (pegs->at == pegs->authority) ? 0 : 1;
|
|
length = colon - pegs->at - skip;
|
|
}
|
|
|
|
/* host */
|
|
/* Encode host according to IDNA2008 */
|
|
ret = idna_encode(norm_start, length, &host, &host_len);
|
|
if (ret == NSERROR_OK) {
|
|
/* valid idna encoding */
|
|
if (lwc_intern_string(host, host_len,
|
|
&url->host) != lwc_error_ok) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
free(host);
|
|
} else {
|
|
/* fall back to straight interning */
|
|
if (lwc_intern_string(norm_start, length,
|
|
&url->host) != lwc_error_ok) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
}
|
|
}
|
|
|
|
break;
|
|
|
|
case URL_PATH:
|
|
if (length != 0) {
|
|
if (lwc_intern_string(norm_start, length,
|
|
&url->path) != lwc_error_ok) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
} else if (url->host != NULL &&
|
|
url->scheme_type != NSURL_SCHEME_MAILTO) {
|
|
/* Set empty path to "/", if there's a host */
|
|
if (lwc_intern_string("/", SLEN("/"),
|
|
&url->path) != lwc_error_ok) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
} else {
|
|
url->path = NULL;
|
|
}
|
|
|
|
break;
|
|
|
|
case URL_QUERY:
|
|
if (length != 0) {
|
|
if (lwc_intern_string(norm_start, length,
|
|
&url->query) != lwc_error_ok) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
} else {
|
|
url->query = NULL;
|
|
}
|
|
|
|
break;
|
|
|
|
case URL_FRAGMENT:
|
|
if (length != 0) {
|
|
if (lwc_intern_string(norm_start, length,
|
|
&url->fragment) != lwc_error_ok) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
} else {
|
|
url->fragment = NULL;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
return NSERROR_OK;
|
|
}
|
|
|
|
|
|
/**
|
|
* Get nsurl string info; total length, component lengths, & components present
|
|
*
|
|
* \param url NetSurf URL components
|
|
* \param parts Which parts of the URL are required in the string
|
|
* \param url_l Updated to total string length
|
|
* \param lengths Updated with individual component lengths
|
|
* \param pflags Updated to contain relevant string flags
|
|
*/
|
|
static void nsurl__get_string_data(const struct nsurl_components *url,
|
|
nsurl_component parts, size_t *url_l,
|
|
struct nsurl_component_lengths *lengths,
|
|
enum nsurl_string_flags *pflags)
|
|
{
|
|
enum nsurl_string_flags flags = *pflags;
|
|
*url_l = 0;
|
|
|
|
/* Intersection of required parts and available parts gives
|
|
* the output parts */
|
|
if (url->scheme && parts & NSURL_SCHEME) {
|
|
flags |= NSURL_F_SCHEME;
|
|
|
|
lengths->scheme = lwc_string_length(url->scheme);
|
|
*url_l += lengths->scheme;
|
|
}
|
|
|
|
if (url->username && parts & NSURL_USERNAME) {
|
|
flags |= NSURL_F_USERNAME;
|
|
|
|
lengths->username = lwc_string_length(url->username);
|
|
*url_l += lengths->username;
|
|
}
|
|
|
|
if (url->password && parts & NSURL_PASSWORD) {
|
|
flags |= NSURL_F_PASSWORD;
|
|
|
|
lengths->password = lwc_string_length(url->password);
|
|
*url_l += SLEN(":") + lengths->password;
|
|
}
|
|
|
|
if (url->host && parts & NSURL_HOST) {
|
|
flags |= NSURL_F_HOST;
|
|
|
|
lengths->host = lwc_string_length(url->host);
|
|
*url_l += lengths->host;
|
|
}
|
|
|
|
if (url->port && parts & NSURL_PORT) {
|
|
flags |= NSURL_F_PORT;
|
|
|
|
lengths->port = lwc_string_length(url->port);
|
|
*url_l += SLEN(":") + lengths->port;
|
|
}
|
|
|
|
if (url->path && parts & NSURL_PATH) {
|
|
flags |= NSURL_F_PATH;
|
|
|
|
lengths->path = lwc_string_length(url->path);
|
|
*url_l += lengths->path;
|
|
}
|
|
|
|
if (url->query && parts & NSURL_QUERY) {
|
|
flags |= NSURL_F_QUERY;
|
|
|
|
lengths->query = lwc_string_length(url->query);
|
|
*url_l += lengths->query;
|
|
}
|
|
|
|
if (url->fragment && parts & NSURL_FRAGMENT) {
|
|
flags |= NSURL_F_FRAGMENT;
|
|
|
|
lengths->fragment = lwc_string_length(url->fragment);
|
|
*url_l += lengths->fragment;
|
|
}
|
|
|
|
/* Turn on any spanned punctuation */
|
|
if ((flags & NSURL_F_SCHEME) && (parts > NSURL_SCHEME)) {
|
|
flags |= NSURL_F_SCHEME_PUNCTUATION;
|
|
|
|
*url_l += SLEN(":");
|
|
}
|
|
|
|
if ((flags & NSURL_F_SCHEME) && (flags > NSURL_F_SCHEME) &&
|
|
url->path && lwc_string_data(url->path)[0] == '/') {
|
|
flags |= NSURL_F_AUTHORITY_PUNCTUATION;
|
|
|
|
*url_l += SLEN("//");
|
|
}
|
|
|
|
if ((flags & (NSURL_F_USERNAME | NSURL_F_PASSWORD)) &&
|
|
flags & NSURL_F_HOST) {
|
|
flags |= NSURL_F_CREDENTIALS_PUNCTUATION;
|
|
|
|
*url_l += SLEN("@");
|
|
}
|
|
|
|
if ((flags & ~NSURL_F_FRAGMENT) && (flags & NSURL_F_FRAGMENT)) {
|
|
flags |= NSURL_F_FRAGMENT_PUNCTUATION;
|
|
|
|
*url_l += SLEN("#");
|
|
}
|
|
|
|
*pflags = flags;
|
|
}
|
|
|
|
|
|
/**
|
|
* Get nsurl string info; total length, component lengths, & components present
|
|
*
|
|
* \param url NetSurf URL components
|
|
* \param url_s Updated to contain the string
|
|
* \param l Individual component lengths
|
|
* \param flags String flags
|
|
*/
|
|
static void nsurl_get_string(const struct nsurl_components *url, char *url_s,
|
|
struct nsurl_component_lengths *l,
|
|
enum nsurl_string_flags flags)
|
|
{
|
|
char *pos;
|
|
|
|
/* Copy the required parts into the url string */
|
|
pos = url_s;
|
|
|
|
if (flags & NSURL_F_SCHEME) {
|
|
memcpy(pos, lwc_string_data(url->scheme), l->scheme);
|
|
pos += l->scheme;
|
|
}
|
|
|
|
if (flags & NSURL_F_SCHEME_PUNCTUATION) {
|
|
*(pos++) = ':';
|
|
}
|
|
|
|
if (flags & NSURL_F_AUTHORITY_PUNCTUATION) {
|
|
*(pos++) = '/';
|
|
*(pos++) = '/';
|
|
}
|
|
|
|
if (flags & NSURL_F_USERNAME) {
|
|
memcpy(pos, lwc_string_data(url->username), l->username);
|
|
pos += l->username;
|
|
}
|
|
|
|
if (flags & NSURL_F_PASSWORD) {
|
|
*(pos++) = ':';
|
|
memcpy(pos, lwc_string_data(url->password), l->password);
|
|
pos += l->password;
|
|
}
|
|
|
|
if (flags & NSURL_F_CREDENTIALS_PUNCTUATION) {
|
|
*(pos++) = '@';
|
|
}
|
|
|
|
if (flags & NSURL_F_HOST) {
|
|
memcpy(pos, lwc_string_data(url->host), l->host);
|
|
pos += l->host;
|
|
}
|
|
|
|
if (flags & NSURL_F_PORT) {
|
|
*(pos++) = ':';
|
|
memcpy(pos, lwc_string_data(url->port), l->port);
|
|
pos += l->port;
|
|
}
|
|
|
|
if (flags & NSURL_F_PATH) {
|
|
memcpy(pos, lwc_string_data(url->path), l->path);
|
|
pos += l->path;
|
|
}
|
|
|
|
if (flags & NSURL_F_QUERY) {
|
|
memcpy(pos, lwc_string_data(url->query), l->query);
|
|
pos += l->query;
|
|
}
|
|
|
|
if (flags & NSURL_F_FRAGMENT) {
|
|
if (flags & NSURL_F_FRAGMENT_PUNCTUATION)
|
|
*(pos++) = '#';
|
|
memcpy(pos, lwc_string_data(url->fragment), l->fragment);
|
|
pos += l->fragment;
|
|
}
|
|
|
|
*pos = '\0';
|
|
}
|
|
|
|
|
|
/**
|
|
* Calculate hash value
|
|
*
|
|
* \param url NetSurf URL object to set hash value for
|
|
*/
|
|
static void nsurl_calc_hash(nsurl *url)
|
|
{
|
|
uint32_t hash = 0;
|
|
|
|
if (url->components.scheme)
|
|
hash ^= lwc_string_hash_value(url->components.scheme);
|
|
|
|
if (url->components.username)
|
|
hash ^= lwc_string_hash_value(url->components.username);
|
|
|
|
if (url->components.password)
|
|
hash ^= lwc_string_hash_value(url->components.password);
|
|
|
|
if (url->components.host)
|
|
hash ^= lwc_string_hash_value(url->components.host);
|
|
|
|
if (url->components.port)
|
|
hash ^= lwc_string_hash_value(url->components.port);
|
|
|
|
if (url->components.path)
|
|
hash ^= lwc_string_hash_value(url->components.path);
|
|
|
|
if (url->components.query)
|
|
hash ^= lwc_string_hash_value(url->components.query);
|
|
|
|
url->hash = hash;
|
|
}
|
|
|
|
|
|
/**
|
|
* Destroy components
|
|
*
|
|
* \param c url components
|
|
*/
|
|
static void nsurl_destroy_components(struct nsurl_components *c)
|
|
{
|
|
if (c->scheme)
|
|
lwc_string_unref(c->scheme);
|
|
|
|
if (c->username)
|
|
lwc_string_unref(c->username);
|
|
|
|
if (c->password)
|
|
lwc_string_unref(c->password);
|
|
|
|
if (c->host)
|
|
lwc_string_unref(c->host);
|
|
|
|
if (c->port)
|
|
lwc_string_unref(c->port);
|
|
|
|
if (c->path)
|
|
lwc_string_unref(c->path);
|
|
|
|
if (c->query)
|
|
lwc_string_unref(c->query);
|
|
|
|
if (c->fragment)
|
|
lwc_string_unref(c->fragment);
|
|
}
|
|
|
|
|
|
#ifdef NSURL_DEBUG
|
|
/**
|
|
* Dump a NetSurf URL's internal components
|
|
*
|
|
* \param url The NetSurf URL to dump components of
|
|
*/
|
|
static void nsurl__dump(const nsurl *url)
|
|
{
|
|
if (url->components.scheme)
|
|
LOG(" Scheme: %s", lwc_string_data(url->components.scheme));
|
|
|
|
if (url->components.username)
|
|
LOG("Username: %s", lwc_string_data(url->components.username));
|
|
|
|
if (url->components.password)
|
|
LOG("Password: %s", lwc_string_data(url->components.password));
|
|
|
|
if (url->components.host)
|
|
LOG(" Host: %s", lwc_string_data(url->components.host));
|
|
|
|
if (url->components.port)
|
|
LOG(" Port: %s", lwc_string_data(url->components.port));
|
|
|
|
if (url->components.path)
|
|
LOG(" Path: %s", lwc_string_data(url->components.path));
|
|
|
|
if (url->components.query)
|
|
LOG(" Query: %s", lwc_string_data(url->components.query));
|
|
|
|
if (url->components.fragment)
|
|
LOG("Fragment: %s", lwc_string_data(url->components.fragment));
|
|
}
|
|
#endif
|
|
|
|
/******************************************************************************
|
|
* NetSurf URL Public API *
|
|
******************************************************************************/
|
|
|
|
/* exported interface, documented in nsurl.h */
|
|
nserror nsurl_create(const char * const url_s, nsurl **url)
|
|
{
|
|
struct url_markers m;
|
|
struct nsurl_components c;
|
|
size_t length;
|
|
char *buff;
|
|
struct nsurl_component_lengths str_len = { 0, 0, 0, 0, 0, 0, 0, 0 };
|
|
enum nsurl_string_flags str_flags = 0;
|
|
nserror e = NSERROR_OK;
|
|
bool match;
|
|
|
|
assert(url_s != NULL);
|
|
|
|
/* Peg out the URL sections */
|
|
nsurl__get_string_markers(url_s, &m, false);
|
|
|
|
/* Get the length of the longest section */
|
|
length = nsurl__get_longest_section(&m);
|
|
|
|
/* Allocate enough memory to url escape the longest section */
|
|
buff = malloc(length * 3 + 1);
|
|
if (buff == NULL)
|
|
return NSERROR_NOMEM;
|
|
|
|
/* Set scheme type */
|
|
c.scheme_type = m.scheme_type;
|
|
|
|
/* Build NetSurf URL object from sections */
|
|
e |= nsurl__create_from_section(url_s, URL_SCHEME, &m, buff, &c);
|
|
e |= nsurl__create_from_section(url_s, URL_CREDENTIALS, &m, buff, &c);
|
|
e |= nsurl__create_from_section(url_s, URL_HOST, &m, buff, &c);
|
|
e |= nsurl__create_from_section(url_s, URL_PATH, &m, buff, &c);
|
|
e |= nsurl__create_from_section(url_s, URL_QUERY, &m, buff, &c);
|
|
e |= nsurl__create_from_section(url_s, URL_FRAGMENT, &m, buff, &c);
|
|
|
|
/* Finished with buffer */
|
|
free(buff);
|
|
|
|
if (e != NSERROR_OK) {
|
|
nsurl_destroy_components(&c);
|
|
return NSERROR_NOMEM;
|
|
}
|
|
|
|
/* Validate URL */
|
|
if ((lwc_string_isequal(c.scheme, corestring_lwc_http,
|
|
&match) == lwc_error_ok && match == true) ||
|
|
(lwc_string_isequal(c.scheme, corestring_lwc_https,
|
|
&match) == lwc_error_ok && match == true)) {
|
|
/* http, https must have host */
|
|
if (c.host == NULL) {
|
|
nsurl_destroy_components(&c);
|
|
return NSERROR_BAD_URL;
|
|
}
|
|
}
|
|
|
|
/* Get the string length and find which parts of url are present */
|
|
nsurl__get_string_data(&c, NSURL_WITH_FRAGMENT, &length,
|
|
&str_len, &str_flags);
|
|
|
|
/* Create NetSurf URL object */
|
|
*url = malloc(sizeof(nsurl) + length + 1); /* Add 1 for \0 */
|
|
if (*url == NULL) {
|
|
nsurl_destroy_components(&c);
|
|
return NSERROR_NOMEM;
|
|
}
|
|
|
|
(*url)->components = c;
|
|
(*url)->length = length;
|
|
|
|
/* Fill out the url string */
|
|
nsurl_get_string(&c, (*url)->string, &str_len, str_flags);
|
|
|
|
/* Get the nsurl's hash */
|
|
nsurl_calc_hash(*url);
|
|
|
|
/* Give the URL a reference */
|
|
(*url)->count = 1;
|
|
|
|
return NSERROR_OK;
|
|
}
|
|
|
|
|
|
/* exported interface, documented in nsurl.h */
|
|
nsurl *nsurl_ref(nsurl *url)
|
|
{
|
|
assert(url != NULL);
|
|
|
|
url->count++;
|
|
|
|
return url;
|
|
}
|
|
|
|
|
|
/* exported interface, documented in nsurl.h */
|
|
void nsurl_unref(nsurl *url)
|
|
{
|
|
assert(url != NULL);
|
|
assert(url->count > 0);
|
|
|
|
if (--url->count > 0)
|
|
return;
|
|
|
|
#ifdef NSURL_DEBUG
|
|
nsurl__dump(url);
|
|
#endif
|
|
|
|
/* Release lwc strings */
|
|
nsurl_destroy_components(&url->components);
|
|
|
|
/* Free the NetSurf URL */
|
|
free(url);
|
|
}
|
|
|
|
|
|
/* exported interface, documented in nsurl.h */
|
|
bool nsurl_compare(const nsurl *url1, const nsurl *url2, nsurl_component parts)
|
|
{
|
|
bool match = true;
|
|
|
|
assert(url1 != NULL);
|
|
assert(url2 != NULL);
|
|
|
|
/* Compare URL components */
|
|
|
|
/* Path, host and query first, since they're most likely to differ */
|
|
|
|
if (parts & NSURL_PATH) {
|
|
nsurl__component_compare(url1->components.path,
|
|
url2->components.path, &match);
|
|
|
|
if (match == false)
|
|
return false;
|
|
}
|
|
|
|
if (parts & NSURL_HOST) {
|
|
nsurl__component_compare(url1->components.host,
|
|
url2->components.host, &match);
|
|
|
|
if (match == false)
|
|
return false;
|
|
}
|
|
|
|
if (parts & NSURL_QUERY) {
|
|
nsurl__component_compare(url1->components.query,
|
|
url2->components.query, &match);
|
|
|
|
if (match == false)
|
|
return false;
|
|
}
|
|
|
|
if (parts & NSURL_SCHEME) {
|
|
nsurl__component_compare(url1->components.scheme,
|
|
url2->components.scheme, &match);
|
|
|
|
if (match == false)
|
|
return false;
|
|
}
|
|
|
|
if (parts & NSURL_USERNAME) {
|
|
nsurl__component_compare(url1->components.username,
|
|
url2->components.username, &match);
|
|
|
|
if (match == false)
|
|
return false;
|
|
}
|
|
|
|
if (parts & NSURL_PASSWORD) {
|
|
nsurl__component_compare(url1->components.password,
|
|
url2->components.password, &match);
|
|
|
|
if (match == false)
|
|
return false;
|
|
}
|
|
|
|
if (parts & NSURL_PORT) {
|
|
nsurl__component_compare(url1->components.port,
|
|
url2->components.port, &match);
|
|
|
|
if (match == false)
|
|
return false;
|
|
}
|
|
|
|
if (parts & NSURL_FRAGMENT) {
|
|
nsurl__component_compare(url1->components.fragment,
|
|
url2->components.fragment, &match);
|
|
|
|
if (match == false)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
/* exported interface, documented in nsurl.h */
|
|
nserror nsurl_get(const nsurl *url, nsurl_component parts,
|
|
char **url_s, size_t *url_l)
|
|
{
|
|
struct nsurl_component_lengths str_len = { 0, 0, 0, 0, 0, 0, 0, 0 };
|
|
enum nsurl_string_flags str_flags = 0;
|
|
|
|
assert(url != NULL);
|
|
|
|
/* Get the string length and find which parts of url need copied */
|
|
nsurl__get_string_data(&(url->components), parts, url_l,
|
|
&str_len, &str_flags);
|
|
|
|
if (*url_l == 0) {
|
|
return NSERROR_BAD_URL;
|
|
}
|
|
|
|
/* Allocate memory for url string */
|
|
*url_s = malloc(*url_l + 1); /* adding 1 for '\0' */
|
|
if (*url_s == NULL) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
|
|
/* Copy the required parts into the url string */
|
|
nsurl_get_string(&(url->components), *url_s, &str_len, str_flags);
|
|
|
|
return NSERROR_OK;
|
|
}
|
|
|
|
|
|
/* exported interface, documented in nsurl.h */
|
|
lwc_string *nsurl_get_component(const nsurl *url, nsurl_component part)
|
|
{
|
|
assert(url != NULL);
|
|
|
|
switch (part) {
|
|
case NSURL_SCHEME:
|
|
return (url->components.scheme != NULL) ?
|
|
lwc_string_ref(url->components.scheme) : NULL;
|
|
|
|
case NSURL_USERNAME:
|
|
return (url->components.username != NULL) ?
|
|
lwc_string_ref(url->components.username) : NULL;
|
|
|
|
case NSURL_PASSWORD:
|
|
return (url->components.password != NULL) ?
|
|
lwc_string_ref(url->components.password) : NULL;
|
|
|
|
case NSURL_HOST:
|
|
return (url->components.host != NULL) ?
|
|
lwc_string_ref(url->components.host) : NULL;
|
|
|
|
case NSURL_PORT:
|
|
return (url->components.port != NULL) ?
|
|
lwc_string_ref(url->components.port) : NULL;
|
|
|
|
case NSURL_PATH:
|
|
return (url->components.path != NULL) ?
|
|
lwc_string_ref(url->components.path) : NULL;
|
|
|
|
case NSURL_QUERY:
|
|
return (url->components.query != NULL) ?
|
|
lwc_string_ref(url->components.query) : NULL;
|
|
|
|
case NSURL_FRAGMENT:
|
|
return (url->components.fragment != NULL) ?
|
|
lwc_string_ref(url->components.fragment) : NULL;
|
|
|
|
default:
|
|
LOG("Unsupported value passed to part param.");
|
|
assert(0);
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/* exported interface, documented in nsurl.h */
|
|
bool nsurl_has_component(const nsurl *url, nsurl_component part)
|
|
{
|
|
assert(url != NULL);
|
|
|
|
switch (part) {
|
|
case NSURL_SCHEME:
|
|
if (url->components.scheme != NULL)
|
|
return true;
|
|
else
|
|
return false;
|
|
|
|
case NSURL_CREDENTIALS:
|
|
/* Only username required for credentials section */
|
|
/* Fall through */
|
|
case NSURL_USERNAME:
|
|
if (url->components.username != NULL)
|
|
return true;
|
|
else
|
|
return false;
|
|
|
|
case NSURL_PASSWORD:
|
|
if (url->components.password != NULL)
|
|
return true;
|
|
else
|
|
return false;
|
|
|
|
case NSURL_HOST:
|
|
if (url->components.host != NULL)
|
|
return true;
|
|
else
|
|
return false;
|
|
|
|
case NSURL_PORT:
|
|
if (url->components.port != NULL)
|
|
return true;
|
|
else
|
|
return false;
|
|
|
|
case NSURL_PATH:
|
|
if (url->components.path != NULL)
|
|
return true;
|
|
else
|
|
return false;
|
|
|
|
case NSURL_QUERY:
|
|
if (url->components.query != NULL)
|
|
return true;
|
|
else
|
|
return false;
|
|
|
|
case NSURL_FRAGMENT:
|
|
if (url->components.fragment != NULL)
|
|
return true;
|
|
else
|
|
return false;
|
|
|
|
default:
|
|
LOG("Unsupported value passed to part param.");
|
|
assert(0);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
/* exported interface, documented in nsurl.h */
|
|
const char *nsurl_access(const nsurl *url)
|
|
{
|
|
assert(url != NULL);
|
|
|
|
return url->string;
|
|
}
|
|
|
|
|
|
/* exported interface, documented in nsurl.h */
|
|
nserror nsurl_get_utf8(const nsurl *url, char **url_s, size_t *url_l)
|
|
{
|
|
nserror err;
|
|
lwc_string *host;
|
|
char *idna_host = NULL;
|
|
size_t idna_host_len;
|
|
char *scheme = NULL;
|
|
size_t scheme_len;
|
|
char *path = NULL;
|
|
size_t path_len;
|
|
|
|
assert(url != NULL);
|
|
|
|
if (url->components.host == NULL) {
|
|
return nsurl_get(url, NSURL_WITH_FRAGMENT, url_s, url_l);
|
|
}
|
|
|
|
host = url->components.host;
|
|
err = idna_decode(lwc_string_data(host), lwc_string_length(host),
|
|
&idna_host, &idna_host_len);
|
|
if (err != NSERROR_OK) {
|
|
goto cleanup;
|
|
}
|
|
|
|
err = nsurl_get(url,
|
|
NSURL_SCHEME | NSURL_CREDENTIALS,
|
|
&scheme, &scheme_len);
|
|
if (err != NSERROR_OK) {
|
|
goto cleanup;
|
|
}
|
|
|
|
err = nsurl_get(url,
|
|
NSURL_PORT | NSURL_PATH | NSURL_QUERY | NSURL_FRAGMENT,
|
|
&path, &path_len);
|
|
if (err != NSERROR_OK) {
|
|
goto cleanup;
|
|
}
|
|
|
|
*url_l = scheme_len + idna_host_len + path_len + 1; /* +1 for \0 */
|
|
*url_s = malloc(*url_l);
|
|
|
|
if (*url_s == NULL) {
|
|
err = NSERROR_NOMEM;
|
|
goto cleanup;
|
|
}
|
|
|
|
snprintf(*url_s, *url_l, "%s%s%s", scheme, idna_host, path);
|
|
|
|
err = NSERROR_OK;
|
|
|
|
cleanup:
|
|
free(idna_host);
|
|
free(scheme);
|
|
free(path);
|
|
|
|
return err;
|
|
}
|
|
|
|
|
|
/* exported interface, documented in nsurl.h */
|
|
const char *nsurl_access_leaf(const nsurl *url)
|
|
{
|
|
size_t path_len;
|
|
const char *path;
|
|
const char *leaf;
|
|
|
|
assert(url != NULL);
|
|
|
|
if (url->components.path == NULL)
|
|
return "";
|
|
|
|
path = lwc_string_data(url->components.path);
|
|
path_len = lwc_string_length(url->components.path);
|
|
|
|
if (path_len == 0)
|
|
return "";
|
|
|
|
if (path_len == 1 && *path == '/')
|
|
return "/";
|
|
|
|
leaf = path + path_len;
|
|
|
|
do {
|
|
leaf--;
|
|
} while ((leaf != path) && (*leaf != '/'));
|
|
|
|
if (*leaf == '/')
|
|
leaf++;
|
|
|
|
return leaf;
|
|
}
|
|
|
|
|
|
/* exported interface, documented in nsurl.h */
|
|
size_t nsurl_length(const nsurl *url)
|
|
{
|
|
assert(url != NULL);
|
|
|
|
return url->length;
|
|
}
|
|
|
|
|
|
/* exported interface, documented in nsurl.h */
|
|
uint32_t nsurl_hash(const nsurl *url)
|
|
{
|
|
assert(url != NULL);
|
|
|
|
return url->hash;
|
|
}
|
|
|
|
|
|
/* exported interface, documented in nsurl.h */
|
|
nserror nsurl_join(const nsurl *base, const char *rel, nsurl **joined)
|
|
{
|
|
struct url_markers m;
|
|
struct nsurl_components c;
|
|
size_t length;
|
|
char *buff;
|
|
char *buff_pos;
|
|
char *buff_start;
|
|
struct nsurl_component_lengths str_len = { 0, 0, 0, 0, 0, 0, 0, 0 };
|
|
enum nsurl_string_flags str_flags = 0;
|
|
nserror error = 0;
|
|
enum {
|
|
NSURL_F_REL = 0,
|
|
NSURL_F_BASE_SCHEME = (1 << 0),
|
|
NSURL_F_BASE_AUTHORITY = (1 << 1),
|
|
NSURL_F_BASE_PATH = (1 << 2),
|
|
NSURL_F_MERGED_PATH = (1 << 3),
|
|
NSURL_F_BASE_QUERY = (1 << 4)
|
|
} joined_parts;
|
|
|
|
assert(base != NULL);
|
|
assert(rel != NULL);
|
|
|
|
#ifdef NSURL_DEBUG
|
|
LOG("base: \"%s\", rel: \"%s\"", nsurl_access(base), rel);
|
|
#endif
|
|
|
|
/* Peg out the URL sections */
|
|
nsurl__get_string_markers(rel, &m, true);
|
|
|
|
/* Get the length of the longest section */
|
|
length = nsurl__get_longest_section(&m);
|
|
|
|
/* Initially assume that the joined URL can be formed entierly from
|
|
* the relative URL.
|
|
*/
|
|
joined_parts = NSURL_F_REL;
|
|
|
|
/* Update joined_compnents to indicate any required parts from the
|
|
* base URL.
|
|
*/
|
|
if (m.scheme_end - m.start <= 0) {
|
|
/* The relative url has no scheme.
|
|
* Use base URL's scheme. */
|
|
joined_parts |= NSURL_F_BASE_SCHEME;
|
|
|
|
if (m.path - m.authority <= 0) {
|
|
/* The relative URL has no authority.
|
|
* Use base URL's authority. */
|
|
joined_parts |= NSURL_F_BASE_AUTHORITY;
|
|
|
|
if (m.query - m.path <= 0) {
|
|
/* The relative URL has no path.
|
|
* Use base URL's path. */
|
|
joined_parts |= NSURL_F_BASE_PATH;
|
|
|
|
if (m.fragment - m.query <= 0) {
|
|
/* The relative URL has no query.
|
|
* Use base URL's query. */
|
|
joined_parts |= NSURL_F_BASE_QUERY;
|
|
}
|
|
|
|
} else if (*(rel + m.path) != '/') {
|
|
/* Relative URL has relative path */
|
|
joined_parts |= NSURL_F_MERGED_PATH;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Allocate enough memory to url escape the longest section, plus
|
|
* space for path merging (if required).
|
|
*/
|
|
if (joined_parts & NSURL_F_MERGED_PATH) {
|
|
/* Need to merge paths */
|
|
length += (base->components.path != NULL) ?
|
|
lwc_string_length(base->components.path) : 0;
|
|
}
|
|
length *= 4;
|
|
/* Plus space for removing dots from path */
|
|
length += (m.query - m.path) + ((base->components.path != NULL) ?
|
|
lwc_string_length(base->components.path) : 0);
|
|
|
|
buff = malloc(length + 5);
|
|
if (buff == NULL) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
|
|
buff_pos = buff;
|
|
|
|
/* Form joined URL from base or rel components, as appropriate */
|
|
|
|
if (joined_parts & NSURL_F_BASE_SCHEME) {
|
|
c.scheme_type = base->components.scheme_type;
|
|
|
|
c.scheme = nsurl__component_copy(base->components.scheme);
|
|
} else {
|
|
c.scheme_type = m.scheme_type;
|
|
|
|
error = nsurl__create_from_section(rel, URL_SCHEME, &m, buff, &c);
|
|
if (error != NSERROR_OK) {
|
|
free(buff);
|
|
return error;
|
|
}
|
|
}
|
|
|
|
if (joined_parts & NSURL_F_BASE_AUTHORITY) {
|
|
c.username = nsurl__component_copy(base->components.username);
|
|
c.password = nsurl__component_copy(base->components.password);
|
|
c.host = nsurl__component_copy(base->components.host);
|
|
c.port = nsurl__component_copy(base->components.port);
|
|
} else {
|
|
error = nsurl__create_from_section(rel, URL_CREDENTIALS, &m,
|
|
buff, &c);
|
|
if (error == NSERROR_OK) {
|
|
error = nsurl__create_from_section(rel, URL_HOST, &m,
|
|
buff, &c);
|
|
}
|
|
if (error != NSERROR_OK) {
|
|
free(buff);
|
|
return error;
|
|
}
|
|
}
|
|
|
|
if (joined_parts & NSURL_F_BASE_PATH) {
|
|
c.path = nsurl__component_copy(base->components.path);
|
|
|
|
} else if (joined_parts & NSURL_F_MERGED_PATH) {
|
|
struct url_markers m_path;
|
|
size_t new_length;
|
|
|
|
if (base->components.host != NULL &&
|
|
base->components.path == NULL) {
|
|
/* Append relative path to "/". */
|
|
*(buff_pos++) = '/';
|
|
memcpy(buff_pos, rel + m.path, m.query - m.path);
|
|
buff_pos += m.query - m.path;
|
|
|
|
} else {
|
|
/* Append relative path to all but last segment of
|
|
* base path. */
|
|
size_t path_end = lwc_string_length(
|
|
base->components.path);
|
|
const char *path = lwc_string_data(
|
|
base->components.path);
|
|
|
|
while (*(path + path_end) != '/' &&
|
|
path_end != 0) {
|
|
path_end--;
|
|
}
|
|
if (*(path + path_end) == '/')
|
|
path_end++;
|
|
|
|
/* Copy the base part */
|
|
memcpy(buff_pos, path, path_end);
|
|
buff_pos += path_end;
|
|
|
|
/* Copy the relative part */
|
|
memcpy(buff_pos, rel + m.path, m.query - m.path);
|
|
buff_pos += m.query - m.path;
|
|
}
|
|
|
|
/* add termination to string */
|
|
*buff_pos++ = '\0';
|
|
|
|
new_length = nsurl__remove_dot_segments(buff, buff_pos);
|
|
|
|
m_path.path = 0;
|
|
m_path.query = new_length;
|
|
|
|
buff_start = buff_pos + new_length;
|
|
error = nsurl__create_from_section(buff_pos, URL_PATH, &m_path,
|
|
buff_start, &c);
|
|
if (error != NSERROR_OK) {
|
|
free(buff);
|
|
return error;
|
|
}
|
|
|
|
} else {
|
|
struct url_markers m_path;
|
|
size_t new_length;
|
|
|
|
memcpy(buff_pos, rel + m.path, m.query - m.path);
|
|
buff_pos += m.query - m.path;
|
|
*(buff_pos++) = '\0';
|
|
|
|
new_length = nsurl__remove_dot_segments(buff, buff_pos);
|
|
|
|
m_path.path = 0;
|
|
m_path.query = new_length;
|
|
|
|
buff_start = buff_pos + new_length;
|
|
|
|
error = nsurl__create_from_section(buff_pos, URL_PATH, &m_path,
|
|
buff_start, &c);
|
|
if (error != NSERROR_OK) {
|
|
free(buff);
|
|
return error;
|
|
}
|
|
}
|
|
|
|
if (joined_parts & NSURL_F_BASE_QUERY) {
|
|
c.query = nsurl__component_copy(base->components.query);
|
|
} else {
|
|
error = nsurl__create_from_section(rel, URL_QUERY, &m,
|
|
buff, &c);
|
|
if (error != NSERROR_OK) {
|
|
free(buff);
|
|
return error;
|
|
}
|
|
}
|
|
|
|
error = nsurl__create_from_section(rel, URL_FRAGMENT, &m, buff, &c);
|
|
|
|
/* Free temporary buffer */
|
|
free(buff);
|
|
|
|
if (error != NSERROR_OK) {
|
|
return error;
|
|
}
|
|
|
|
/* Get the string length and find which parts of url are present */
|
|
nsurl__get_string_data(&c, NSURL_WITH_FRAGMENT, &length,
|
|
&str_len, &str_flags);
|
|
|
|
/* Create NetSurf URL object */
|
|
*joined = malloc(sizeof(nsurl) + length + 1); /* Add 1 for \0 */
|
|
if (*joined == NULL) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
|
|
(*joined)->components = c;
|
|
(*joined)->length = length;
|
|
|
|
/* Fill out the url string */
|
|
nsurl_get_string(&c, (*joined)->string, &str_len, str_flags);
|
|
|
|
/* Get the nsurl's hash */
|
|
nsurl_calc_hash(*joined);
|
|
|
|
/* Give the URL a reference */
|
|
(*joined)->count = 1;
|
|
|
|
return NSERROR_OK;
|
|
}
|
|
|
|
|
|
/* exported interface, documented in nsurl.h */
|
|
nserror nsurl_defragment(const nsurl *url, nsurl **no_frag)
|
|
{
|
|
size_t length;
|
|
char *pos;
|
|
|
|
assert(url != NULL);
|
|
|
|
/* check for source url having no fragment already */
|
|
if (url->components.fragment == NULL) {
|
|
*no_frag = (nsurl *)url;
|
|
|
|
(*no_frag)->count++;
|
|
|
|
return NSERROR_OK;
|
|
}
|
|
|
|
/* Find the change in length from url to new_url */
|
|
length = url->length;
|
|
if (url->components.fragment != NULL) {
|
|
length -= 1 + lwc_string_length(url->components.fragment);
|
|
}
|
|
|
|
/* Create NetSurf URL object */
|
|
*no_frag = malloc(sizeof(nsurl) + length + 1); /* Add 1 for \0 */
|
|
if (*no_frag == NULL) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
|
|
/* Copy components */
|
|
(*no_frag)->components.scheme =
|
|
nsurl__component_copy(url->components.scheme);
|
|
(*no_frag)->components.username =
|
|
nsurl__component_copy(url->components.username);
|
|
(*no_frag)->components.password =
|
|
nsurl__component_copy(url->components.password);
|
|
(*no_frag)->components.host =
|
|
nsurl__component_copy(url->components.host);
|
|
(*no_frag)->components.port =
|
|
nsurl__component_copy(url->components.port);
|
|
(*no_frag)->components.path =
|
|
nsurl__component_copy(url->components.path);
|
|
(*no_frag)->components.query =
|
|
nsurl__component_copy(url->components.query);
|
|
(*no_frag)->components.fragment = NULL;
|
|
|
|
(*no_frag)->components.scheme_type = url->components.scheme_type;
|
|
|
|
(*no_frag)->length = length;
|
|
|
|
/* Fill out the url string */
|
|
pos = (*no_frag)->string;
|
|
memcpy(pos, url->string, length);
|
|
pos += length;
|
|
*pos = '\0';
|
|
|
|
/* Get the nsurl's hash */
|
|
nsurl_calc_hash(*no_frag);
|
|
|
|
/* Give the URL a reference */
|
|
(*no_frag)->count = 1;
|
|
|
|
return NSERROR_OK;
|
|
}
|
|
|
|
|
|
/* exported interface, documented in nsurl.h */
|
|
nserror nsurl_refragment(const nsurl *url, lwc_string *frag, nsurl **new_url)
|
|
{
|
|
int frag_len;
|
|
int base_len;
|
|
char *pos;
|
|
size_t len;
|
|
|
|
assert(url != NULL);
|
|
assert(frag != NULL);
|
|
|
|
/* Find the change in length from url to new_url */
|
|
base_len = url->length;
|
|
if (url->components.fragment != NULL) {
|
|
base_len -= 1 + lwc_string_length(url->components.fragment);
|
|
}
|
|
frag_len = lwc_string_length(frag);
|
|
|
|
/* Set new_url's length */
|
|
len = base_len + 1 /* # */ + frag_len;
|
|
|
|
/* Create NetSurf URL object */
|
|
*new_url = malloc(sizeof(nsurl) + len + 1); /* Add 1 for \0 */
|
|
if (*new_url == NULL) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
|
|
(*new_url)->length = len;
|
|
|
|
/* Set string */
|
|
pos = (*new_url)->string;
|
|
memcpy(pos, url->string, base_len);
|
|
pos += base_len;
|
|
*pos = '#';
|
|
memcpy(++pos, lwc_string_data(frag), frag_len);
|
|
pos += frag_len;
|
|
*pos = '\0';
|
|
|
|
/* Copy components */
|
|
(*new_url)->components.scheme =
|
|
nsurl__component_copy(url->components.scheme);
|
|
(*new_url)->components.username =
|
|
nsurl__component_copy(url->components.username);
|
|
(*new_url)->components.password =
|
|
nsurl__component_copy(url->components.password);
|
|
(*new_url)->components.host =
|
|
nsurl__component_copy(url->components.host);
|
|
(*new_url)->components.port =
|
|
nsurl__component_copy(url->components.port);
|
|
(*new_url)->components.path =
|
|
nsurl__component_copy(url->components.path);
|
|
(*new_url)->components.query =
|
|
nsurl__component_copy(url->components.query);
|
|
(*new_url)->components.fragment =
|
|
lwc_string_ref(frag);
|
|
|
|
(*new_url)->components.scheme_type = url->components.scheme_type;
|
|
|
|
/* Get the nsurl's hash */
|
|
nsurl_calc_hash(*new_url);
|
|
|
|
/* Give the URL a reference */
|
|
(*new_url)->count = 1;
|
|
|
|
return NSERROR_OK;
|
|
}
|
|
|
|
|
|
/* exported interface, documented in nsurl.h */
|
|
nserror nsurl_replace_query(const nsurl *url, const char *query,
|
|
nsurl **new_url)
|
|
{
|
|
int query_len; /* Length of new query string, including '?' */
|
|
int frag_len = 0; /* Length of fragment, including '#' */
|
|
int base_len; /* Length of URL up to start of query */
|
|
char *pos;
|
|
size_t len;
|
|
lwc_string *lwc_query;
|
|
|
|
assert(url != NULL);
|
|
assert(query != NULL);
|
|
assert(query[0] == '?');
|
|
|
|
/* Get the length of the new query */
|
|
query_len = strlen(query);
|
|
|
|
/* Find the change in length from url to new_url */
|
|
base_len = url->length;
|
|
if (url->components.query != NULL) {
|
|
base_len -= lwc_string_length(url->components.query);
|
|
}
|
|
if (url->components.fragment != NULL) {
|
|
frag_len = 1 + lwc_string_length(url->components.fragment);
|
|
base_len -= frag_len;
|
|
}
|
|
|
|
/* Set new_url's length */
|
|
len = base_len + query_len + frag_len;
|
|
|
|
/* Create NetSurf URL object */
|
|
*new_url = malloc(sizeof(nsurl) + len + 1); /* Add 1 for \0 */
|
|
if (*new_url == NULL) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
|
|
if (lwc_intern_string(query, query_len, &lwc_query) != lwc_error_ok) {
|
|
free(*new_url);
|
|
return NSERROR_NOMEM;
|
|
}
|
|
|
|
(*new_url)->length = len;
|
|
|
|
/* Set string */
|
|
pos = (*new_url)->string;
|
|
memcpy(pos, url->string, base_len);
|
|
pos += base_len;
|
|
memcpy(pos, query, query_len);
|
|
pos += query_len;
|
|
if (url->components.fragment != NULL) {
|
|
const char *frag = lwc_string_data(url->components.fragment);
|
|
*pos = '#';
|
|
memcpy(++pos, frag, frag_len - 1);
|
|
pos += frag_len - 1;
|
|
}
|
|
*pos = '\0';
|
|
|
|
/* Copy components */
|
|
(*new_url)->components.scheme =
|
|
nsurl__component_copy(url->components.scheme);
|
|
(*new_url)->components.username =
|
|
nsurl__component_copy(url->components.username);
|
|
(*new_url)->components.password =
|
|
nsurl__component_copy(url->components.password);
|
|
(*new_url)->components.host =
|
|
nsurl__component_copy(url->components.host);
|
|
(*new_url)->components.port =
|
|
nsurl__component_copy(url->components.port);
|
|
(*new_url)->components.path =
|
|
nsurl__component_copy(url->components.path);
|
|
(*new_url)->components.query = lwc_query;
|
|
(*new_url)->components.fragment =
|
|
nsurl__component_copy(url->components.fragment);
|
|
|
|
(*new_url)->components.scheme_type = url->components.scheme_type;
|
|
|
|
/* Get the nsurl's hash */
|
|
nsurl_calc_hash(*new_url);
|
|
|
|
/* Give the URL a reference */
|
|
(*new_url)->count = 1;
|
|
|
|
return NSERROR_OK;
|
|
}
|
|
|
|
|
|
/* exported interface documented in utils/nsurl.h */
|
|
nserror nsurl_nice(const nsurl *url, char **result, bool remove_extensions)
|
|
{
|
|
const char *data;
|
|
size_t len;
|
|
size_t pos;
|
|
bool match;
|
|
char *name;
|
|
|
|
assert(url != NULL);
|
|
|
|
*result = 0;
|
|
|
|
/* extract the last component of the path, if possible */
|
|
if ((url->components.path != NULL) &&
|
|
(lwc_string_length(url->components.path) != 0) &&
|
|
(lwc_string_isequal(url->components.path,
|
|
corestring_lwc_slash_, &match) == lwc_error_ok) &&
|
|
(match == false)) {
|
|
bool first = true;
|
|
bool keep_looking;
|
|
|
|
/* Get hold of the string data we're examining */
|
|
data = lwc_string_data(url->components.path);
|
|
len = lwc_string_length(url->components.path);
|
|
pos = len;
|
|
|
|
do {
|
|
keep_looking = false;
|
|
pos--;
|
|
|
|
/* Find last '/' with stuff after it */
|
|
while (pos != 0) {
|
|
if (data[pos] == '/' && pos < len - 1) {
|
|
break;
|
|
}
|
|
pos--;
|
|
}
|
|
|
|
if (pos == 0) {
|
|
break;
|
|
}
|
|
|
|
if (first) {
|
|
if (strncasecmp("/default.", data + pos,
|
|
SLEN("/default.")) == 0) {
|
|
keep_looking = true;
|
|
|
|
} else if (strncasecmp("/index.",
|
|
data + pos,
|
|
6) == 0) {
|
|
keep_looking = true;
|
|
|
|
}
|
|
first = false;
|
|
}
|
|
|
|
} while (keep_looking);
|
|
|
|
if (data[pos] == '/')
|
|
pos++;
|
|
|
|
if (strncasecmp("default.", data + pos, 8) != 0 &&
|
|
strncasecmp("index.", data + pos, 6) != 0) {
|
|
size_t end = pos;
|
|
while (data[end] != '\0' && data[end] != '/') {
|
|
end++;
|
|
}
|
|
if (end - pos != 0) {
|
|
name = malloc(end - pos + 1);
|
|
if (name == NULL) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
memcpy(name, data + pos, end - pos);
|
|
name[end - pos] = '\0';
|
|
if (remove_extensions) {
|
|
/* strip any extenstion */
|
|
char *dot = strchr(name, '.');
|
|
if (dot && dot != name) {
|
|
*dot = '\0';
|
|
}
|
|
}
|
|
*result = name;
|
|
return NSERROR_OK;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (url->components.host != NULL) {
|
|
name = strdup(lwc_string_data(url->components.host));
|
|
|
|
for (pos = 0; name[pos] != '\0'; pos++) {
|
|
if (name[pos] == '.') {
|
|
name[pos] = '_';
|
|
}
|
|
}
|
|
|
|
*result = name;
|
|
return NSERROR_OK;
|
|
}
|
|
|
|
return NSERROR_NOT_FOUND;
|
|
}
|
|
|
|
|
|
/* exported interface, documented in nsurl.h */
|
|
nserror nsurl_parent(const nsurl *url, nsurl **new_url)
|
|
{
|
|
lwc_string *lwc_path;
|
|
size_t old_path_len, new_path_len;
|
|
size_t len;
|
|
const char* path = NULL;
|
|
char *pos;
|
|
|
|
assert(url != NULL);
|
|
|
|
old_path_len = (url->components.path == NULL) ? 0 :
|
|
lwc_string_length(url->components.path);
|
|
|
|
/* Find new path length */
|
|
if (old_path_len == 0) {
|
|
new_path_len = old_path_len;
|
|
} else {
|
|
path = lwc_string_data(url->components.path);
|
|
|
|
new_path_len = old_path_len;
|
|
if (old_path_len > 1) {
|
|
/* Skip over any trailing / */
|
|
if (path[new_path_len - 1] == '/')
|
|
new_path_len--;
|
|
|
|
/* Work back to next / */
|
|
while (new_path_len > 0 &&
|
|
path[new_path_len - 1] != '/')
|
|
new_path_len--;
|
|
}
|
|
}
|
|
|
|
/* Find the length of new_url */
|
|
len = url->length;
|
|
if (url->components.query != NULL) {
|
|
len -= lwc_string_length(url->components.query);
|
|
}
|
|
if (url->components.fragment != NULL) {
|
|
len -= 1; /* # */
|
|
len -= lwc_string_length(url->components.fragment);
|
|
}
|
|
len -= old_path_len - new_path_len;
|
|
|
|
/* Create NetSurf URL object */
|
|
*new_url = malloc(sizeof(nsurl) + len + 1); /* Add 1 for \0 */
|
|
if (*new_url == NULL) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
|
|
/* Make new path */
|
|
if (old_path_len == 0) {
|
|
lwc_path = NULL;
|
|
} else if (old_path_len == new_path_len) {
|
|
lwc_path = lwc_string_ref(url->components.path);
|
|
} else {
|
|
if (lwc_intern_string(path, old_path_len - new_path_len,
|
|
&lwc_path) != lwc_error_ok) {
|
|
free(*new_url);
|
|
return NSERROR_NOMEM;
|
|
}
|
|
}
|
|
|
|
(*new_url)->length = len;
|
|
|
|
/* Set string */
|
|
pos = (*new_url)->string;
|
|
memcpy(pos, url->string, len);
|
|
pos += len;
|
|
*pos = '\0';
|
|
|
|
/* Copy components */
|
|
(*new_url)->components.scheme =
|
|
nsurl__component_copy(url->components.scheme);
|
|
(*new_url)->components.username =
|
|
nsurl__component_copy(url->components.username);
|
|
(*new_url)->components.password =
|
|
nsurl__component_copy(url->components.password);
|
|
(*new_url)->components.host =
|
|
nsurl__component_copy(url->components.host);
|
|
(*new_url)->components.port =
|
|
nsurl__component_copy(url->components.port);
|
|
(*new_url)->components.path = lwc_path;
|
|
(*new_url)->components.query = NULL;
|
|
(*new_url)->components.fragment = NULL;
|
|
|
|
(*new_url)->components.scheme_type = url->components.scheme_type;
|
|
|
|
/* Get the nsurl's hash */
|
|
nsurl_calc_hash(*new_url);
|
|
|
|
/* Give the URL a reference */
|
|
(*new_url)->count = 1;
|
|
|
|
return NSERROR_OK;
|
|
}
|
|
|