netsurf/utils/nsurl/nsurl.c

934 lines
21 KiB
C
Raw Normal View History

/*
* Copyright 2011 Michael Drake <tlsa@netsurf-browser.org>
*
* This file is part of NetSurf, http://www.netsurf-browser.org/
*
* NetSurf is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* NetSurf is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file
* NetSurf URL handling implementation.
*
* This is the common implementation of all URL handling within the
* browser. This implementation is based upon RFC3986 although this has
* been superceeded by https://url.spec.whatwg.org/ which is based on
* actual contemporary implementations.
*
* Care must be taken with character encodings within this module as
* the specifications work with specific ascii ranges and must not be
* affected by locale. Hence the c library character type functions
* are not used.
*/
#include <assert.h>
#include <libwapcaplet/libwapcaplet.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "utils/ascii.h"
2014-01-25 03:19:46 +04:00
#include "utils/corestrings.h"
#include "utils/errors.h"
2014-05-30 23:03:04 +04:00
#include "utils/idna.h"
#include "utils/log.h"
#include "utils/nsurl/private.h"
#include "utils/nsurl.h"
#include "utils/utils.h"
/* Define to enable NSURL debugging */
#undef NSURL_DEBUG
#define nsurl__component_copy(c) (c == NULL) ? NULL : lwc_string_ref(c)
2012-08-09 20:44:59 +04:00
#define nsurl__component_compare(c1, c2, match) \
if (c1 && c2 && lwc_error_ok == \
lwc_string_isequal(c1, c2, match)) { \
/* do nothing */ \
} else if (c1 || c2) { \
*match = false; \
}
/**
* Destroy components
*
* \param c url components
*/
static void nsurl_destroy_components(struct nsurl_components *c)
{
if (c->scheme)
lwc_string_unref(c->scheme);
if (c->username)
lwc_string_unref(c->username);
if (c->password)
lwc_string_unref(c->password);
if (c->host)
lwc_string_unref(c->host);
if (c->port)
lwc_string_unref(c->port);
if (c->path)
lwc_string_unref(c->path);
if (c->query)
lwc_string_unref(c->query);
if (c->fragment)
lwc_string_unref(c->fragment);
}
#ifdef NSURL_DEBUG
/**
* Dump a NetSurf URL's internal components
*
* \param url The NetSurf URL to dump components of
*/
static void nsurl__dump(const nsurl *url)
{
if (url->components.scheme)
LOG(" Scheme: %s", lwc_string_data(url->components.scheme));
if (url->components.username)
LOG("Username: %s", lwc_string_data(url->components.username));
if (url->components.password)
LOG("Password: %s", lwc_string_data(url->components.password));
if (url->components.host)
LOG(" Host: %s", lwc_string_data(url->components.host));
if (url->components.port)
LOG(" Port: %s", lwc_string_data(url->components.port));
if (url->components.path)
LOG(" Path: %s", lwc_string_data(url->components.path));
if (url->components.query)
LOG(" Query: %s", lwc_string_data(url->components.query));
if (url->components.fragment)
LOG("Fragment: %s", lwc_string_data(url->components.fragment));
}
#endif
/******************************************************************************
* NetSurf URL Public API *
******************************************************************************/
/* exported interface, documented in nsurl.h */
nsurl *nsurl_ref(nsurl *url)
{
assert(url != NULL);
url->count++;
return url;
}
/* exported interface, documented in nsurl.h */
void nsurl_unref(nsurl *url)
{
assert(url != NULL);
assert(url->count > 0);
if (--url->count > 0)
return;
#ifdef NSURL_DEBUG
nsurl__dump(url);
#endif
/* Release lwc strings */
nsurl_destroy_components(&url->components);
/* Free the NetSurf URL */
free(url);
}
/* exported interface, documented in nsurl.h */
bool nsurl_compare(const nsurl *url1, const nsurl *url2, nsurl_component parts)
{
bool match = true;
assert(url1 != NULL);
assert(url2 != NULL);
/* Compare URL components */
/* Path, host and query first, since they're most likely to differ */
if (parts & NSURL_PATH) {
nsurl__component_compare(url1->components.path,
url2->components.path, &match);
if (match == false)
return false;
}
if (parts & NSURL_HOST) {
nsurl__component_compare(url1->components.host,
url2->components.host, &match);
if (match == false)
return false;
}
if (parts & NSURL_QUERY) {
nsurl__component_compare(url1->components.query,
url2->components.query, &match);
if (match == false)
return false;
}
if (parts & NSURL_SCHEME) {
nsurl__component_compare(url1->components.scheme,
url2->components.scheme, &match);
if (match == false)
return false;
}
if (parts & NSURL_USERNAME) {
nsurl__component_compare(url1->components.username,
url2->components.username, &match);
if (match == false)
return false;
}
if (parts & NSURL_PASSWORD) {
nsurl__component_compare(url1->components.password,
url2->components.password, &match);
if (match == false)
return false;
}
if (parts & NSURL_PORT) {
nsurl__component_compare(url1->components.port,
url2->components.port, &match);
if (match == false)
return false;
}
if (parts & NSURL_FRAGMENT) {
nsurl__component_compare(url1->components.fragment,
url2->components.fragment, &match);
if (match == false)
return false;
}
return true;
}
/* exported interface, documented in nsurl.h */
nserror nsurl_get(const nsurl *url, nsurl_component parts,
char **url_s, size_t *url_l)
{
struct nsurl_component_lengths str_len = { 0, 0, 0, 0, 0, 0, 0, 0 };
enum nsurl_string_flags str_flags = 0;
assert(url != NULL);
/* Get the string length and find which parts of url need copied */
nsurl__get_string_data(&(url->components), parts, url_l,
&str_len, &str_flags);
if (*url_l == 0) {
return NSERROR_BAD_URL;
}
/* Allocate memory for url string */
*url_s = malloc(*url_l + 1); /* adding 1 for '\0' */
if (*url_s == NULL) {
return NSERROR_NOMEM;
}
/* Copy the required parts into the url string */
2017-01-15 18:09:36 +03:00
nsurl__get_string(&(url->components), *url_s, &str_len, str_flags);
return NSERROR_OK;
}
/* exported interface, documented in nsurl.h */
lwc_string *nsurl_get_component(const nsurl *url, nsurl_component part)
{
assert(url != NULL);
switch (part) {
case NSURL_SCHEME:
return (url->components.scheme != NULL) ?
lwc_string_ref(url->components.scheme) : NULL;
case NSURL_USERNAME:
return (url->components.username != NULL) ?
lwc_string_ref(url->components.username) : NULL;
case NSURL_PASSWORD:
return (url->components.password != NULL) ?
lwc_string_ref(url->components.password) : NULL;
case NSURL_HOST:
return (url->components.host != NULL) ?
lwc_string_ref(url->components.host) : NULL;
case NSURL_PORT:
return (url->components.port != NULL) ?
lwc_string_ref(url->components.port) : NULL;
case NSURL_PATH:
return (url->components.path != NULL) ?
lwc_string_ref(url->components.path) : NULL;
case NSURL_QUERY:
return (url->components.query != NULL) ?
lwc_string_ref(url->components.query) : NULL;
case NSURL_FRAGMENT:
return (url->components.fragment != NULL) ?
lwc_string_ref(url->components.fragment) : NULL;
default:
LOG("Unsupported value passed to part param.");
assert(0);
}
return NULL;
}
/* exported interface, documented in nsurl.h */
bool nsurl_has_component(const nsurl *url, nsurl_component part)
{
assert(url != NULL);
switch (part) {
case NSURL_SCHEME:
if (url->components.scheme != NULL)
return true;
else
return false;
case NSURL_CREDENTIALS:
/* Only username required for credentials section */
/* Fall through */
case NSURL_USERNAME:
if (url->components.username != NULL)
return true;
else
return false;
case NSURL_PASSWORD:
if (url->components.password != NULL)
return true;
else
return false;
case NSURL_HOST:
if (url->components.host != NULL)
return true;
else
return false;
case NSURL_PORT:
if (url->components.port != NULL)
return true;
else
return false;
case NSURL_PATH:
if (url->components.path != NULL)
return true;
else
return false;
case NSURL_QUERY:
if (url->components.query != NULL)
return true;
else
return false;
case NSURL_FRAGMENT:
if (url->components.fragment != NULL)
return true;
else
return false;
default:
LOG("Unsupported value passed to part param.");
assert(0);
}
return false;
}
/* exported interface, documented in nsurl.h */
const char *nsurl_access(const nsurl *url)
{
assert(url != NULL);
return url->string;
}
/* exported interface, documented in nsurl.h */
nserror nsurl_get_utf8(const nsurl *url, char **url_s, size_t *url_l)
{
nserror err;
lwc_string *host;
char *idna_host = NULL;
size_t idna_host_len;
char *scheme = NULL;
size_t scheme_len;
char *path = NULL;
size_t path_len;
assert(url != NULL);
2015-10-31 16:32:42 +03:00
if (url->components.host == NULL) {
return nsurl_get(url, NSURL_WITH_FRAGMENT, url_s, url_l);
}
2015-07-17 22:44:52 +03:00
2015-10-31 16:32:42 +03:00
host = url->components.host;
err = idna_decode(lwc_string_data(host), lwc_string_length(host),
2015-10-31 16:23:08 +03:00
&idna_host, &idna_host_len);
if (err != NSERROR_OK) {
goto cleanup;
2015-10-31 16:23:08 +03:00
}
2015-10-31 16:23:08 +03:00
err = nsurl_get(url,
NSURL_SCHEME | NSURL_CREDENTIALS,
&scheme, &scheme_len);
if (err != NSERROR_OK) {
goto cleanup;
2015-10-31 16:23:08 +03:00
}
2015-10-31 16:23:08 +03:00
err = nsurl_get(url,
NSURL_PORT | NSURL_PATH | NSURL_QUERY | NSURL_FRAGMENT,
&path, &path_len);
if (err != NSERROR_OK) {
goto cleanup;
2015-10-31 16:23:08 +03:00
}
*url_l = scheme_len + idna_host_len + path_len + 1; /* +1 for \0 */
*url_s = malloc(*url_l);
2015-10-31 16:23:08 +03:00
if (*url_s == NULL) {
err = NSERROR_NOMEM;
goto cleanup;
2015-10-31 16:23:08 +03:00
}
snprintf(*url_s, *url_l, "%s%s%s", scheme, idna_host, path);
err = NSERROR_OK;
cleanup:
free(idna_host);
free(scheme);
free(path);
return err;
}
2012-10-24 21:22:45 +04:00
/* exported interface, documented in nsurl.h */
const char *nsurl_access_leaf(const nsurl *url)
{
size_t path_len;
const char *path;
const char *leaf;
assert(url != NULL);
2012-10-24 21:22:45 +04:00
if (url->components.path == NULL)
return "";
path = lwc_string_data(url->components.path);
path_len = lwc_string_length(url->components.path);
if (path_len == 0)
return "";
if (path_len == 1 && *path == '/')
return "/";
leaf = path + path_len;
do {
leaf--;
} while ((leaf != path) && (*leaf != '/'));
if (*leaf == '/')
leaf++;
return leaf;
}
/* exported interface, documented in nsurl.h */
size_t nsurl_length(const nsurl *url)
{
assert(url != NULL);
return url->length;
}
/* exported interface, documented in nsurl.h */
uint32_t nsurl_hash(const nsurl *url)
{
assert(url != NULL);
return url->hash;
}
/* exported interface, documented in nsurl.h */
nserror nsurl_defragment(const nsurl *url, nsurl **no_frag)
{
size_t length;
char *pos;
assert(url != NULL);
/* check for source url having no fragment already */
if (url->components.fragment == NULL) {
*no_frag = (nsurl *)url;
(*no_frag)->count++;
return NSERROR_OK;
}
/* Find the change in length from url to new_url */
length = url->length;
if (url->components.fragment != NULL) {
length -= 1 + lwc_string_length(url->components.fragment);
}
/* Create NetSurf URL object */
*no_frag = malloc(sizeof(nsurl) + length + 1); /* Add 1 for \0 */
if (*no_frag == NULL) {
return NSERROR_NOMEM;
}
/* Copy components */
(*no_frag)->components.scheme =
nsurl__component_copy(url->components.scheme);
(*no_frag)->components.username =
nsurl__component_copy(url->components.username);
(*no_frag)->components.password =
nsurl__component_copy(url->components.password);
(*no_frag)->components.host =
nsurl__component_copy(url->components.host);
(*no_frag)->components.port =
nsurl__component_copy(url->components.port);
(*no_frag)->components.path =
nsurl__component_copy(url->components.path);
(*no_frag)->components.query =
nsurl__component_copy(url->components.query);
(*no_frag)->components.fragment = NULL;
(*no_frag)->components.scheme_type = url->components.scheme_type;
(*no_frag)->length = length;
/* Fill out the url string */
pos = (*no_frag)->string;
memcpy(pos, url->string, length);
pos += length;
*pos = '\0';
/* Get the nsurl's hash */
2017-01-15 18:09:36 +03:00
nsurl__calc_hash(*no_frag);
/* Give the URL a reference */
(*no_frag)->count = 1;
return NSERROR_OK;
}
/* exported interface, documented in nsurl.h */
nserror nsurl_refragment(const nsurl *url, lwc_string *frag, nsurl **new_url)
{
int frag_len;
int base_len;
char *pos;
size_t len;
assert(url != NULL);
assert(frag != NULL);
/* Find the change in length from url to new_url */
base_len = url->length;
if (url->components.fragment != NULL) {
base_len -= 1 + lwc_string_length(url->components.fragment);
}
frag_len = lwc_string_length(frag);
/* Set new_url's length */
len = base_len + 1 /* # */ + frag_len;
/* Create NetSurf URL object */
*new_url = malloc(sizeof(nsurl) + len + 1); /* Add 1 for \0 */
if (*new_url == NULL) {
return NSERROR_NOMEM;
}
(*new_url)->length = len;
/* Set string */
pos = (*new_url)->string;
memcpy(pos, url->string, base_len);
pos += base_len;
*pos = '#';
memcpy(++pos, lwc_string_data(frag), frag_len);
pos += frag_len;
*pos = '\0';
/* Copy components */
(*new_url)->components.scheme =
nsurl__component_copy(url->components.scheme);
(*new_url)->components.username =
nsurl__component_copy(url->components.username);
(*new_url)->components.password =
nsurl__component_copy(url->components.password);
(*new_url)->components.host =
nsurl__component_copy(url->components.host);
(*new_url)->components.port =
nsurl__component_copy(url->components.port);
(*new_url)->components.path =
nsurl__component_copy(url->components.path);
(*new_url)->components.query =
nsurl__component_copy(url->components.query);
(*new_url)->components.fragment =
lwc_string_ref(frag);
(*new_url)->components.scheme_type = url->components.scheme_type;
/* Get the nsurl's hash */
2017-01-15 18:09:36 +03:00
nsurl__calc_hash(*new_url);
/* Give the URL a reference */
(*new_url)->count = 1;
return NSERROR_OK;
}
/* exported interface, documented in nsurl.h */
nserror nsurl_replace_query(const nsurl *url, const char *query,
nsurl **new_url)
{
int query_len; /* Length of new query string, including '?' */
int frag_len = 0; /* Length of fragment, including '#' */
int base_len; /* Length of URL up to start of query */
char *pos;
size_t len;
lwc_string *lwc_query;
assert(url != NULL);
assert(query != NULL);
assert(query[0] == '?');
/* Get the length of the new query */
query_len = strlen(query);
/* Find the change in length from url to new_url */
base_len = url->length;
if (url->components.query != NULL) {
base_len -= lwc_string_length(url->components.query);
}
if (url->components.fragment != NULL) {
frag_len = 1 + lwc_string_length(url->components.fragment);
base_len -= frag_len;
}
/* Set new_url's length */
len = base_len + query_len + frag_len;
/* Create NetSurf URL object */
*new_url = malloc(sizeof(nsurl) + len + 1); /* Add 1 for \0 */
if (*new_url == NULL) {
return NSERROR_NOMEM;
}
if (lwc_intern_string(query, query_len, &lwc_query) != lwc_error_ok) {
free(*new_url);
return NSERROR_NOMEM;
}
(*new_url)->length = len;
/* Set string */
pos = (*new_url)->string;
memcpy(pos, url->string, base_len);
pos += base_len;
memcpy(pos, query, query_len);
pos += query_len;
if (url->components.fragment != NULL) {
const char *frag = lwc_string_data(url->components.fragment);
*pos = '#';
2014-02-15 16:39:09 +04:00
memcpy(++pos, frag, frag_len - 1);
pos += frag_len - 1;
}
*pos = '\0';
/* Copy components */
(*new_url)->components.scheme =
nsurl__component_copy(url->components.scheme);
(*new_url)->components.username =
nsurl__component_copy(url->components.username);
(*new_url)->components.password =
nsurl__component_copy(url->components.password);
(*new_url)->components.host =
nsurl__component_copy(url->components.host);
(*new_url)->components.port =
nsurl__component_copy(url->components.port);
(*new_url)->components.path =
nsurl__component_copy(url->components.path);
(*new_url)->components.query = lwc_query;
(*new_url)->components.fragment =
nsurl__component_copy(url->components.fragment);
(*new_url)->components.scheme_type = url->components.scheme_type;
/* Get the nsurl's hash */
2017-01-15 18:09:36 +03:00
nsurl__calc_hash(*new_url);
/* Give the URL a reference */
(*new_url)->count = 1;
return NSERROR_OK;
}
/* exported interface documented in utils/nsurl.h */
nserror nsurl_nice(const nsurl *url, char **result, bool remove_extensions)
{
const char *data;
size_t len;
size_t pos;
bool match;
char *name;
assert(url != NULL);
*result = 0;
/* extract the last component of the path, if possible */
if ((url->components.path != NULL) &&
(lwc_string_length(url->components.path) != 0) &&
(lwc_string_isequal(url->components.path,
corestring_lwc_slash_, &match) == lwc_error_ok) &&
(match == false)) {
bool first = true;
bool keep_looking;
/* Get hold of the string data we're examining */
data = lwc_string_data(url->components.path);
len = lwc_string_length(url->components.path);
pos = len;
do {
keep_looking = false;
pos--;
/* Find last '/' with stuff after it */
while (pos != 0) {
if (data[pos] == '/' && pos < len - 1) {
break;
}
pos--;
}
if (pos == 0) {
break;
}
if (first) {
if (strncasecmp("/default.", data + pos,
SLEN("/default.")) == 0) {
keep_looking = true;
} else if (strncasecmp("/index.",
data + pos,
6) == 0) {
keep_looking = true;
}
first = false;
}
} while (keep_looking);
if (data[pos] == '/')
pos++;
if (strncasecmp("default.", data + pos, 8) != 0 &&
strncasecmp("index.", data + pos, 6) != 0) {
size_t end = pos;
while (data[end] != '\0' && data[end] != '/') {
end++;
}
if (end - pos != 0) {
name = malloc(end - pos + 1);
if (name == NULL) {
return NSERROR_NOMEM;
}
memcpy(name, data + pos, end - pos);
name[end - pos] = '\0';
if (remove_extensions) {
/* strip any extenstion */
char *dot = strchr(name, '.');
if (dot && dot != name) {
*dot = '\0';
}
}
*result = name;
return NSERROR_OK;
}
}
}
if (url->components.host != NULL) {
name = strdup(lwc_string_data(url->components.host));
for (pos = 0; name[pos] != '\0'; pos++) {
if (name[pos] == '.') {
name[pos] = '_';
}
}
*result = name;
return NSERROR_OK;
}
return NSERROR_NOT_FOUND;
}
/* exported interface, documented in nsurl.h */
nserror nsurl_parent(const nsurl *url, nsurl **new_url)
{
lwc_string *lwc_path;
size_t old_path_len, new_path_len;
size_t len;
const char* path = NULL;
char *pos;
assert(url != NULL);
old_path_len = (url->components.path == NULL) ? 0 :
lwc_string_length(url->components.path);
/* Find new path length */
if (old_path_len == 0) {
new_path_len = old_path_len;
} else {
path = lwc_string_data(url->components.path);
new_path_len = old_path_len;
if (old_path_len > 1) {
/* Skip over any trailing / */
if (path[new_path_len - 1] == '/')
new_path_len--;
/* Work back to next / */
while (new_path_len > 0 &&
path[new_path_len - 1] != '/')
new_path_len--;
}
}
/* Find the length of new_url */
len = url->length;
if (url->components.query != NULL) {
len -= lwc_string_length(url->components.query);
}
if (url->components.fragment != NULL) {
len -= 1; /* # */
len -= lwc_string_length(url->components.fragment);
}
len -= old_path_len - new_path_len;
/* Create NetSurf URL object */
*new_url = malloc(sizeof(nsurl) + len + 1); /* Add 1 for \0 */
if (*new_url == NULL) {
return NSERROR_NOMEM;
}
/* Make new path */
if (old_path_len == 0) {
lwc_path = NULL;
} else if (old_path_len == new_path_len) {
lwc_path = lwc_string_ref(url->components.path);
} else {
if (lwc_intern_string(path, old_path_len - new_path_len,
&lwc_path) != lwc_error_ok) {
free(*new_url);
return NSERROR_NOMEM;
}
}
(*new_url)->length = len;
/* Set string */
pos = (*new_url)->string;
memcpy(pos, url->string, len);
pos += len;
*pos = '\0';
/* Copy components */
(*new_url)->components.scheme =
nsurl__component_copy(url->components.scheme);
(*new_url)->components.username =
nsurl__component_copy(url->components.username);
(*new_url)->components.password =
nsurl__component_copy(url->components.password);
(*new_url)->components.host =
nsurl__component_copy(url->components.host);
(*new_url)->components.port =
nsurl__component_copy(url->components.port);
(*new_url)->components.path = lwc_path;
(*new_url)->components.query = NULL;
(*new_url)->components.fragment = NULL;
(*new_url)->components.scheme_type = url->components.scheme_type;
/* Get the nsurl's hash */
2017-01-15 18:09:36 +03:00
nsurl__calc_hash(*new_url);
/* Give the URL a reference */
(*new_url)->count = 1;
return NSERROR_OK;
}