netsurf/utils/url.c

/*
 * This file is part of NetSurf, http://netsurf.sourceforge.net/
 * Licensed under the GNU General Public License,
 *		  http://www.opensource.org/licenses/gpl-license
 * Copyright 2005 James Bursa <bursa@users.sourceforge.net>
 * Copyright 2005 John M Bell <jmb202@ecs.soton.ac.uk>
 */

/** \file
 * URL parsing and joining (implementation).
 */

#include <assert.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <sys/types.h>
#include <regex.h>
#include "netsurf/utils/log.h"
#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"


regex_t url_re, url_up_re;

/**
 * Initialise URL routines.
 *
 * Compiles regular expressions required by the url_ functions.
 */

void url_init(void)
{
	/* regex from RFC 2396 */
	regcomp_wrapper(&url_re, "^[[:space:]]*"
#define URL_RE_SCHEME 2
			"(([a-zA-Z][-a-zA-Z0-9+.]*):)?"
#define URL_RE_AUTHORITY 4
			"(//([^/?#[:space:]]*))?"
#define URL_RE_PATH 5
			"([^?#[:space:]]*)"
#define URL_RE_QUERY 7
			"(\\?([^#[:space:]]*))?"
#define URL_RE_FRAGMENT 9
			"(#([^[:space:]]*))?"
			"[[:space:]]*$", REG_EXTENDED);
	regcomp_wrapper(&url_up_re,
			"/([^/]?|[.][^./]|[^./][.]|[^./][^./]|[^/][^/][^/]+)"
			"/[.][.](/|$)",
			REG_EXTENDED);
}


/**
 * Normalize a URL.
 *
 * \param  url	   an absolute URL
 * \param  result  pointer to pointer to buffer to hold cleaned up url
 * \return  URL_FUNC_OK on success
 *
 * If there is no scheme, http:// is added. The scheme and host are
 * lower-cased. Default ports are removed (http only). An empty path is
 * replaced with "/". Characters are unescaped if safe.
 */

url_func_result url_normalize(const char *url, char **result)
{
	char c;
	int m;
	int i;
	size_t len;
	bool http = false;
	regmatch_t match[10];

	*result = NULL;

	if ((m = regexec(&url_re, url, 10, match, 0))) {
		LOG(("url '%s' failed to match regex", url));
		return URL_FUNC_FAILED;
	}

	len = strlen(url);

	if (match[URL_RE_SCHEME].rm_so == -1) {
		/* scheme missing: add http:// and reparse */
/*		LOG(("scheme missing: using http"));*/
		if ((*result = malloc(len + 13)) == NULL) {
			LOG(("malloc failed"));
			return URL_FUNC_NOMEM;
		}
		strcpy(*result, "http://");
		strcpy(*result + sizeof("http://")-1, url);
		if ((m = regexec(&url_re, *result, 10, match, 0))) {
			LOG(("url '%s' failed to match regex", (*result)));
			free(*result);
			return URL_FUNC_FAILED;
		}
		len += sizeof("http://")-1;
	} else {
		if ((*result = malloc(len + 6)) == NULL) {
			LOG(("malloc failed"));
			return URL_FUNC_NOMEM;
		}
		strcpy(*result, url);
	}

	/*for (unsigned int i = 0; i != 10; i++) {
		if (match[i].rm_so == -1)
			continue;
		fprintf(stderr, "%i: '%.*s'\n", i,
				match[i].rm_eo - match[i].rm_so,
				res + match[i].rm_so);
	}*/

	/* see RFC 2616 section 3.2.3 */
	/* make scheme lower-case */
	if (match[URL_RE_SCHEME].rm_so != -1) {
		for (i = match[URL_RE_SCHEME].rm_so;
				i != match[URL_RE_SCHEME].rm_eo; i++)
			(*result)[i] = tolower((*result)[i]);
		if (match[URL_RE_SCHEME].rm_eo == 4
				&& (*result)[0] == 'h'
				&& (*result)[1] == 't'
				&& (*result)[2] == 't'
				&& (*result)[3] == 'p')
			http = true;
	}

	/* make empty path into "/" */
	if (match[URL_RE_PATH].rm_so != -1 &&
			match[URL_RE_PATH].rm_so == match[URL_RE_PATH].rm_eo) {
		memmove((*result) + match[URL_RE_PATH].rm_so + 1,
				(*result) + match[URL_RE_PATH].rm_so,
				len - match[URL_RE_PATH].rm_so + 1);
		(*result)[match[URL_RE_PATH].rm_so] = '/';
		len++;
	}

	/* make host lower-case */
	if (match[URL_RE_AUTHORITY].rm_so != -1) {
		for (i = match[URL_RE_AUTHORITY].rm_so;
				i != match[URL_RE_AUTHORITY].rm_eo; i++) {
			if ((*result)[i] == ':') {
				if (http && (*result)[i + 1] == '8' &&
						(*result)[i + 2] == '0' &&
						i + 3 ==
						match[URL_RE_AUTHORITY].rm_eo) {
					memmove((*result) + i,
							(*result) + i + 3,
							len -
							match[URL_RE_AUTHORITY].
							rm_eo);
					len -= 3;
					(*result)[len] = '\0';
				} else if (i + 1 == match[4].rm_eo) {
					memmove((*result) + i,
							(*result) + i + 1,
							len -
							match[URL_RE_AUTHORITY].
							rm_eo);
					len--;
					(*result)[len] = '\0';
				}
				break;
			}
			(*result)[i] = tolower((*result)[i]);
		}
	}

	/* unescape non-"reserved" escaped characters */
	for (i = 0; (unsigned)i != len; i++) {
		if ((*result)[i] != '%')
			continue;
		c = tolower((*result)[i + 1]);
		if ('0' <= c && c <= '9')
			m = 16 * (c - '0');
		else if ('a' <= c && c <= 'f')
			m = 16 * (c - 'a' + 10);
		else
			continue;
		c = tolower((*result)[i + 2]);
		if ('0' <= c && c <= '9')
			m += c - '0';
		else if ('a' <= c && c <= 'f')
			m += c - 'a' + 10;
		else
			continue;

		if (m <= 0x20 || strchr(";/?:@&=+$," "<>#%\"{}|\\^[]`", m) ||
				m >= 0x7f) {
			i += 2;
			continue;
		}

		(*result)[i] = m;
		memmove((*result) + i + 1, (*result) + i + 3, len - i - 2);
		len -= 2;
	}

	return URL_FUNC_OK;
}


/**
 * Resolve a relative URL to absolute form.
 *
 * \param  rel	   relative URL
 * \param  base	   base URL, must be absolute and cleaned as by url_normalize()
 * \param  result  pointer to pointer to buffer to hold absolute url
 * \return  URL_FUNC_OK on success
 */

url_func_result url_join(const char *rel, const char *base, char **result)
{
	int m;
	int i, j;
	char *buf = 0;
	const char *scheme = 0, *authority = 0, *path = 0, *query = 0,
			*fragment = 0;
	int scheme_len = 0, authority_len = 0, path_len = 0, query_len = 0,
			fragment_len = 0;
	regmatch_t base_match[10];
	regmatch_t rel_match[10];
	regmatch_t up_match[3];

	(*result) = 0;

	/* see RFC 2396 section 5.2 */
	m = regexec(&url_re, base, 10, base_match, 0);
	if (m) {
		LOG(("base url '%s' failed to match regex", base));
		return URL_FUNC_FAILED;
	}
	/*for (unsigned int i = 0; i != 10; i++) {
		if (base_match[i].rm_so == -1)
			continue;
		fprintf(stderr, "%i: '%.*s'\n", i,
				base_match[i].rm_eo - base_match[i].rm_so,
				base + base_match[i].rm_so);
	}*/
	if (base_match[URL_RE_SCHEME].rm_so == -1) {
		LOG(("base url '%s' is not absolute", base));
		return URL_FUNC_FAILED;
	}
	scheme = base + base_match[URL_RE_SCHEME].rm_so;
	scheme_len = base_match[URL_RE_SCHEME].rm_eo -
			base_match[URL_RE_SCHEME].rm_so;
	if (base_match[URL_RE_AUTHORITY].rm_so != -1) {
		authority = base + base_match[URL_RE_AUTHORITY].rm_so;
		authority_len = base_match[URL_RE_AUTHORITY].rm_eo -
				base_match[URL_RE_AUTHORITY].rm_so;
	}
	path = base + base_match[URL_RE_PATH].rm_so;
	path_len = base_match[URL_RE_PATH].rm_eo -
			base_match[URL_RE_PATH].rm_so;


	/* 1) */
	m = regexec(&url_re, rel, 10, rel_match, 0);
	if (m) {
		LOG(("relative url '%s' failed to match regex", rel));
		return URL_FUNC_FAILED;
	}

	/* 2) */
	/* base + "#s" = (current document)#s (see Appendix C.1) */
	if (rel_match[URL_RE_FRAGMENT].rm_so != -1) {
		fragment = rel + rel_match[URL_RE_FRAGMENT].rm_so;
		fragment_len = rel_match[URL_RE_FRAGMENT].rm_eo -
				rel_match[URL_RE_FRAGMENT].rm_so;
	}
	if (rel_match[URL_RE_PATH].rm_so == rel_match[URL_RE_PATH].rm_eo &&
			rel_match[URL_RE_SCHEME].rm_so == -1 &&
			rel_match[URL_RE_AUTHORITY].rm_so == -1 &&
			rel_match[URL_RE_QUERY].rm_so == -1) {
		if (base_match[URL_RE_QUERY].rm_so != -1) {
			/* normally the base query is discarded, but this is a
			 * "reference to the current document", so keep it */
			query = base + base_match[URL_RE_QUERY].rm_so;
			query_len = base_match[URL_RE_QUERY].rm_eo -
					base_match[URL_RE_QUERY].rm_so;
		}
		goto step7;
	}
	if (rel_match[URL_RE_QUERY].rm_so != -1) {
		query = rel + rel_match[URL_RE_QUERY].rm_so;
		query_len = rel_match[URL_RE_QUERY].rm_eo -
				rel_match[URL_RE_QUERY].rm_so;
	}

	/* base + "?y" = (base - query)?y
	 * e.g http://a/b/c/d;p?q + ?y = http://a/b/c/d;p?y */
	if (rel_match[URL_RE_PATH].rm_so == rel_match[URL_RE_PATH].rm_eo &&
			rel_match[URL_RE_SCHEME].rm_so == -1 &&
			rel_match[URL_RE_AUTHORITY].rm_so == -1 &&
			rel_match[URL_RE_QUERY].rm_so != -1)
		goto step7;

	/* 3) */
	if (rel_match[URL_RE_SCHEME].rm_so != -1) {
		scheme = rel + rel_match[URL_RE_SCHEME].rm_so;
		scheme_len = rel_match[URL_RE_SCHEME].rm_eo -
				rel_match[URL_RE_SCHEME].rm_so;
		authority = 0;
		authority_len = 0;
		if (rel_match[URL_RE_AUTHORITY].rm_so != -1) {
			authority = rel + rel_match[URL_RE_AUTHORITY].rm_so;
			authority_len = rel_match[URL_RE_AUTHORITY].rm_eo -
					rel_match[URL_RE_AUTHORITY].rm_so;
		}
		path = rel + rel_match[URL_RE_PATH].rm_so;
		path_len = rel_match[URL_RE_PATH].rm_eo -
				rel_match[URL_RE_PATH].rm_so;
		goto step7;
	}

	/* 4) */
	if (rel_match[URL_RE_AUTHORITY].rm_so != -1) {
		authority = rel + rel_match[URL_RE_AUTHORITY].rm_so;
		authority_len = rel_match[URL_RE_AUTHORITY].rm_eo -
				rel_match[URL_RE_AUTHORITY].rm_so;
		path = rel + rel_match[URL_RE_PATH].rm_so;
		path_len = rel_match[URL_RE_PATH].rm_eo -
				rel_match[URL_RE_PATH].rm_so;
		goto step7;
	}

	/* 5) */
	if (rel[rel_match[URL_RE_PATH].rm_so] == '/') {
		path = rel + rel_match[URL_RE_PATH].rm_so;
		path_len = rel_match[URL_RE_PATH].rm_eo -
				rel_match[URL_RE_PATH].rm_so;
		goto step7;
	}

	/* 6) */
	buf = malloc(path_len + rel_match[URL_RE_PATH].rm_eo + 10);
	if (!buf) {
		LOG(("malloc failed"));
		return URL_FUNC_NOMEM;
	}
	/* a) */
	strncpy(buf, path, path_len);
	for (; path_len != 0 && buf[path_len - 1] != '/'; path_len--)
		;
	/* b) */
	strncpy(buf + path_len, rel + rel_match[URL_RE_PATH].rm_so,
			rel_match[URL_RE_PATH].rm_eo -
			rel_match[URL_RE_PATH].rm_so);
	path_len += rel_match[URL_RE_PATH].rm_eo - rel_match[URL_RE_PATH].rm_so;
	/* c) */
	buf[path_len] = 0;
	for (i = j = 0; j != path_len; ) {
		if (j && buf[j - 1] == '/' && buf[j] == '.' &&
				buf[j + 1] == '/')
			j += 2;
		else
			buf[i++] = buf[j++];
	}
	path_len = i;
	/* d) */
	if (2 <= path_len && buf[path_len - 2] == '/' &&
			buf[path_len - 1] == '.')
		path_len--;
	/* e) and f) */
	while (1) {
		buf[path_len] = 0;
		m = regexec(&url_up_re, buf, 3, up_match, 0);
		if (m)
			break;
		if (up_match[1].rm_eo + 4 <= path_len) {
			memmove(buf + up_match[1].rm_so,
					buf + up_match[1].rm_eo + 4,
					path_len - up_match[1].rm_eo - 4);
			path_len -= up_match[1].rm_eo - up_match[1].rm_so + 4;
		} else
			path_len -= up_match[1].rm_eo - up_match[1].rm_so + 3;
	}
	/* g) (choose to remove) */
	path = buf;
	while (3 <= path_len && path[1] == '.' && path[2] == '.') {
		path += 3;
		path_len -= 3;
	}

	buf[path - buf + path_len] = 0;

step7:	/* 7) */
	(*result) = malloc(scheme_len + 1 + 2 + authority_len + path_len + 1 +
			1 + query_len + 1 + fragment_len + 1);
	if (!(*result)) {
		LOG(("malloc failed"));
		free(buf);
		return URL_FUNC_NOMEM;
	}

	strncpy((*result), scheme, scheme_len);
	(*result)[scheme_len] = ':';
	i = scheme_len + 1;
	if (authority) {
		(*result)[i++] = '/';
		(*result)[i++] = '/';
		strncpy((*result) + i, authority, authority_len);
		i += authority_len;
	}
	if (path_len) {
		strncpy((*result) + i, path, path_len);
		i += path_len;
	} else {
		(*result)[i++] = '/';
	}
	if (query) {
		(*result)[i++] = '?';
		strncpy((*result) + i, query, query_len);
		i += query_len;
	}
	if (fragment) {
		(*result)[i++] = '#';
		strncpy((*result) + i, fragment, fragment_len);
		i += fragment_len;
	}
	(*result)[i] = 0;

	free(buf);

	return URL_FUNC_OK;
}


/**
 * Return the host name from an URL.
 *
 * \param  url	   an absolute URL
 * \param  result  pointer to pointer to buffer to hold host name
 * \return  URL_FUNC_OK on success
 */

url_func_result url_host(const char *url, char **result)
{
	int m;
	regmatch_t match[10];

	(*result) = 0;

	m = regexec(&url_re, url, 10, match, 0);
	if (m) {
		LOG(("url '%s' failed to match regex", url));
		return URL_FUNC_FAILED;
	}
	if (match[URL_RE_AUTHORITY].rm_so == -1)
		return URL_FUNC_FAILED;

	(*result) = malloc(match[URL_RE_AUTHORITY].rm_eo -
			match[URL_RE_AUTHORITY].rm_so + 1);
	if (!(*result)) {
		LOG(("malloc failed"));
		return URL_FUNC_NOMEM;
	}
	strncpy((*result), url + match[URL_RE_AUTHORITY].rm_so,
			match[URL_RE_AUTHORITY].rm_eo -
			match[URL_RE_AUTHORITY].rm_so);
	(*result)[match[URL_RE_AUTHORITY].rm_eo -
			match[URL_RE_AUTHORITY].rm_so] = 0;

	return URL_FUNC_OK;
}


/**
 * Return the scheme name from an URL.
 * 
 * See RFC 3986, 3.1 for reference.
 *
 * \param  url	   an absolute URL
 * \param  result  pointer to pointer to buffer to hold scheme name
 * \return  URL_FUNC_OK on success
 */

url_func_result url_scheme(const char *url, char **result)
{
	const char *scheme_end;
  	
	assert(url);
        
	/* ensure the first character is alpha */
	if (!isalpha(*url))
		return URL_FUNC_FAILED;        
        
	/* continue checking until the end marker (':') of the scheme for
	 * the format ALPHA<EFBFBD>*(<EFBFBD>ALPHA<EFBFBD>/<EFBFBD>DIGIT<EFBFBD>/<EFBFBD>"+"<EFBFBD>/<EFBFBD>"-"<EFBFBD>/<EFBFBD>"."<EFBFBD>) */
	for (scheme_end = url;
			((*scheme_end != '\0') && (*scheme_end != ':'));
			scheme_end++) {
		if (!isalnum(*scheme_end) &&
				(*scheme_end != '+') &&
				(*scheme_end != '-') &&
				(*scheme_end != '.'))
			return URL_FUNC_FAILED;  
	}
	if (*scheme_end == '\0')
		return URL_FUNC_FAILED;
        
	/* make a copy of the result for the caller */
	(*result) = malloc(scheme_end - url + 1);
	if (!(*result)) {
		LOG(("malloc failed"));
		return URL_FUNC_NOMEM;
	}
	strncpy((*result), url, scheme_end - url);
	(*result)[scheme_end - url] = '\0';
	return URL_FUNC_OK;
}


/**
 * Return the canonical root of an URL
 *
 * \param url	  an absolute URL
 * \param result  pointer to pointer to buffer to hold canonical rool URL
 * \return  URL_FUNC_OK on success
 */

url_func_result url_canonical_root(const char *url, char **result)
{
	int m, scheme_len, authority_len;
	regmatch_t match[10];

	(*result) = 0;

	m = regexec(&url_re, url, 10, match, 0);
	if (m) {
		LOG(("url '%s' failed to match regex", url));
		return URL_FUNC_FAILED;
	}
	if (match[URL_RE_SCHEME].rm_so == -1 ||
			match[URL_RE_AUTHORITY].rm_so == -1)
		return URL_FUNC_FAILED;

	scheme_len = match[URL_RE_SCHEME].rm_eo - match[URL_RE_SCHEME].rm_so;
	authority_len = match[URL_RE_AUTHORITY].rm_eo -
			match[URL_RE_AUTHORITY].rm_so;

	(*result) = malloc(scheme_len + 1 + 2 + authority_len + 1);
	if (!(*result)) {
		LOG(("malloc failed"));
		return URL_FUNC_NOMEM;
	}

	strncpy((*result), url + match[URL_RE_SCHEME].rm_so, scheme_len);
	m = scheme_len;
	(*result)[m++] = ':';
	(*result)[m++] = '/';
	(*result)[m++] = '/';
	strncpy((*result) + m, url + match[URL_RE_AUTHORITY].rm_so,
			authority_len);
	(*result)[m + authority_len] = '\0';

	return URL_FUNC_OK;
}


/**
 * Strip leafname, query and fragment segments from an URL
 *
 * \param url	  an absolute URL
 * \param result  pointer to pointer to buffer to hold result
 * \return URL_FUNC_OK on success
 */

url_func_result url_strip_lqf(const char *url, char **result)
{
	int m, scheme_len, authority_len, path_len = 0;
	regmatch_t match[10];

	(*result) = 0;

	m = regexec(&url_re, url, 10, match, 0);
	if (m) {
		LOG(("url '%s' failed to match regex", url));
		return URL_FUNC_FAILED;
	}
	if (match[URL_RE_SCHEME].rm_so == -1 ||
			match[URL_RE_AUTHORITY].rm_so == -1)
		return URL_FUNC_FAILED;

	scheme_len = match[URL_RE_SCHEME].rm_eo - match[URL_RE_SCHEME].rm_so;
	authority_len = match[URL_RE_AUTHORITY].rm_eo -
			match[URL_RE_AUTHORITY].rm_so;
	if (match[URL_RE_PATH].rm_so != -1)
		path_len = match[URL_RE_PATH].rm_eo -
				match[URL_RE_PATH].rm_so;

	(*result) = malloc(scheme_len + 1 + 2 + authority_len +
			(path_len ? path_len : 1) + 1);
	if (!(*result)) {
		LOG(("malloc failed"));
		return URL_FUNC_NOMEM;
	}

	strncpy((*result), url + match[URL_RE_SCHEME].rm_so, scheme_len);
	m = scheme_len;
	(*result)[m++] = ':';
	(*result)[m++] = '/';
	(*result)[m++] = '/';
	strncpy((*result) + m, url + match[URL_RE_AUTHORITY].rm_so,
			authority_len);
	m += authority_len;

	if (path_len) {
		strncpy((*result) + m, url + match[URL_RE_AUTHORITY].rm_so,
				path_len);
		for (; path_len != 0 && (*result)[m + path_len - 1] != '/';
				path_len--)
			/* do nothing */;
		m += path_len;
	}
	else
		(*result)[m++] = '/';

	(*result)[m] = '\0';

	return URL_FUNC_OK;
}


/**
 * Extract path, leafname and query segments from an URL
 *
 * \param url	  an absolute URL
 * \param result  pointer to pointer to buffer to hold result
 * \return URL_FUNC_OK on success
 */

url_func_result url_plq(const char *url, char **result)
{
	int m, path_len = 0, query_len = 0;
	regmatch_t match[10];

	(*result) = 0;

	m = regexec(&url_re, url, 10, match, 0);
	if (m) {
		LOG(("url '%s' failed to match regex", url));
		return URL_FUNC_FAILED;
	}
	if (match[URL_RE_SCHEME].rm_so == -1 ||
			match[URL_RE_AUTHORITY].rm_so == -1)
		return URL_FUNC_FAILED;

	if (match[URL_RE_PATH].rm_so != -1)
		path_len = match[URL_RE_PATH].rm_eo -
				match[URL_RE_PATH].rm_so;
	if (match[URL_RE_QUERY].rm_so != -1)
		query_len = match[URL_RE_QUERY].rm_eo -
				match[URL_RE_QUERY].rm_so;

	(*result) = malloc((path_len ? path_len : 1) + query_len + 1 + 1);
	if (!(*result)) {
		LOG(("malloc failed"));
		return URL_FUNC_NOMEM;
	}

	m = 0;
	if (path_len) {
		strncpy((*result), url + match[URL_RE_PATH].rm_so,
				path_len);
		m += path_len;
	}
	else
		(*result)[m++] = '/';

	if (query_len) {
		(*result)[m++] = '?';
		strncpy((*result) + m, url + match[URL_RE_QUERY].rm_so,
				query_len);
		m += query_len;
	}

	(*result)[m] = '\0';

	return URL_FUNC_OK;
}


/**
 * Extract path segment from an URL
 *
 * \param url	  an absolute URL
 * \param result  pointer to pointer to buffer to hold result
 * \return URL_FUNC_OK on success
 */

url_func_result url_path(const char *url, char **result)
{
	int m, path_len = 0;
	regmatch_t match[10];

	(*result) = 0;

	m = regexec(&url_re, url, 10, match, 0);
	if (m) {
		LOG(("url '%s' failed to match regex", url));
		return URL_FUNC_FAILED;
	}
	if (match[URL_RE_SCHEME].rm_so == -1 ||
			match[URL_RE_AUTHORITY].rm_so == -1)
		return URL_FUNC_FAILED;

	if (match[URL_RE_PATH].rm_so != -1)
		path_len = match[URL_RE_PATH].rm_eo -
				match[URL_RE_PATH].rm_so;

	(*result) = malloc((path_len ? path_len : 1) + 1);
	if (!(*result)) {
		LOG(("malloc failed"));
		return URL_FUNC_NOMEM;
	}

	m = 0;
	if (path_len > 1) {
		strncpy((*result), url + match[URL_RE_PATH].rm_so,
				path_len);
		for (; path_len != 0 && (*result)[m + path_len - 1] != '/';
				path_len--)
			/* do nothing */;
		m += path_len;
	}
	else
		(*result)[m++] = '/';

	(*result)[m] = '\0';

	return URL_FUNC_OK;
}


/**
 * Attempt to find a nice filename for a URL.
 *
 * \param  url	   an absolute URL
 * \param  result  pointer to pointer to buffer to hold filename
 * \param  remove_extensions  remove any extensions from the filename
 * \return  URL_FUNC_OK on success
 */

url_func_result url_nice(const char *url, char **result,
		bool remove_extensions)
{
	int m;
	regmatch_t match[10];
	regoff_t start, end;
	size_t i;
	char *dot;

	*result = 0;

	m = regexec(&url_re, url, 10, match, 0);
	if (m) {
		LOG(("url '%s' failed to match regex", url));
		return URL_FUNC_FAILED;
	}

	/* extract the last component of the path, if possible */
	if (match[URL_RE_PATH].rm_so == -1 || match[URL_RE_PATH].rm_so ==
			match[URL_RE_PATH].rm_eo)
		goto no_path;  /* no path, or empty */
	for (end = match[URL_RE_PATH].rm_eo - 1;
			end != match[URL_RE_PATH].rm_so && url[end] == '/';
			end--)
		;
	if (end == match[URL_RE_PATH].rm_so)
		goto no_path;  /* path is a string of '/' */
	end++;
	for (start = end - 1;
			start != match[URL_RE_PATH].rm_so && url[start] != '/';
			start--)
		;
	if (url[start] == '/')
		start++;

	if (!strncasecmp(url + start, "index.", 6) ||
			!strncasecmp(url + start, "default.", 8)) {
		/* try again */
		if (start == match[URL_RE_PATH].rm_so)
			goto no_path;
		for (end = start - 1;
				end != match[URL_RE_PATH].rm_so &&
				url[end] == '/';
				end--)
			;
		if (end == match[URL_RE_PATH].rm_so)
			goto no_path;
		end++;
		for (start = end - 1;
				start != match[URL_RE_PATH].rm_so &&
				url[start] != '/';
				start--)
		;
		if (url[start] == '/')
			start++;
	}

	*result = malloc(end - start + 1);
	if (!*result) {
		LOG(("malloc failed"));
		return URL_FUNC_NOMEM;
	}
	strncpy(*result, url + start, end - start);
	(*result)[end - start] = 0;

	if (remove_extensions) {
		dot = strchr(*result, '.');
		if (dot && dot != *result)
			*dot = 0;
	}

	return URL_FUNC_OK;

no_path:

	/* otherwise, use the host name, with '.' replaced by '_' */
	if (match[URL_RE_AUTHORITY].rm_so != -1 &&
			match[URL_RE_AUTHORITY].rm_so !=
			match[URL_RE_AUTHORITY].rm_eo) {
		*result = malloc(match[URL_RE_AUTHORITY].rm_eo -
				match[URL_RE_AUTHORITY].rm_so + 1);
		if (!*result) {
			LOG(("malloc failed"));
			return URL_FUNC_NOMEM;
		}
		strncpy(*result, url + match[URL_RE_AUTHORITY].rm_so,
				match[URL_RE_AUTHORITY].rm_eo -
				match[URL_RE_AUTHORITY].rm_so);
		(*result)[match[URL_RE_AUTHORITY].rm_eo -
				match[URL_RE_AUTHORITY].rm_so] = 0;

		for (i = 0; (*result)[i]; i++)
			if ((*result)[i] == '.')
				(*result)[i] = '_';

		return URL_FUNC_OK;
	}

	return URL_FUNC_FAILED;
}


/**
 * Escape a string suitable for inclusion in an URL.
 *
 * \param  unescaped  the unescaped string
 * \param  result     pointer to pointer to buffer to hold escaped string
 * \return  URL_FUNC_OK on success
 */

url_func_result url_escape(const char *unescaped, char **result)
{
	int len;
	char *escaped, *d;
	const char *c;

	if (!unescaped || !result)
		return URL_FUNC_FAILED;

	*result = NULL;

	len = strlen(unescaped);

	escaped = malloc(len * 3 + 1);
	if (!escaped)
		return URL_FUNC_NOMEM;

	for (c = unescaped, d = escaped; *c; c++) {
		if (!isascii(*c) ||
				strchr(";/?:@&=+$," "<>#%\"{}|\\^[]`", *c) ||
				*c <= 0x20 || *c == 0x7f) {
			*d++ = '%';
			*d++ = "0123456789ABCDEF"[((*c >> 4) & 0xf)];
			*d++ = "0123456789ABCDEF"[(*c & 0xf)];
		}
		else {
			/* unreserved characters: [a-zA-Z0-9-_.!~*'()] */
			*d++ = *c;
		}
	}

	*d++ = '\0';

	(*result) = malloc(d - escaped);
	if (!(*result)) {
		free(escaped);
		return URL_FUNC_NOMEM;
	}

	memcpy((*result), escaped, d - escaped);

	free(escaped);

	return URL_FUNC_OK;
}


#ifdef TEST

int main(int argc, char *argv[])
{
	int i;
	url_func_result res;
	char *s;
	url_init();
	for (i = 1; i != argc; i++) {
/*		printf("==> '%s'\n", argv[i]);
		res = url_normalize(argv[i], &s);
		if (res == URL_FUNC_OK) {
			printf("<== '%s'\n", s);
			free(s);
		}*/
/*		printf("==> '%s'\n", argv[i]);
		res = url_host(argv[i], &s);
		if (res == URL_FUNC_OK) {
			printf("<== '%s'\n", s);
			free(s);
		}*/
		if (1 != i) {
			res = url_join(argv[i], argv[1], &s);
			if (res == URL_FUNC_OK) {
				printf("'%s' + '%s' \t= '%s'\n", argv[1],
						argv[i], s);
				free(s);
			}
		}
/*		printf("'%s' => ", argv[i]);
		res = url_nice(argv[i], &s, true);
		if (res == URL_FUNC_OK) {
			printf("'%s', ", s);
			free(s);
		} else {
			printf("failed %u, ", res);
		}
		res = url_nice(argv[i], &s, false);
		if (res == URL_FUNC_OK) {
			printf("'%s', ", s);
			free(s);
		} else {
			printf("failed %u, ", res);
		}
		printf("\n");*/
	}
	return 0;
}

void regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
{
	char errbuf[200];
	int r;
	r = regcomp(preg, regex, cflags);
	if (r) {
		regerror(r, preg, errbuf, sizeof errbuf);
		fprintf(stderr, "Failed to compile regexp '%s'\n", regex);
		fprintf(stderr, "error: %s\n", errbuf);
		exit(1);
	}
}

#endif
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+								/*
 								 * This file is part of NetSurf, http://netsurf.sourceforge.net/
 								 * Licensed under the GNU General Public License,
-												Don't use a regexp to extract scheme name.

svn path=/trunk/netsurf/; revision=2655

											
										
										
											2006-06-27 04:53:39 +04:00
+								 *		  http://www.opensource.org/licenses/gpl-license
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+								 * Copyright 2005 James Bursa <bursa@users.sourceforge.net>
 								 * Copyright 2005 John M Bell <jmb202@ecs.soton.ac.uk>
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+								 */
 								/** \file
 								 * URL parsing and joining (implementation).
 								 */
-												[project @ 2004-03-27 23:18:52 by bursa]
Implement url_nice() and make save boxes use it to choose the default filename.

svn path=/import/netsurf/; revision=682

											
										
										
											2004-03-28 03:18:52 +04:00
+								#include <assert.h>
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+								#include <ctype.h>
 								#include <stdbool.h>
 								#include <stdlib.h>
 								#include <string.h>
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+								#include <strings.h>
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+								#include <sys/types.h>
 								#include <regex.h>
 								#include "netsurf/utils/log.h"
 								#include "netsurf/utils/url.h"
 								#include "netsurf/utils/utils.h"
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+								regex_t url_re, url_up_re;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
 								/**
 								 * Initialise URL routines.
 								 *
 								 * Compiles regular expressions required by the url_ functions.
 								 */
 								void url_init(void)
 								{
 									/* regex from RFC 2396 */
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									regcomp_wrapper(&url_re, "^[[:space:]]*"
 								#define URL_RE_SCHEME 2
 											"(([a-zA-Z][-a-zA-Z0-9+.]*):)?"
 								#define URL_RE_AUTHORITY 4
 											"(//([^/?#[:space:]]*))?"
 								#define URL_RE_PATH 5
 											"([^?#[:space:]]*)"
 								#define URL_RE_QUERY 7
 											"(\\?([^#[:space:]]*))?"
 								#define URL_RE_FRAGMENT 9
 											"(#([^[:space:]]*))?"
-												[project @ 2004-06-08 10:56:21 by bursa]
Ignore whitespace at the start and end of URLs.

svn path=/import/netsurf/; revision=935

											
										
										
											2004-06-08 14:56:21 +04:00
+											"[[:space:]]*$", REG_EXTENDED);
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									regcomp_wrapper(&url_up_re,
-												Fix crash when a relative URL contains "//../".

svn path=/trunk/netsurf/; revision=2543

											
										
										
											2006-04-22 13:07:28 +04:00
+											"/([^/]?|[.][^./]|[^./][.]|[^./][^./]|[^/][^/][^/]+)"
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+											"/[.][.](/|$)",
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+											REG_EXTENDED);
 								}
 								/**
 								 * Normalize a URL.
 								 *
-												Don't use a regexp to extract scheme name.

svn path=/trunk/netsurf/; revision=2655

											
										
										
											2006-06-27 04:53:39 +04:00
+								 * \param  url	   an absolute URL
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+								 * \param  result  pointer to pointer to buffer to hold cleaned up url
 								 * \return  URL_FUNC_OK on success
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+								 *
 								 * If there is no scheme, http:// is added. The scheme and host are
 								 * lower-cased. Default ports are removed (http only). An empty path is
 								 * replaced with "/". Characters are unescaped if safe.
 								 */
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+								url_func_result url_normalize(const char *url, char **result)
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+								{
 									char c;
 									int m;
 									int i;
-												[project @ 2004-08-14 15:07:19 by joty]
- Rename len() to css_len2px().
- Less compiler warnings concerning float/int implicit casts.
- More stddef.h type usuage.

svn path=/import/netsurf/; revision=1232

											
										
										
											2004-08-14 19:07:21 +04:00
+									size_t len;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									bool http = false;
 									regmatch_t match[10];
-												[project @ 2004-08-14 15:07:19 by joty]
- Rename len() to css_len2px().
- Less compiler warnings concerning float/int implicit casts.
- More stddef.h type usuage.

svn path=/import/netsurf/; revision=1232

											
										
										
											2004-08-14 19:07:21 +04:00
+									*result = NULL;
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
-												[project @ 2005-01-02 14:41:07 by bursa]
regexec() returns int, not a pointer.

svn path=/import/netsurf/; revision=1425

											
										
										
											2005-01-02 17:41:07 +03:00
+									if ((m = regexec(&url_re, url, 10, match, 0))) {
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										LOG(("url '%s' failed to match regex", url));
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										return URL_FUNC_FAILED;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									}
 									len = strlen(url);
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									if (match[URL_RE_SCHEME].rm_so == -1) {
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										/* scheme missing: add http:// and reparse */
-												[project @ 2005-02-03 13:18:22 by rjw]
Implementation of URL suggestion

svn path=/import/netsurf/; revision=1488

											
										
										
											2005-02-03 16:18:22 +03:00
+								/*		LOG(("scheme missing: using http"));*/
-												[project @ 2004-08-14 15:07:19 by joty]
- Rename len() to css_len2px().
- Less compiler warnings concerning float/int implicit casts.
- More stddef.h type usuage.

svn path=/import/netsurf/; revision=1232

											
										
										
											2004-08-14 19:07:21 +04:00
+										if ((*result = malloc(len + 13)) == NULL) {
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+											LOG(("malloc failed"));
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+											return URL_FUNC_NOMEM;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										}
-												[project @ 2004-08-14 15:07:19 by joty]
- Rename len() to css_len2px().
- Less compiler warnings concerning float/int implicit casts.
- More stddef.h type usuage.

svn path=/import/netsurf/; revision=1232

											
										
										
											2004-08-14 19:07:21 +04:00
+										strcpy(*result, "http://");
 										strcpy(*result + sizeof("http://")-1, url);
-												[project @ 2005-01-02 14:41:07 by bursa]
regexec() returns int, not a pointer.

svn path=/import/netsurf/; revision=1425

											
										
										
											2005-01-02 17:41:07 +03:00
+										if ((m = regexec(&url_re, *result, 10, match, 0))) {
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+											LOG(("url '%s' failed to match regex", (*result)));
-												[project @ 2004-08-14 15:07:19 by joty]
- Rename len() to css_len2px().
- Less compiler warnings concerning float/int implicit casts.
- More stddef.h type usuage.

svn path=/import/netsurf/; revision=1232

											
										
										
											2004-08-14 19:07:21 +04:00
+											free(*result);
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+											return URL_FUNC_FAILED;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										}
-												[project @ 2004-08-14 15:07:19 by joty]
- Rename len() to css_len2px().
- Less compiler warnings concerning float/int implicit casts.
- More stddef.h type usuage.

svn path=/import/netsurf/; revision=1232

											
										
										
											2004-08-14 19:07:21 +04:00
+										len += sizeof("http://")-1;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									} else {
-												[project @ 2004-08-14 15:07:19 by joty]
- Rename len() to css_len2px().
- Less compiler warnings concerning float/int implicit casts.
- More stddef.h type usuage.

svn path=/import/netsurf/; revision=1232

											
										
										
											2004-08-14 19:07:21 +04:00
+										if ((*result = malloc(len + 6)) == NULL) {
-												[project @ 2005-02-14 21:20:26 by jmb]
Make regex work with latest unixlib.
Fix incorrect return value

svn path=/import/netsurf/; revision=1512

											
										
										
											2005-02-15 00:20:26 +03:00
+											LOG(("malloc failed"));
 											return URL_FUNC_NOMEM;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										}
-												[project @ 2004-08-14 15:07:19 by joty]
- Rename len() to css_len2px().
- Less compiler warnings concerning float/int implicit casts.
- More stddef.h type usuage.

svn path=/import/netsurf/; revision=1232

											
										
										
											2004-08-14 19:07:21 +04:00
+										strcpy(*result, url);
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									}
 									/*for (unsigned int i = 0; i != 10; i++) {
 										if (match[i].rm_so == -1)
 											continue;
 										fprintf(stderr, "%i: '%.*s'\n", i,
 												match[i].rm_eo - match[i].rm_so,
 												res + match[i].rm_so);
 									}*/
 									/* see RFC 2616 section 3.2.3 */
 									/* make scheme lower-case */
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									if (match[URL_RE_SCHEME].rm_so != -1) {
 										for (i = match[URL_RE_SCHEME].rm_so;
 												i != match[URL_RE_SCHEME].rm_eo; i++)
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+											(*result)[i] = tolower((*result)[i]);
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+										if (match[URL_RE_SCHEME].rm_eo == 4
-												[project @ 2004-08-14 15:07:19 by joty]
- Rename len() to css_len2px().
- Less compiler warnings concerning float/int implicit casts.
- More stddef.h type usuage.

svn path=/import/netsurf/; revision=1232

											
										
										
											2004-08-14 19:07:21 +04:00
+												&& (*result)[0] == 'h'
 												&& (*result)[1] == 't'
 												&& (*result)[2] == 't'
 												&& (*result)[3] == 'p')
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+											http = true;
 									}
 									/* make empty path into "/" */
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									if (match[URL_RE_PATH].rm_so != -1 &&
 											match[URL_RE_PATH].rm_so == match[URL_RE_PATH].rm_eo) {
 										memmove((*result) + match[URL_RE_PATH].rm_so + 1,
 												(*result) + match[URL_RE_PATH].rm_so,
 												len - match[URL_RE_PATH].rm_so + 1);
 										(*result)[match[URL_RE_PATH].rm_so] = '/';
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										len++;
 									}
 									/* make host lower-case */
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									if (match[URL_RE_AUTHORITY].rm_so != -1) {
 										for (i = match[URL_RE_AUTHORITY].rm_so;
 												i != match[URL_RE_AUTHORITY].rm_eo; i++) {
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+											if ((*result)[i] == ':') {
 												if (http && (*result)[i + 1] == '8' &&
 														(*result)[i + 2] == '0' &&
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+														i + 3 ==
 														match[URL_RE_AUTHORITY].rm_eo) {
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+													memmove((*result) + i,
 															(*result) + i + 3,
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+															len -
 															match[URL_RE_AUTHORITY].
 															rm_eo);
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+													len -= 3;
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+													(*result)[len] = '\0';
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+												} else if (i + 1 == match[4].rm_eo) {
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+													memmove((*result) + i,
 															(*result) + i + 1,
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+															len -
 															match[URL_RE_AUTHORITY].
 															rm_eo);
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+													len--;
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+													(*result)[len] = '\0';
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+												}
 												break;
 											}
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+											(*result)[i] = tolower((*result)[i]);
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										}
 									}
 									/* unescape non-"reserved" escaped characters */
-												[project @ 2005-01-02 04:01:21 by jmb]
Lose warning

svn path=/import/netsurf/; revision=1420

											
										
										
											2005-01-02 07:01:21 +03:00
+									for (i = 0; (unsigned)i != len; i++) {
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										if ((*result)[i] != '%')
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+											continue;
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										c = tolower((*result)[i + 1]);
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										if ('0' <= c && c <= '9')
 											m = 16 * (c - '0');
 										else if ('a' <= c && c <= 'f')
 											m = 16 * (c - 'a' + 10);
 										else
 											continue;
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										c = tolower((*result)[i + 2]);
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										if ('0' <= c && c <= '9')
 											m += c - '0';
 										else if ('a' <= c && c <= 'f')
 											m += c - 'a' + 10;
 										else
 											continue;
-												[project @ 2005-04-16 05:10:08 by jmb]
Don't unescape characters 0x80->0xFF

svn path=/import/netsurf/; revision=1648

											
										
										
											2005-04-16 09:10:08 +04:00
+										if (m <= 0x20 || strchr(";/?:@&=+$," "<>#%\"{}|\\^[]`", m) ||
 												m >= 0x7f) {
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+											i += 2;
 											continue;
 										}
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										(*result)[i] = m;
 										memmove((*result) + i + 1, (*result) + i + 3, len - i - 2);
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										len -= 2;
 									}
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+									return URL_FUNC_OK;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+								}
 								/**
 								 * Resolve a relative URL to absolute form.
 								 *
-												Don't use a regexp to extract scheme name.

svn path=/trunk/netsurf/; revision=2655

											
										
										
											2006-06-27 04:53:39 +04:00
+								 * \param  rel	   relative URL
 								 * \param  base	   base URL, must be absolute and cleaned as by url_normalize()
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+								 * \param  result  pointer to pointer to buffer to hold absolute url
 								 * \return  URL_FUNC_OK on success
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+								 */
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+								url_func_result url_join(const char *rel, const char *base, char **result)
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+								{
 									int m;
 									int i, j;
 									char *buf = 0;
 									const char *scheme = 0, *authority = 0, *path = 0, *query = 0,
 											*fragment = 0;
 									int scheme_len = 0, authority_len = 0, path_len = 0, query_len = 0,
 											fragment_len = 0;
 									regmatch_t base_match[10];
 									regmatch_t rel_match[10];
 									regmatch_t up_match[3];
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+									(*result) = 0;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									/* see RFC 2396 section 5.2 */
 									m = regexec(&url_re, base, 10, base_match, 0);
 									if (m) {
 										LOG(("base url '%s' failed to match regex", base));
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										return URL_FUNC_FAILED;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									}
 									/*for (unsigned int i = 0; i != 10; i++) {
 										if (base_match[i].rm_so == -1)
 											continue;
 										fprintf(stderr, "%i: '%.*s'\n", i,
 												base_match[i].rm_eo - base_match[i].rm_so,
 												base + base_match[i].rm_so);
 									}*/
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									if (base_match[URL_RE_SCHEME].rm_so == -1) {
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										LOG(("base url '%s' is not absolute", base));
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										return URL_FUNC_FAILED;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									}
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									scheme = base + base_match[URL_RE_SCHEME].rm_so;
 									scheme_len = base_match[URL_RE_SCHEME].rm_eo -
 											base_match[URL_RE_SCHEME].rm_so;
 									if (base_match[URL_RE_AUTHORITY].rm_so != -1) {
 										authority = base + base_match[URL_RE_AUTHORITY].rm_so;
 										authority_len = base_match[URL_RE_AUTHORITY].rm_eo -
 												base_match[URL_RE_AUTHORITY].rm_so;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									}
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									path = base + base_match[URL_RE_PATH].rm_so;
 									path_len = base_match[URL_RE_PATH].rm_eo -
 											base_match[URL_RE_PATH].rm_so;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
-												[project @ 2005-04-23 22:26:05 by jmb]
Make url_join match the spec - see http://www.ics.uci.edu/~fielding/url/test1.html for testcases.

svn path=/import/netsurf/; revision=1682

											
										
										
											2005-04-24 02:26:05 +04:00
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									/* 1) */
 									m = regexec(&url_re, rel, 10, rel_match, 0);
 									if (m) {
 										LOG(("relative url '%s' failed to match regex", rel));
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										return URL_FUNC_FAILED;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									}
 									/* 2) */
-												[project @ 2004-08-06 22:20:36 by jmb]
Make the likes of <a href="#foo"> work

svn path=/import/netsurf/; revision=1188

											
										
										
											2004-08-07 02:20:36 +04:00
+									/* base + "#s" = (current document)#s (see Appendix C.1) */
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									if (rel_match[URL_RE_FRAGMENT].rm_so != -1) {
 										fragment = rel + rel_match[URL_RE_FRAGMENT].rm_so;
 										fragment_len = rel_match[URL_RE_FRAGMENT].rm_eo -
 												rel_match[URL_RE_FRAGMENT].rm_so;
-												[project @ 2004-08-06 22:20:36 by jmb]
Make the likes of <a href="#foo"> work

svn path=/import/netsurf/; revision=1188

											
										
										
											2004-08-07 02:20:36 +04:00
+									}
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									if (rel_match[URL_RE_PATH].rm_so == rel_match[URL_RE_PATH].rm_eo &&
 											rel_match[URL_RE_SCHEME].rm_so == -1 &&
 											rel_match[URL_RE_AUTHORITY].rm_so == -1 &&
 											rel_match[URL_RE_QUERY].rm_so == -1) {
 										if (base_match[URL_RE_QUERY].rm_so != -1) {
 											/* normally the base query is discarded, but this is a
 											 * "reference to the current document", so keep it */
 											query = base + base_match[URL_RE_QUERY].rm_so;
 											query_len = base_match[URL_RE_QUERY].rm_eo -
 													base_match[URL_RE_QUERY].rm_so;
-												[project @ 2005-04-23 22:26:05 by jmb]
Make url_join match the spec - see http://www.ics.uci.edu/~fielding/url/test1.html for testcases.

svn path=/import/netsurf/; revision=1682

											
										
										
											2005-04-24 02:26:05 +04:00
+										}
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										goto step7;
 									}
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									if (rel_match[URL_RE_QUERY].rm_so != -1) {
 										query = rel + rel_match[URL_RE_QUERY].rm_so;
 										query_len = rel_match[URL_RE_QUERY].rm_eo -
 												rel_match[URL_RE_QUERY].rm_so;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									}
-												[project @ 2005-04-23 22:26:05 by jmb]
Make url_join match the spec - see http://www.ics.uci.edu/~fielding/url/test1.html for testcases.

svn path=/import/netsurf/; revision=1682

											
										
										
											2005-04-24 02:26:05 +04:00
+									/* base + "?y" = (base - query)?y
 									 * e.g http://a/b/c/d;p?q + ?y = http://a/b/c/d;p?y */
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									if (rel_match[URL_RE_PATH].rm_so == rel_match[URL_RE_PATH].rm_eo &&
 											rel_match[URL_RE_SCHEME].rm_so == -1 &&
 											rel_match[URL_RE_AUTHORITY].rm_so == -1 &&
 											rel_match[URL_RE_QUERY].rm_so != -1)
-												[project @ 2005-04-23 22:26:05 by jmb]
Make url_join match the spec - see http://www.ics.uci.edu/~fielding/url/test1.html for testcases.

svn path=/import/netsurf/; revision=1682

											
										
										
											2005-04-24 02:26:05 +04:00
+										goto step7;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									/* 3) */
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									if (rel_match[URL_RE_SCHEME].rm_so != -1) {
 										scheme = rel + rel_match[URL_RE_SCHEME].rm_so;
 										scheme_len = rel_match[URL_RE_SCHEME].rm_eo -
 												rel_match[URL_RE_SCHEME].rm_so;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										authority = 0;
 										authority_len = 0;
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+										if (rel_match[URL_RE_AUTHORITY].rm_so != -1) {
 											authority = rel + rel_match[URL_RE_AUTHORITY].rm_so;
 											authority_len = rel_match[URL_RE_AUTHORITY].rm_eo -
 													rel_match[URL_RE_AUTHORITY].rm_so;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										}
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+										path = rel + rel_match[URL_RE_PATH].rm_so;
 										path_len = rel_match[URL_RE_PATH].rm_eo -
 												rel_match[URL_RE_PATH].rm_so;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										goto step7;
 									}
 									/* 4) */
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									if (rel_match[URL_RE_AUTHORITY].rm_so != -1) {
 										authority = rel + rel_match[URL_RE_AUTHORITY].rm_so;
 										authority_len = rel_match[URL_RE_AUTHORITY].rm_eo -
 												rel_match[URL_RE_AUTHORITY].rm_so;
 										path = rel + rel_match[URL_RE_PATH].rm_so;
 										path_len = rel_match[URL_RE_PATH].rm_eo -
 												rel_match[URL_RE_PATH].rm_so;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										goto step7;
 									}
 									/* 5) */
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									if (rel[rel_match[URL_RE_PATH].rm_so] == '/') {
 										path = rel + rel_match[URL_RE_PATH].rm_so;
 										path_len = rel_match[URL_RE_PATH].rm_eo -
 												rel_match[URL_RE_PATH].rm_so;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										goto step7;
 									}
 									/* 6) */
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									buf = malloc(path_len + rel_match[URL_RE_PATH].rm_eo + 10);
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									if (!buf) {
 										LOG(("malloc failed"));
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										return URL_FUNC_NOMEM;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									}
 									/* a) */
 									strncpy(buf, path, path_len);
 									for (; path_len != 0 && buf[path_len - 1] != '/'; path_len--)
 										;
 									/* b) */
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									strncpy(buf + path_len, rel + rel_match[URL_RE_PATH].rm_so,
 											rel_match[URL_RE_PATH].rm_eo -
 											rel_match[URL_RE_PATH].rm_so);
 									path_len += rel_match[URL_RE_PATH].rm_eo - rel_match[URL_RE_PATH].rm_so;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									/* c) */
 									buf[path_len] = 0;
 									for (i = j = 0; j != path_len; ) {
 										if (j && buf[j - 1] == '/' && buf[j] == '.' &&
 												buf[j + 1] == '/')
 											j += 2;
 										else
 											buf[i++] = buf[j++];
 									}
 									path_len = i;
 									/* d) */
-												[project @ 2004-05-07 19:04:59 by bursa]
Fix possible buffer under-read.

svn path=/import/netsurf/; revision=840

											
										
										
											2004-05-07 23:04:59 +04:00
+									if (2 <= path_len && buf[path_len - 2] == '/' &&
 											buf[path_len - 1] == '.')
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										path_len--;
 									/* e) and f) */
 									while (1) {
 										buf[path_len] = 0;
 										m = regexec(&url_up_re, buf, 3, up_match, 0);
 										if (m)
 											break;
 										if (up_match[1].rm_eo + 4 <= path_len) {
 											memmove(buf + up_match[1].rm_so,
 													buf + up_match[1].rm_eo + 4,
 													path_len - up_match[1].rm_eo - 4);
 											path_len -= up_match[1].rm_eo - up_match[1].rm_so + 4;
 										} else
 											path_len -= up_match[1].rm_eo - up_match[1].rm_so + 3;
 									}
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									/* g) (choose to remove) */
-												[project @ 2005-04-23 22:26:05 by jmb]
Make url_join match the spec - see http://www.ics.uci.edu/~fielding/url/test1.html for testcases.

svn path=/import/netsurf/; revision=1682

											
										
										
											2005-04-24 02:26:05 +04:00
+									path = buf;
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									while (3 <= path_len && path[1] == '.' && path[2] == '.') {
 										path += 3;
 										path_len -= 3;
 									}
 									buf[path - buf + path_len] = 0;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
 								step7:	/* 7) */
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									(*result) = malloc(scheme_len + 1 + 2 + authority_len + path_len + 1 +
 + query_len + 1 + fragment_len + 1);
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+									if (!(*result)) {
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										LOG(("malloc failed"));
 										free(buf);
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										return URL_FUNC_NOMEM;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									}
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+									strncpy((*result), scheme, scheme_len);
 									(*result)[scheme_len] = ':';
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									i = scheme_len + 1;
 									if (authority) {
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										(*result)[i++] = '/';
 										(*result)[i++] = '/';
 										strncpy((*result) + i, authority, authority_len);
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										i += authority_len;
 									}
-												[project @ 2004-04-12 20:43:29 by bursa]
Fix bug in url_join().

svn path=/import/netsurf/; revision=763

											
										
										
											2004-04-13 00:43:29 +04:00
+									if (path_len) {
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										strncpy((*result) + i, path, path_len);
-												[project @ 2004-04-12 20:43:29 by bursa]
Fix bug in url_join().

svn path=/import/netsurf/; revision=763

											
										
										
											2004-04-13 00:43:29 +04:00
+										i += path_len;
 									} else {
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										(*result)[i++] = '/';
-												[project @ 2004-04-12 20:43:29 by bursa]
Fix bug in url_join().

svn path=/import/netsurf/; revision=763

											
										
										
											2004-04-13 00:43:29 +04:00
+									}
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									if (query) {
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										(*result)[i++] = '?';
 										strncpy((*result) + i, query, query_len);
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										i += query_len;
 									}
 									if (fragment) {
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										(*result)[i++] = '#';
 										strncpy((*result) + i, fragment, fragment_len);
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										i += fragment_len;
 									}
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+									(*result)[i] = 0;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
 									free(buf);
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+									return URL_FUNC_OK;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+								}
 								/**
 								 * Return the host name from an URL.
 								 *
-												Don't use a regexp to extract scheme name.

svn path=/trunk/netsurf/; revision=2655

											
										
										
											2006-06-27 04:53:39 +04:00
+								 * \param  url	   an absolute URL
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+								 * \param  result  pointer to pointer to buffer to hold host name
 								 * \return  URL_FUNC_OK on success
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+								 */
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+								url_func_result url_host(const char *url, char **result)
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+								{
 									int m;
 									regmatch_t match[10];
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+									(*result) = 0;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									m = regexec(&url_re, url, 10, match, 0);
 									if (m) {
 										LOG(("url '%s' failed to match regex", url));
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										return URL_FUNC_FAILED;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									}
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									if (match[URL_RE_AUTHORITY].rm_so == -1)
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										return URL_FUNC_FAILED;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									(*result) = malloc(match[URL_RE_AUTHORITY].rm_eo -
 											match[URL_RE_AUTHORITY].rm_so + 1);
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+									if (!(*result)) {
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+										LOG(("malloc failed"));
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										return URL_FUNC_NOMEM;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									}
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									strncpy((*result), url + match[URL_RE_AUTHORITY].rm_so,
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+											match[URL_RE_AUTHORITY].rm_eo -
 											match[URL_RE_AUTHORITY].rm_so);
-												[project @ 2005-04-30 14:31:48 by bursa]
Fix bug in url_up_re which caused /xy/../ not to be recognised when xy was 2 characters. Add defines for match part numbers. Simplify part 6(g) of url_join().

svn path=/import/netsurf/; revision=1704

											
										
										
											2005-04-30 18:31:48 +04:00
+									(*result)[match[URL_RE_AUTHORITY].rm_eo -
 											match[URL_RE_AUTHORITY].rm_so] = 0;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+									return URL_FUNC_OK;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+								}
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
-												[project @ 2004-10-01 21:31:55 by jmb]
A somewhat better implementation of referrers which no longer sends the referer if the URL schemes don't match.

Things to do:
1) Preservation of referer across redirects (see comment in browser.c:284)
2) GUI templates/code for configuration of referer sending (simple on/off toggle only)
3) Make referer sending when fetching objects/stylesheets for a page pay attention to option_send_referer?
4) Handle the case where the referer is in the form of http://moo:foo@mysite.com/ (ie the login details embedded in the referer - not good).

svn path=/import/netsurf/; revision=1297

											
										
										
											2004-10-02 01:31:55 +04:00
+								/**
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+								 * Return the scheme name from an URL.
-												Don't use a regexp to extract scheme name.

svn path=/trunk/netsurf/; revision=2655

											
										
										
											2006-06-27 04:53:39 +04:00
+								 *
 								 * See RFC 3986, 3.1 for reference.
-												[project @ 2004-10-01 21:31:55 by jmb]
A somewhat better implementation of referrers which no longer sends the referer if the URL schemes don't match.

Things to do:
1) Preservation of referer across redirects (see comment in browser.c:284)
2) GUI templates/code for configuration of referer sending (simple on/off toggle only)
3) Make referer sending when fetching objects/stylesheets for a page pay attention to option_send_referer?
4) Handle the case where the referer is in the form of http://moo:foo@mysite.com/ (ie the login details embedded in the referer - not good).

svn path=/import/netsurf/; revision=1297

											
										
										
											2004-10-02 01:31:55 +04:00
+								 *
-												Don't use a regexp to extract scheme name.

svn path=/trunk/netsurf/; revision=2655

											
										
										
											2006-06-27 04:53:39 +04:00
+								 * \param  url	   an absolute URL
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+								 * \param  result  pointer to pointer to buffer to hold scheme name
 								 * \return  URL_FUNC_OK on success
-												[project @ 2004-10-01 21:31:55 by jmb]
A somewhat better implementation of referrers which no longer sends the referer if the URL schemes don't match.

Things to do:
1) Preservation of referer across redirects (see comment in browser.c:284)
2) GUI templates/code for configuration of referer sending (simple on/off toggle only)
3) Make referer sending when fetching objects/stylesheets for a page pay attention to option_send_referer?
4) Handle the case where the referer is in the form of http://moo:foo@mysite.com/ (ie the login details embedded in the referer - not good).

svn path=/import/netsurf/; revision=1297

											
										
										
											2004-10-02 01:31:55 +04:00
+								 */
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
-												[project @ 2004-10-01 21:31:55 by jmb]
A somewhat better implementation of referrers which no longer sends the referer if the URL schemes don't match.

Things to do:
1) Preservation of referer across redirects (see comment in browser.c:284)
2) GUI templates/code for configuration of referer sending (simple on/off toggle only)
3) Make referer sending when fetching objects/stylesheets for a page pay attention to option_send_referer?
4) Handle the case where the referer is in the form of http://moo:foo@mysite.com/ (ie the login details embedded in the referer - not good).

svn path=/import/netsurf/; revision=1297

											
										
										
											2004-10-02 01:31:55 +04:00
+								url_func_result url_scheme(const char *url, char **result)
 								{
-												Don't use a regexp to extract scheme name.

svn path=/trunk/netsurf/; revision=2655

											
										
										
											2006-06-27 04:53:39 +04:00
+									const char *scheme_end;
 									assert(url);
 									/* ensure the first character is alpha */
 									if (!isalpha(*url))
 										return URL_FUNC_FAILED;
 									/* continue checking until the end marker (':') of the scheme for
 									 * the format ALPHA<EFBFBD>*(<EFBFBD>ALPHA<EFBFBD>/<EFBFBD>DIGIT<EFBFBD>/<EFBFBD>"+"<EFBFBD>/<EFBFBD>"-"<EFBFBD>/<EFBFBD>"."<EFBFBD>) */
 									for (scheme_end = url;
 											((*scheme_end != '\0') && (*scheme_end != ':'));
 											scheme_end++) {
 										if (!isalnum(*scheme_end) &&
 												(*scheme_end != '+') &&
 												(*scheme_end != '-') &&
 												(*scheme_end != '.'))
 											return URL_FUNC_FAILED;
 									}
 									if (*scheme_end == '\0')
-												[project @ 2004-10-01 21:31:55 by jmb]
A somewhat better implementation of referrers which no longer sends the referer if the URL schemes don't match.

Things to do:
1) Preservation of referer across redirects (see comment in browser.c:284)
2) GUI templates/code for configuration of referer sending (simple on/off toggle only)
3) Make referer sending when fetching objects/stylesheets for a page pay attention to option_send_referer?
4) Handle the case where the referer is in the form of http://moo:foo@mysite.com/ (ie the login details embedded in the referer - not good).

svn path=/import/netsurf/; revision=1297

											
										
										
											2004-10-02 01:31:55 +04:00
+										return URL_FUNC_FAILED;
-												Don't use a regexp to extract scheme name.

svn path=/trunk/netsurf/; revision=2655

											
										
										
											2006-06-27 04:53:39 +04:00
 									/* make a copy of the result for the caller */
 									(*result) = malloc(scheme_end - url + 1);
-												[project @ 2004-10-01 21:31:55 by jmb]
A somewhat better implementation of referrers which no longer sends the referer if the URL schemes don't match.

Things to do:
1) Preservation of referer across redirects (see comment in browser.c:284)
2) GUI templates/code for configuration of referer sending (simple on/off toggle only)
3) Make referer sending when fetching objects/stylesheets for a page pay attention to option_send_referer?
4) Handle the case where the referer is in the form of http://moo:foo@mysite.com/ (ie the login details embedded in the referer - not good).

svn path=/import/netsurf/; revision=1297

											
										
										
											2004-10-02 01:31:55 +04:00
+									if (!(*result)) {
 										LOG(("malloc failed"));
 										return URL_FUNC_NOMEM;
 									}
-												Don't use a regexp to extract scheme name.

svn path=/trunk/netsurf/; revision=2655

											
										
										
											2006-06-27 04:53:39 +04:00
+									strncpy((*result), url, scheme_end - url);
 									(*result)[scheme_end - url] = '\0';
-												[project @ 2004-10-01 21:31:55 by jmb]
A somewhat better implementation of referrers which no longer sends the referer if the URL schemes don't match.

Things to do:
1) Preservation of referer across redirects (see comment in browser.c:284)
2) GUI templates/code for configuration of referer sending (simple on/off toggle only)
3) Make referer sending when fetching objects/stylesheets for a page pay attention to option_send_referer?
4) Handle the case where the referer is in the form of http://moo:foo@mysite.com/ (ie the login details embedded in the referer - not good).

svn path=/import/netsurf/; revision=1297

											
										
										
											2004-10-02 01:31:55 +04:00
+									return URL_FUNC_OK;
 								}
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
-												[project @ 2006-02-19 18:26:23 by jmb]
Rewrite HTTP authentication.
Fix extraction of realm from WWW-Authenticate header.
Tidy up login dialog code.

svn path=/import/netsurf/; revision=2085

											
										
										
											2006-02-19 21:26:23 +03:00
+								/**
 								 * Return the canonical root of an URL
 								 *
-												Don't use a regexp to extract scheme name.

svn path=/trunk/netsurf/; revision=2655

											
										
										
											2006-06-27 04:53:39 +04:00
+								 * \param url	  an absolute URL
-												[project @ 2006-02-19 18:26:23 by jmb]
Rewrite HTTP authentication.
Fix extraction of realm from WWW-Authenticate header.
Tidy up login dialog code.

svn path=/import/netsurf/; revision=2085

											
										
										
											2006-02-19 21:26:23 +03:00
+								 * \param result  pointer to pointer to buffer to hold canonical rool URL
 								 * \return  URL_FUNC_OK on success
 								 */
 								url_func_result url_canonical_root(const char *url, char **result)
 								{
 									int m, scheme_len, authority_len;
 									regmatch_t match[10];
 									(*result) = 0;
 									m = regexec(&url_re, url, 10, match, 0);
 									if (m) {
 										LOG(("url '%s' failed to match regex", url));
 										return URL_FUNC_FAILED;
 									}
 									if (match[URL_RE_SCHEME].rm_so == -1 ||
 											match[URL_RE_AUTHORITY].rm_so == -1)
 										return URL_FUNC_FAILED;
 									scheme_len = match[URL_RE_SCHEME].rm_eo - match[URL_RE_SCHEME].rm_so;
 									authority_len = match[URL_RE_AUTHORITY].rm_eo -
 											match[URL_RE_AUTHORITY].rm_so;
 									(*result) = malloc(scheme_len + 1 + 2 + authority_len + 1);
 									if (!(*result)) {
 										LOG(("malloc failed"));
 										return URL_FUNC_NOMEM;
 									}
 									strncpy((*result), url + match[URL_RE_SCHEME].rm_so, scheme_len);
 									m = scheme_len;
 									(*result)[m++] = ':';
 									(*result)[m++] = '/';
 									(*result)[m++] = '/';
 									strncpy((*result) + m, url + match[URL_RE_AUTHORITY].rm_so,
 											authority_len);
 									(*result)[m + authority_len] = '\0';
 									return URL_FUNC_OK;
 								}
 								/**
 								 * Strip leafname, query and fragment segments from an URL
 								 *
-												Don't use a regexp to extract scheme name.

svn path=/trunk/netsurf/; revision=2655

											
										
										
											2006-06-27 04:53:39 +04:00
+								 * \param url	  an absolute URL
-												[project @ 2006-02-19 18:26:23 by jmb]
Rewrite HTTP authentication.
Fix extraction of realm from WWW-Authenticate header.
Tidy up login dialog code.

svn path=/import/netsurf/; revision=2085

											
										
										
											2006-02-19 21:26:23 +03:00
+								 * \param result  pointer to pointer to buffer to hold result
 								 * \return URL_FUNC_OK on success
 								 */
 								url_func_result url_strip_lqf(const char *url, char **result)
 								{
 									int m, scheme_len, authority_len, path_len = 0;
 									regmatch_t match[10];
 									(*result) = 0;
 									m = regexec(&url_re, url, 10, match, 0);
 									if (m) {
 										LOG(("url '%s' failed to match regex", url));
 										return URL_FUNC_FAILED;
 									}
 									if (match[URL_RE_SCHEME].rm_so == -1 ||
 											match[URL_RE_AUTHORITY].rm_so == -1)
 										return URL_FUNC_FAILED;
 									scheme_len = match[URL_RE_SCHEME].rm_eo - match[URL_RE_SCHEME].rm_so;
 									authority_len = match[URL_RE_AUTHORITY].rm_eo -
 											match[URL_RE_AUTHORITY].rm_so;
 									if (match[URL_RE_PATH].rm_so != -1)
 										path_len = match[URL_RE_PATH].rm_eo -
 												match[URL_RE_PATH].rm_so;
 									(*result) = malloc(scheme_len + 1 + 2 + authority_len +
 											(path_len ? path_len : 1) + 1);
 									if (!(*result)) {
 										LOG(("malloc failed"));
 										return URL_FUNC_NOMEM;
 									}
 									strncpy((*result), url + match[URL_RE_SCHEME].rm_so, scheme_len);
 									m = scheme_len;
 									(*result)[m++] = ':';
 									(*result)[m++] = '/';
 									(*result)[m++] = '/';
 									strncpy((*result) + m, url + match[URL_RE_AUTHORITY].rm_so,
 											authority_len);
 									m += authority_len;
 									if (path_len) {
 										strncpy((*result) + m, url + match[URL_RE_AUTHORITY].rm_so,
 												path_len);
 										for (; path_len != 0 && (*result)[m + path_len - 1] != '/';
 												path_len--)
 											/* do nothing */;
 										m += path_len;
 									}
 									else
 										(*result)[m++] = '/';
 									(*result)[m] = '\0';
 									return URL_FUNC_OK;
 								}
-												Unify information databases

svn path=/trunk/netsurf/; revision=2519

											
										
										
											2006-04-10 03:21:13 +04:00
+								/**
 								 * Extract path, leafname and query segments from an URL
 								 *
-												Don't use a regexp to extract scheme name.

svn path=/trunk/netsurf/; revision=2655

											
										
										
											2006-06-27 04:53:39 +04:00
+								 * \param url	  an absolute URL
-												Unify information databases

svn path=/trunk/netsurf/; revision=2519

											
										
										
											2006-04-10 03:21:13 +04:00
+								 * \param result  pointer to pointer to buffer to hold result
 								 * \return URL_FUNC_OK on success
 								 */
 								url_func_result url_plq(const char *url, char **result)
 								{
 									int m, path_len = 0, query_len = 0;
 									regmatch_t match[10];
 									(*result) = 0;
 									m = regexec(&url_re, url, 10, match, 0);
 									if (m) {
 										LOG(("url '%s' failed to match regex", url));
 										return URL_FUNC_FAILED;
 									}
 									if (match[URL_RE_SCHEME].rm_so == -1 ||
 											match[URL_RE_AUTHORITY].rm_so == -1)
 										return URL_FUNC_FAILED;
 									if (match[URL_RE_PATH].rm_so != -1)
 										path_len = match[URL_RE_PATH].rm_eo -
 												match[URL_RE_PATH].rm_so;
 									if (match[URL_RE_QUERY].rm_so != -1)
 										query_len = match[URL_RE_QUERY].rm_eo -
 												match[URL_RE_QUERY].rm_so;
 									(*result) = malloc((path_len ? path_len : 1) + query_len + 1 + 1);
 									if (!(*result)) {
 										LOG(("malloc failed"));
 										return URL_FUNC_NOMEM;
 									}
 									m = 0;
 									if (path_len) {
 										strncpy((*result), url + match[URL_RE_PATH].rm_so,
 												path_len);
 										m += path_len;
 									}
 									else
 										(*result)[m++] = '/';
 									if (query_len) {
 										(*result)[m++] = '?';
 										strncpy((*result) + m, url + match[URL_RE_QUERY].rm_so,
 												query_len);
 										m += query_len;
 									}
 									(*result)[m] = '\0';
 									return URL_FUNC_OK;
 								}
-												Merge cookies changes into head - unvalidated transactions and a UI 
still need implementing.

svn path=/trunk/netsurf/; revision=2632

											
										
										
											2006-06-20 01:49:25 +04:00
+								/**
 								 * Extract path segment from an URL
 								 *
-												Don't use a regexp to extract scheme name.

svn path=/trunk/netsurf/; revision=2655

											
										
										
											2006-06-27 04:53:39 +04:00
+								 * \param url	  an absolute URL
-												Merge cookies changes into head - unvalidated transactions and a UI 
still need implementing.

svn path=/trunk/netsurf/; revision=2632

											
										
										
											2006-06-20 01:49:25 +04:00
+								 * \param result  pointer to pointer to buffer to hold result
 								 * \return URL_FUNC_OK on success
 								 */
 								url_func_result url_path(const char *url, char **result)
 								{
 									int m, path_len = 0;
 									regmatch_t match[10];
 									(*result) = 0;
 									m = regexec(&url_re, url, 10, match, 0);
 									if (m) {
 										LOG(("url '%s' failed to match regex", url));
 										return URL_FUNC_FAILED;
 									}
 									if (match[URL_RE_SCHEME].rm_so == -1 ||
 											match[URL_RE_AUTHORITY].rm_so == -1)
 										return URL_FUNC_FAILED;
 									if (match[URL_RE_PATH].rm_so != -1)
 										path_len = match[URL_RE_PATH].rm_eo -
 												match[URL_RE_PATH].rm_so;
 									(*result) = malloc((path_len ? path_len : 1) + 1);
 									if (!(*result)) {
 										LOG(("malloc failed"));
 										return URL_FUNC_NOMEM;
 									}
 									m = 0;
 									if (path_len > 1) {
 										strncpy((*result), url + match[URL_RE_PATH].rm_so,
 												path_len);
 										for (; path_len != 0 && (*result)[m + path_len - 1] != '/';
 												path_len--)
 											/* do nothing */;
 										m += path_len;
 									}
 									else
 										(*result)[m++] = '/';
 									(*result)[m] = '\0';
 									return URL_FUNC_OK;
 								}
-												[project @ 2004-03-27 23:18:52 by bursa]
Implement url_nice() and make save boxes use it to choose the default filename.

svn path=/import/netsurf/; revision=682

											
										
										
											2004-03-28 03:18:52 +04:00
+								/**
 								 * Attempt to find a nice filename for a URL.
 								 *
-												Don't use a regexp to extract scheme name.

svn path=/trunk/netsurf/; revision=2655

											
										
										
											2006-06-27 04:53:39 +04:00
+								 * \param  url	   an absolute URL
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+								 * \param  result  pointer to pointer to buffer to hold filename
 								 * \param  remove_extensions  remove any extensions from the filename
 								 * \return  URL_FUNC_OK on success
-												[project @ 2004-03-27 23:18:52 by bursa]
Implement url_nice() and make save boxes use it to choose the default filename.

svn path=/import/netsurf/; revision=682

											
										
										
											2004-03-28 03:18:52 +04:00
+								 */
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+								url_func_result url_nice(const char *url, char **result,
 										bool remove_extensions)
-												[project @ 2004-03-27 23:18:52 by bursa]
Implement url_nice() and make save boxes use it to choose the default filename.

svn path=/import/netsurf/; revision=682

											
										
										
											2004-03-28 03:18:52 +04:00
+								{
 									int m;
 									regmatch_t match[10];
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+									regoff_t start, end;
 									size_t i;
 									char *dot;
-												[project @ 2004-03-27 23:18:52 by bursa]
Implement url_nice() and make save boxes use it to choose the default filename.

svn path=/import/netsurf/; revision=682

											
										
										
											2004-03-28 03:18:52 +04:00
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+									*result = 0;
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+									m = regexec(&url_re, url, 10, match, 0);
 									if (m) {
 										LOG(("url '%s' failed to match regex", url));
 										return URL_FUNC_FAILED;
-												[project @ 2004-03-27 23:18:52 by bursa]
Implement url_nice() and make save boxes use it to choose the default filename.

svn path=/import/netsurf/; revision=682

											
										
										
											2004-03-28 03:18:52 +04:00
+									}
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+									/* extract the last component of the path, if possible */
 									if (match[URL_RE_PATH].rm_so == -1 || match[URL_RE_PATH].rm_so ==
 											match[URL_RE_PATH].rm_eo)
 										goto no_path;  /* no path, or empty */
 									for (end = match[URL_RE_PATH].rm_eo - 1;
 											end != match[URL_RE_PATH].rm_so && url[end] == '/';
 											end--)
 										;
 									if (end == match[URL_RE_PATH].rm_so)
 										goto no_path;  /* path is a string of '/' */
 									end++;
 									for (start = end - 1;
 											start != match[URL_RE_PATH].rm_so && url[start] != '/';
 											start--)
 										;
 									if (url[start] == '/')
 										start++;
 									if (!strncasecmp(url + start, "index.", 6) ||
 											!strncasecmp(url + start, "default.", 8)) {
 										/* try again */
 										if (start == match[URL_RE_PATH].rm_so)
 											goto no_path;
 										for (end = start - 1;
 												end != match[URL_RE_PATH].rm_so &&
 												url[end] == '/';
 												end--)
 											;
 										if (end == match[URL_RE_PATH].rm_so)
 											goto no_path;
 										end++;
 										for (start = end - 1;
 												start != match[URL_RE_PATH].rm_so &&
 												url[start] != '/';
 												start--)
 										;
 										if (url[start] == '/')
 											start++;
 									}
-												[project @ 2004-03-27 23:18:52 by bursa]
Implement url_nice() and make save boxes use it to choose the default filename.

svn path=/import/netsurf/; revision=682

											
										
										
											2004-03-28 03:18:52 +04:00
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+									*result = malloc(end - start + 1);
 									if (!*result) {
 										LOG(("malloc failed"));
 										return URL_FUNC_NOMEM;
 									}
 									strncpy(*result, url + start, end - start);
 									(*result)[end - start] = 0;
-												[project @ 2004-03-27 23:18:52 by bursa]
Implement url_nice() and make save boxes use it to choose the default filename.

svn path=/import/netsurf/; revision=682

											
										
										
											2004-03-28 03:18:52 +04:00
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+									if (remove_extensions) {
 										dot = strchr(*result, '.');
 										if (dot && dot != *result)
 											*dot = 0;
-												[project @ 2004-03-27 23:18:52 by bursa]
Implement url_nice() and make save boxes use it to choose the default filename.

svn path=/import/netsurf/; revision=682

											
										
										
											2004-03-28 03:18:52 +04:00
+									}
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+									return URL_FUNC_OK;
-												[project @ 2004-03-27 23:18:52 by bursa]
Implement url_nice() and make save boxes use it to choose the default filename.

svn path=/import/netsurf/; revision=682

											
										
										
											2004-03-28 03:18:52 +04:00
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+								no_path:
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+									/* otherwise, use the host name, with '.' replaced by '_' */
 									if (match[URL_RE_AUTHORITY].rm_so != -1 &&
 											match[URL_RE_AUTHORITY].rm_so !=
 											match[URL_RE_AUTHORITY].rm_eo) {
 										*result = malloc(match[URL_RE_AUTHORITY].rm_eo -
 												match[URL_RE_AUTHORITY].rm_so + 1);
 										if (!*result) {
 											LOG(("malloc failed"));
 											return URL_FUNC_NOMEM;
 										}
 										strncpy(*result, url + match[URL_RE_AUTHORITY].rm_so,
 												match[URL_RE_AUTHORITY].rm_eo -
 												match[URL_RE_AUTHORITY].rm_so);
 										(*result)[match[URL_RE_AUTHORITY].rm_eo -
 												match[URL_RE_AUTHORITY].rm_so] = 0;
-												[project @ 2004-03-27 23:18:52 by bursa]
Implement url_nice() and make save boxes use it to choose the default filename.

svn path=/import/netsurf/; revision=682

											
										
										
											2004-03-28 03:18:52 +04:00
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+										for (i = 0; (*result)[i]; i++)
 											if ((*result)[i] == '.')
 												(*result)[i] = '_';
 										return URL_FUNC_OK;
-												[project @ 2006-02-19 18:26:23 by jmb]
Rewrite HTTP authentication.
Fix extraction of realm from WWW-Authenticate header.
Tidy up login dialog code.

svn path=/import/netsurf/; revision=2085

											
										
										
											2006-02-19 21:26:23 +03:00
+									}
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+									return URL_FUNC_FAILED;
-												[project @ 2004-03-27 23:18:52 by bursa]
Implement url_nice() and make save boxes use it to choose the default filename.

svn path=/import/netsurf/; revision=682

											
										
										
											2004-03-28 03:18:52 +04:00
+								}
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
-												[project @ 2005-06-26 22:18:37 by jmb]
Improve clarity of use of utf8_to_enc.
Remove use of curl_escape - url_escape does similar things, just better.

svn path=/import/netsurf/; revision=1766

											
										
										
											2005-06-27 02:18:37 +04:00
+								/**
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+								 * Escape a string suitable for inclusion in an URL.
-												[project @ 2005-06-26 22:18:37 by jmb]
Improve clarity of use of utf8_to_enc.
Remove use of curl_escape - url_escape does similar things, just better.

svn path=/import/netsurf/; revision=1766

											
										
										
											2005-06-27 02:18:37 +04:00
+								 *
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+								 * \param  unescaped  the unescaped string
 								 * \param  result     pointer to pointer to buffer to hold escaped string
 								 * \return  URL_FUNC_OK on success
-												[project @ 2005-06-26 22:18:37 by jmb]
Improve clarity of use of utf8_to_enc.
Remove use of curl_escape - url_escape does similar things, just better.

svn path=/import/netsurf/; revision=1766

											
										
										
											2005-06-27 02:18:37 +04:00
+								 */
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
-												[project @ 2005-06-26 22:18:37 by jmb]
Improve clarity of use of utf8_to_enc.
Remove use of curl_escape - url_escape does similar things, just better.

svn path=/import/netsurf/; revision=1766

											
										
										
											2005-06-27 02:18:37 +04:00
+								url_func_result url_escape(const char *unescaped, char **result)
 								{
 									int len;
 									char *escaped, *d;
 									const char *c;
 									if (!unescaped || !result)
 										return URL_FUNC_FAILED;
 									*result = NULL;
 									len = strlen(unescaped);
 									escaped = malloc(len * 3 + 1);
 									if (!escaped)
 										return URL_FUNC_NOMEM;
-												[project @ 2004-03-27 23:18:52 by bursa]
Implement url_nice() and make save boxes use it to choose the default filename.

svn path=/import/netsurf/; revision=682

											
										
										
											2004-03-28 03:18:52 +04:00
-												[project @ 2005-06-26 22:18:37 by jmb]
Improve clarity of use of utf8_to_enc.
Remove use of curl_escape - url_escape does similar things, just better.

svn path=/import/netsurf/; revision=1766

											
										
										
											2005-06-27 02:18:37 +04:00
+									for (c = unescaped, d = escaped; *c; c++) {
 										if (!isascii(*c) ||
 												strchr(";/?:@&=+$," "<>#%\"{}|\\^[]`", *c) ||
 												*c <= 0x20 || *c == 0x7f) {
 											*d++ = '%';
 											*d++ = "0123456789ABCDEF"[((*c >> 4) & 0xf)];
 											*d++ = "0123456789ABCDEF"[(*c & 0xf)];
 										}
 										else {
 											/* unreserved characters: [a-zA-Z0-9-_.!~*'()] */
 											*d++ = *c;
 										}
 									}
-												[project @ 2005-06-27 01:57:54 by adrianl]
Fix termination of output from url_escape

svn path=/import/netsurf/; revision=1770

											
										
										
											2005-06-27 05:57:54 +04:00
+									*d++ = '\0';
 									(*result) = malloc(d - escaped);
-												[project @ 2005-06-26 22:18:37 by jmb]
Improve clarity of use of utf8_to_enc.
Remove use of curl_escape - url_escape does similar things, just better.

svn path=/import/netsurf/; revision=1766

											
										
										
											2005-06-27 02:18:37 +04:00
+									if (!(*result)) {
 										free(escaped);
 										return URL_FUNC_NOMEM;
 									}
-												[project @ 2005-06-27 01:57:54 by adrianl]
Fix termination of output from url_escape

svn path=/import/netsurf/; revision=1770

											
										
										
											2005-06-27 05:57:54 +04:00
 									memcpy((*result), escaped, d - escaped);
-												[project @ 2005-06-26 22:18:37 by jmb]
Improve clarity of use of utf8_to_enc.
Remove use of curl_escape - url_escape does similar things, just better.

svn path=/import/netsurf/; revision=1766

											
										
										
											2005-06-27 02:18:37 +04:00
 									free(escaped);
 									return URL_FUNC_OK;
 								}
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+								#ifdef TEST
 								int main(int argc, char *argv[])
 								{
 									int i;
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+									url_func_result res;
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									char *s;
 									url_init();
-												[project @ 2004-10-01 21:31:55 by jmb]
A somewhat better implementation of referrers which no longer sends the referer if the URL schemes don't match.

Things to do:
1) Preservation of referer across redirects (see comment in browser.c:284)
2) GUI templates/code for configuration of referer sending (simple on/off toggle only)
3) Make referer sending when fetching objects/stylesheets for a page pay attention to option_send_referer?
4) Handle the case where the referer is in the form of http://moo:foo@mysite.com/ (ie the login details embedded in the referer - not good).

svn path=/import/netsurf/; revision=1297

											
										
										
											2004-10-02 01:31:55 +04:00
+									for (i = 1; i != argc; i++) {
 								/*		printf("==> '%s'\n", argv[i]);
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										res = url_normalize(argv[i], &s);
 										if (res == URL_FUNC_OK) {
 											printf("<== '%s'\n", s);
 											free(s);
 										}*/
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+								/*		printf("==> '%s'\n", argv[i]);
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										res = url_host(argv[i], &s);
 										if (res == URL_FUNC_OK) {
 											printf("<== '%s'\n", s);
 											free(s);
 										}*/
-												Fix crash when a relative URL contains "//../".

svn path=/trunk/netsurf/; revision=2543

											
										
										
											2006-04-22 13:07:28 +04:00
+										if (1 != i) {
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+											res = url_join(argv[i], argv[1], &s);
 											if (res == URL_FUNC_OK) {
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+												printf("'%s' + '%s' \t= '%s'\n", argv[1],
 														argv[i], s);
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+												free(s);
 											}
-												Fix crash when a relative URL contains "//../".

svn path=/trunk/netsurf/; revision=2543

											
										
										
											2006-04-22 13:07:28 +04:00
+										}
 								/*		printf("'%s' => ", argv[i]);
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+										res = url_nice(argv[i], &s, true);
 										if (res == URL_FUNC_OK) {
 											printf("'%s', ", s);
 											free(s);
 										} else {
 											printf("failed %u, ", res);
-												[project @ 2004-10-01 21:31:55 by jmb]
A somewhat better implementation of referrers which no longer sends the referer if the URL schemes don't match.

Things to do:
1) Preservation of referer across redirects (see comment in browser.c:284)
2) GUI templates/code for configuration of referer sending (simple on/off toggle only)
3) Make referer sending when fetching objects/stylesheets for a page pay attention to option_send_referer?
4) Handle the case where the referer is in the form of http://moo:foo@mysite.com/ (ie the login details embedded in the referer - not good).

svn path=/import/netsurf/; revision=1297

											
										
										
											2004-10-02 01:31:55 +04:00
+										}
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+										res = url_nice(argv[i], &s, false);
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+										if (res == URL_FUNC_OK) {
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+											printf("'%s', ", s);
-												[project @ 2004-08-09 16:11:58 by jmb]
Rework the interface of the URL handing module to allow for multiple error types.
Modify save_complete URL rewriting appropriately.

svn path=/import/netsurf/; revision=1206

											
										
										
											2004-08-09 20:11:58 +04:00
+											free(s);
-												[project @ 2005-07-23 20:43:37 by bursa]
Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814

											
										
										
											2005-07-24 00:43:37 +04:00
+										} else {
 											printf("failed %u, ", res);
 										}
-												Fix crash when a relative URL contains "//../".

svn path=/trunk/netsurf/; revision=2543

											
										
										
											2006-04-22 13:07:28 +04:00
+										printf("\n");*/
-												[project @ 2004-03-02 18:02:17 by bursa]
Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578

											
										
										
											2004-03-02 21:02:41 +03:00
+									}
 									return 0;
 								}
 								void regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
 								{
 									char errbuf[200];
 									int r;
 									r = regcomp(preg, regex, cflags);
 									if (r) {
 										regerror(r, preg, errbuf, sizeof errbuf);
 										fprintf(stderr, "Failed to compile regexp '%s'\n", regex);
 										fprintf(stderr, "error: %s\n", errbuf);
 										exit(1);
 									}
 								}
 								#endif