netsurf/content/urldb.c

/*
 * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk>
 * Copyright 2009 John Tytgat <joty@netsurf-browser.org>
 *
 * This file is part of NetSurf, http://www.netsurf-browser.org/
 *
 * NetSurf is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2 of the License.
 *
 * NetSurf is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * \file
 * Unified URL information database implementation
 *
 * URLs are stored in a tree-based structure as follows:
 *
 * The host component is extracted from each URL and, if a FQDN, split on
 * every '.'.The tree is constructed by inserting each FQDN segment in
 * reverse order. Duplicate nodes are merged.
 *
 * If the host part of an URL is an IP address, then this is added to the
 * tree verbatim (as if it were a TLD).
 *
 * This provides something looking like:
 *
 *			      root (a sentinel)
 *				|
 *	-------------------------------------------------
 *	|	|	|	|	|	|	|
 *     com     edu     gov  127.0.0.1  net     org     uk	TLDs
 *	|	|	|		|	|	|
 *    google   ...     ...             ...     ...     co	2LDs
 *	|						|
 *     www					       bbc  Hosts/Subdomains
 *							|
 *						       www	...
 *
 * Each of the nodes in this tree is a struct host_part. This stores the
 * FQDN segment (or IP address) with which the node is concerned. Each node
 * may contain further information about paths on a host (struct path_data)
 * or SSL certificate processing on a host-wide basis
 * (host_part::permit_invalid_certs).
 *
 * Path data is concerned with storing various metadata about the path in
 * question. This includes global history data, HTTP authentication details
 * and any associated HTTP cookies. This is stored as a tree of path segments
 * hanging off the relevant host_part node.
 *
 * Therefore, to find the last visited time of the URL
 * http://www.example.com/path/to/resource.html, the FQDN tree would be
 * traversed in the order root -> "com" -> "example" -> "www". The "www"
 * node would have attached to it a tree of struct path_data:
 *
 *			    (sentinel)
 *				|
 *			       path
 *				|
 *			       to
 *				|
 *			   resource.html
 *
 * This represents the absolute path "/path/to/resource.html". The leaf node
 * "resource.html" contains the last visited time of the resource.
 *
 * The mechanism described above is, however, not particularly conducive to
 * fast searching of the database for a given URL (or URLs beginning with a
 * given prefix). Therefore, an anciliary data structure is used to enable
 * fast searching. This structure simply reflects the contents of the
 * database, with entries being added/removed at the same time as for the
 * core database. In order to ensure that degenerate cases are kept to a
 * minimum, we use an AAtree. This is an approximation of a Red-Black tree
 * with similar performance characteristics, but with a significantly
 * simpler implementation. Entries in this tree comprise pointers to the
 * leaf nodes of the host tree described above.
 *
 * REALLY IMPORTANT NOTE: urldb expects all URLs to be normalised. Use of
 * non-normalised URLs with urldb will result in undefined behaviour and
 * potential crashes.
 */

#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <time.h>
#ifdef WITH_NSPSL
#include <nspsl.h>
#endif

#include "utils/inet.h"
#include "utils/nsoption.h"
#include "utils/log.h"
#include "utils/corestrings.h"
#include "utils/url.h"
#include "utils/utils.h"
#include "utils/bloom.h"
#include "utils/time.h"
#include "utils/nsurl.h"
#include "utils/ascii.h"
#include "utils/http.h"
#include "netsurf/bitmap.h"
#include "desktop/cookie_manager.h"
#include "desktop/gui_internal.h"

#include "content/content.h"
#include "content/urldb.h"

#ifdef WITH_AMISSL
/* AmiSSL needs everything to be using bsdsocket directly to avoid conflicts */
#include <proto/bsdsocket.h>
#endif

/**
 * cookie entry.
 *
 * \warning This *must* be kept in sync with the public interface in
 *   netsurf/cookie_db.h
 */
struct cookie_internal_data {
	struct cookie_internal_data *prev;	/**< Previous in list */
	struct cookie_internal_data *next;	/**< Next in list */

	char *name;		/**< Cookie name */
	char *value;		/**< Cookie value */
	bool value_was_quoted;	/**< Value was quoted in Set-Cookie: */
	char *comment;		/**< Cookie comment */
	bool domain_from_set;	/**< Domain came from Set-Cookie: header */
	char *domain;		/**< Domain */
	bool path_from_set;	/**< Path came from Set-Cookie: header */
	char *path;		/**< Path */
	time_t expires;		/**< Expiry timestamp, or -1 for session */
	time_t last_used;	/**< Last used time */
	bool secure;		/**< Only send for HTTPS requests */
	bool http_only;		/**< Only expose to HTTP(S) requests */
	enum cookie_version version;	/**< Specification compliance */
	bool no_destroy;	/**< Never destroy this cookie,
				 * unless it's expired */

};


/**
 * A protection space
 *
 * This is defined as a tuple canonical_root_url and realm.  This
 * structure lives as linked list element in a leaf host_part struct
 * so we need additional scheme and port to have a canonical_root_url.
 */
struct prot_space_data {
	/**
	 * URL scheme of canonical hostname of this protection space.
	 */
	lwc_string *scheme;
	/**
	 * Port number of canonical hostname of this protection
	 * space. When 0, it means the default port for given scheme,
	 * i.e. 80 (http), 443 (https).
	 */
	unsigned int port;
	/** Protection realm */
	char *realm;

	/**
	 * Authentication details for this protection space in form
	 * username:password
	 */
	char *auth;
	/** Next sibling */
	struct prot_space_data *next;
};


/**
 * meta data about a url
 *
 * \warning must be kept in sync with url_data structure in netsurf/url_db.h
 */
struct url_internal_data {
	char *title;		/**< Resource title */
	unsigned int visits;	/**< Visit count */
	time_t last_visit;	/**< Last visit time */
	content_type type;	/**< Type of resource */
};


/**
 * data entry for url
 */
struct path_data {
	nsurl *url;		/**< Full URL */
	lwc_string *scheme;	/**< URL scheme for data */
	unsigned int port;	/**< Port number for data. When 0, it means
				 * the default port for given scheme, i.e.
				 * 80 (http), 443 (https). */
	char *segment;		/**< Path segment for this node */
	unsigned int frag_cnt;	/**< Number of entries in path_data::fragment */
	char **fragment;	/**< Array of fragments */
	bool persistent;	/**< This entry should persist */

	struct url_internal_data urld;	/**< URL data for resource */

	/**
	 * Protection space to which this resource belongs too. Can be
	 * NULL when it does not belong to a protection space or when
	 * it is not known. No ownership (is with struct host_part::prot_space).
	 */
	const struct prot_space_data *prot_space;
	/** Cookies associated with resource */
	struct cookie_internal_data *cookies;
	/** Last cookie in list */
	struct cookie_internal_data *cookies_end;

	struct path_data *next;	/**< Next sibling */
	struct path_data *prev;	/**< Previous sibling */
	struct path_data *parent; /**< Parent path segment */
	struct path_data *children; /**< Child path segments */
	struct path_data *last; /**< Last child */
};

struct hsts_data {
	time_t expires; /**< Expiry time */
	bool include_sub_domains; /**< Whether to include subdomains */
};

struct host_part {
	/**
	 * Known paths on this host. This _must_ be first so that
	 * struct host_part *h = (struct host_part *)mypath; works
	 */
	struct path_data paths;
	/**
	 * Allow access to SSL protected resources on this host
	 * without verifying certificate authenticity
	 */
	bool permit_invalid_certs;
	/* HSTS data */
	struct hsts_data hsts;

	/**
	 * Part of host string
	 */
	char *part;

	/**
	 * Linked list of all known proctection spaces known for this
	 * host and all its schems and ports.
	 */
	struct prot_space_data *prot_space;

	struct host_part *next;	/**< Next sibling */
	struct host_part *prev;	/**< Previous sibling */
	struct host_part *parent; /**< Parent host part */
	struct host_part *children; /**< Child host parts */
};


/**
 * search index node
 */
struct search_node {
	const struct host_part *data;	/**< Host tree entry */

	unsigned int level;		/**< Node level */

	struct search_node *left;	/**< Left subtree */
	struct search_node *right;	/**< Right subtree */
};

/** Root database handle */
static struct host_part db_root;

/** Search trees - one per letter + 1 for IPs + 1 for Everything Else */
#define NUM_SEARCH_TREES 28
#define ST_IP 0
#define ST_EE 1
#define ST_DN 2
static struct search_node empty = { 0, 0, &empty, &empty };
static struct search_node *search_trees[NUM_SEARCH_TREES] = {
	&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
	&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
	&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
	&empty, &empty, &empty, &empty
};

/** Minimum cookie database file version */
#define MIN_COOKIE_FILE_VERSION 100
/** Current cookie database file version */
#define COOKIE_FILE_VERSION 102
/** loaded cookie file version */
static int loaded_cookie_file_version;

/** Minimum URL database file version */
#define MIN_URL_FILE_VERSION 106
/** Current URL database file version */
#define URL_FILE_VERSION 107

/**
 * filter for url presence in database
 *
 * Bloom filter used for short-circuting the false case of "is this
 * URL in the database?".  BLOOM_SIZE controls how large the filter is
 * in bytes.  Primitive experimentation shows that for a filter of X
 * bytes filled with X items, searching for X items not in the filter
 * has a 5% false-positive rate.  We set it to 32kB, which should be
 * enough for all but the largest databases, while not being
 * shockingly wasteful on memory.
 */
static struct bloom_filter *url_bloom;
/**
 * Size of url filter
 */
#define BLOOM_SIZE (1024 * 32)


/**
 * write a time_t to a file portably
 *
 * \param fp File to write to
 * \param val the unix time value to output
 * \return NSERROR_OK on success
 */
static nserror urldb_write_timet(FILE *fp, time_t val)
{
	int use;
	char op[32];

	use = nsc_sntimet(op, 32, &val);
	if (use == 0) {
		fprintf(fp, "%i\n", (int)val);
	} else {
		fprintf(fp, "%.*s\n", use, op);
	}
	return NSERROR_OK;
}

/**
 * Write paths associated with a host
 *
 * \param parent Root of (sub)tree to write
 * \param host Current host name
 * \param fp File to write to
 * \param path Current path string
 * \param path_alloc Allocated size of path
 * \param path_used Used size of path
 * \param expiry Expiry time of URLs
 */
static void
urldb_write_paths(const struct path_data *parent,
		  const char *host,
		  FILE *fp,
		  char **path,
		  int *path_alloc,
		  int *path_used,
		  time_t expiry)
{
	const struct path_data *p = parent;
	int i;

	do {
		int seglen = p->segment != NULL ? strlen(p->segment) : 0;
		int len = *path_used + seglen + 1;

		if (*path_alloc < len) {
			char *temp;
			temp = realloc(*path,
				       (len > 64) ? len : *path_alloc + 64);
			if (!temp) {
				return;
			}
			*path = temp;
			*path_alloc = (len > 64) ? len : *path_alloc + 64;
		}

		if (p->segment != NULL) {
			memcpy(*path + *path_used - 1, p->segment, seglen);
		}

		if (p->children != NULL) {
			(*path)[*path_used + seglen - 1] = '/';
			(*path)[*path_used + seglen] = '\0';
		} else {
			(*path)[*path_used + seglen - 1] = '\0';
			len -= 1;
		}

		*path_used = len;

		if (p->children != NULL) {
			/* Drill down into children */
			p = p->children;
		} else {
			/* leaf node */
			if (p->persistent ||
			    ((p->urld.last_visit > expiry) &&
			     (p->urld.visits > 0))) {
				fprintf(fp, "%s\n", lwc_string_data(p->scheme));

				if (p->port) {
					fprintf(fp,"%d\n", p->port);
				} else {
					fprintf(fp, "\n");
				}

				fprintf(fp, "%s\n", *path);

				/** \todo handle fragments? */

				/* number of visits */
				fprintf(fp, "%i\n", p->urld.visits);

				/* time entry was last used */
				urldb_write_timet(fp, p->urld.last_visit);

				/* entry type */
				fprintf(fp, "%i\n", (int)p->urld.type);

				fprintf(fp, "\n");

				if (p->urld.title) {
					uint8_t *s = (uint8_t *) p->urld.title;

					for (i = 0; s[i] != '\0'; i++)
						if (s[i] < 32)
							s[i] = ' ';
					for (--i; ((i > 0) && (s[i] == ' '));
					     i--)
						s[i] = '\0';
					fprintf(fp, "%s\n", p->urld.title);
				} else {
					fprintf(fp, "\n");
				}
			}

			/* Now, find next node to process. */
			while (p != parent) {
				int seglen = p->segment != NULL
					? strlen(p->segment) : 0;

				/* Remove our segment from the path */
				*path_used -= seglen;
				(*path)[*path_used - 1] = '\0';

				if (p->next != NULL) {
					/* Have a sibling, process that */
					p = p->next;
					break;
				}

				/* Going up, so remove '/' */
				*path_used -= 1;
				(*path)[*path_used - 1] = '\0';

				/* Ascend tree */
				p = p->parent;
			}
		}
	} while (p != parent);
}


/**
 * Count number of URLs associated with a host
 *
 * \param root Root of path data tree
 * \param expiry Expiry time for URLs
 * \param count Pointer to count
 */
static void
urldb_count_urls(const struct path_data *root,
		 time_t expiry,
		 unsigned int *count)
{
	const struct path_data *p = root;

	do {
		if (p->children != NULL) {
			/* Drill down into children */
			p = p->children;
		} else {
			/* No more children, increment count if required */
			if (p->persistent ||
			    ((p->urld.last_visit > expiry) &&
			     (p->urld.visits > 0))) {
				(*count)++;
			}

			/* Now, find next node to process. */
			while (p != root) {
				if (p->next != NULL) {
					/* Have a sibling, process that */
					p = p->next;
					break;
				}

				/* Ascend tree */
				p = p->parent;
			}
		}
	} while (p != root);
}


/**
 * Save a search (sub)tree
 *
 * \param parent root node of search tree to save.
 * \param fp File to write to
 */
static void urldb_save_search_tree(struct search_node *parent, FILE *fp)
{
	char host[256];
	const struct host_part *h;
	unsigned int path_count = 0;
	char *path, *p, *end;
	int path_alloc = 64, path_used = 1;
	time_t expiry, hsts_expiry = 0;
	int hsts_include_subdomains = 0;

	expiry = time(NULL) - ((60 * 60 * 24) * nsoption_int(expire_url));

	if (parent == &empty)
		return;

	urldb_save_search_tree(parent->left, fp);

	path = malloc(path_alloc);
	if (!path)
		return;

	path[0] = '\0';

	for (h = parent->data, p = host, end = host + sizeof host;
	     h && h != &db_root && p < end; h = h->parent) {
		int written = snprintf(p, end - p, "%s%s", h->part,
				       (h->parent && h->parent->parent) ? "." : "");
		if (written < 0) {
			free(path);
			return;
		}
		p += written;
	}

	h = parent->data;
	if (h && h->hsts.expires > expiry) {
		hsts_expiry = h->hsts.expires;
		hsts_include_subdomains = h->hsts.include_sub_domains;
	}

	urldb_count_urls(&parent->data->paths, expiry, &path_count);

	if (path_count > 0) {
		fprintf(fp, "%s %i ", host, hsts_include_subdomains);
		urldb_write_timet(fp, hsts_expiry);
		fprintf(fp, "%i\n", path_count);

		urldb_write_paths(&parent->data->paths, host, fp,
				  &path, &path_alloc, &path_used, expiry);
	} else if (hsts_expiry) {
		fprintf(fp, "%s %i ", host, hsts_include_subdomains);
		urldb_write_timet(fp, hsts_expiry);
		fprintf(fp, "0\n");
	}

	free(path);

	urldb_save_search_tree(parent->right, fp);
}


/**
 * Path data iterator (internal)
 *
 * \param parent Root of subtree to iterate over
 * \param url_callback Callback function
 * \param cookie_callback Callback function
 * \return true to continue, false otherwise
 */
static bool
urldb_iterate_entries_path(const struct path_data *parent,
		bool (*url_callback)(nsurl *url, const struct url_data *data),
		bool (*cookie_callback)(const struct cookie_data *data))
{
	const struct path_data *p = parent;
	const struct cookie_data *c;

	do {
		if (p->children != NULL) {
			/* Drill down into children */
			p = p->children;
		} else {
			/* All leaf nodes in the path tree should have an URL or
			 * cookies attached to them. If this is not the case, it
			 * indicates that there's a bug in the file loader/URL
			 * insertion code. Therefore, assert this here. */
			assert(url_callback || cookie_callback);

			/** \todo handle fragments? */
			if (url_callback) {
				const struct url_internal_data *u = &p->urld;

				assert(p->url);

				if (!url_callback(p->url,
						  (const struct url_data *) u))
					return false;
			} else {
				c = (const struct cookie_data *)p->cookies;
				for (; c != NULL; c = c->next) {
					if (!cookie_callback(c))
						return false;
				}
			}

			/* Now, find next node to process. */
			while (p != parent) {
				if (p->next != NULL) {
					/* Have a sibling, process that */
					p = p->next;
					break;
				}

				/* Ascend tree */
				p = p->parent;
			}
		}
	} while (p != parent);

	return true;
}


/**
 * Check whether a host string is an IP address.
 *
 * This call detects IPv4 addresses (all of dotted-quad or subsets,
 * decimal or hexadecimal notations) and IPv6 addresses (including
 * those containing embedded IPv4 addresses.)
 *
 * \param host a hostname terminated by '\0'
 * \return true if the hostname is an IP address, false otherwise
 */
static bool urldb__host_is_ip_address(const char *host)
{
	struct in_addr ipv4;
	size_t host_len = strlen(host);
	const char *sane_host;
	const char *slash;
#ifndef NO_IPV6
	struct in6_addr ipv6;
	char ipv6_addr[64];
	unsigned int ipv6_addr_len;
#endif
	/**
	 * @todo FIXME Some parts of urldb.c make confusions between hosts
	 * and "prefixes", we can sometimes be erroneously passed more than
	 * just a host.  Sometimes we may be passed trailing slashes, or even
	 * whole path segments.  A specific criminal in this class is
	 * urldb_iterate_partial, which takes a prefix to search for, but
	 * passes that prefix to functions that expect only hosts.
	 *
	 * For the time being, we will accept such calls; we check if there
	 * is a / in the host parameter, and if there is, we take a copy and
	 * replace the / with a \0.  This is not a permanent solution; we
	 * should search through NetSurf and find all the callers that are
	 * in error and fix them.  When doing this task, it might be wise
	 * to replace the hideousness below with code that doesn't have to do
	 * this, and add assert(strchr(host, '/') == NULL); somewhere.
	 * -- rjek - 2010-11-04
	 */

	slash = strchr(host, '/');
	if (slash == NULL) {
		sane_host = host;
	} else {
		char *c = strdup(host);
		c[slash - host] = '\0';
		sane_host = c;
		host_len = slash - host;
		NSLOG(netsurf, INFO, "WARNING: called with non-host '%s'",
		      host);
	}

	if (strspn(sane_host, "0123456789abcdefABCDEF[].:") < host_len)
		goto out_false;

	if (inet_aton(sane_host, &ipv4) != 0) {
		/* This can only be a sane IPv4 address if it contains 3 dots.
		 * Helpfully, inet_aton is happy to treat "a", "a.b", "a.b.c",
		 * and "a.b.c.d" as valid IPv4 address strings where we only
		 * support the full, dotted-quad, form.
		 */
		int num_dots = 0;
		size_t index;

		for (index = 0; index < host_len; index++) {
			if (sane_host[index] == '.')
				num_dots++;
		}

		if (num_dots == 3)
			goto out_true;
		else
			goto out_false;
	}

#ifndef NO_IPV6
	if ((host_len < 6) ||
	    (sane_host[0] != '[') ||
	    (sane_host[host_len - 1] != ']')) {
		goto out_false;
	}

	ipv6_addr_len = host_len - 2;
	if (ipv6_addr_len >= sizeof(ipv6_addr)) {
		ipv6_addr_len = sizeof(ipv6_addr) - 1;
	}
	strncpy(ipv6_addr, sane_host + 1, ipv6_addr_len);
	ipv6_addr[ipv6_addr_len] = '\0';

	if (inet_pton(AF_INET6, ipv6_addr, &ipv6) == 1)
		goto out_true;
#endif

out_false:
	if (slash != NULL) free((void *)sane_host);
	return false;

out_true:
	if (slash != NULL) free((void *)sane_host);
	return true;
}


/**
 * Compare host_part with prefix
 *
 * \param a host part
 * \param b prefix
 * \return 0 if match, non-zero, otherwise
 */
static int urldb_search_match_prefix(const struct host_part *a, const char *b)
{
	const char *end, *dot;
	int plen, ret;

	assert(a && a != &db_root && b);

	if (urldb__host_is_ip_address(b)) {
		/* IP address */
		return strncasecmp(a->part, b, strlen(b));
	}

	end = b + strlen(b) + 1;

	while (b < end && a && a != &db_root) {
		dot = strchr(b, '.');
		if (!dot) {
			/* last segment */
			dot = end - 1;
		}

		/* Compare strings (length limited) */
		if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
			/* didn't match => return difference */
			return ret;

		/* The strings matched */
		if (dot < end - 1) {
			/* Consider segment lengths only in the case
			 * where the prefix contains segments */
			plen = strlen(a->part);
			if (plen > dot - b) {
				/* len(a) > len(b) */
				return 1;
			} else if (plen < dot - b) {
				/* len(a) < len(b) */
				return -1;
			}
		}

		b = dot + 1;
		a = a->parent;
	}

	/* If we get here then either:
	 *    a) The path lengths differ
	 * or b) The hosts are identical
	 */
	if (a && a != &db_root && b >= end) {
		/* len(a) > len(b) => prefix matches */
		return 0;
	} else if ((!a || a == &db_root) && b < end) {
		/* len(a) < len(b) => prefix does not match */
		return -1;
	}

	/* Identical */
	return 0;
}


/**
 * Partial host iterator (internal)
 *
 * \param root Root of (sub)tree to traverse
 * \param prefix Prefix to match
 * \param callback Callback function
 * \return true to continue, false otherwise
 */
static bool
urldb_iterate_partial_host(struct search_node *root,
		const char *prefix,
		bool (*callback)(nsurl *url, const struct url_data *data))
{
	int c;

	assert(root && prefix && callback);

	if (root == &empty)
		return true;

	c = urldb_search_match_prefix(root->data, prefix);

	if (c > 0) {
		/* No match => look in left subtree */
		return urldb_iterate_partial_host(root->left,
						  prefix,
						  callback);
	} else if (c < 0) {
		/* No match => look in right subtree */
		return urldb_iterate_partial_host(root->right,
						  prefix,
						  callback);
	} else {
		/* Match => iterate over l/r subtrees & process this node */
		if (!urldb_iterate_partial_host(root->left,
						prefix,
						callback)) {
			return false;
		}

		if (root->data->paths.children) {
			/* and extract all paths attached to this host */
			if (!urldb_iterate_entries_path(&root->data->paths,
							callback,
							NULL)) {
				return false;
			}
		}

		if (!urldb_iterate_partial_host(root->right,
						prefix,
						callback)) {
			return false;
		}
	}

	return true;
}


/**
 * Partial path iterator (internal)
 *
 * Given: http://www.example.org/a/b/c/d//e
 * and assuming a path tree:
 *     ^
 *    /	\
 *   a1 b1
 *  / \
 * a2 b2
 *    /|\
 *   a b c
 *   3 3 |
 *       d
 *       |
 *       e
 *      / \
 *      f g
 *
 * Prefix will be:	p will be:
 *
 * a/b/c/d//e		a1
 *   b/c/d//e		a2
 *   b/c/d//e		b3
 *     c/d//e		a3
 *     c/d//e		b3
 *     c/d//e		c
 *       d//e		d
 *         /e		e		(skip /)
 *          e		e
 *
 * I.E. perform a breadth-first search of the tree.
 *
 * \param parent Root of (sub)tree to traverse
 * \param prefix Prefix to match
 * \param callback Callback function
 * \return true to continue, false otherwise
 */
static bool
urldb_iterate_partial_path(const struct path_data *parent,
		const char *prefix,
		bool (*callback)(nsurl *url, const struct url_data *data))
{
	const struct path_data *p = parent->children;
	const char *slash, *end = prefix + strlen(prefix);

	do {
		slash = strchr(prefix, '/');
		if (!slash) {
			slash = end;
		}

		if (slash == prefix && *prefix == '/') {
			/* Ignore "//" */
			prefix++;
			continue;
		}

		if (strncasecmp(p->segment, prefix, slash - prefix) == 0) {
			/* prefix matches so far */
			if (slash == end) {
				/* we've run out of prefix, so all
				 * paths below this one match */
				if (!urldb_iterate_entries_path(p,
								callback,
								NULL)) {
					return false;
				}

				/* Progress to next sibling */
				p = p->next;
			} else {
				/* Skip over this segment */
				prefix = slash + 1;

				p = p->children;
			}
		} else {
			/* Doesn't match this segment, try next sibling */
			p = p->next;
		}
	} while (p != NULL);

	return true;
}


/**
 * Host data iterator (internal)
 *
 * \param parent Root of subtree to iterate over
 * \param url_callback Callback function
 * \param cookie_callback Callback function
 * \return true to continue, false otherwise
 */
static bool
urldb_iterate_entries_host(struct search_node *parent,
		bool (*url_callback)(nsurl *url, const struct url_data *data),
		bool (*cookie_callback)(const struct cookie_data *data))
{
	if (parent == &empty) {
		return true;
	}

	if (!urldb_iterate_entries_host(parent->left,
					url_callback,
					cookie_callback)) {
		return false;
	}

	if ((parent->data->paths.children) ||
	    ((cookie_callback) &&
	     (parent->data->paths.cookies))) {
		/* We have paths (or domain cookies), so iterate them */
		if (!urldb_iterate_entries_path(&parent->data->paths,
						url_callback,
						cookie_callback)) {
			return false;
		}
	}

	if (!urldb_iterate_entries_host(parent->right,
					url_callback,
					cookie_callback)) {
		return false;
	}

	return true;
}


/**
 * Add a host node to the tree
 *
 * \param part Host segment to add (or whole IP address) (copied)
 * \param parent Parent node to add to
 * \return Pointer to added node, or NULL on memory exhaustion
 */
static struct host_part *
urldb_add_host_node(const char *part, struct host_part *parent)
{
	struct host_part *d;

	assert(part && parent);

	d = calloc(1, sizeof(struct host_part));
	if (!d) {
		return NULL;
	}

	d->part = strdup(part);
	if (!d->part) {
		free(d);
		return NULL;
	}

	d->next = parent->children;
	if (parent->children) {
		parent->children->prev = d;
	}
	d->parent = parent;
	parent->children = d;

	return d;
}


/**
 * Fragment comparator callback for qsort
 *
 * \param a first value
 * \param b second value
 * \return 0 for equal else positive or negative value on comparison
 */
static int urldb_add_path_fragment_cmp(const void *a, const void *b)
{
	return strcasecmp(*((const char **) a), *((const char **) b));
}


/**
 * Add a fragment to a path segment
 *
 * \param segment Path segment to add to
 * \param fragment Fragment to add (copied), or NULL
 * \return segment or NULL on memory exhaustion
 */
static struct path_data *
urldb_add_path_fragment(struct path_data *segment, lwc_string *fragment)
{
	char **temp;

	assert(segment);

	/* If no fragment, this function is a NOP
	 * This may seem strange, but it makes the rest
	 * of the code cleaner */
	if (!fragment)
		return segment;

	temp = realloc(segment->fragment,
		       (segment->frag_cnt + 1) * sizeof(char *));
	if (!temp)
		return NULL;

	segment->fragment = temp;
	segment->fragment[segment->frag_cnt] =
		strdup(lwc_string_data(fragment));
	if (!segment->fragment[segment->frag_cnt]) {
		/* Don't free temp - it's now our buffer */
		return NULL;
	}

	segment->frag_cnt++;

	/* We want fragments in alphabetical order, so sort them
	 * It may prove better to insert in alphabetical order instead */
	qsort(segment->fragment,
	      segment->frag_cnt,
	      sizeof (char *),
	      urldb_add_path_fragment_cmp);

	return segment;
}


/**
 * Add a path node to the tree
 *
 * \param scheme URL scheme associated with path (copied)
 * \param port Port number on host associated with path
 * \param segment Path segment to add (copied)
 * \param fragment URL fragment (copied), or NULL
 * \param parent Parent node to add to
 * \return Pointer to added node, or NULL on memory exhaustion
 */
static struct path_data *
urldb_add_path_node(lwc_string *scheme,
		    unsigned int port,
		    const char *segment,
		    lwc_string *fragment,
		    struct path_data *parent)
{
	struct path_data *d, *e;

	assert(scheme && segment && parent);

	d = calloc(1, sizeof(struct path_data));
	if (!d)
		return NULL;

	d->scheme = lwc_string_ref(scheme);

	d->port = port;

	d->segment = strdup(segment);
	if (!d->segment) {
		lwc_string_unref(d->scheme);
		free(d);
		return NULL;
	}

	if (fragment) {
		if (!urldb_add_path_fragment(d, fragment)) {
			free(d->segment);
			lwc_string_unref(d->scheme);
			free(d);
			return NULL;
		}
	}

	for (e = parent->children; e; e = e->next) {
		if (strcmp(e->segment, d->segment) > 0)
			break;
	}

	if (e) {
		d->prev = e->prev;
		d->next = e;
		if (e->prev)
			e->prev->next = d;
		else
			parent->children = d;
		e->prev = d;
	} else if (!parent->children) {
		d->prev = d->next = NULL;
		parent->children = parent->last = d;
	} else {
		d->next = NULL;
		d->prev = parent->last;
		parent->last->next = d;
		parent->last = d;
	}
	d->parent = parent;

	return d;
}


/**
 * Get the search tree for a particular host
 *
 * \param host the host to lookup
 * \return the corresponding search tree
 */
static struct search_node **urldb_get_search_tree_direct(const char *host)
{
	assert(host);

	if (urldb__host_is_ip_address(host)) {
		return &search_trees[ST_IP];
	} else if (ascii_is_alpha(*host)) {
		return &search_trees[ST_DN + ascii_to_lower(*host) - 'a'];
	}
	return &search_trees[ST_EE];
}


/**
 * Get the search tree for a particular host
 *
 * \param host the host to lookup
 * \return the corresponding search tree
 */
static struct search_node *urldb_get_search_tree(const char *host)
{
	return *urldb_get_search_tree_direct(host);
}


/**
 * Compare host part with a string
 *
 * \param a host part
 * \param b string to compare
 * \return 0 if match, non-zero, otherwise
 */
static int urldb_search_match_string(const struct host_part *a, const char *b)
{
	const char *end, *dot;
	int plen, ret;

	assert(a && a != &db_root && b);

	if (urldb__host_is_ip_address(b)) {
		/* IP address */
		return strcasecmp(a->part, b);
	}

	end = b + strlen(b) + 1;

	while (b < end && a && a != &db_root) {
		dot = strchr(b, '.');
		if (!dot) {
			/* last segment */
			dot = end - 1;
		}

		/* Compare strings (length limited) */
		if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
			/* didn't match => return difference */
			return ret;

		/* The strings matched, now check that the lengths do, too */
		plen = strlen(a->part);

		if (plen > dot - b) {
			/* len(a) > len(b) */
			return 1;
		} else if (plen < dot - b) {
			/* len(a) < len(b) */
			return -1;
		}

		b = dot + 1;
		a = a->parent;
	}

	/* If we get here then either:
	 *    a) The path lengths differ
	 * or b) The hosts are identical
	 */
	if (a && a != &db_root && b >= end) {
		/* len(a) > len(b) */
		return 1;
	} else if ((!a || a == &db_root) && b < end) {
		/* len(a) < len(b) */
		return -1;
	}

	/* Identical */
	return 0;
}


/**
 * Find a node in a search tree
 *
 * \param root Tree to look in
 * \param host Host to find
 * \return Pointer to host tree node, or NULL if not found
 */
static const struct host_part *
urldb_search_find(struct search_node *root, const char *host)
{
	int c;

	assert(root && host);

	if (root == &empty) {
		return NULL;
	}

	c = urldb_search_match_string(root->data, host);

	if (c > 0) {
		return urldb_search_find(root->left, host);
	} else if (c < 0) {
		return urldb_search_find(root->right, host);
	}

	return root->data;
}


/**
 * Match a path string
 *
 * \param parent Path (sub)tree to look in
 * \param path The path to search for
 * \param scheme The URL scheme associated with the path
 * \param port The port associated with the path
 * \return Pointer to path data or NULL if not found.
 */
static struct path_data *
urldb_match_path(const struct path_data *parent,
		 const char *path,
		 lwc_string *scheme,
		 unsigned short port)
{
	const struct path_data *p;
	const char *slash;
	bool match;

	assert(parent != NULL);
	assert(parent->segment == NULL);

	if (path[0] != '/') {
		NSLOG(netsurf, INFO, "path is %s", path);
	}

	assert(path[0] == '/');

	/* Start with children, as parent has no segment */
	p = parent->children;

	while (p != NULL) {
		slash = strchr(path + 1, '/');
		if (!slash) {
			slash = path + strlen(path);
		}

		if (strncmp(p->segment, path + 1, slash - path - 1) == 0 &&
		    lwc_string_isequal(p->scheme, scheme, &match) == lwc_error_ok &&
		    match == true &&
		    p->port == port) {
			if (*slash == '\0') {
				/* Complete match */
				return (struct path_data *) p;
			}

			/* Match so far, go down tree */
			p = p->children;

			path = slash;
		} else {
			/* No match, try next sibling */
			p = p->next;
		}
	}

	return NULL;
}


/**
 * Find an URL in the database
 *
 * \param url Absolute URL to find
 * \return Pointer to path data, or NULL if not found
 */
static struct path_data *urldb_find_url(nsurl *url)
{
	const struct host_part *h;
	struct path_data *p;
	struct search_node *tree;
	char *plq;
	const char *host_str;
	lwc_string *scheme, *host, *port;
	size_t len = 0;
	unsigned int port_int;
	bool match;

	assert(url);

	if (url_bloom != NULL) {
		if (bloom_search_hash(url_bloom, nsurl_hash(url)) == false) {
			return NULL;
		}
	}

	scheme = nsurl_get_component(url, NSURL_SCHEME);
	if (scheme == NULL)
		return NULL;

	if (lwc_string_isequal(scheme, corestring_lwc_mailto, &match) ==
	    lwc_error_ok && match == true) {
		lwc_string_unref(scheme);
		return NULL;
	}

	host = nsurl_get_component(url, NSURL_HOST);
	if (host != NULL) {
		host_str = lwc_string_data(host);
		lwc_string_unref(host);

	} else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) ==
		   lwc_error_ok && match == true) {
		host_str = "localhost";

	} else {
		lwc_string_unref(scheme);
		return NULL;
	}

	tree = urldb_get_search_tree(host_str);
	h = urldb_search_find(tree, host_str);
	if (!h) {
		lwc_string_unref(scheme);
		return NULL;
	}

	/* generate plq (path, leaf, query) */
	if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &plq, &len) != NSERROR_OK) {
		lwc_string_unref(scheme);
		return NULL;
	}

	/* Get port */
	port = nsurl_get_component(url, NSURL_PORT);
	if (port != NULL) {
		port_int = atoi(lwc_string_data(port));
		lwc_string_unref(port);
	} else {
		port_int = 0;
	}

	p = urldb_match_path(&h->paths, plq, scheme, port_int);

	free(plq);
	lwc_string_unref(scheme);

	return p;
}


/**
 * Dump URL database paths to stderr
 *
 * \param parent Parent node of tree to dump
 */
static void urldb_dump_paths(struct path_data *parent)
{
	const struct path_data *p = parent;
	unsigned int i;

	do {
		if (p->segment != NULL) {
			NSLOG(netsurf, INFO, "\t%s : %u",
			      lwc_string_data(p->scheme), p->port);

			NSLOG(netsurf, INFO, "\t\t'%s'", p->segment);

			for (i = 0; i != p->frag_cnt; i++) {
				NSLOG(netsurf, INFO, "\t\t\t#%s",
				      p->fragment[i]);
			}
		}

		if (p->children != NULL) {
			p = p->children;
		} else {
			while (p != parent) {
				if (p->next != NULL) {
					p = p->next;
					break;
				}

				p = p->parent;
			}
		}
	} while (p != parent);
}


/**
 * Dump URL database hosts to stderr
 *
 * \param parent Parent node of tree to dump
 */
static void urldb_dump_hosts(struct host_part *parent)
{
	struct host_part *h;

	if (parent->part) {
		NSLOG(netsurf, INFO, "%s", parent->part);

		NSLOG(netsurf, INFO, "\t%s invalid SSL certs",
		      parent->permit_invalid_certs ? "Permits" : "Denies");
	}

	/* Dump path data */
	urldb_dump_paths(&parent->paths);

	/* and recurse */
	for (h = parent->children; h; h = h->next) {
		urldb_dump_hosts(h);
	}
}


/**
 * Dump search tree
 *
 * \param parent Parent node of tree to dump
 * \param depth Tree depth
 */
static void urldb_dump_search(struct search_node *parent, int depth)
{
	const struct host_part *h;
	int i; /* index into string */
	char s[1024];
	int r;
	int sl = sizeof(s) - 2;

	if (parent == &empty)
		return;

	urldb_dump_search(parent->left, depth + 1);

	for (i = 0; i != depth; i++) {
		s[i] = ' ';
	}

	for (h = parent->data; h; h = h->parent) {
		if (h->part) {
			r = snprintf(&s[i], sl - i, "%s", h->part);
			if ((i + r) > sl) {
				break;
			}
			i += r;
		}

		if (h->parent && h->parent->parent) {
			s[i]='.';
			i++;
		}
	}
	s[i]= 0;

	NSLOG(netsurf, INFO, "%s", s);

	urldb_dump_search(parent->right, depth + 1);
}


/**
 * Compare a pair of host parts
 *
 * \param a first host part
 * \param b second host part
 * \return 0 if match, non-zero, otherwise
 */
static int
urldb_search_match_host(const struct host_part *a, const struct host_part *b)
{
	int ret;

	assert(a && b);

	/* traverse up tree to root, comparing parts as we go. */
	for (; a && a != &db_root && b && b != &db_root;
	     a = a->parent, b = b->parent) {
		if ((ret = strcasecmp(a->part, b->part)) != 0) {
			/* They differ => return the difference here */
			return ret;
		}
	}

	/* If we get here then either:
	 *    a) The path lengths differ
	 * or b) The hosts are identical
	 */
	if (a && a != &db_root && (!b || b == &db_root)) {
		/* len(a) > len(b) */
		return 1;
	} else if ((!a || a == &db_root) && b && b != &db_root) {
		/* len(a) < len(b) */
		return -1;
	}

	/* identical */
	return 0;
}


/**
 * Rotate a subtree right
 *
 * \param root Root of subtree to rotate
 * \return new root of subtree
 */
static struct search_node *urldb_search_skew(struct search_node *root)
{
	assert(root);

	if (root->left->level == root->level) {
		struct search_node *temp;

		temp = root->left;
		root->left = temp->right;
		temp->right = root;
		root = temp;
	}

	return root;
}


/**
 * Rotate a node left, increasing the parent's level
 *
 * \param root Root of subtree to rotate
 * \return New root of subtree
 */
static struct search_node *urldb_search_split(struct search_node *root)
{
	assert(root);

	if (root->right->right->level == root->level) {
		struct search_node *temp;

		temp = root->right;
		root->right = temp->left;
		temp->left = root;
		root = temp;

		root->level++;
	}

	return root;
}


/**
 * Insert node into search tree
 *
 * \param root Root of (sub)tree to insert into
 * \param n Node to insert
 * \return Pointer to updated root
 */
static struct search_node *
urldb_search_insert_internal(struct search_node *root, struct search_node *n)
{
	assert(root && n);

	if (root == &empty) {
		root = n;
	} else {
		int c = urldb_search_match_host(root->data, n->data);

		if (c > 0) {
			root->left = urldb_search_insert_internal(
				root->left, n);
		} else if (c < 0) {
			root->right = urldb_search_insert_internal(
				root->right, n);
		} else {
			/* exact match */
			free(n);
			return root;
		}

		root = urldb_search_skew(root);
		root = urldb_search_split(root);
	}

	return root;
}


/**
 * Insert a node into the search tree
 *
 * \param root Root of tree to insert into
 * \param data User data to insert
 * \return Pointer to updated root, or NULL if failed
 */
static struct search_node *
urldb_search_insert(struct search_node *root, const struct host_part *data)
{
	struct search_node *n;

	assert(root && data);

	n = malloc(sizeof(struct search_node));
	if (!n)
		return NULL;

	n->level = 1;
	n->data = data;
	n->left = n->right = &empty;

	root = urldb_search_insert_internal(root, n);

	return root;
}


/**
 * Parse a cookie avpair
 *
 * \param c Cookie struct to populate
 * \param n Name component
 * \param v Value component
 * \param was_quoted Whether \a v was quoted in the input
 * \return true on success, false on memory exhaustion
 */
static bool
urldb_parse_avpair(struct cookie_internal_data *c,
		   char *n,
		   char *v,
		   bool was_quoted)
{
	int vlen;

	assert(c && n && v);

	/* Strip whitespace from start of name */
	for (; *n; n++) {
		if (*n != ' ' && *n != '\t')
			break;
	}

	/* Strip whitespace from end of name */
	for (vlen = strlen(n); vlen; vlen--) {
		if (n[vlen] == ' ' || n[vlen] == '\t')
			n[vlen] = '\0';
		else
			break;
	}

	/* Strip whitespace from start of value */
	for (; *v; v++) {
		if (*v != ' ' && *v != '\t')
			break;
	}

	/* Strip whitespace from end of value */
	for (vlen = strlen(v); vlen; vlen--) {
		if (v[vlen] == ' ' || v[vlen] == '\t')
			v[vlen] = '\0';
		else
			break;
	}

	if (!c->comment && strcasecmp(n, "Comment") == 0) {
		c->comment = strdup(v);
		if (!c->comment)
			return false;
	} else if (!c->domain && strcasecmp(n, "Domain") == 0) {
		if (v[0] == '.') {
			/* Domain must start with a dot */
			c->domain_from_set = true;
			c->domain = strdup(v);
			if (!c->domain)
				return false;
		}
	} else if (strcasecmp(n, "Max-Age") == 0) {
		int temp = atoi(v);
		if (temp == 0)
			/* Special case - 0 means delete */
			c->expires = 0;
		else
			c->expires = time(NULL) + temp;
	} else if (!c->path && strcasecmp(n, "Path") == 0) {
		c->path_from_set = true;
		c->path = strdup(v);
		if (!c->path)
			return false;
	} else if (strcasecmp(n, "Version") == 0) {
		c->version = atoi(v);
	} else if (strcasecmp(n, "Expires") == 0) {
		char *datenoday;
		time_t expires;
		nserror res;

		/* Strip dayname from date (these are hugely variable
		 * and liable to break the parser.  They also serve no
		 * useful purpose) */
		for (datenoday = v;
		     *datenoday && !ascii_is_digit(*datenoday);
		     datenoday++) {
			/* do nothing */
		}

		res = nsc_strntimet(datenoday, strlen(datenoday), &expires);
		if (res != NSERROR_OK) {
			/* assume we have an unrepresentable date =>
			 * force it to the maximum possible value of a
			 * 32bit time_t (this may break in 2038. We'll
			 * deal with that once we come to it) */
			expires = (time_t)0x7fffffff;
		}
		c->expires = expires;
	} else if (strcasecmp(n, "Secure") == 0) {
		c->secure = true;
	} else if (strcasecmp(n, "HttpOnly") == 0) {
		c->http_only = true;
	} else if (!c->name) {
		c->name = strdup(n);
		c->value = strdup(v);
		c->value_was_quoted = was_quoted;
		if (!c->name || !c->value) {
			return false;
		}
	}

	return true;
}


/**
 * Free a cookie
 *
 * \param c The cookie to free
 */
static void urldb_free_cookie(struct cookie_internal_data *c)
{
	assert(c);

	free(c->comment);
	free(c->domain);
	free(c->path);
	free(c->name);
	free(c->value);
	free(c);
}


/**
 * Parse a cookie
 *
 * \param url URL being fetched
 * \param cookie Pointer to cookie string (updated on exit)
 * \return Pointer to cookie structure (on heap, caller frees) or NULL
 */
static struct cookie_internal_data *
urldb_parse_cookie(nsurl *url, const char **cookie)
{
	struct cookie_internal_data *c;
	const char *cur;
	char name[1024], value[4096];
	char *n = name, *v = value;
	bool in_value = false;
	bool had_value_data = false;
	bool value_verbatim = false;
	bool quoted = false;
	bool was_quoted = false;

	assert(url && cookie && *cookie);

	c = calloc(1, sizeof(struct cookie_internal_data));
	if (c == NULL)
		return NULL;

	c->expires = -1;

	name[0] = '\0';
	value[0] = '\0';

	for (cur = *cookie; *cur; cur++) {
		if (*cur == '\r' && *(cur + 1) == '\n') {
			/* End of header */
			if (quoted) {
				/* Unmatched quote encountered */

				/* Match Firefox 2.0.0.11 */
				value[0] = '\0';

			}

			break;
		} else if (*cur == '\r') {
			/* Spurious linefeed */
			continue;
		} else if (*cur == '\n') {
			/* Spurious newline */
			continue;
		}

		if (in_value && !had_value_data) {
			if (*cur == ' ' || *cur == '\t') {
				/* Strip leading whitespace from value */
				continue;
			} else {
				had_value_data = true;

				/* Value is taken verbatim if first non-space
				 * character is not a " */
				if (*cur != '"') {
					value_verbatim = true;
				}
			}
		}

		if (in_value && !value_verbatim && (*cur == '"')) {
			/* Only non-verbatim values may be quoted */
			if (cur == *cookie || *(cur - 1) != '\\') {
				/* Only unescaped quotes count */
				was_quoted = quoted;
				quoted = !quoted;

				continue;
			}
		}

		if (!quoted && !in_value && *cur == '=') {
			/* First equals => attr-value separator */
			in_value = true;
			continue;
		}

		if (!quoted && (was_quoted || *cur == ';')) {
			/* Semicolon or after quoted value
			 * => end of current avpair */

			/* NUL-terminate tokens */
			*n = '\0';
			*v = '\0';

			if (!urldb_parse_avpair(c, name, value, was_quoted)) {
				/* Memory exhausted */
				urldb_free_cookie(c);
				return NULL;
			}

			/* And reset to start */
			n = name;
			v = value;
			in_value = false;
			had_value_data = false;
			value_verbatim = false;
			was_quoted = false;

			/* Now, if the current input is anything other than a
			 * semicolon, we must be sure to reprocess it */
			if (*cur != ';') {
				cur--;
			}

			continue;
		}

		/* And now handle commas. These are a pain as they may mean
		 * any of the following:
		 *
		 * + End of cookie
		 * + Day separator in Expires avpair
		 * + (Invalid) comma in unquoted value
		 *
		 * Therefore, in order to handle all 3 cases (2 and 3 are
		 * identical, the difference being that 2 is in the spec and
		 * 3 isn't), we need to determine where the comma actually
		 * lies. We use the following heuristic:
		 *
		 *   Given a comma at the current input position, find the
		 *   immediately following semicolon (or end of input if none
		 *   found). Then, consider the input characters between
		 *   these two positions. If any of these characters is an
		 *   '=', we must assume that the comma signified the end of
		 *   the current cookie.
		 *
		 * This holds as the first avpair of any cookie must be
		 * NAME=VALUE, so the '=' is guaranteed to appear in the
		 * case where the comma marks the end of a cookie.
		 *
		 * This will fail, however, in the case where '=' appears in
		 * the value of the current avpair after the comma or the
		 * subsequent cookie does not start with NAME=VALUE. Neither
		 * of these is particularly likely and if they do occur, the
		 * website is more broken than we can be bothered to handle.
		 */
		if (!quoted && *cur == ',') {
			/* Find semi-colon, if any */
			const char *p;
			const char *semi = strchr(cur + 1, ';');
			if (!semi)
				semi = cur + strlen(cur) - 2 /* CRLF */;

			/* Look for equals sign between comma and semi */
			for (p = cur + 1; p < semi; p++)
				if (*p == '=')
					break;

			if (p == semi) {
				/* none found => comma internal to value */
				/* do nothing */
			} else {
				/* found one => comma marks end of cookie */
				cur++;
				break;
			}
		}

		/* Accumulate into buffers, always leaving space for a NUL */
		/** \todo is silently truncating overlong names/values wise? */
		if (!in_value) {
			if (n < name + (sizeof(name) - 1))
				*n++ = *cur;
		} else {
			if (v < value + (sizeof(value) - 1))
				*v++ = *cur;
		}
	}

	/* Parse final avpair */
	*n = '\0';
	*v = '\0';

	if (!urldb_parse_avpair(c, name, value, was_quoted)) {
		/* Memory exhausted */
		urldb_free_cookie(c);
		return NULL;
	}

	/* Now fix-up default values */
	if (c->domain == NULL) {
		lwc_string *host = nsurl_get_component(url, NSURL_HOST);
		if (host == NULL) {
			urldb_free_cookie(c);
			return NULL;
		}
		c->domain = strdup(lwc_string_data(host));
		lwc_string_unref(host);
	}

	if (c->path == NULL) {
		const char *path_data;
		char *path, *slash;
		lwc_string *path_lwc;

		path_lwc = nsurl_get_component(url, NSURL_PATH);
		if (path_lwc == NULL) {
			urldb_free_cookie(c);
			return NULL;
		}
		path_data = lwc_string_data(path_lwc);

		/* Strip leafname and trailing slash (4.3.1) */
		slash = strrchr(path_data, '/');
		if (slash != NULL) {
			/* Special case: retain first slash in path */
			if (slash == path_data)
				slash++;

			slash = strndup(path_data, slash - path_data);
			if (slash == NULL) {
				lwc_string_unref(path_lwc);
				urldb_free_cookie(c);
				return NULL;
			}

			path = slash;
			lwc_string_unref(path_lwc);
		} else {
			path = strdup(lwc_string_data(path_lwc));
			lwc_string_unref(path_lwc);
			if (path == NULL) {
				urldb_free_cookie(c);
				return NULL;
			}
		}

		c->path = path;
	}

	/* Write back current position */
	*cookie = cur;

	return c;
}


/**
 * Add a path to the database, creating any intermediate entries
 *
 * \param scheme URL scheme associated with path
 * \param port Port number on host associated with path
 * \param host Host tree node to attach to
 * \param path_query Absolute path plus query to add (freed)
 * \param fragment URL fragment, or NULL
 * \param url URL (fragment ignored)
 * \return Pointer to leaf node, or NULL on memory exhaustion
 */
static struct path_data *
urldb_add_path(lwc_string *scheme,
	       unsigned int port,
	       const struct host_part *host,
	       char *path_query,
	       lwc_string *fragment,
	       nsurl *url)
{
	struct path_data *d, *e;
	char *buf = path_query;
	char *segment, *slash;
	bool match;

	assert(scheme && host && url);

	d = (struct path_data *) &host->paths;

	/* skip leading '/' */
	segment = buf;
	if (*segment == '/')
		segment++;

	/* Process path segments */
	do {
		slash = strchr(segment, '/');
		if (!slash) {
			/* last segment */
			/* look for existing entry */
			for (e = d->children; e; e = e->next)
				if (strcmp(segment, e->segment) == 0 &&
				    lwc_string_isequal(scheme,
						       e->scheme, &match) ==
				    lwc_error_ok &&
				    match == true &&
				    e->port == port)
					break;

			d = e ? urldb_add_path_fragment(e, fragment) :
				urldb_add_path_node(scheme, port,
						    segment, fragment, d);
			break;
		}

		*slash = '\0';

		/* look for existing entry */
		for (e = d->children; e; e = e->next)
			if (strcmp(segment, e->segment) == 0 &&
			    lwc_string_isequal(scheme, e->scheme,
					       &match) == lwc_error_ok &&
			    match == true &&
			    e->port == port)
				break;

		d = e ? e : urldb_add_path_node(scheme, port, segment, NULL, d);
		if (!d)
			break;

		segment = slash + 1;
	} while (1);

	free(path_query);

	if (d && !d->url) {
		/* Insert defragmented URL */
		if (nsurl_defragment(url, &d->url) != NSERROR_OK)
			return NULL;
	}

	return d;
}


/**
 * Add a host to the database, creating any intermediate entries
 *
 * \param host Hostname to add
 * \return Pointer to leaf node, or NULL on memory exhaustion
 */
static struct host_part *urldb_add_host(const char *host)
{
	struct host_part *d = (struct host_part *) &db_root, *e;
	struct search_node *s;
	char buf[256]; /* 256 bytes is sufficient - domain names are
			* limited to 255 chars. */
	char *part;

	assert(host);

	if (urldb__host_is_ip_address(host)) {
		/* Host is an IP, so simply add as TLD */

		/* Check for existing entry */
		for (e = d->children; e; e = e->next)
			if (strcasecmp(host, e->part) == 0)
				/* found => return it */
				return e;

		d = urldb_add_host_node(host, d);

		s = urldb_search_insert(search_trees[ST_IP], d);
		if (!s) {
			/* failed */
			d = NULL;
		} else {
			search_trees[ST_IP] = s;
		}

		return d;
	}

	/* Copy host string, so we can corrupt it */
	strncpy(buf, host, sizeof buf);
	buf[sizeof buf - 1] = '\0';

	/* Process FQDN segments backwards */
	do {
		part = strrchr(buf, '.');
		if (!part) {
			/* last segment */
			/* Check for existing entry */
			for (e = d->children; e; e = e->next)
				if (strcasecmp(buf, e->part) == 0)
					break;

			if (e) {
				d = e;
			} else {
				d = urldb_add_host_node(buf, d);
			}

			/* And insert into search tree */
			if (d) {
				struct search_node **r;

				r = urldb_get_search_tree_direct(buf);
				s = urldb_search_insert(*r, d);
				if (!s) {
					/* failed */
					d = NULL;
				} else {
					*r = s;
				}
			}
			break;
		}

		/* Check for existing entry */
		for (e = d->children; e; e = e->next)
			if (strcasecmp(part + 1, e->part) == 0)
				break;

		d = e ? e : urldb_add_host_node(part + 1, d);
		if (!d)
			break;

		*part = '\0';
	} while (1);

	return d;
}


/**
 * Insert a cookie into the database
 *
 * \param c The cookie to insert
 * \param scheme URL scheme associated with cookie path
 * \param url URL (sans fragment) associated with cookie
 * \return true on success, false on memory exhaustion (c will be freed)
 */
static bool
urldb_insert_cookie(struct cookie_internal_data *c,
		    lwc_string *scheme,
		    nsurl *url)
{
	struct cookie_internal_data *d;
	const struct host_part *h;
	struct path_data *p;
	time_t now = time(NULL);

	assert(c);

	if (c->domain[0] == '.') {
		h = urldb_search_find(
			urldb_get_search_tree(&(c->domain[1])),
			c->domain + 1);
		if (!h) {
			h = urldb_add_host(c->domain + 1);
			if (!h) {
				urldb_free_cookie(c);
				return false;
			}
		}

		p = (struct path_data *) &h->paths;
	} else {
		/* Need to have a URL and scheme, if it's not a domain cookie */
		assert(url != NULL);
		assert(scheme != NULL);

		h = urldb_search_find(
			urldb_get_search_tree(c->domain),
			c->domain);

		if (!h) {
			h = urldb_add_host(c->domain);
			if (!h) {
				urldb_free_cookie(c);
				return false;
			}
		}

		/* find path */
		p = urldb_add_path(scheme, 0, h,
				   strdup(c->path), NULL, url);
		if (!p) {
			urldb_free_cookie(c);
			return false;
		}
	}

	/* add cookie */
	for (d = p->cookies; d; d = d->next) {
		if (!strcmp(d->domain, c->domain) &&
		    !strcmp(d->path, c->path) &&
		    !strcmp(d->name, c->name))
			break;
	}

	if (d) {
		if (c->expires != -1 && c->expires < now) {
			/* remove cookie */
			if (d->next)
				d->next->prev = d->prev;
			else
				p->cookies_end = d->prev;
			if (d->prev)
				d->prev->next = d->next;
			else
				p->cookies = d->next;

			cookie_manager_remove((struct cookie_data *)d);

			urldb_free_cookie(d);
			urldb_free_cookie(c);
		} else {
			/* replace d with c */
			c->prev = d->prev;
			c->next = d->next;
			if (c->next)
				c->next->prev = c;
			else
				p->cookies_end = c;
			if (c->prev)
				c->prev->next = c;
			else
				p->cookies = c;

			cookie_manager_remove((struct cookie_data *)d);
			urldb_free_cookie(d);

			cookie_manager_add((struct cookie_data *)c);
		}
	} else {
		c->prev = p->cookies_end;
		c->next = NULL;
		if (p->cookies_end)
			p->cookies_end->next = c;
		else
			p->cookies = c;
		p->cookies_end = c;

		cookie_manager_add((struct cookie_data *)c);
	}

	return true;
}


/**
 * Concatenate a cookie into the provided buffer
 *
 * \param c Cookie to concatenate
 * \param version The version of the cookie string to output
 * \param used Pointer to amount of buffer used (updated)
 * \param alloc Pointer to allocated size of buffer (updated)
 * \param buf Pointer to Pointer to buffer (updated)
 * \return true on success, false on memory exhaustion
 */
static bool
urldb_concat_cookie(struct cookie_internal_data *c,
		    int version,
		    int *used,
		    int *alloc,
		    char **buf)
{
	/* Combined (A)BNF for the Cookie: request header:
	 *
	 * CHAR           = <any US-ASCII character (octets 0 - 127)>
	 * CTL            = <any US-ASCII control character
	 *                  (octets 0 - 31) and DEL (127)>
	 * CR             = <US-ASCII CR, carriage return (13)>
	 * LF             = <US-ASCII LF, linefeed (10)>
	 * SP             = <US-ASCII SP, space (32)>
	 * HT             = <US-ASCII HT, horizontal-tab (9)>
	 * <">            = <US-ASCII double-quote mark (34)>
	 *
	 * CRLF           = CR LF
	 *
	 * LWS            = [CRLF] 1*( SP | HT )
	 *
	 * TEXT           = <any OCTET except CTLs,
	 *                  but including LWS>
	 *
	 * token          = 1*<any CHAR except CTLs or separators>
	 * separators     = "(" | ")" | "<" | ">" | "@"
	 *                | "," | ";" | ":" | "\" | <">
	 *                | "/" | "[" | "]" | "?" | "="
	 *                | "{" | "}" | SP | HT
	 *
	 * quoted-string  = ( <"> *(qdtext | quoted-pair ) <"> )
	 * qdtext         = <any TEXT except <">>
	 * quoted-pair    = "\" CHAR
	 *
	 * attr            =       token
	 * value           =       word
	 * word            =       token | quoted-string
	 *
	 * cookie          =       "Cookie:" cookie-version
	 *                         1*((";" | ",") cookie-value)
	 * cookie-value    =       NAME "=" VALUE [";" path] [";" domain]
	 * cookie-version  =       "$Version" "=" value
	 * NAME            =       attr
	 * VALUE           =       value
	 * path            =       "$Path" "=" value
	 * domain          =       "$Domain" "=" value
	 *
	 * A note on quoted-string handling:
	 *   The cookie data stored in the db is verbatim (i.e. sans enclosing
	 *   <">, if any, and with all quoted-pairs intact) thus all that we
	 *   need to do here is ensure that value strings which were quoted
	 *   in Set-Cookie or which include any of the separators are quoted
	 *   before use.
	 *
	 * A note on cookie-value separation:
	 *   We use semicolons for all separators, including between
	 *   cookie-values. This simplifies things and is backwards compatible.
	 */
	const char * const separators = "()<>@,;:\\\"/[]?={} \t";

	int max_len;

	assert(c && used && alloc && buf && *buf);

	/* "; " cookie-value
	 * We allow for the possibility that values are quoted
	 */
	max_len = 2 + strlen(c->name) + 1 + strlen(c->value) + 2 +
		(c->path_from_set ?
		 8 + strlen(c->path) + 2 : 0) +
		(c->domain_from_set ?
		 10 + strlen(c->domain) + 2 : 0);

	if (*used + max_len >= *alloc) {
		char *temp = realloc(*buf, *alloc + 4096);
		if (!temp) {
			return false;
		}
		*buf = temp;
		*alloc += 4096;
	}

	if (version == COOKIE_NETSCAPE) {
		/* Original Netscape cookie */
		sprintf(*buf + *used - 1, "; %s=", c->name);
		*used += 2 + strlen(c->name) + 1;

		/* The Netscape spec doesn't mention quoting of cookie values.
		 * RFC 2109 $10.1.3 indicates that values must not be quoted.
		 *
		 * However, other browsers preserve quoting, so we should, too
		 */
		if (c->value_was_quoted) {
			sprintf(*buf + *used - 1, "\"%s\"", c->value);
			*used += 1 + strlen(c->value) + 1;
		} else {
			/** \todo should we %XX-encode [;HT,SP] ? */
			/** \todo Should we strip escaping backslashes? */
			sprintf(*buf + *used - 1, "%s", c->value);
			*used += strlen(c->value);
		}

		/* We don't send path/domain information -- that's what the
		 * Netscape spec suggests we should do, anyway. */
	} else {
		/* RFC2109 or RFC2965 cookie */
		sprintf(*buf + *used - 1, "; %s=", c->name);
		*used += 2 + strlen(c->name) + 1;

		/* Value needs quoting if it contains any separator or if
		 * it needs preserving from the Set-Cookie header */
		if (c->value_was_quoted ||
		    strpbrk(c->value, separators) != NULL) {
			sprintf(*buf + *used - 1, "\"%s\"", c->value);
			*used += 1 + strlen(c->value) + 1;
		} else {
			sprintf(*buf + *used - 1, "%s", c->value);
			*used += strlen(c->value);
		}

		if (c->path_from_set) {
			/* Path, quoted if necessary */
			sprintf(*buf + *used - 1, "; $Path=");
			*used += 8;

			if (strpbrk(c->path, separators) != NULL) {
				sprintf(*buf + *used - 1, "\"%s\"", c->path);
				*used += 1 + strlen(c->path) + 1;
			} else {
				sprintf(*buf + *used - 1, "%s", c->path);
				*used += strlen(c->path);
			}
		}

		if (c->domain_from_set) {
			/* Domain, quoted if necessary */
			sprintf(*buf + *used - 1, "; $Domain=");
			*used += 10;

			if (strpbrk(c->domain, separators) != NULL) {
				sprintf(*buf + *used - 1, "\"%s\"", c->domain);
				*used += 1 + strlen(c->domain) + 1;
			} else {
				sprintf(*buf + *used - 1, "%s", c->domain);
				*used += strlen(c->domain);
			}
		}
	}

	return true;
}


/**
 * deletes paths from a cookie.
 *
 * \param domain the cookie domain
 * \param path the cookie path
 * \param name The cookie name
 * \param parent The url data of the cookie
 */
static void
urldb_delete_cookie_paths(const char *domain,
			  const char *path,
			  const char *name,
			  struct path_data *parent)
{
	struct cookie_internal_data *c;
	struct path_data *p = parent;

	assert(parent);

	do {
		for (c = p->cookies; c; c = c->next) {
			if (strcmp(c->domain, domain) == 0 &&
			    strcmp(c->path, path) == 0 &&
			    strcmp(c->name, name) == 0) {
				if (c->prev) {
					c->prev->next = c->next;
				} else {
					p->cookies = c->next;
				}

				if (c->next) {
					c->next->prev = c->prev;
				} else {
					p->cookies_end = c->prev;
				}

				urldb_free_cookie(c);

				return;
			}
		}

		if (p->children) {
			p = p->children;
		} else {
			while (p != parent) {
				if (p->next != NULL) {
					p = p->next;
					break;
				}

				p = p->parent;
			}
		}
	} while (p != parent);
}


/**
 * Deletes cookie hosts and their assoicated paths
 *
 * \param domain the cookie domain
 * \param path the cookie path
 * \param name The cookie name
 * \param parent The url data of the cookie
 */
static void
urldb_delete_cookie_hosts(const char *domain,
			  const char *path,
			  const char *name,
			  struct host_part *parent)
{
	struct host_part *h;
	assert(parent);

	urldb_delete_cookie_paths(domain, path, name, &parent->paths);

	for (h = parent->children; h; h = h->next) {
		urldb_delete_cookie_hosts(domain, path, name, h);
	}
}


/**
 * Save a path subtree's cookies
 *
 * \param fp File pointer to write to
 * \param parent Parent path
 */
static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent)
{
	struct path_data *p = parent;
	time_t now = time(NULL);

	assert(fp && parent);

	do {
		if (p->cookies != NULL) {
			struct cookie_internal_data *c;

			for (c = p->cookies; c != NULL; c = c->next) {
				if (c->expires == -1 || c->expires < now) {
					/* Skip expired & session cookies */
					continue;
				}

				fprintf(fp,
					"%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t"
					"%s\t%s\t%d\t%s\t%s\t%s\n",
					c->version, c->domain,
					c->domain_from_set, c->path,
					c->path_from_set, c->secure,
					c->http_only,
					(int)c->expires, (int)c->last_used,
					c->no_destroy, c->name, c->value,
					c->value_was_quoted,
					p->scheme ? lwc_string_data(p->scheme) :
					"unused",
					p->url ? nsurl_access(p->url) :
					"unused",
					c->comment ? c->comment : "");
			}
		}

		if (p->children != NULL) {
			p = p->children;
		} else {
			while (p != parent) {
				if (p->next != NULL) {
					p = p->next;
					break;
				}

				p = p->parent;
			}
		}
	} while (p != parent);
}


/**
 * Save a host subtree's cookies
 *
 * \param fp File pointer to write to
 * \param parent Parent host
 */
static void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent)
{
	struct host_part *h;
	assert(fp && parent);

	urldb_save_cookie_paths(fp, &parent->paths);

	for (h = parent->children; h; h = h->next)
		urldb_save_cookie_hosts(fp, h);
}


/**
 * Destroy a cookie node
 *
 * \param c Cookie to destroy
 */
static void urldb_destroy_cookie(struct cookie_internal_data *c)
{
	free(c->name);
	free(c->value);
	free(c->comment);
	free(c->domain);
	free(c->path);

	free(c);
}


/**
 * Destroy the contents of a path node
 *
 * \param node Node to destroy contents of (does not destroy node)
 */
static void urldb_destroy_path_node_content(struct path_data *node)
{
	struct cookie_internal_data *a, *b;
	unsigned int i;

	if (node->url != NULL) {
		nsurl_unref(node->url);
	}

	if (node->scheme != NULL) {
		lwc_string_unref(node->scheme);
	}

	free(node->segment);
	for (i = 0; i < node->frag_cnt; i++)
		free(node->fragment[i]);
	free(node->fragment);

	free(node->urld.title);

	for (a = node->cookies; a; a = b) {
		b = a->next;
		urldb_destroy_cookie(a);
	}
}


/**
 * Destroy protection space data
 *
 * \param space Protection space to destroy
 */
static void urldb_destroy_prot_space(struct prot_space_data *space)
{
	lwc_string_unref(space->scheme);
	free(space->realm);
	free(space->auth);

	free(space);
}


/**
 * Destroy a path tree
 *
 * \param root Root node of tree to destroy
 */
static void urldb_destroy_path_tree(struct path_data *root)
{
	struct path_data *p = root;

	do {
		if (p->children != NULL) {
			p = p->children;
		} else {
			struct path_data *q = p;

			while (p != root) {
				if (p->next != NULL) {
					p = p->next;
					break;
				}

				p = p->parent;

				urldb_destroy_path_node_content(q);
				free(q);

				q = p;
			}

			urldb_destroy_path_node_content(q);
			free(q);
		}
	} while (p != root);
}


/**
 * Destroy a host tree
 *
 * \param root Root node of tree to destroy
 */
static void urldb_destroy_host_tree(struct host_part *root)
{
	struct host_part *a, *b;
	struct path_data *p, *q;
	struct prot_space_data *s, *t;

	/* Destroy children */
	for (a = root->children; a; a = b) {
		b = a->next;
		urldb_destroy_host_tree(a);
	}

	/* Now clean up paths */
	for (p = root->paths.children; p; p = q) {
		q = p->next;
		urldb_destroy_path_tree(p);
	}

	/* Root path */
	urldb_destroy_path_node_content(&root->paths);

	/* Proctection space data */
	for (s = root->prot_space; s; s = t) {
		t = s->next;
		urldb_destroy_prot_space(s);
	}

	/* And ourselves */
	free(root->part);
	free(root);
}


/**
 * Destroy a search tree
 *
 * \param root Root node of tree to destroy
 */
static void urldb_destroy_search_tree(struct search_node *root)
{
	/* Destroy children */
	if (root->left != &empty)
		urldb_destroy_search_tree(root->left);
	if (root->right != &empty)
		urldb_destroy_search_tree(root->right);

	/* And destroy ourselves */
	free(root);
}


/*************** External interface ***************/


/* exported interface documented in content/urldb.h */
void urldb_destroy(void)
{
	struct host_part *a, *b;
	int i;

	/* Clean up search trees */
	for (i = 0; i < NUM_SEARCH_TREES; i++) {
		if (search_trees[i] != &empty) {
			urldb_destroy_search_tree(search_trees[i]);
			search_trees[i] = &empty;
		}
	}

	/* And database */
	for (a = db_root.children; a; a = b) {
		b = a->next;
		urldb_destroy_host_tree(a);
	}
	memset(&db_root, 0, sizeof(db_root));

	/* And the bloom filter */
	if (url_bloom != NULL) {
		bloom_destroy(url_bloom);
		url_bloom = NULL;
	}
}


/* exported interface documented in netsurf/url_db.h */
nserror urldb_load(const char *filename)
{
#define MAXIMUM_URL_LENGTH 4096
	char s[MAXIMUM_URL_LENGTH];
	char host[256];
	struct host_part *h;
	int urls;
	int i;
	int version;
	int length;
	FILE *fp;

	assert(filename);

	NSLOG(netsurf, INFO, "Loading URL file %s", filename);

	if (url_bloom == NULL)
		url_bloom = bloom_create(BLOOM_SIZE);

	fp = fopen(filename, "r");
	if (!fp) {
		NSLOG(netsurf, INFO, "Failed to open file '%s' for reading",
		      filename);
		return NSERROR_NOT_FOUND;
	}

	if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) {
		fclose(fp);
		return NSERROR_NEED_DATA;
	}

	version = atoi(s);
	if (version < MIN_URL_FILE_VERSION) {
		NSLOG(netsurf, INFO, "Unsupported URL file version.");
		fclose(fp);
		return NSERROR_INVALID;
	}
	if (version > URL_FILE_VERSION) {
		NSLOG(netsurf, INFO, "Unknown URL file version.");
		fclose(fp);
		return NSERROR_INVALID;
	}

	while (fgets(host, sizeof host, fp)) {
		time_t hsts_expiry = 0;
		int hsts_include_sub_domains = 0;

		/* get the hostname */
		length = strlen(host) - 1;
		host[length] = '\0';

		/* skip data that has ended up with a host of '' */
		if (length == 0) {
			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
				break;
			urls = atoi(s);
			/* Eight fields/url */
			for (i = 0; i < (8 * urls); i++) {
				if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
					break;
			}
			continue;
		}

		if (version >= 107) {
			char *p = host;
			while (*p && *p != ' ') p++;
			while (*p && *p == ' ') { *p = '\0'; p++; }
			hsts_include_sub_domains = (*p == '1');
			while (*p && *p != ' ') p++;
			while (*p && *p == ' ') p++;
			nsc_snptimet(p, strlen(p), &hsts_expiry);
		}

		h = urldb_add_host(host);
		if (!h) {
			NSLOG(netsurf, INFO, "Failed adding host: '%s'", host);
			fclose(fp);
			return NSERROR_NOMEM;
		}
		h->hsts.expires = hsts_expiry;
		h->hsts.include_sub_domains = hsts_include_sub_domains;

		/* read number of URLs */
		if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
			break;
		urls = atoi(s);

		/* no URLs => try next host */
		if (urls == 0) {
			NSLOG(netsurf, INFO, "No URLs for '%s'", host);
			continue;
		}

		/* load the non-corrupt data */
		for (i = 0; i < urls; i++) {
			struct path_data *p = NULL;
			char scheme[64], ports[10];
			char url[64 + 3 + 256 + 6 + 4096 + 1 + 1];
			unsigned int port;
			bool is_file = false;
			nsurl *nsurl;
			lwc_string *scheme_lwc, *fragment_lwc;
			char *path_query;
			size_t len;

			if (!fgets(scheme, sizeof scheme, fp))
				break;
			length = strlen(scheme) - 1;
			scheme[length] = '\0';

			if (!fgets(ports, sizeof ports, fp))
				break;
			length = strlen(ports) - 1;
			ports[length] = '\0';
			port = atoi(ports);

			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
				break;
			length = strlen(s) - 1;
			s[length] = '\0';

			if (!strcasecmp(host, "localhost") &&
			    !strcasecmp(scheme, "file"))
				is_file = true;

			snprintf(url, sizeof url, "%s://%s%s%s%s",
				 scheme,
				 /* file URLs have no host */
				 (is_file ? "" : host),
				 (port ? ":" : ""),
				 (port ? ports : ""),
				 s);

			/* TODO: store URLs in pre-parsed state, and make
			 *       a nsurl_load to generate the nsurl more
			 *       swiftly.
			 *       Need a nsurl_save too.
			 */
			if (nsurl_create(url, &nsurl) != NSERROR_OK) {
				NSLOG(netsurf, INFO, "Failed inserting '%s'",
				      url);
				fclose(fp);
				return NSERROR_NOMEM;
			}

			if (url_bloom != NULL) {
				uint32_t hash = nsurl_hash(nsurl);
				bloom_insert_hash(url_bloom, hash);
			}

			/* Copy and merge path/query strings */
			if (nsurl_get(nsurl, NSURL_PATH | NSURL_QUERY,
				      &path_query, &len) != NSERROR_OK) {
				NSLOG(netsurf, INFO, "Failed inserting '%s'",
				      url);
				fclose(fp);
				return NSERROR_NOMEM;
			}

			scheme_lwc = nsurl_get_component(nsurl, NSURL_SCHEME);
			fragment_lwc = nsurl_get_component(nsurl,
							   NSURL_FRAGMENT);
			p = urldb_add_path(scheme_lwc, port, h, path_query,
					   fragment_lwc, nsurl);
			if (!p) {
				NSLOG(netsurf, INFO, "Failed inserting '%s'",
				      url);
				fclose(fp);
				return NSERROR_NOMEM;
			}
			nsurl_unref(nsurl);
			lwc_string_unref(scheme_lwc);
			if (fragment_lwc != NULL)
				lwc_string_unref(fragment_lwc);

			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
				break;
			if (p)
				p->urld.visits = (unsigned int)atoi(s);

			/* entry last use time */
			if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) {
				break;
			}
			if (p) {
				nsc_snptimet(s, strlen(s) - 1, &p->urld.last_visit);
			}

			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
				break;
			if (p)
				p->urld.type = (content_type)atoi(s);

			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
				break;


			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
				break;
			length = strlen(s) - 1;
			if (p && length > 0) {
				s[length] = '\0';
				p->urld.title = malloc(length + 1);
				if (p->urld.title)
					memcpy(p->urld.title, s, length + 1);
			}
		}
	}

	fclose(fp);
	NSLOG(netsurf, INFO, "Successfully loaded URL file");
#undef MAXIMUM_URL_LENGTH

	return NSERROR_OK;
}

/* exported interface documented in netsurf/url_db.h */
nserror urldb_save(const char *filename)
{
	FILE *fp;
	int i;

	assert(filename);

	fp = fopen(filename, "w");
	if (!fp) {
		NSLOG(netsurf, INFO, "Failed to open file '%s' for writing",
		      filename);
		return NSERROR_SAVE_FAILED;
	}

	/* file format version number */
	fprintf(fp, "%d\n", URL_FILE_VERSION);

	for (i = 0; i != NUM_SEARCH_TREES; i++) {
		urldb_save_search_tree(search_trees[i], fp);
	}

	fclose(fp);

	return NSERROR_OK;
}


/* exported interface documented in content/urldb.h */
nserror urldb_set_url_persistence(nsurl *url, bool persist)
{
	struct path_data *p;

	assert(url);

	p = urldb_find_url(url);
	if (!p) {
		return NSERROR_NOT_FOUND;
	}

	p->persistent = persist;

	return NSERROR_OK;
}


/* exported interface documented in content/urldb.h */
bool urldb_add_url(nsurl *url)
{
	struct host_part *h;
	struct path_data *p;
	lwc_string *scheme;
	lwc_string *port;
	lwc_string *host;
	lwc_string *fragment;
	const char *host_str;
	char *path_query = NULL;
	size_t len;
	bool match;
	unsigned int port_int;

	assert(url);

	if (url_bloom == NULL)
		url_bloom = bloom_create(BLOOM_SIZE);

	if (url_bloom != NULL) {
		uint32_t hash = nsurl_hash(url);
		bloom_insert_hash(url_bloom, hash);
	}

	/* Copy and merge path/query strings */
	if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &path_query, &len) !=
	    NSERROR_OK) {
		return false;
	}
	assert(path_query != NULL);

	scheme = nsurl_get_component(url, NSURL_SCHEME);
	if (scheme == NULL) {
		free(path_query);
		return false;
	}

	host = nsurl_get_component(url, NSURL_HOST);
	if (host != NULL) {
		host_str = lwc_string_data(host);
		lwc_string_unref(host);

	} else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) ==
		   lwc_error_ok && match == true) {
		host_str = "localhost";

	} else {
		lwc_string_unref(scheme);
		free(path_query);
		return false;
	}

	fragment = nsurl_get_component(url, NSURL_FRAGMENT);

	port = nsurl_get_component(url, NSURL_PORT);
	if (port != NULL) {
		port_int = atoi(lwc_string_data(port));
		lwc_string_unref(port);
	} else {
		port_int = 0;
	}

	/* Get host entry */
	h = urldb_add_host(host_str);

	/* Get path entry */
	if (h != NULL) {
		p = urldb_add_path(scheme,
				   port_int,
				   h,
				   path_query,
				   fragment,
				   url);
	} else {
		p = NULL;
	}

	lwc_string_unref(scheme);
	if (fragment != NULL)
		lwc_string_unref(fragment);

	return (p != NULL);
}


/* exported interface documented in content/urldb.h */
nserror urldb_set_url_title(nsurl *url, const char *title)
{
	struct path_data *p;
	char *temp;

	assert(url);

	p = urldb_find_url(url);
	if (p == NULL) {
		return NSERROR_NOT_FOUND;
	}

	/* copy the parameter if necessary */
	if (title != NULL) {
		temp = strdup(title);
		if (temp == NULL) {
			return NSERROR_NOMEM;
		}
	} else {
		temp = NULL;
	}

	free(p->urld.title);
	p->urld.title = temp;

	return NSERROR_OK;
}


/* exported interface documented in content/urldb.h */
nserror urldb_set_url_content_type(nsurl *url, content_type type)
{
	struct path_data *p;

	assert(url);

	p = urldb_find_url(url);
	if (!p) {
		return NSERROR_NOT_FOUND;
	}

	p->urld.type = type;

	return NSERROR_OK;
}


/* exported interface documented in content/urldb.h */
nserror urldb_update_url_visit_data(nsurl *url)
{
	struct path_data *p;

	assert(url);

	p = urldb_find_url(url);
	if (!p) {
		return NSERROR_NOT_FOUND;
	}

	p->urld.last_visit = time(NULL);
	p->urld.visits++;

	return NSERROR_OK;
}


/* exported interface documented in content/urldb.h */
void urldb_reset_url_visit_data(nsurl *url)
{
	struct path_data *p;

	assert(url);

	p = urldb_find_url(url);
	if (!p)
		return;

	p->urld.last_visit = (time_t)0;
	p->urld.visits = 0;
}


/* exported interface documented in netsurf/url_db.h */
const struct url_data *urldb_get_url_data(nsurl *url)
{
	struct path_data *p;
	struct url_internal_data *u;

	assert(url);

	p = urldb_find_url(url);
	if (!p)
		return NULL;

	u = &p->urld;

	return (const struct url_data *) u;
}


/* exported interface documented in content/urldb.h */
nsurl *urldb_get_url(nsurl *url)
{
	struct path_data *p;

	assert(url);

	p = urldb_find_url(url);
	if (!p)
		return NULL;

	return p->url;
}


/* exported interface documented in netsurf/url_db.h */
void urldb_set_auth_details(nsurl *url, const char *realm, const char *auth)
{
	struct path_data *p, *pi;
	struct host_part *h;
	struct prot_space_data *space, *space_alloc;
	char *realm_alloc, *auth_alloc;
	bool match;

	assert(url && realm && auth);

	/* add url, in case it's missing */
	urldb_add_url(url);

	p = urldb_find_url(url);

	if (!p)
		return;

	/* Search for host_part */
	for (pi = p; pi->parent != NULL; pi = pi->parent)
		;
	h = (struct host_part *)pi;

	/* Search if given URL belongs to a protection space we already know of. */
	for (space = h->prot_space; space; space = space->next) {
		if (!strcmp(space->realm, realm) &&
		    lwc_string_isequal(space->scheme, p->scheme,
				       &match) == lwc_error_ok &&
		    match == true &&
		    space->port == p->port)
			break;
	}

	if (space != NULL) {
		/* Overrule existing auth. */
		free(space->auth);
		space->auth = strdup(auth);
	} else {
		/* Create a new protection space. */
		space = space_alloc = malloc(sizeof(struct prot_space_data));
		realm_alloc = strdup(realm);
		auth_alloc = strdup(auth);

		if (!space_alloc || !realm_alloc || !auth_alloc) {
			free(space_alloc);
			free(realm_alloc);
			free(auth_alloc);
			return;
		}

		space->scheme = lwc_string_ref(p->scheme);
		space->port = p->port;
		space->realm = realm_alloc;
		space->auth = auth_alloc;
		space->next = h->prot_space;
		h->prot_space = space;
	}

	p->prot_space = space;
}


/* exported interface documented in netsurf/url_db.h */
const char *urldb_get_auth_details(nsurl *url, const char *realm)
{
	struct path_data *p, *p_cur, *p_top;

	assert(url);

	/* add to the db, so our lookup will work */
	urldb_add_url(url);

	p = urldb_find_url(url);
	if (!p)
		return NULL;

	/* Check for any auth details attached to the path_data node or any of
	 * its parents.
	 */
	for (p_cur = p; p_cur != NULL; p_top = p_cur, p_cur = p_cur->parent) {
		if (p_cur->prot_space) {
			return p_cur->prot_space->auth;
		}
	}

	/* Only when we have a realm (and canonical root of given URL), we can
	 * uniquely locate the protection space.
	 */
	if (realm != NULL) {
		const struct host_part *h = (const struct host_part *)p_top;
		const struct prot_space_data *space;
		bool match;

		/* Search for a possible matching protection space. */
		for (space = h->prot_space; space != NULL;
		     space = space->next) {
			if (!strcmp(space->realm, realm) &&
			    lwc_string_isequal(space->scheme,
					       p->scheme, &match) ==
			    lwc_error_ok &&
			    match == true &&
			    space->port == p->port) {
				p->prot_space = space;
				return p->prot_space->auth;
			}
		}
	}

	return NULL;
}


/* exported interface documented in netsurf/url_db.h */
void urldb_set_cert_permissions(nsurl *url, bool permit)
{
	struct path_data *p;
	struct host_part *h;

	assert(url);

	/* add url, in case it's missing */
	urldb_add_url(url);

	p = urldb_find_url(url);
	if (!p)
		return;

	for (; p && p->parent; p = p->parent)
		/* do nothing */;
	assert(p);

	h = (struct host_part *)p;

	h->permit_invalid_certs = permit;
}


/* exported interface documented in content/urldb.h */
bool urldb_get_cert_permissions(nsurl *url)
{
	struct path_data *p;
	const struct host_part *h;

	assert(url);

	p = urldb_find_url(url);
	if (!p)
		return false;

	for (; p && p->parent; p = p->parent)
		/* do nothing */;
	assert(p);

	h = (const struct host_part *)p;

	return h->permit_invalid_certs;
}


/* exported interface documented in content/urldb.h */
bool urldb_set_hsts_policy(struct nsurl *url, const char *header)
{
	struct path_data *p;
	struct host_part *h;
	lwc_string *host;
	time_t now = time(NULL);
	http_strict_transport_security *sts;
	uint32_t max_age = 0;
	nserror error;

	assert(url);

	host = nsurl_get_component(url, NSURL_HOST);
	if (host != NULL) {
		if (urldb__host_is_ip_address(lwc_string_data(host))) {
			/* Host is IP: ignore */
			lwc_string_unref(host);
			return true;
		} else if (lwc_string_length(host) == 0) {
			/* Host is blank: ignore */
			lwc_string_unref(host);
			return true;
		}

		lwc_string_unref(host);
	} else {
		/* No host part: ignore */
		return true;
	}

	/* add url, in case it's missing */
	urldb_add_url(url);

	p = urldb_find_url(url);
	if (!p)
		return false;

	for (; p && p->parent; p = p->parent)
		/* do nothing */;
	assert(p);

	h = (struct host_part *)p;
	if (h->permit_invalid_certs) {
		/* Transport is tainted: ignore */
		return true;
	}

	error = http_parse_strict_transport_security(header, &sts);
	if (error != NSERROR_OK) {
		/* Parse failed: ignore */
		return true;
	}

	h->hsts.include_sub_domains =
		http_strict_transport_security_include_subdomains(sts);

	max_age = http_strict_transport_security_max_age(sts);
	if (max_age == 0) {
		h->hsts.expires = 0;
		h->hsts.include_sub_domains = false;
	} else if ((time_t) (now + max_age) > h->hsts.expires) {
		h->hsts.expires = now + max_age;
	}

	http_strict_transport_security_destroy(sts);

	return true;
}


/* exported interface documented in content/urldb.h */
bool urldb_get_hsts_enabled(struct nsurl *url)
{
	struct path_data *p;
	const struct host_part *h;
	lwc_string *host;
	time_t now = time(NULL);

	assert(url);

	host = nsurl_get_component(url, NSURL_HOST);
	if (host != NULL) {
		if (urldb__host_is_ip_address(lwc_string_data(host))) {
			/* Host is IP: not enabled */
			lwc_string_unref(host);
			return false;
		} else if (lwc_string_length(host) == 0) {
			/* Host is blank: not enabled */
			lwc_string_unref(host);
			return false;
		}

		lwc_string_unref(host);
	} else {
		/* No host part: not enabled */
		return false;
	}

	/* The URL must exist in the db in order to find HSTS policy, since
	 * we search up the tree from the URL node, and policy from further
	 * up may also apply. */
	urldb_add_url(url);

	p = urldb_find_url(url);
	if (!p)
		return false;

	for (; p && p->parent; p = p->parent)
		/* do nothing */;
	assert(p);

	h = (const struct host_part *)p;

	/* Consult record for this host */
	if (h->hsts.expires > now) {
		/* Not expired */
		return true;
	}

	/* Consult parent domains */
	for (h = h->parent; h && h != &db_root; h = h->parent) {
		if (h->hsts.expires > now && h->hsts.include_sub_domains) {
			/* Not expired and subdomains included */
			return true;
		}
	}

	return false;
}


/* exported interface documented in netsurf/url_db.h */
void
urldb_iterate_partial(const char *prefix,
		      bool (*callback)(nsurl *url, const struct url_data *data))
{
	char host[256];
	char buf[260]; /* max domain + "www." */
	const char *slash, *scheme_sep;
	struct search_node *tree;
	const struct host_part *h;

	assert(prefix && callback);

	/* strip scheme */
	scheme_sep = strstr(prefix, "://");
	if (scheme_sep)
		prefix = scheme_sep + 3;

	slash = strchr(prefix, '/');
	tree = urldb_get_search_tree(prefix);

	if (slash) {
		/* if there's a slash in the input, then we can
		 * assume that we're looking for a path */
		snprintf(host, sizeof host, "%.*s",
			 (int) (slash - prefix), prefix);

		h = urldb_search_find(tree, host);
		if (!h) {
			int len = slash - prefix;

			if (len <= 3 || strncasecmp(host, "www.", 4) != 0) {
				snprintf(buf, sizeof buf, "www.%s", host);
				h = urldb_search_find(
					search_trees[ST_DN + 'w' - 'a'],
					buf);
				if (!h)
					return;
			} else
				return;
		}

		if (h->paths.children) {
			/* Have paths, iterate them */
			urldb_iterate_partial_path(&h->paths, slash + 1,
						   callback);
		}

	} else {
		int len = strlen(prefix);

		/* looking for hosts */
		if (!urldb_iterate_partial_host(tree, prefix, callback))
			return;

		if (len <= 3 || strncasecmp(prefix, "www.", 4) != 0) {
			/* now look for www.prefix */
			snprintf(buf, sizeof buf, "www.%s", prefix);
			if(!urldb_iterate_partial_host(
				   search_trees[ST_DN + 'w' - 'a'],
				   buf, callback))
				return;
		}
	}
}


/* exported interface documented in netsurf/url_db.h */
void
urldb_iterate_entries(bool (*callback)(nsurl *url, const struct url_data *data))
{
	int i;

	assert(callback);

	for (i = 0; i < NUM_SEARCH_TREES; i++) {
		if (!urldb_iterate_entries_host(search_trees[i],
						callback,
						NULL)) {
			break;
		}
	}
}


/* exported interface documented in content/urldb.h */
void urldb_iterate_cookies(bool (*callback)(const struct cookie_data *data))
{
	int i;

	assert(callback);

	for (i = 0; i < NUM_SEARCH_TREES; i++) {
		if (!urldb_iterate_entries_host(search_trees[i],
						NULL, callback))
			break;
	}
}


/* exported interface documented in content/urldb.h */
bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer)
{
	const char *cur = header, *end;
	lwc_string *path, *host, *scheme;
	nsurl *urlt;
	bool match;

	assert(url && header);

	/* Get defragmented URL, as 'urlt' */
	if (nsurl_defragment(url, &urlt) != NSERROR_OK)
		return NULL;

	scheme = nsurl_get_component(url, NSURL_SCHEME);
	if (scheme == NULL) {
		nsurl_unref(urlt);
		return false;
	}

	path = nsurl_get_component(url, NSURL_PATH);
	if (path == NULL) {
		lwc_string_unref(scheme);
		nsurl_unref(urlt);
		return false;
	}

	host = nsurl_get_component(url, NSURL_HOST);
	if (host == NULL) {
		lwc_string_unref(path);
		lwc_string_unref(scheme);
		nsurl_unref(urlt);
		return false;
	}

	if (referer) {
		lwc_string *rhost;

		/* Ensure that url's host name domain matches
		 * referer's (4.3.5) */
		rhost = nsurl_get_component(referer, NSURL_HOST);
		if (rhost == NULL) {
			goto error;
		}

		/* Domain match host names */
		if (lwc_string_isequal(host, rhost, &match) == lwc_error_ok &&
		    match == false) {
			const char *hptr;
			const char *rptr;
			const char *dot;
			const char *host_data = lwc_string_data(host);
			const char *rhost_data = lwc_string_data(rhost);

			/* Ensure neither host nor rhost are IP addresses */
			if (urldb__host_is_ip_address(host_data) ||
			    urldb__host_is_ip_address(rhost_data)) {
				/* IP address, so no partial match */
				lwc_string_unref(rhost);
				goto error;
			}

			/* Not exact match, so try the following:
			 *
			 * 1) Find the longest common suffix of host and rhost
			 *    (may be all of host/rhost)
			 * 2) Discard characters from the start of the suffix
			 *    until the suffix starts with a dot
			 *    (prevents foobar.com matching bar.com)
			 * 3) Ensure the suffix is non-empty and contains
			 *    embedded dots (to avoid permitting .com as a
			 *    suffix)
			 *
			 * Note that the above in no way resembles the
			 * domain matching algorithm found in RFC2109.
			 * It does, however, model the real world rather
			 * more accurately.
			 */

			/** \todo In future, we should consult a TLD service
			 * instead of just looking for embedded dots.
			 */

			hptr = host_data + lwc_string_length(host) - 1;
			rptr = rhost_data + lwc_string_length(rhost) - 1;

			/* 1 */
			while (hptr >= host_data && rptr >= rhost_data) {
				if (*hptr != *rptr)
					break;
				hptr--;
				rptr--;
			}
			/* Ensure we end up pointing at the start of the
			 * common suffix. The above loop will exit pointing
			 * to the byte before the start of the suffix. */
			hptr++;

			/* 2 */
			while (*hptr != '\0' && *hptr != '.')
				hptr++;

			/* 3 */
			if (*hptr == '\0' ||
			    (dot = strchr(hptr + 1, '.')) == NULL ||
			    *(dot + 1) == '\0') {
				lwc_string_unref(rhost);
				goto error;
			}
		}

		lwc_string_unref(rhost);
	}

	end = cur + strlen(cur) - 2 /* Trailing CRLF */;

	do {
		struct cookie_internal_data *c;
		char *dot;
		size_t len;
#ifdef WITH_NSPSL
		const char *suffix;
#endif

		c = urldb_parse_cookie(url, &cur);
		if (!c) {
			/* failed => stop parsing */
			goto error;
		}

		/* validate cookie */

		/* 4.2.2:i Cookie must have NAME and VALUE */
		if (!c->name || !c->value) {
			urldb_free_cookie(c);
			goto error;
		}

		/* 4.3.2:i Cookie path must be a prefix of URL path */
		len = strlen(c->path);
		if (len > lwc_string_length(path) ||
		    strncmp(c->path, lwc_string_data(path),
			    len) != 0) {
			urldb_free_cookie(c);
			goto error;
		}

#ifdef WITH_NSPSL
		/* check domain is not a public suffix */
		dot = c->domain;
		if (*dot == '.') {
			dot++;
		}
		suffix = nspsl_getpublicsuffix(dot);
		if (suffix == NULL) {
			NSLOG(netsurf, INFO,
			      "domain %s was a public suffix domain", dot);
			urldb_free_cookie(c);
			goto error;
		}
#else
		/* 4.3.2:ii Cookie domain must contain embedded dots */
		dot = strchr(c->domain + 1, '.');
		if (!dot || *(dot + 1) == '\0') {
			/* no embedded dots */
			urldb_free_cookie(c);
			goto error;
		}
#endif

		/* Domain match fetch host with cookie domain */
		if (strcasecmp(lwc_string_data(host), c->domain) != 0) {
			int hlen, dlen;
			char *domain = c->domain;

			/* c->domain must be a domain cookie here because:
			 * c->domain is either:
			 *   + specified in the header as a domain cookie
			 *     (non-domain cookies in the header are ignored
			 *      by urldb_parse_cookie / urldb_parse_avpair)
			 *   + defaulted to the URL's host part
			 *     (by urldb_parse_cookie if no valid domain was
			 *      specified in the header)
			 *
			 * The latter will pass the strcasecmp above, which
			 * leaves the former (i.e. a domain cookie)
			 */
			assert(c->domain[0] == '.');

			/* 4.3.2:iii */
			if (urldb__host_is_ip_address(lwc_string_data(host))) {
				/* IP address, so no partial match */
				urldb_free_cookie(c);
				goto error;
			}

			hlen = lwc_string_length(host);
			dlen = strlen(c->domain);

			if (hlen <= dlen && hlen != dlen - 1) {
				/* Partial match not possible */
				urldb_free_cookie(c);
				goto error;
			}

			if (hlen == dlen - 1) {
				/* Relax matching to allow
				 * host a.com to match .a.com */
				domain++;
				dlen--;
			}

			if (strcasecmp(lwc_string_data(host) + (hlen - dlen),
				       domain)) {
				urldb_free_cookie(c);
				goto error;
			}

			/* 4.3.2:iv Ensure H contains no dots
			 *
			 * If you believe the spec, H should contain no
			 * dots in _any_ cookie. Unfortunately, however,
			 * reality differs in that many sites send domain
			 * cookies of the form .foo.com from hosts such
			 * as bar.bat.foo.com and then expect domain
			 * matching to work. Thus we have to do what they
			 * expect, regardless of any potential security
			 * implications.
			 *
			 * This is what code conforming to the spec would
			 * look like:
			 *
			 * for (int i = 0; i < (hlen - dlen); i++) {
			 *	if (host[i] == '.') {
			 *		urldb_free_cookie(c);
			 *		goto error;
			 *	}
			 * }
			 */
		}

		/* Now insert into database */
		if (!urldb_insert_cookie(c, scheme, urlt))
			goto error;
	} while (cur < end);

	lwc_string_unref(host);
	lwc_string_unref(path);
	lwc_string_unref(scheme);
	nsurl_unref(urlt);

	return true;

error:
	lwc_string_unref(host);
	lwc_string_unref(path);
	lwc_string_unref(scheme);
	nsurl_unref(urlt);

	return false;
}


/* exported interface documented in content/urldb.h */
char *urldb_get_cookie(nsurl *url, bool include_http_only)
{
	const struct path_data *p, *q;
	const struct host_part *h;
	lwc_string *path_lwc;
	struct cookie_internal_data *c;
	int count = 0, version = COOKIE_RFC2965;
	struct cookie_internal_data **matched_cookies;
	int matched_cookies_size = 20;
	int ret_alloc = 4096, ret_used = 1;
	const char *path;
	char *ret;
	lwc_string *scheme;
	time_t now;
	int i;
	bool match;

	assert(url != NULL);

	/* The URL must exist in the db in order to find relevant cookies, since
	 * we search up the tree from the URL node, and cookies from further
	 * up also apply. */
	urldb_add_url(url);

	p = urldb_find_url(url);
	if (!p)
		return NULL;

	scheme = p->scheme;

	matched_cookies = malloc(matched_cookies_size *
				 sizeof(struct cookie_internal_data *));
	if (!matched_cookies)
		return NULL;

#define GROW_MATCHED_COOKIES						\
	do {								\
		if (count == matched_cookies_size) {			\
			struct cookie_internal_data **temp;		\
			temp = realloc(matched_cookies,			\
				       (matched_cookies_size + 20) *	\
				       sizeof(struct cookie_internal_data *)); \
									\
			if (temp == NULL) {				\
				free(ret);				\
				free(matched_cookies);			\
				return NULL;				\
			}						\
									\
			matched_cookies = temp;				\
			matched_cookies_size += 20;			\
		}							\
	} while(0)

	ret = malloc(ret_alloc);
	if (!ret) {
		free(matched_cookies);
		return NULL;
	}

	ret[0] = '\0';

	path_lwc = nsurl_get_component(url, NSURL_PATH);
	if (path_lwc == NULL) {
		free(ret);
		free(matched_cookies);
		return NULL;
	}
	path = lwc_string_data(path_lwc);
	lwc_string_unref(path_lwc);

	now = time(NULL);

	if (*(p->segment) != '\0') {
		/* Match exact path, unless directory, when prefix matching
		 * will handle this case for us. */
		for (q = p->parent->children; q; q = q->next) {
			if (strcmp(q->segment, p->segment))
				continue;

			/* Consider all cookies associated with
			 * this exact path */
			for (c = q->cookies; c; c = c->next) {
				if (c->expires != -1 && c->expires < now)
					/* cookie has expired => ignore */
					continue;

				if (c->secure && lwc_string_isequal(
					    q->scheme,
					    corestring_lwc_https,
					    &match) &&
				    match == false)
					/* secure cookie for insecure host.
					 * ignore */
					continue;

				if (c->http_only && !include_http_only)
					/* Ignore HttpOnly */
					continue;

				matched_cookies[count++] = c;

				GROW_MATCHED_COOKIES;

				if (c->version < (unsigned int)version)
					version = c->version;

				c->last_used = now;

				cookie_manager_add((struct cookie_data *)c);
			}
		}
	}

	/* Now consider cookies whose paths prefix-match ours */
	for (p = p->parent; p; p = p->parent) {
		/* Find directory's path entry(ies) */
		/* There are potentially multiple due to differing schemes */
		for (q = p->children; q; q = q->next) {
			if (*(q->segment) != '\0')
				continue;

			for (c = q->cookies; c; c = c->next) {
				if (c->expires != -1 && c->expires < now)
					/* cookie has expired => ignore */
					continue;

				if (c->secure && lwc_string_isequal(
					    q->scheme,
					    corestring_lwc_https,
					    &match) &&
				    match == false)
					/* Secure cookie for insecure server
					 * => ignore */
					continue;

				matched_cookies[count++] = c;

				GROW_MATCHED_COOKIES;

				if (c->version < (unsigned int) version)
					version = c->version;

				c->last_used = now;

				cookie_manager_add((struct cookie_data *)c);
			}
		}

		if (!p->parent) {
			/* No parent, so bail here. This can't go in
			 * the loop exit condition as we also want to
			 * process the top-level node.
			 *
			 * If p->parent is NULL then p->cookies are
			 * the domain cookies and thus we don't even
			 * try matching against them.
			 */
			break;
		}

		/* Consider p itself - may be the result of Path=/foo */
		for (c = p->cookies; c; c = c->next) {
			if (c->expires != -1 && c->expires < now)
				/* cookie has expired => ignore */
				continue;

			/* Ensure cookie path is a prefix of the resource */
			if (strncmp(c->path, path, strlen(c->path)) != 0)
				/* paths don't match => ignore */
				continue;

			if (c->secure && lwc_string_isequal(p->scheme,
							    corestring_lwc_https,
							    &match) &&
			    match == false)
				/* Secure cookie for insecure server
				 * => ignore */
				continue;

			matched_cookies[count++] = c;

			GROW_MATCHED_COOKIES;

			if (c->version < (unsigned int) version)
				version = c->version;

			c->last_used = now;

			cookie_manager_add((struct cookie_data *)c);
		}

	}

	/* Finally consider domain cookies for hosts which domain match ours */
	for (h = (const struct host_part *)p; h && h != &db_root;
	     h = h->parent) {
		for (c = h->paths.cookies; c; c = c->next) {
			if (c->expires != -1 && c->expires < now)
				/* cookie has expired => ignore */
				continue;

			/* Ensure cookie path is a prefix of the resource */
			if (strncmp(c->path, path, strlen(c->path)) != 0)
				/* paths don't match => ignore */
				continue;

			if (c->secure && lwc_string_isequal(scheme,
							    corestring_lwc_https,
							    &match) &&
			    match == false)
				/* secure cookie for insecure host. ignore */
				continue;

			matched_cookies[count++] = c;

			GROW_MATCHED_COOKIES;

			if (c->version < (unsigned int)version)
				version = c->version;

			c->last_used = now;

			cookie_manager_add((struct cookie_data *)c);
		}
	}

	if (count == 0) {
		/* No cookies found */
		free(ret);
		free(matched_cookies);
		return NULL;
	}

	/* and build output string */
	if (version > COOKIE_NETSCAPE) {
		sprintf(ret, "$Version=%d", version);
		ret_used = strlen(ret) + 1;
	}

	for (i = 0; i < count; i++) {
		if (!urldb_concat_cookie(matched_cookies[i], version,
					 &ret_used, &ret_alloc, &ret)) {
			free(ret);
			free(matched_cookies);
			return NULL;
		}
	}

	if (version == COOKIE_NETSCAPE) {
		/* Old-style cookies => no version & skip "; " */
		memmove(ret, ret + 2, ret_used - 2);
		ret_used -= 2;
	}

	/* Now, shrink the output buffer to the required size */
	{
		char *temp = realloc(ret, ret_used);
		if (!temp) {
			free(ret);
			free(matched_cookies);
			return NULL;
		}

		ret = temp;
	}

	free(matched_cookies);

	return ret;

#undef GROW_MATCHED_COOKIES
}


/* exported interface documented in content/urldb.h */
void urldb_delete_cookie(const char *domain, const char *path,
			 const char *name)
{
	urldb_delete_cookie_hosts(domain, path, name, &db_root);
}


/* exported interface documented in content/urldb.h */
void urldb_load_cookies(const char *filename)
{
	FILE *fp;
	char s[16*1024];

	assert(filename);

	fp = fopen(filename, "r");
	if (!fp)
		return;

#define FIND_T {				\
		for (; *p && *p != '\t'; p++)	\
			; /* do nothing */	\
		if (p >= end) {			\
			NSLOG(netsurf, INFO, "Overran input");	\
			continue;		\
		}				\
		*p++ = '\0';			\
	}

#define SKIP_T {				\
		for (; *p && *p == '\t'; p++)	\
			; /* do nothing */	\
		if (p >= end) {			\
			NSLOG(netsurf, INFO, "Overran input");	\
			continue;		\
		}				\
	}

	while (fgets(s, sizeof s, fp)) {
		char *p = s, *end = 0,
			*domain, *path, *name, *value, *scheme, *url,
			*comment;
		int version, domain_specified, path_specified,
			secure, http_only, no_destroy, value_quoted;
		time_t expires, last_used;
		struct cookie_internal_data *c;

		if(s[0] == 0 || s[0] == '#')
			/* Skip blank lines or comments */
			continue;

		s[strlen(s) - 1] = '\0'; /* lose terminating newline */
		end = s + strlen(s);

		/* Look for file version first
		 * (all input is ignored until this is read)
		 */
		if (strncasecmp(s, "Version:", 8) == 0) {
			FIND_T; SKIP_T; loaded_cookie_file_version = atoi(p);

			if (loaded_cookie_file_version <
			    MIN_COOKIE_FILE_VERSION) {
				NSLOG(netsurf, INFO,
				      "Unsupported Cookie file version");
				break;
			}

			continue;
		} else if (loaded_cookie_file_version == 0) {
			/* Haven't yet seen version; skip this input */
			continue;
		}

		/* One cookie/line */

		/* Parse input */
		FIND_T; version = atoi(s);
		SKIP_T; domain = p; FIND_T;
		SKIP_T; domain_specified = atoi(p); FIND_T;
		SKIP_T; path = p; FIND_T;
		SKIP_T; path_specified = atoi(p); FIND_T;
		SKIP_T; secure = atoi(p); FIND_T;
		if (loaded_cookie_file_version > 101) {
			/* Introduced in version 1.02 */
			SKIP_T; http_only = atoi(p); FIND_T;
		} else {
			http_only = 0;
		}
		SKIP_T; expires = (time_t)atoi(p); FIND_T;
		SKIP_T; last_used = (time_t)atoi(p); FIND_T;
		SKIP_T; no_destroy = atoi(p); FIND_T;
		SKIP_T; name = p; FIND_T;
		SKIP_T; value = p; FIND_T;
		if (loaded_cookie_file_version > 100) {
			/* Introduced in version 1.01 */
			SKIP_T;	value_quoted = atoi(p); FIND_T;
		} else {
			value_quoted = 0;
		}
		SKIP_T; scheme = p; FIND_T;
		SKIP_T; url = p; FIND_T;

		/* Comment may have no content, so don't
		 * use macros as they'll break */
		for (; *p && *p == '\t'; p++)
			; /* do nothing */
		comment = p;

		assert(p <= end);

		/* Now create cookie */
		c = malloc(sizeof(struct cookie_internal_data));
		if (!c)
			break;

		c->name = strdup(name);
		c->value = strdup(value);
		c->value_was_quoted = value_quoted;
		c->comment = strdup(comment);
		c->domain_from_set = domain_specified;
		c->domain = strdup(domain);
		c->path_from_set = path_specified;
		c->path = strdup(path);
		c->expires = expires;
		c->last_used = last_used;
		c->secure = secure;
		c->http_only = http_only;
		c->version = version;
		c->no_destroy = no_destroy;

		if (!(c->name && c->value && c->comment &&
		      c->domain && c->path)) {
			urldb_free_cookie(c);
			break;
		}

		if (c->domain[0] != '.') {
			lwc_string *scheme_lwc = NULL;
			nsurl *url_nsurl = NULL;

			assert(scheme[0] != 'u');

			if (nsurl_create(url, &url_nsurl) != NSERROR_OK) {
				urldb_free_cookie(c);
				break;
			}
			scheme_lwc = nsurl_get_component(url_nsurl,
							 NSURL_SCHEME);

			/* And insert it into database */
			if (!urldb_insert_cookie(c, scheme_lwc, url_nsurl)) {
				/* Cookie freed for us */
				nsurl_unref(url_nsurl);
				lwc_string_unref(scheme_lwc);
				break;
			}
			nsurl_unref(url_nsurl);
			lwc_string_unref(scheme_lwc);

		} else {
			if (!urldb_insert_cookie(c, NULL, NULL)) {
				/* Cookie freed for us */
				break;
			}
		}
	}

#undef SKIP_T
#undef FIND_T

	fclose(fp);
}


/* exported interface documented in content/urldb.h */
void urldb_save_cookies(const char *filename)
{
	FILE *fp;
	int cookie_file_version = max(loaded_cookie_file_version,
				      COOKIE_FILE_VERSION);

	assert(filename);

	fp = fopen(filename, "w");
	if (!fp)
		return;

	fprintf(fp, "# NetSurf cookies file.\n"
		"#\n"
		"# Lines starting with a '#' are comments, "
		"blank lines are ignored.\n"
		"#\n"
		"# All lines prior to \"Version:\t%d\" are discarded.\n"
		"#\n"
		"# Version\tDomain\tDomain from Set-Cookie\tPath\t"
		"Path from Set-Cookie\tSecure\tHTTP-Only\tExpires\tLast used\t"
		"No destroy\tName\tValue\tValue was quoted\tScheme\t"
		"URL\tComment\n",
		cookie_file_version);
	fprintf(fp, "Version:\t%d\n", cookie_file_version);

	urldb_save_cookie_hosts(fp, &db_root);

	fclose(fp);
}


/* exported interface documented in netsurf/url_db.h */
void urldb_dump(void)
{
	int i;

	urldb_dump_hosts(&db_root);

	for (i = 0; i != NUM_SEARCH_TREES; i++) {
		urldb_dump_search(search_trees[i], 0);
	}
}