netsurf/content/urldb.c
2022-10-23 17:22:56 +01:00

4500 lines
99 KiB
C

/*
* Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk>
* Copyright 2009 John Tytgat <joty@netsurf-browser.org>
*
* This file is part of NetSurf, http://www.netsurf-browser.org/
*
* NetSurf is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* NetSurf is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file
* Unified URL information database implementation
*
* URLs are stored in a tree-based structure as follows:
*
* The host component is extracted from each URL and, if a FQDN, split on
* every '.'.The tree is constructed by inserting each FQDN segment in
* reverse order. Duplicate nodes are merged.
*
* If the host part of an URL is an IP address, then this is added to the
* tree verbatim (as if it were a TLD).
*
* This provides something looking like:
*
* root (a sentinel)
* |
* -------------------------------------------------
* | | | | | | |
* com edu gov 127.0.0.1 net org uk TLDs
* | | | | | |
* google ... ... ... ... co 2LDs
* | |
* www bbc Hosts/Subdomains
* |
* www ...
*
* Each of the nodes in this tree is a struct host_part. This stores the
* FQDN segment (or IP address) with which the node is concerned. Each node
* may contain further information about paths on a host (struct path_data)
* or SSL certificate processing on a host-wide basis
* (host_part::permit_invalid_certs).
*
* Path data is concerned with storing various metadata about the path in
* question. This includes global history data, HTTP authentication details
* and any associated HTTP cookies. This is stored as a tree of path segments
* hanging off the relevant host_part node.
*
* Therefore, to find the last visited time of the URL
* http://www.example.com/path/to/resource.html, the FQDN tree would be
* traversed in the order root -> "com" -> "example" -> "www". The "www"
* node would have attached to it a tree of struct path_data:
*
* (sentinel)
* |
* path
* |
* to
* |
* resource.html
*
* This represents the absolute path "/path/to/resource.html". The leaf node
* "resource.html" contains the last visited time of the resource.
*
* The mechanism described above is, however, not particularly conducive to
* fast searching of the database for a given URL (or URLs beginning with a
* given prefix). Therefore, an anciliary data structure is used to enable
* fast searching. This structure simply reflects the contents of the
* database, with entries being added/removed at the same time as for the
* core database. In order to ensure that degenerate cases are kept to a
* minimum, we use an AAtree. This is an approximation of a Red-Black tree
* with similar performance characteristics, but with a significantly
* simpler implementation. Entries in this tree comprise pointers to the
* leaf nodes of the host tree described above.
*
* REALLY IMPORTANT NOTE: urldb expects all URLs to be normalised. Use of
* non-normalised URLs with urldb will result in undefined behaviour and
* potential crashes.
*/
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <time.h>
#ifdef WITH_NSPSL
#include <nspsl.h>
#endif
#include "utils/inet.h"
#include "utils/nsoption.h"
#include "utils/log.h"
#include "utils/corestrings.h"
#include "utils/url.h"
#include "utils/utils.h"
#include "utils/bloom.h"
#include "utils/time.h"
#include "utils/nsurl.h"
#include "utils/ascii.h"
#include "utils/http.h"
#include "netsurf/bitmap.h"
#include "desktop/cookie_manager.h"
#include "content/content.h"
#include "content/urldb.h"
#ifdef WITH_AMISSL
/* AmiSSL needs everything to be using bsdsocket directly to avoid conflicts */
#include <proto/bsdsocket.h>
#endif
/**
* cookie entry.
*
* \warning This *must* be kept in sync with the public interface in
* netsurf/cookie_db.h
*/
struct cookie_internal_data {
struct cookie_internal_data *prev; /**< Previous in list */
struct cookie_internal_data *next; /**< Next in list */
char *name; /**< Cookie name */
char *value; /**< Cookie value */
bool value_was_quoted; /**< Value was quoted in Set-Cookie: */
char *comment; /**< Cookie comment */
bool domain_from_set; /**< Domain came from Set-Cookie: header */
char *domain; /**< Domain */
bool path_from_set; /**< Path came from Set-Cookie: header */
char *path; /**< Path */
time_t expires; /**< Expiry timestamp, or -1 for session */
time_t last_used; /**< Last used time */
bool secure; /**< Only send for HTTPS requests */
bool http_only; /**< Only expose to HTTP(S) requests */
enum cookie_version version; /**< Specification compliance */
bool no_destroy; /**< Never destroy this cookie,
* unless it's expired */
};
/**
* A protection space
*
* This is defined as a tuple canonical_root_url and realm. This
* structure lives as linked list element in a leaf host_part struct
* so we need additional scheme and port to have a canonical_root_url.
*/
struct prot_space_data {
/**
* URL scheme of canonical hostname of this protection space.
*/
lwc_string *scheme;
/**
* Port number of canonical hostname of this protection
* space. When 0, it means the default port for given scheme,
* i.e. 80 (http), 443 (https).
*/
unsigned int port;
/** Protection realm */
char *realm;
/**
* Authentication details for this protection space in form
* username:password
*/
char *auth;
/** Next sibling */
struct prot_space_data *next;
};
/**
* meta data about a url
*
* \warning must be kept in sync with url_data structure in netsurf/url_db.h
*/
struct url_internal_data {
char *title; /**< Resource title */
unsigned int visits; /**< Visit count */
time_t last_visit; /**< Last visit time */
content_type type; /**< Type of resource */
};
/**
* data entry for url
*/
struct path_data {
nsurl *url; /**< Full URL */
lwc_string *scheme; /**< URL scheme for data */
unsigned int port; /**< Port number for data. When 0, it means
* the default port for given scheme, i.e.
* 80 (http), 443 (https). */
char *segment; /**< Path segment for this node */
unsigned int frag_cnt; /**< Number of entries in path_data::fragment */
char **fragment; /**< Array of fragments */
bool persistent; /**< This entry should persist */
struct url_internal_data urld; /**< URL data for resource */
/**
* Protection space to which this resource belongs too. Can be
* NULL when it does not belong to a protection space or when
* it is not known. No ownership (is with struct host_part::prot_space).
*/
const struct prot_space_data *prot_space;
/** Cookies associated with resource */
struct cookie_internal_data *cookies;
/** Last cookie in list */
struct cookie_internal_data *cookies_end;
struct path_data *next; /**< Next sibling */
struct path_data *prev; /**< Previous sibling */
struct path_data *parent; /**< Parent path segment */
struct path_data *children; /**< Child path segments */
struct path_data *last; /**< Last child */
};
struct hsts_data {
time_t expires; /**< Expiry time */
bool include_sub_domains; /**< Whether to include subdomains */
};
struct host_part {
/**
* Known paths on this host. This _must_ be first so that
* struct host_part *h = (struct host_part *)mypath; works
*/
struct path_data paths;
/**
* Allow access to SSL protected resources on this host
* without verifying certificate authenticity
*/
bool permit_invalid_certs;
/* HSTS data */
struct hsts_data hsts;
/**
* Part of host string
*/
char *part;
/**
* Linked list of all known proctection spaces known for this
* host and all its schems and ports.
*/
struct prot_space_data *prot_space;
struct host_part *next; /**< Next sibling */
struct host_part *prev; /**< Previous sibling */
struct host_part *parent; /**< Parent host part */
struct host_part *children; /**< Child host parts */
};
/**
* search index node
*/
struct search_node {
const struct host_part *data; /**< Host tree entry */
unsigned int level; /**< Node level */
struct search_node *left; /**< Left subtree */
struct search_node *right; /**< Right subtree */
};
/** Root database handle */
static struct host_part db_root;
/** Search trees - one per letter + 1 for IPs + 1 for Everything Else */
#define NUM_SEARCH_TREES 28
#define ST_IP 0
#define ST_EE 1
#define ST_DN 2
static struct search_node empty = { 0, 0, &empty, &empty };
static struct search_node *search_trees[NUM_SEARCH_TREES] = {
&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
&empty, &empty, &empty, &empty
};
/** Minimum cookie database file version */
#define MIN_COOKIE_FILE_VERSION 100
/** Current cookie database file version */
#define COOKIE_FILE_VERSION 102
/** loaded cookie file version */
static int loaded_cookie_file_version;
/** Minimum URL database file version */
#define MIN_URL_FILE_VERSION 106
/** Current URL database file version */
#define URL_FILE_VERSION 107
/**
* filter for url presence in database
*
* Bloom filter used for short-circuting the false case of "is this
* URL in the database?". BLOOM_SIZE controls how large the filter is
* in bytes. Primitive experimentation shows that for a filter of X
* bytes filled with X items, searching for X items not in the filter
* has a 5% false-positive rate. We set it to 32kB, which should be
* enough for all but the largest databases, while not being
* shockingly wasteful on memory.
*/
static struct bloom_filter *url_bloom;
/**
* Size of url filter
*/
#define BLOOM_SIZE (1024 * 32)
/**
* write a time_t to a file portably
*
* \param fp File to write to
* \param val the unix time value to output
* \return NSERROR_OK on success
*/
static nserror urldb_write_timet(FILE *fp, time_t val)
{
int use;
char op[32];
use = nsc_sntimet(op, 32, &val);
if (use == 0) {
fprintf(fp, "%i\n", (int)val);
} else {
fprintf(fp, "%.*s\n", use, op);
}
return NSERROR_OK;
}
/**
* Write paths associated with a host
*
* \param parent Root of (sub)tree to write
* \param host Current host name
* \param fp File to write to
* \param path Current path string
* \param path_alloc Allocated size of path
* \param path_used Used size of path
* \param expiry Expiry time of URLs
*/
static void
urldb_write_paths(const struct path_data *parent,
const char *host,
FILE *fp,
char **path,
int *path_alloc,
int *path_used,
time_t expiry)
{
const struct path_data *p = parent;
int i;
do {
int seglen = p->segment != NULL ? strlen(p->segment) : 0;
int len = *path_used + seglen + 1;
if (*path_alloc < len) {
char *temp;
temp = realloc(*path,
(len > 64) ? len : *path_alloc + 64);
if (!temp) {
return;
}
*path = temp;
*path_alloc = (len > 64) ? len : *path_alloc + 64;
}
if (p->segment != NULL) {
memcpy(*path + *path_used - 1, p->segment, seglen);
}
if (p->children != NULL) {
(*path)[*path_used + seglen - 1] = '/';
(*path)[*path_used + seglen] = '\0';
} else {
(*path)[*path_used + seglen - 1] = '\0';
len -= 1;
}
*path_used = len;
if (p->children != NULL) {
/* Drill down into children */
p = p->children;
} else {
/* leaf node */
if (p->persistent ||
((p->urld.last_visit > expiry) &&
(p->urld.visits > 0))) {
fprintf(fp, "%s\n", lwc_string_data(p->scheme));
if (p->port) {
fprintf(fp,"%d\n", p->port);
} else {
fprintf(fp, "\n");
}
fprintf(fp, "%s\n", *path);
/** \todo handle fragments? */
/* number of visits */
fprintf(fp, "%i\n", p->urld.visits);
/* time entry was last used */
urldb_write_timet(fp, p->urld.last_visit);
/* entry type */
fprintf(fp, "%i\n", (int)p->urld.type);
fprintf(fp, "\n");
if (p->urld.title) {
uint8_t *s = (uint8_t *) p->urld.title;
for (i = 0; s[i] != '\0'; i++)
if (s[i] < 32)
s[i] = ' ';
for (--i; ((i > 0) && (s[i] == ' '));
i--)
s[i] = '\0';
fprintf(fp, "%s\n", p->urld.title);
} else {
fprintf(fp, "\n");
}
}
/* Now, find next node to process. */
while (p != parent) {
int seglen = p->segment != NULL
? strlen(p->segment) : 0;
/* Remove our segment from the path */
*path_used -= seglen;
(*path)[*path_used - 1] = '\0';
if (p->next != NULL) {
/* Have a sibling, process that */
p = p->next;
break;
}
/* Going up, so remove '/' */
*path_used -= 1;
(*path)[*path_used - 1] = '\0';
/* Ascend tree */
p = p->parent;
}
}
} while (p != parent);
}
/**
* Count number of URLs associated with a host
*
* \param root Root of path data tree
* \param expiry Expiry time for URLs
* \param count Pointer to count
*/
static void
urldb_count_urls(const struct path_data *root,
time_t expiry,
unsigned int *count)
{
const struct path_data *p = root;
do {
if (p->children != NULL) {
/* Drill down into children */
p = p->children;
} else {
/* No more children, increment count if required */
if (p->persistent ||
((p->urld.last_visit > expiry) &&
(p->urld.visits > 0))) {
(*count)++;
}
/* Now, find next node to process. */
while (p != root) {
if (p->next != NULL) {
/* Have a sibling, process that */
p = p->next;
break;
}
/* Ascend tree */
p = p->parent;
}
}
} while (p != root);
}
/**
* Save a search (sub)tree
*
* \param parent root node of search tree to save.
* \param fp File to write to
*/
static void urldb_save_search_tree(struct search_node *parent, FILE *fp)
{
char host[256];
const struct host_part *h;
unsigned int path_count = 0;
char *path, *p, *end;
int path_alloc = 64, path_used = 1;
time_t expiry, hsts_expiry = 0;
int hsts_include_subdomains = 0;
expiry = time(NULL) - ((60 * 60 * 24) * nsoption_int(expire_url));
if (parent == &empty)
return;
urldb_save_search_tree(parent->left, fp);
path = malloc(path_alloc);
if (!path)
return;
path[0] = '\0';
for (h = parent->data, p = host, end = host + sizeof host;
h && h != &db_root && p < end; h = h->parent) {
int written = snprintf(p, end - p, "%s%s", h->part,
(h->parent && h->parent->parent) ? "." : "");
if (written < 0) {
free(path);
return;
}
p += written;
}
h = parent->data;
if (h && h->hsts.expires > expiry) {
hsts_expiry = h->hsts.expires;
hsts_include_subdomains = h->hsts.include_sub_domains;
}
urldb_count_urls(&parent->data->paths, expiry, &path_count);
if (path_count > 0) {
fprintf(fp, "%s %i ", host, hsts_include_subdomains);
urldb_write_timet(fp, hsts_expiry);
fprintf(fp, "%i\n", path_count);
urldb_write_paths(&parent->data->paths, host, fp,
&path, &path_alloc, &path_used, expiry);
} else if (hsts_expiry) {
fprintf(fp, "%s %i ", host, hsts_include_subdomains);
urldb_write_timet(fp, hsts_expiry);
fprintf(fp, "0\n");
}
free(path);
urldb_save_search_tree(parent->right, fp);
}
/**
* Path data iterator (internal)
*
* \param parent Root of subtree to iterate over
* \param url_callback Callback function
* \param cookie_callback Callback function
* \return true to continue, false otherwise
*/
static bool
urldb_iterate_entries_path(const struct path_data *parent,
bool (*url_callback)(nsurl *url, const struct url_data *data),
bool (*cookie_callback)(const struct cookie_data *data))
{
const struct path_data *p = parent;
const struct cookie_data *c;
do {
if (p->children != NULL) {
/* Drill down into children */
p = p->children;
} else {
/* All leaf nodes in the path tree should have an URL or
* cookies attached to them. If this is not the case, it
* indicates that there's a bug in the file loader/URL
* insertion code. Therefore, assert this here. */
assert(url_callback || cookie_callback);
/** \todo handle fragments? */
if (url_callback) {
const struct url_internal_data *u = &p->urld;
assert(p->url);
if (!url_callback(p->url,
(const struct url_data *) u))
return false;
} else {
c = (const struct cookie_data *)p->cookies;
for (; c != NULL; c = c->next) {
if (!cookie_callback(c))
return false;
}
}
/* Now, find next node to process. */
while (p != parent) {
if (p->next != NULL) {
/* Have a sibling, process that */
p = p->next;
break;
}
/* Ascend tree */
p = p->parent;
}
}
} while (p != parent);
return true;
}
/**
* Check whether a host string is an IP address.
*
* This call detects IPv4 addresses (all of dotted-quad or subsets,
* decimal or hexadecimal notations) and IPv6 addresses (including
* those containing embedded IPv4 addresses.)
*
* \param host a hostname terminated by '\0'
* \return true if the hostname is an IP address, false otherwise
*/
static bool urldb__host_is_ip_address(const char *host)
{
struct in_addr ipv4;
size_t host_len = strlen(host);
const char *sane_host;
const char *slash;
#ifndef NO_IPV6
struct in6_addr ipv6;
char ipv6_addr[64];
unsigned int ipv6_addr_len;
#endif
/**
* @todo FIXME Some parts of urldb.c make confusions between hosts
* and "prefixes", we can sometimes be erroneously passed more than
* just a host. Sometimes we may be passed trailing slashes, or even
* whole path segments. A specific criminal in this class is
* urldb_iterate_partial, which takes a prefix to search for, but
* passes that prefix to functions that expect only hosts.
*
* For the time being, we will accept such calls; we check if there
* is a / in the host parameter, and if there is, we take a copy and
* replace the / with a \0. This is not a permanent solution; we
* should search through NetSurf and find all the callers that are
* in error and fix them. When doing this task, it might be wise
* to replace the hideousness below with code that doesn't have to do
* this, and add assert(strchr(host, '/') == NULL); somewhere.
* -- rjek - 2010-11-04
*/
slash = strchr(host, '/');
if (slash == NULL) {
sane_host = host;
} else {
char *c = strdup(host);
c[slash - host] = '\0';
sane_host = c;
host_len = slash - host;
NSLOG(netsurf, INFO, "WARNING: called with non-host '%s'",
host);
}
if (strspn(sane_host, "0123456789abcdefABCDEF[].:") < host_len)
goto out_false;
if (inet_aton(sane_host, &ipv4) != 0) {
/* This can only be a sane IPv4 address if it contains 3 dots.
* Helpfully, inet_aton is happy to treat "a", "a.b", "a.b.c",
* and "a.b.c.d" as valid IPv4 address strings where we only
* support the full, dotted-quad, form.
*/
int num_dots = 0;
size_t index;
for (index = 0; index < host_len; index++) {
if (sane_host[index] == '.')
num_dots++;
}
if (num_dots == 3)
goto out_true;
else
goto out_false;
}
#ifndef NO_IPV6
if ((host_len < 6) ||
(sane_host[0] != '[') ||
(sane_host[host_len - 1] != ']')) {
goto out_false;
}
ipv6_addr_len = host_len - 2;
if (ipv6_addr_len >= sizeof(ipv6_addr)) {
ipv6_addr_len = sizeof(ipv6_addr) - 1;
}
strncpy(ipv6_addr, sane_host + 1, ipv6_addr_len);
ipv6_addr[ipv6_addr_len] = '\0';
if (inet_pton(AF_INET6, ipv6_addr, &ipv6) == 1)
goto out_true;
#endif
out_false:
if (slash != NULL) free((void *)sane_host);
return false;
out_true:
if (slash != NULL) free((void *)sane_host);
return true;
}
/**
* Compare host_part with prefix
*
* \param a host part
* \param b prefix
* \return 0 if match, non-zero, otherwise
*/
static int urldb_search_match_prefix(const struct host_part *a, const char *b)
{
const char *end, *dot;
int plen, ret;
assert(a && a != &db_root && b);
if (urldb__host_is_ip_address(b)) {
/* IP address */
return strncasecmp(a->part, b, strlen(b));
}
end = b + strlen(b) + 1;
while (b < end && a && a != &db_root) {
dot = strchr(b, '.');
if (!dot) {
/* last segment */
dot = end - 1;
}
/* Compare strings (length limited) */
if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
/* didn't match => return difference */
return ret;
/* The strings matched */
if (dot < end - 1) {
/* Consider segment lengths only in the case
* where the prefix contains segments */
plen = strlen(a->part);
if (plen > dot - b) {
/* len(a) > len(b) */
return 1;
} else if (plen < dot - b) {
/* len(a) < len(b) */
return -1;
}
}
b = dot + 1;
a = a->parent;
}
/* If we get here then either:
* a) The path lengths differ
* or b) The hosts are identical
*/
if (a && a != &db_root && b >= end) {
/* len(a) > len(b) => prefix matches */
return 0;
} else if ((!a || a == &db_root) && b < end) {
/* len(a) < len(b) => prefix does not match */
return -1;
}
/* Identical */
return 0;
}
/**
* Partial host iterator (internal)
*
* \param root Root of (sub)tree to traverse
* \param prefix Prefix to match
* \param callback Callback function
* \return true to continue, false otherwise
*/
static bool
urldb_iterate_partial_host(struct search_node *root,
const char *prefix,
bool (*callback)(nsurl *url, const struct url_data *data))
{
int c;
assert(root && prefix && callback);
if (root == &empty)
return true;
c = urldb_search_match_prefix(root->data, prefix);
if (c > 0) {
/* No match => look in left subtree */
return urldb_iterate_partial_host(root->left,
prefix,
callback);
} else if (c < 0) {
/* No match => look in right subtree */
return urldb_iterate_partial_host(root->right,
prefix,
callback);
} else {
/* Match => iterate over l/r subtrees & process this node */
if (!urldb_iterate_partial_host(root->left,
prefix,
callback)) {
return false;
}
if (root->data->paths.children) {
/* and extract all paths attached to this host */
if (!urldb_iterate_entries_path(&root->data->paths,
callback,
NULL)) {
return false;
}
}
if (!urldb_iterate_partial_host(root->right,
prefix,
callback)) {
return false;
}
}
return true;
}
/**
* Partial path iterator (internal)
*
* Given: http://www.example.org/a/b/c/d//e
* and assuming a path tree:
* ^
* / \
* a1 b1
* / \
* a2 b2
* /|\
* a b c
* 3 3 |
* d
* |
* e
* / \
* f g
*
* Prefix will be: p will be:
*
* a/b/c/d//e a1
* b/c/d//e a2
* b/c/d//e b3
* c/d//e a3
* c/d//e b3
* c/d//e c
* d//e d
* /e e (skip /)
* e e
*
* I.E. perform a breadth-first search of the tree.
*
* \param parent Root of (sub)tree to traverse
* \param prefix Prefix to match
* \param callback Callback function
* \return true to continue, false otherwise
*/
static bool
urldb_iterate_partial_path(const struct path_data *parent,
const char *prefix,
bool (*callback)(nsurl *url, const struct url_data *data))
{
const struct path_data *p = parent->children;
const char *slash, *end = prefix + strlen(prefix);
do {
slash = strchr(prefix, '/');
if (!slash) {
slash = end;
}
if (slash == prefix && *prefix == '/') {
/* Ignore "//" */
prefix++;
continue;
}
if (strncasecmp(p->segment, prefix, slash - prefix) == 0) {
/* prefix matches so far */
if (slash == end) {
/* we've run out of prefix, so all
* paths below this one match */
if (!urldb_iterate_entries_path(p,
callback,
NULL)) {
return false;
}
/* Progress to next sibling */
p = p->next;
} else {
/* Skip over this segment */
prefix = slash + 1;
p = p->children;
}
} else {
/* Doesn't match this segment, try next sibling */
p = p->next;
}
} while (p != NULL);
return true;
}
/**
* Host data iterator (internal)
*
* \param parent Root of subtree to iterate over
* \param url_callback Callback function
* \param cookie_callback Callback function
* \return true to continue, false otherwise
*/
static bool
urldb_iterate_entries_host(struct search_node *parent,
bool (*url_callback)(nsurl *url, const struct url_data *data),
bool (*cookie_callback)(const struct cookie_data *data))
{
if (parent == &empty) {
return true;
}
if (!urldb_iterate_entries_host(parent->left,
url_callback,
cookie_callback)) {
return false;
}
if ((parent->data->paths.children) ||
((cookie_callback) &&
(parent->data->paths.cookies))) {
/* We have paths (or domain cookies), so iterate them */
if (!urldb_iterate_entries_path(&parent->data->paths,
url_callback,
cookie_callback)) {
return false;
}
}
if (!urldb_iterate_entries_host(parent->right,
url_callback,
cookie_callback)) {
return false;
}
return true;
}
/**
* Add a host node to the tree
*
* \param part Host segment to add (or whole IP address) (copied)
* \param parent Parent node to add to
* \return Pointer to added node, or NULL on memory exhaustion
*/
static struct host_part *
urldb_add_host_node(const char *part, struct host_part *parent)
{
struct host_part *d;
assert(part && parent);
d = calloc(1, sizeof(struct host_part));
if (!d) {
return NULL;
}
d->part = strdup(part);
if (!d->part) {
free(d);
return NULL;
}
d->next = parent->children;
if (parent->children) {
parent->children->prev = d;
}
d->parent = parent;
parent->children = d;
return d;
}
/**
* Fragment comparator callback for qsort
*
* \param a first value
* \param b second value
* \return 0 for equal else positive or negative value on comparison
*/
static int urldb_add_path_fragment_cmp(const void *a, const void *b)
{
return strcasecmp(*((const char **) a), *((const char **) b));
}
/**
* Add a fragment to a path segment
*
* \param segment Path segment to add to
* \param fragment Fragment to add (copied), or NULL
* \return segment or NULL on memory exhaustion
*/
static struct path_data *
urldb_add_path_fragment(struct path_data *segment, lwc_string *fragment)
{
char **temp;
assert(segment);
/* If no fragment, this function is a NOP
* This may seem strange, but it makes the rest
* of the code cleaner */
if (!fragment)
return segment;
temp = realloc(segment->fragment,
(segment->frag_cnt + 1) * sizeof(char *));
if (!temp)
return NULL;
segment->fragment = temp;
segment->fragment[segment->frag_cnt] =
strdup(lwc_string_data(fragment));
if (!segment->fragment[segment->frag_cnt]) {
/* Don't free temp - it's now our buffer */
return NULL;
}
segment->frag_cnt++;
/* We want fragments in alphabetical order, so sort them
* It may prove better to insert in alphabetical order instead */
qsort(segment->fragment,
segment->frag_cnt,
sizeof (char *),
urldb_add_path_fragment_cmp);
return segment;
}
/**
* Add a path node to the tree
*
* \param scheme URL scheme associated with path (copied)
* \param port Port number on host associated with path
* \param segment Path segment to add (copied)
* \param fragment URL fragment (copied), or NULL
* \param parent Parent node to add to
* \return Pointer to added node, or NULL on memory exhaustion
*/
static struct path_data *
urldb_add_path_node(lwc_string *scheme,
unsigned int port,
const char *segment,
lwc_string *fragment,
struct path_data *parent)
{
struct path_data *d, *e;
assert(scheme && segment && parent);
d = calloc(1, sizeof(struct path_data));
if (!d)
return NULL;
d->scheme = lwc_string_ref(scheme);
d->port = port;
d->segment = strdup(segment);
if (!d->segment) {
lwc_string_unref(d->scheme);
free(d);
return NULL;
}
if (fragment) {
if (!urldb_add_path_fragment(d, fragment)) {
free(d->segment);
lwc_string_unref(d->scheme);
free(d);
return NULL;
}
}
for (e = parent->children; e; e = e->next) {
if (strcmp(e->segment, d->segment) > 0)
break;
}
if (e) {
d->prev = e->prev;
d->next = e;
if (e->prev)
e->prev->next = d;
else
parent->children = d;
e->prev = d;
} else if (!parent->children) {
d->prev = d->next = NULL;
parent->children = parent->last = d;
} else {
d->next = NULL;
d->prev = parent->last;
parent->last->next = d;
parent->last = d;
}
d->parent = parent;
return d;
}
/**
* Get the search tree for a particular host
*
* \param host the host to lookup
* \return the corresponding search tree
*/
static struct search_node **urldb_get_search_tree_direct(const char *host)
{
assert(host);
if (urldb__host_is_ip_address(host)) {
return &search_trees[ST_IP];
} else if (ascii_is_alpha(*host)) {
return &search_trees[ST_DN + ascii_to_lower(*host) - 'a'];
}
return &search_trees[ST_EE];
}
/**
* Get the search tree for a particular host
*
* \param host the host to lookup
* \return the corresponding search tree
*/
static struct search_node *urldb_get_search_tree(const char *host)
{
return *urldb_get_search_tree_direct(host);
}
/**
* Compare host part with a string
*
* \param a host part
* \param b string to compare
* \return 0 if match, non-zero, otherwise
*/
static int urldb_search_match_string(const struct host_part *a, const char *b)
{
const char *end, *dot;
int plen, ret;
assert(a && a != &db_root && b);
if (urldb__host_is_ip_address(b)) {
/* IP address */
return strcasecmp(a->part, b);
}
end = b + strlen(b) + 1;
while (b < end && a && a != &db_root) {
dot = strchr(b, '.');
if (!dot) {
/* last segment */
dot = end - 1;
}
/* Compare strings (length limited) */
if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
/* didn't match => return difference */
return ret;
/* The strings matched, now check that the lengths do, too */
plen = strlen(a->part);
if (plen > dot - b) {
/* len(a) > len(b) */
return 1;
} else if (plen < dot - b) {
/* len(a) < len(b) */
return -1;
}
b = dot + 1;
a = a->parent;
}
/* If we get here then either:
* a) The path lengths differ
* or b) The hosts are identical
*/
if (a && a != &db_root && b >= end) {
/* len(a) > len(b) */
return 1;
} else if ((!a || a == &db_root) && b < end) {
/* len(a) < len(b) */
return -1;
}
/* Identical */
return 0;
}
/**
* Find a node in a search tree
*
* \param root Tree to look in
* \param host Host to find
* \return Pointer to host tree node, or NULL if not found
*/
static const struct host_part *
urldb_search_find(struct search_node *root, const char *host)
{
int c;
assert(root && host);
if (root == &empty) {
return NULL;
}
c = urldb_search_match_string(root->data, host);
if (c > 0) {
return urldb_search_find(root->left, host);
} else if (c < 0) {
return urldb_search_find(root->right, host);
}
return root->data;
}
/**
* Match a path string
*
* \param parent Path (sub)tree to look in
* \param path The path to search for
* \param scheme The URL scheme associated with the path
* \param port The port associated with the path
* \return Pointer to path data or NULL if not found.
*/
static struct path_data *
urldb_match_path(const struct path_data *parent,
const char *path,
lwc_string *scheme,
unsigned short port)
{
const struct path_data *p;
const char *slash;
bool match;
assert(parent != NULL);
assert(parent->segment == NULL);
if (path[0] != '/') {
NSLOG(netsurf, INFO, "path is %s", path);
}
assert(path[0] == '/');
/* Start with children, as parent has no segment */
p = parent->children;
while (p != NULL) {
slash = strchr(path + 1, '/');
if (!slash) {
slash = path + strlen(path);
}
if (strncmp(p->segment, path + 1, slash - path - 1) == 0 &&
lwc_string_isequal(p->scheme, scheme, &match) == lwc_error_ok &&
match == true &&
p->port == port) {
if (*slash == '\0') {
/* Complete match */
return (struct path_data *) p;
}
/* Match so far, go down tree */
p = p->children;
path = slash;
} else {
/* No match, try next sibling */
p = p->next;
}
}
return NULL;
}
/**
* Find an URL in the database
*
* \param url Absolute URL to find
* \return Pointer to path data, or NULL if not found
*/
static struct path_data *urldb_find_url(nsurl *url)
{
const struct host_part *h;
struct path_data *p;
struct search_node *tree;
char *plq;
const char *host_str;
lwc_string *scheme, *host, *port;
size_t len = 0;
unsigned int port_int;
bool match;
assert(url);
if (url_bloom != NULL) {
if (bloom_search_hash(url_bloom, nsurl_hash(url)) == false) {
return NULL;
}
}
scheme = nsurl_get_component(url, NSURL_SCHEME);
if (scheme == NULL)
return NULL;
if (lwc_string_isequal(scheme, corestring_lwc_mailto, &match) ==
lwc_error_ok && match == true) {
lwc_string_unref(scheme);
return NULL;
}
host = nsurl_get_component(url, NSURL_HOST);
if (host != NULL) {
host_str = lwc_string_data(host);
lwc_string_unref(host);
} else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) ==
lwc_error_ok && match == true) {
host_str = "localhost";
} else {
lwc_string_unref(scheme);
return NULL;
}
tree = urldb_get_search_tree(host_str);
h = urldb_search_find(tree, host_str);
if (!h) {
lwc_string_unref(scheme);
return NULL;
}
/* generate plq (path, leaf, query) */
if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &plq, &len) != NSERROR_OK) {
lwc_string_unref(scheme);
return NULL;
}
/* Get port */
port = nsurl_get_component(url, NSURL_PORT);
if (port != NULL) {
port_int = atoi(lwc_string_data(port));
lwc_string_unref(port);
} else {
port_int = 0;
}
p = urldb_match_path(&h->paths, plq, scheme, port_int);
free(plq);
lwc_string_unref(scheme);
return p;
}
/**
* Dump URL database paths to stderr
*
* \param parent Parent node of tree to dump
*/
static void urldb_dump_paths(struct path_data *parent)
{
const struct path_data *p = parent;
unsigned int i;
do {
if (p->segment != NULL) {
NSLOG(netsurf, INFO, "\t%s : %u",
lwc_string_data(p->scheme), p->port);
NSLOG(netsurf, INFO, "\t\t'%s'", p->segment);
for (i = 0; i != p->frag_cnt; i++) {
NSLOG(netsurf, INFO, "\t\t\t#%s",
p->fragment[i]);
}
}
if (p->children != NULL) {
p = p->children;
} else {
while (p != parent) {
if (p->next != NULL) {
p = p->next;
break;
}
p = p->parent;
}
}
} while (p != parent);
}
/**
* Dump URL database hosts to stderr
*
* \param parent Parent node of tree to dump
*/
static void urldb_dump_hosts(struct host_part *parent)
{
struct host_part *h;
if (parent->part) {
NSLOG(netsurf, INFO, "%s", parent->part);
NSLOG(netsurf, INFO, "\t%s invalid SSL certs",
parent->permit_invalid_certs ? "Permits" : "Denies");
}
/* Dump path data */
urldb_dump_paths(&parent->paths);
/* and recurse */
for (h = parent->children; h; h = h->next) {
urldb_dump_hosts(h);
}
}
/**
* Dump search tree
*
* \param parent Parent node of tree to dump
* \param depth Tree depth
*/
static void urldb_dump_search(struct search_node *parent, int depth)
{
const struct host_part *h;
int i; /* index into string */
char s[1024];
int r;
int sl = sizeof(s) - 2;
if (parent == &empty)
return;
urldb_dump_search(parent->left, depth + 1);
for (i = 0; i != depth; i++) {
s[i] = ' ';
}
for (h = parent->data; h; h = h->parent) {
if (h->part) {
r = snprintf(&s[i], sl - i, "%s", h->part);
if (r < 0) {
break;
}
if ((i + r) >= sl) {
break;
}
i += r;
}
if (h->parent && h->parent->parent) {
s[i]='.';
i++;
}
}
s[i]= 0;
NSLOG(netsurf, INFO, "%s", s);
urldb_dump_search(parent->right, depth + 1);
}
/**
* Compare a pair of host parts
*
* \param a first host part
* \param b second host part
* \return 0 if match, non-zero, otherwise
*/
static int
urldb_search_match_host(const struct host_part *a, const struct host_part *b)
{
int ret;
assert(a && b);
/* traverse up tree to root, comparing parts as we go. */
for (; a && a != &db_root && b && b != &db_root;
a = a->parent, b = b->parent) {
if ((ret = strcasecmp(a->part, b->part)) != 0) {
/* They differ => return the difference here */
return ret;
}
}
/* If we get here then either:
* a) The path lengths differ
* or b) The hosts are identical
*/
if (a && a != &db_root && (!b || b == &db_root)) {
/* len(a) > len(b) */
return 1;
} else if ((!a || a == &db_root) && b && b != &db_root) {
/* len(a) < len(b) */
return -1;
}
/* identical */
return 0;
}
/**
* Rotate a subtree right
*
* \param root Root of subtree to rotate
* \return new root of subtree
*/
static struct search_node *urldb_search_skew(struct search_node *root)
{
assert(root);
if (root->left->level == root->level) {
struct search_node *temp;
temp = root->left;
root->left = temp->right;
temp->right = root;
root = temp;
}
return root;
}
/**
* Rotate a node left, increasing the parent's level
*
* \param root Root of subtree to rotate
* \return New root of subtree
*/
static struct search_node *urldb_search_split(struct search_node *root)
{
assert(root);
if (root->right->right->level == root->level) {
struct search_node *temp;
temp = root->right;
root->right = temp->left;
temp->left = root;
root = temp;
root->level++;
}
return root;
}
/**
* Insert node into search tree
*
* \param root Root of (sub)tree to insert into
* \param n Node to insert
* \return Pointer to updated root
*/
static struct search_node *
urldb_search_insert_internal(struct search_node *root, struct search_node *n)
{
assert(root && n);
if (root == &empty) {
root = n;
} else {
int c = urldb_search_match_host(root->data, n->data);
if (c > 0) {
root->left = urldb_search_insert_internal(
root->left, n);
} else if (c < 0) {
root->right = urldb_search_insert_internal(
root->right, n);
} else {
/* exact match */
free(n);
return root;
}
root = urldb_search_skew(root);
root = urldb_search_split(root);
}
return root;
}
/**
* Insert a node into the search tree
*
* \param root Root of tree to insert into
* \param data User data to insert
* \return Pointer to updated root, or NULL if failed
*/
static struct search_node *
urldb_search_insert(struct search_node *root, const struct host_part *data)
{
struct search_node *n;
assert(root && data);
n = malloc(sizeof(struct search_node));
if (!n)
return NULL;
n->level = 1;
n->data = data;
n->left = n->right = &empty;
root = urldb_search_insert_internal(root, n);
return root;
}
/**
* Parse a cookie avpair
*
* \param c Cookie struct to populate
* \param n Name component
* \param v Value component
* \param was_quoted Whether \a v was quoted in the input
* \return true on success, false on memory exhaustion
*/
static bool
urldb_parse_avpair(struct cookie_internal_data *c,
char *n,
char *v,
bool was_quoted)
{
int vlen;
assert(c && n && v);
/* Strip whitespace from start of name */
for (; *n; n++) {
if (*n != ' ' && *n != '\t')
break;
}
/* Strip whitespace from end of name */
for (vlen = strlen(n); vlen; vlen--) {
if (n[vlen] == ' ' || n[vlen] == '\t')
n[vlen] = '\0';
else
break;
}
/* Strip whitespace from start of value */
for (; *v; v++) {
if (*v != ' ' && *v != '\t')
break;
}
/* Strip whitespace from end of value */
for (vlen = strlen(v); vlen; vlen--) {
if (v[vlen] == ' ' || v[vlen] == '\t')
v[vlen] = '\0';
else
break;
}
if (!c->comment && strcasecmp(n, "Comment") == 0) {
c->comment = strdup(v);
if (!c->comment)
return false;
} else if (!c->domain && strcasecmp(n, "Domain") == 0) {
if (v[0] == '.') {
/* Domain must start with a dot */
c->domain_from_set = true;
c->domain = strdup(v);
if (!c->domain)
return false;
}
} else if (strcasecmp(n, "Max-Age") == 0) {
int temp = atoi(v);
if (temp == 0)
/* Special case - 0 means delete */
c->expires = 0;
else
c->expires = time(NULL) + temp;
} else if (!c->path && strcasecmp(n, "Path") == 0) {
c->path_from_set = true;
c->path = strdup(v);
if (!c->path)
return false;
} else if (strcasecmp(n, "Version") == 0) {
c->version = atoi(v);
} else if (strcasecmp(n, "Expires") == 0) {
char *datenoday;
time_t expires;
nserror res;
/* Strip dayname from date (these are hugely variable
* and liable to break the parser. They also serve no
* useful purpose) */
for (datenoday = v;
*datenoday && !ascii_is_digit(*datenoday);
datenoday++) {
/* do nothing */
}
res = nsc_strntimet(datenoday, strlen(datenoday), &expires);
if (res != NSERROR_OK) {
/* assume we have an unrepresentable date =>
* force it to the maximum possible value of a
* 32bit time_t (this may break in 2038. We'll
* deal with that once we come to it) */
expires = (time_t)0x7fffffff;
}
c->expires = expires;
} else if (strcasecmp(n, "Secure") == 0) {
c->secure = true;
} else if (strcasecmp(n, "HttpOnly") == 0) {
c->http_only = true;
} else if (!c->name) {
c->name = strdup(n);
c->value = strdup(v);
c->value_was_quoted = was_quoted;
if (!c->name || !c->value) {
return false;
}
}
return true;
}
/**
* Free a cookie
*
* \param c The cookie to free
*/
static void urldb_free_cookie(struct cookie_internal_data *c)
{
assert(c);
free(c->comment);
free(c->domain);
free(c->path);
free(c->name);
free(c->value);
free(c);
}
/**
* Parse a cookie
*
* \param url URL being fetched
* \param cookie Pointer to cookie string (updated on exit)
* \return Pointer to cookie structure (on heap, caller frees) or NULL
*/
static struct cookie_internal_data *
urldb_parse_cookie(nsurl *url, const char **cookie)
{
struct cookie_internal_data *c;
const char *cur;
char name[1024], value[4096];
char *n = name, *v = value;
bool in_value = false;
bool had_value_data = false;
bool value_verbatim = false;
bool quoted = false;
bool was_quoted = false;
assert(url && cookie && *cookie);
c = calloc(1, sizeof(struct cookie_internal_data));
if (c == NULL)
return NULL;
c->expires = -1;
name[0] = '\0';
value[0] = '\0';
for (cur = *cookie; *cur; cur++) {
if (*cur == '\r' && *(cur + 1) == '\n') {
/* End of header */
if (quoted) {
/* Unmatched quote encountered */
/* Match Firefox 2.0.0.11 */
value[0] = '\0';
}
break;
} else if (*cur == '\r') {
/* Spurious linefeed */
continue;
} else if (*cur == '\n') {
/* Spurious newline */
continue;
}
if (in_value && !had_value_data) {
if (*cur == ' ' || *cur == '\t') {
/* Strip leading whitespace from value */
continue;
} else {
had_value_data = true;
/* Value is taken verbatim if first non-space
* character is not a " */
if (*cur != '"') {
value_verbatim = true;
}
}
}
if (in_value && !value_verbatim && (*cur == '"')) {
/* Only non-verbatim values may be quoted */
if (cur == *cookie || *(cur - 1) != '\\') {
/* Only unescaped quotes count */
was_quoted = quoted;
quoted = !quoted;
continue;
}
}
if (!quoted && !in_value && *cur == '=') {
/* First equals => attr-value separator */
in_value = true;
continue;
}
if (!quoted && (was_quoted || *cur == ';')) {
/* Semicolon or after quoted value
* => end of current avpair */
/* NUL-terminate tokens */
*n = '\0';
*v = '\0';
if (!urldb_parse_avpair(c, name, value, was_quoted)) {
/* Memory exhausted */
urldb_free_cookie(c);
return NULL;
}
/* And reset to start */
n = name;
v = value;
in_value = false;
had_value_data = false;
value_verbatim = false;
was_quoted = false;
/* Now, if the current input is anything other than a
* semicolon, we must be sure to reprocess it */
if (*cur != ';') {
cur--;
}
continue;
}
/* And now handle commas. These are a pain as they may mean
* any of the following:
*
* + End of cookie
* + Day separator in Expires avpair
* + (Invalid) comma in unquoted value
*
* Therefore, in order to handle all 3 cases (2 and 3 are
* identical, the difference being that 2 is in the spec and
* 3 isn't), we need to determine where the comma actually
* lies. We use the following heuristic:
*
* Given a comma at the current input position, find the
* immediately following semicolon (or end of input if none
* found). Then, consider the input characters between
* these two positions. If any of these characters is an
* '=', we must assume that the comma signified the end of
* the current cookie.
*
* This holds as the first avpair of any cookie must be
* NAME=VALUE, so the '=' is guaranteed to appear in the
* case where the comma marks the end of a cookie.
*
* This will fail, however, in the case where '=' appears in
* the value of the current avpair after the comma or the
* subsequent cookie does not start with NAME=VALUE. Neither
* of these is particularly likely and if they do occur, the
* website is more broken than we can be bothered to handle.
*/
if (!quoted && *cur == ',') {
/* Find semi-colon, if any */
const char *p;
const char *semi = strchr(cur + 1, ';');
if (!semi)
semi = cur + strlen(cur) - 2 /* CRLF */;
/* Look for equals sign between comma and semi */
for (p = cur + 1; p < semi; p++)
if (*p == '=')
break;
if (p == semi) {
/* none found => comma internal to value */
/* do nothing */
} else {
/* found one => comma marks end of cookie */
cur++;
break;
}
}
/* Accumulate into buffers, always leaving space for a NUL */
/** \todo is silently truncating overlong names/values wise? */
if (!in_value) {
if (n < name + (sizeof(name) - 1))
*n++ = *cur;
} else {
if (v < value + (sizeof(value) - 1))
*v++ = *cur;
}
}
/* Parse final avpair */
*n = '\0';
*v = '\0';
if (!urldb_parse_avpair(c, name, value, was_quoted)) {
/* Memory exhausted */
urldb_free_cookie(c);
return NULL;
}
/* Now fix-up default values */
if (c->domain == NULL) {
lwc_string *host = nsurl_get_component(url, NSURL_HOST);
if (host == NULL) {
urldb_free_cookie(c);
return NULL;
}
c->domain = strdup(lwc_string_data(host));
lwc_string_unref(host);
}
if (c->path == NULL) {
const char *path_data;
char *path, *slash;
lwc_string *path_lwc;
path_lwc = nsurl_get_component(url, NSURL_PATH);
if (path_lwc == NULL) {
urldb_free_cookie(c);
return NULL;
}
path_data = lwc_string_data(path_lwc);
/* Strip leafname and trailing slash (4.3.1) */
slash = strrchr(path_data, '/');
if (slash != NULL) {
/* Special case: retain first slash in path */
if (slash == path_data)
slash++;
slash = strndup(path_data, slash - path_data);
if (slash == NULL) {
lwc_string_unref(path_lwc);
urldb_free_cookie(c);
return NULL;
}
path = slash;
lwc_string_unref(path_lwc);
} else {
path = strdup(lwc_string_data(path_lwc));
lwc_string_unref(path_lwc);
if (path == NULL) {
urldb_free_cookie(c);
return NULL;
}
}
c->path = path;
}
/* Write back current position */
*cookie = cur;
return c;
}
/**
* Add a path to the database, creating any intermediate entries
*
* \param scheme URL scheme associated with path
* \param port Port number on host associated with path
* \param host Host tree node to attach to
* \param path_query Absolute path plus query to add (freed)
* \param fragment URL fragment, or NULL
* \param url URL (fragment ignored)
* \return Pointer to leaf node, or NULL on memory exhaustion
*/
static struct path_data *
urldb_add_path(lwc_string *scheme,
unsigned int port,
const struct host_part *host,
char *path_query,
lwc_string *fragment,
nsurl *url)
{
struct path_data *d, *e;
char *buf = path_query;
char *segment, *slash;
bool match;
assert(scheme && host && url);
d = (struct path_data *) &host->paths;
/* skip leading '/' */
segment = buf;
if (*segment == '/')
segment++;
/* Process path segments */
do {
slash = strchr(segment, '/');
if (!slash) {
/* last segment */
/* look for existing entry */
for (e = d->children; e; e = e->next)
if (strcmp(segment, e->segment) == 0 &&
lwc_string_isequal(scheme,
e->scheme, &match) ==
lwc_error_ok &&
match == true &&
e->port == port)
break;
d = e ? urldb_add_path_fragment(e, fragment) :
urldb_add_path_node(scheme, port,
segment, fragment, d);
break;
}
*slash = '\0';
/* look for existing entry */
for (e = d->children; e; e = e->next)
if (strcmp(segment, e->segment) == 0 &&
lwc_string_isequal(scheme, e->scheme,
&match) == lwc_error_ok &&
match == true &&
e->port == port)
break;
d = e ? e : urldb_add_path_node(scheme, port, segment, NULL, d);
if (!d)
break;
segment = slash + 1;
} while (1);
free(path_query);
if (d && !d->url) {
/* Insert defragmented URL */
if (nsurl_defragment(url, &d->url) != NSERROR_OK)
return NULL;
}
return d;
}
/**
* Add a host to the database, creating any intermediate entries
*
* \param host Hostname to add
* \return Pointer to leaf node, or NULL on memory exhaustion
*/
static struct host_part *urldb_add_host(const char *host)
{
struct host_part *d = (struct host_part *) &db_root, *e;
struct search_node *s;
char buf[256]; /* 256 bytes is sufficient - domain names are
* limited to 255 chars. */
char *part;
assert(host);
if (urldb__host_is_ip_address(host)) {
/* Host is an IP, so simply add as TLD */
/* Check for existing entry */
for (e = d->children; e; e = e->next)
if (strcasecmp(host, e->part) == 0)
/* found => return it */
return e;
d = urldb_add_host_node(host, d);
s = urldb_search_insert(search_trees[ST_IP], d);
if (!s) {
/* failed */
d = NULL;
} else {
search_trees[ST_IP] = s;
}
return d;
}
/* Copy host string, so we can corrupt it */
strncpy(buf, host, sizeof buf);
buf[sizeof buf - 1] = '\0';
/* Process FQDN segments backwards */
do {
part = strrchr(buf, '.');
if (!part) {
/* last segment */
/* Check for existing entry */
for (e = d->children; e; e = e->next)
if (strcasecmp(buf, e->part) == 0)
break;
if (e) {
d = e;
} else {
d = urldb_add_host_node(buf, d);
}
/* And insert into search tree */
if (d) {
struct search_node **r;
r = urldb_get_search_tree_direct(buf);
s = urldb_search_insert(*r, d);
if (!s) {
/* failed */
d = NULL;
} else {
*r = s;
}
}
break;
}
/* Check for existing entry */
for (e = d->children; e; e = e->next)
if (strcasecmp(part + 1, e->part) == 0)
break;
d = e ? e : urldb_add_host_node(part + 1, d);
if (!d)
break;
*part = '\0';
} while (1);
return d;
}
/**
* Insert a cookie into the database
*
* \param c The cookie to insert
* \param scheme URL scheme associated with cookie path
* \param url URL (sans fragment) associated with cookie
* \return true on success, false on memory exhaustion (c will be freed)
*/
static bool
urldb_insert_cookie(struct cookie_internal_data *c,
lwc_string *scheme,
nsurl *url)
{
struct cookie_internal_data *d;
const struct host_part *h;
struct path_data *p;
time_t now = time(NULL);
assert(c);
if (c->domain[0] == '.') {
h = urldb_search_find(
urldb_get_search_tree(&(c->domain[1])),
c->domain + 1);
if (!h) {
h = urldb_add_host(c->domain + 1);
if (!h) {
urldb_free_cookie(c);
return false;
}
}
p = (struct path_data *) &h->paths;
} else {
/* Need to have a URL and scheme, if it's not a domain cookie */
assert(url != NULL);
assert(scheme != NULL);
h = urldb_search_find(
urldb_get_search_tree(c->domain),
c->domain);
if (!h) {
h = urldb_add_host(c->domain);
if (!h) {
urldb_free_cookie(c);
return false;
}
}
/* find path */
p = urldb_add_path(scheme, 0, h,
strdup(c->path), NULL, url);
if (!p) {
urldb_free_cookie(c);
return false;
}
}
/* add cookie */
for (d = p->cookies; d; d = d->next) {
if (!strcmp(d->domain, c->domain) &&
!strcmp(d->path, c->path) &&
!strcmp(d->name, c->name))
break;
}
if (d) {
if (c->expires != -1 && c->expires < now) {
/* remove cookie */
if (d->next)
d->next->prev = d->prev;
else
p->cookies_end = d->prev;
if (d->prev)
d->prev->next = d->next;
else
p->cookies = d->next;
cookie_manager_remove((struct cookie_data *)d);
urldb_free_cookie(d);
urldb_free_cookie(c);
} else {
/* replace d with c */
c->prev = d->prev;
c->next = d->next;
if (c->next)
c->next->prev = c;
else
p->cookies_end = c;
if (c->prev)
c->prev->next = c;
else
p->cookies = c;
cookie_manager_remove((struct cookie_data *)d);
urldb_free_cookie(d);
cookie_manager_add((struct cookie_data *)c);
}
} else {
c->prev = p->cookies_end;
c->next = NULL;
if (p->cookies_end)
p->cookies_end->next = c;
else
p->cookies = c;
p->cookies_end = c;
cookie_manager_add((struct cookie_data *)c);
}
return true;
}
/**
* Concatenate a cookie into the provided buffer
*
* \param c Cookie to concatenate
* \param version The version of the cookie string to output
* \param used Pointer to amount of buffer used (updated)
* \param alloc Pointer to allocated size of buffer (updated)
* \param buf Pointer to Pointer to buffer (updated)
* \return true on success, false on memory exhaustion
*/
static bool
urldb_concat_cookie(struct cookie_internal_data *c,
int version,
int *used,
int *alloc,
char **buf)
{
/* Combined (A)BNF for the Cookie: request header:
*
* CHAR = <any US-ASCII character (octets 0 - 127)>
* CTL = <any US-ASCII control character
* (octets 0 - 31) and DEL (127)>
* CR = <US-ASCII CR, carriage return (13)>
* LF = <US-ASCII LF, linefeed (10)>
* SP = <US-ASCII SP, space (32)>
* HT = <US-ASCII HT, horizontal-tab (9)>
* <"> = <US-ASCII double-quote mark (34)>
*
* CRLF = CR LF
*
* LWS = [CRLF] 1*( SP | HT )
*
* TEXT = <any OCTET except CTLs,
* but including LWS>
*
* token = 1*<any CHAR except CTLs or separators>
* separators = "(" | ")" | "<" | ">" | "@"
* | "," | ";" | ":" | "\" | <">
* | "/" | "[" | "]" | "?" | "="
* | "{" | "}" | SP | HT
*
* quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
* qdtext = <any TEXT except <">>
* quoted-pair = "\" CHAR
*
* attr = token
* value = word
* word = token | quoted-string
*
* cookie = "Cookie:" cookie-version
* 1*((";" | ",") cookie-value)
* cookie-value = NAME "=" VALUE [";" path] [";" domain]
* cookie-version = "$Version" "=" value
* NAME = attr
* VALUE = value
* path = "$Path" "=" value
* domain = "$Domain" "=" value
*
* A note on quoted-string handling:
* The cookie data stored in the db is verbatim (i.e. sans enclosing
* <">, if any, and with all quoted-pairs intact) thus all that we
* need to do here is ensure that value strings which were quoted
* in Set-Cookie or which include any of the separators are quoted
* before use.
*
* A note on cookie-value separation:
* We use semicolons for all separators, including between
* cookie-values. This simplifies things and is backwards compatible.
*/
const char * const separators = "()<>@,;:\\\"/[]?={} \t";
int max_len;
assert(c && used && alloc && buf && *buf);
/* "; " cookie-value
* We allow for the possibility that values are quoted
*/
max_len = 2 + strlen(c->name) + 1 + strlen(c->value) + 2 +
(c->path_from_set ?
8 + strlen(c->path) + 2 : 0) +
(c->domain_from_set ?
10 + strlen(c->domain) + 2 : 0);
if (*used + max_len >= *alloc) {
char *temp = realloc(*buf, *alloc + 4096);
if (!temp) {
return false;
}
*buf = temp;
*alloc += 4096;
}
if (version == COOKIE_NETSCAPE) {
/* Original Netscape cookie */
sprintf(*buf + *used - 1, "; %s=", c->name);
*used += 2 + strlen(c->name) + 1;
/* The Netscape spec doesn't mention quoting of cookie values.
* RFC 2109 $10.1.3 indicates that values must not be quoted.
*
* However, other browsers preserve quoting, so we should, too
*/
if (c->value_was_quoted) {
sprintf(*buf + *used - 1, "\"%s\"", c->value);
*used += 1 + strlen(c->value) + 1;
} else {
/** \todo should we %XX-encode [;HT,SP] ? */
/** \todo Should we strip escaping backslashes? */
sprintf(*buf + *used - 1, "%s", c->value);
*used += strlen(c->value);
}
/* We don't send path/domain information -- that's what the
* Netscape spec suggests we should do, anyway. */
} else {
/* RFC2109 or RFC2965 cookie */
sprintf(*buf + *used - 1, "; %s=", c->name);
*used += 2 + strlen(c->name) + 1;
/* Value needs quoting if it contains any separator or if
* it needs preserving from the Set-Cookie header */
if (c->value_was_quoted ||
strpbrk(c->value, separators) != NULL) {
sprintf(*buf + *used - 1, "\"%s\"", c->value);
*used += 1 + strlen(c->value) + 1;
} else {
sprintf(*buf + *used - 1, "%s", c->value);
*used += strlen(c->value);
}
if (c->path_from_set) {
/* Path, quoted if necessary */
sprintf(*buf + *used - 1, "; $Path=");
*used += 8;
if (strpbrk(c->path, separators) != NULL) {
sprintf(*buf + *used - 1, "\"%s\"", c->path);
*used += 1 + strlen(c->path) + 1;
} else {
sprintf(*buf + *used - 1, "%s", c->path);
*used += strlen(c->path);
}
}
if (c->domain_from_set) {
/* Domain, quoted if necessary */
sprintf(*buf + *used - 1, "; $Domain=");
*used += 10;
if (strpbrk(c->domain, separators) != NULL) {
sprintf(*buf + *used - 1, "\"%s\"", c->domain);
*used += 1 + strlen(c->domain) + 1;
} else {
sprintf(*buf + *used - 1, "%s", c->domain);
*used += strlen(c->domain);
}
}
}
return true;
}
/**
* deletes paths from a cookie.
*
* \param domain the cookie domain
* \param path the cookie path
* \param name The cookie name
* \param parent The url data of the cookie
*/
static void
urldb_delete_cookie_paths(const char *domain,
const char *path,
const char *name,
struct path_data *parent)
{
struct cookie_internal_data *c;
struct path_data *p = parent;
assert(parent);
do {
for (c = p->cookies; c; c = c->next) {
if (strcmp(c->domain, domain) == 0 &&
strcmp(c->path, path) == 0 &&
strcmp(c->name, name) == 0) {
if (c->prev) {
c->prev->next = c->next;
} else {
p->cookies = c->next;
}
if (c->next) {
c->next->prev = c->prev;
} else {
p->cookies_end = c->prev;
}
urldb_free_cookie(c);
return;
}
}
if (p->children) {
p = p->children;
} else {
while (p != parent) {
if (p->next != NULL) {
p = p->next;
break;
}
p = p->parent;
}
}
} while (p != parent);
}
/**
* Deletes cookie hosts and their assoicated paths
*
* \param domain the cookie domain
* \param path the cookie path
* \param name The cookie name
* \param parent The url data of the cookie
*/
static void
urldb_delete_cookie_hosts(const char *domain,
const char *path,
const char *name,
struct host_part *parent)
{
struct host_part *h;
assert(parent);
urldb_delete_cookie_paths(domain, path, name, &parent->paths);
for (h = parent->children; h; h = h->next) {
urldb_delete_cookie_hosts(domain, path, name, h);
}
}
/**
* Save a path subtree's cookies
*
* \param fp File pointer to write to
* \param parent Parent path
*/
static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent)
{
struct path_data *p = parent;
time_t now = time(NULL);
assert(fp && parent);
do {
if (p->cookies != NULL) {
struct cookie_internal_data *c;
for (c = p->cookies; c != NULL; c = c->next) {
if (c->expires == -1 || c->expires < now) {
/* Skip expired & session cookies */
continue;
}
fprintf(fp,
"%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t"
"%s\t%s\t%d\t%s\t%s\t%s\n",
c->version, c->domain,
c->domain_from_set, c->path,
c->path_from_set, c->secure,
c->http_only,
(int)c->expires, (int)c->last_used,
c->no_destroy, c->name, c->value,
c->value_was_quoted,
p->scheme ? lwc_string_data(p->scheme) :
"unused",
p->url ? nsurl_access(p->url) :
"unused",
c->comment ? c->comment : "");
}
}
if (p->children != NULL) {
p = p->children;
} else {
while (p != parent) {
if (p->next != NULL) {
p = p->next;
break;
}
p = p->parent;
}
}
} while (p != parent);
}
/**
* Save a host subtree's cookies
*
* \param fp File pointer to write to
* \param parent Parent host
*/
static void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent)
{
struct host_part *h;
assert(fp && parent);
urldb_save_cookie_paths(fp, &parent->paths);
for (h = parent->children; h; h = h->next)
urldb_save_cookie_hosts(fp, h);
}
/**
* Destroy a cookie node
*
* \param c Cookie to destroy
*/
static void urldb_destroy_cookie(struct cookie_internal_data *c)
{
free(c->name);
free(c->value);
free(c->comment);
free(c->domain);
free(c->path);
free(c);
}
/**
* Destroy the contents of a path node
*
* \param node Node to destroy contents of (does not destroy node)
*/
static void urldb_destroy_path_node_content(struct path_data *node)
{
struct cookie_internal_data *a, *b;
unsigned int i;
if (node->url != NULL) {
nsurl_unref(node->url);
}
if (node->scheme != NULL) {
lwc_string_unref(node->scheme);
}
free(node->segment);
for (i = 0; i < node->frag_cnt; i++)
free(node->fragment[i]);
free(node->fragment);
free(node->urld.title);
for (a = node->cookies; a; a = b) {
b = a->next;
urldb_destroy_cookie(a);
}
}
/**
* Destroy protection space data
*
* \param space Protection space to destroy
*/
static void urldb_destroy_prot_space(struct prot_space_data *space)
{
lwc_string_unref(space->scheme);
free(space->realm);
free(space->auth);
free(space);
}
/**
* Destroy a path tree
*
* \param root Root node of tree to destroy
*/
static void urldb_destroy_path_tree(struct path_data *root)
{
struct path_data *p = root;
do {
if (p->children != NULL) {
p = p->children;
} else {
struct path_data *q = p;
while (p != root) {
if (p->next != NULL) {
p = p->next;
break;
}
p = p->parent;
urldb_destroy_path_node_content(q);
free(q);
q = p;
}
urldb_destroy_path_node_content(q);
free(q);
}
} while (p != root);
}
/**
* Destroy a host tree
*
* \param root Root node of tree to destroy
*/
static void urldb_destroy_host_tree(struct host_part *root)
{
struct host_part *a, *b;
struct path_data *p, *q;
struct prot_space_data *s, *t;
/* Destroy children */
for (a = root->children; a; a = b) {
b = a->next;
urldb_destroy_host_tree(a);
}
/* Now clean up paths */
for (p = root->paths.children; p; p = q) {
q = p->next;
urldb_destroy_path_tree(p);
}
/* Root path */
urldb_destroy_path_node_content(&root->paths);
/* Proctection space data */
for (s = root->prot_space; s; s = t) {
t = s->next;
urldb_destroy_prot_space(s);
}
/* And ourselves */
free(root->part);
free(root);
}
/**
* Destroy a search tree
*
* \param root Root node of tree to destroy
*/
static void urldb_destroy_search_tree(struct search_node *root)
{
/* Destroy children */
if (root->left != &empty)
urldb_destroy_search_tree(root->left);
if (root->right != &empty)
urldb_destroy_search_tree(root->right);
/* And destroy ourselves */
free(root);
}
/*************** External interface ***************/
/* exported interface documented in content/urldb.h */
void urldb_destroy(void)
{
struct host_part *a, *b;
int i;
/* Clean up search trees */
for (i = 0; i < NUM_SEARCH_TREES; i++) {
if (search_trees[i] != &empty) {
urldb_destroy_search_tree(search_trees[i]);
search_trees[i] = &empty;
}
}
/* And database */
for (a = db_root.children; a; a = b) {
b = a->next;
urldb_destroy_host_tree(a);
}
memset(&db_root, 0, sizeof(db_root));
/* And the bloom filter */
if (url_bloom != NULL) {
bloom_destroy(url_bloom);
url_bloom = NULL;
}
}
/* exported interface documented in netsurf/url_db.h */
nserror urldb_load(const char *filename)
{
#define MAXIMUM_URL_LENGTH 4096
char s[MAXIMUM_URL_LENGTH];
char host[256];
struct host_part *h;
int urls;
int i;
int version;
int length;
FILE *fp;
assert(filename);
NSLOG(netsurf, INFO, "Loading URL file %s", filename);
if (url_bloom == NULL)
url_bloom = bloom_create(BLOOM_SIZE);
fp = fopen(filename, "r");
if (!fp) {
NSLOG(netsurf, INFO, "Failed to open file '%s' for reading",
filename);
return NSERROR_NOT_FOUND;
}
if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) {
fclose(fp);
return NSERROR_NEED_DATA;
}
version = atoi(s);
if (version < MIN_URL_FILE_VERSION) {
NSLOG(netsurf, INFO, "Unsupported URL file version.");
fclose(fp);
return NSERROR_INVALID;
}
if (version > URL_FILE_VERSION) {
NSLOG(netsurf, INFO, "Unknown URL file version.");
fclose(fp);
return NSERROR_INVALID;
}
while (fgets(host, sizeof host, fp)) {
time_t hsts_expiry = 0;
int hsts_include_sub_domains = 0;
/* get the hostname */
length = strlen(host) - 1;
host[length] = '\0';
/* skip data that has ended up with a host of '' */
if (length == 0) {
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
urls = atoi(s);
/* Eight fields/url */
for (i = 0; i < (8 * urls); i++) {
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
}
continue;
}
if (version >= 107) {
char *p = host;
while (*p && *p != ' ') p++;
while (*p && *p == ' ') { *p = '\0'; p++; }
hsts_include_sub_domains = (*p == '1');
while (*p && *p != ' ') p++;
while (*p && *p == ' ') p++;
nsc_snptimet(p, strlen(p), &hsts_expiry);
}
h = urldb_add_host(host);
if (!h) {
NSLOG(netsurf, INFO, "Failed adding host: '%s'", host);
fclose(fp);
return NSERROR_NOMEM;
}
h->hsts.expires = hsts_expiry;
h->hsts.include_sub_domains = hsts_include_sub_domains;
/* read number of URLs */
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
urls = atoi(s);
/* no URLs => try next host */
if (urls == 0) {
NSLOG(netsurf, INFO, "No URLs for '%s'", host);
continue;
}
/* load the non-corrupt data */
for (i = 0; i < urls; i++) {
struct path_data *p = NULL;
char scheme[64], ports[10];
char url[64 + 3 + 256 + 6 + 4096 + 1 + 1];
unsigned int port;
bool is_file = false;
nsurl *nsurl;
lwc_string *scheme_lwc, *fragment_lwc;
char *path_query;
size_t len;
if (!fgets(scheme, sizeof scheme, fp))
break;
length = strlen(scheme) - 1;
scheme[length] = '\0';
if (!fgets(ports, sizeof ports, fp))
break;
length = strlen(ports) - 1;
ports[length] = '\0';
port = atoi(ports);
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
length = strlen(s) - 1;
s[length] = '\0';
if (!strcasecmp(host, "localhost") &&
!strcasecmp(scheme, "file"))
is_file = true;
snprintf(url, sizeof url, "%s://%s%s%s%s",
scheme,
/* file URLs have no host */
(is_file ? "" : host),
(port ? ":" : ""),
(port ? ports : ""),
s);
/* TODO: store URLs in pre-parsed state, and make
* a nsurl_load to generate the nsurl more
* swiftly.
* Need a nsurl_save too.
*/
if (nsurl_create(url, &nsurl) != NSERROR_OK) {
NSLOG(netsurf, INFO, "Failed inserting '%s'",
url);
fclose(fp);
return NSERROR_NOMEM;
}
if (url_bloom != NULL) {
uint32_t hash = nsurl_hash(nsurl);
bloom_insert_hash(url_bloom, hash);
}
/* Copy and merge path/query strings */
if (nsurl_get(nsurl, NSURL_PATH | NSURL_QUERY,
&path_query, &len) != NSERROR_OK) {
NSLOG(netsurf, INFO, "Failed inserting '%s'",
url);
fclose(fp);
return NSERROR_NOMEM;
}
scheme_lwc = nsurl_get_component(nsurl, NSURL_SCHEME);
fragment_lwc = nsurl_get_component(nsurl,
NSURL_FRAGMENT);
p = urldb_add_path(scheme_lwc, port, h, path_query,
fragment_lwc, nsurl);
if (!p) {
NSLOG(netsurf, INFO, "Failed inserting '%s'",
url);
fclose(fp);
return NSERROR_NOMEM;
}
nsurl_unref(nsurl);
lwc_string_unref(scheme_lwc);
if (fragment_lwc != NULL)
lwc_string_unref(fragment_lwc);
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
if (p)
p->urld.visits = (unsigned int)atoi(s);
/* entry last use time */
if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) {
break;
}
if (p) {
nsc_snptimet(s, strlen(s) - 1, &p->urld.last_visit);
}
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
if (p)
p->urld.type = (content_type)atoi(s);
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
length = strlen(s) - 1;
if (p && length > 0) {
s[length] = '\0';
p->urld.title = malloc(length + 1);
if (p->urld.title)
memcpy(p->urld.title, s, length + 1);
}
}
}
fclose(fp);
NSLOG(netsurf, INFO, "Successfully loaded URL file");
#undef MAXIMUM_URL_LENGTH
return NSERROR_OK;
}
/* exported interface documented in netsurf/url_db.h */
nserror urldb_save(const char *filename)
{
FILE *fp;
int i;
assert(filename);
fp = fopen(filename, "w");
if (!fp) {
NSLOG(netsurf, INFO, "Failed to open file '%s' for writing",
filename);
return NSERROR_SAVE_FAILED;
}
/* file format version number */
fprintf(fp, "%d\n", URL_FILE_VERSION);
for (i = 0; i != NUM_SEARCH_TREES; i++) {
urldb_save_search_tree(search_trees[i], fp);
}
fclose(fp);
return NSERROR_OK;
}
/* exported interface documented in content/urldb.h */
nserror urldb_set_url_persistence(nsurl *url, bool persist)
{
struct path_data *p;
assert(url);
p = urldb_find_url(url);
if (!p) {
return NSERROR_NOT_FOUND;
}
p->persistent = persist;
return NSERROR_OK;
}
/* exported interface documented in content/urldb.h */
bool urldb_add_url(nsurl *url)
{
struct host_part *h;
struct path_data *p;
lwc_string *scheme;
lwc_string *port;
lwc_string *host;
lwc_string *fragment;
const char *host_str;
char *path_query = NULL;
size_t len;
bool match;
unsigned int port_int;
assert(url);
if (url_bloom == NULL)
url_bloom = bloom_create(BLOOM_SIZE);
if (url_bloom != NULL) {
uint32_t hash = nsurl_hash(url);
bloom_insert_hash(url_bloom, hash);
}
/* Copy and merge path/query strings */
if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &path_query, &len) !=
NSERROR_OK) {
return false;
}
assert(path_query != NULL);
scheme = nsurl_get_component(url, NSURL_SCHEME);
if (scheme == NULL) {
free(path_query);
return false;
}
host = nsurl_get_component(url, NSURL_HOST);
if (host != NULL) {
host_str = lwc_string_data(host);
lwc_string_unref(host);
} else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) ==
lwc_error_ok && match == true) {
host_str = "localhost";
} else {
lwc_string_unref(scheme);
free(path_query);
return false;
}
fragment = nsurl_get_component(url, NSURL_FRAGMENT);
port = nsurl_get_component(url, NSURL_PORT);
if (port != NULL) {
port_int = atoi(lwc_string_data(port));
lwc_string_unref(port);
} else {
port_int = 0;
}
/* Get host entry */
h = urldb_add_host(host_str);
/* Get path entry */
if (h != NULL) {
p = urldb_add_path(scheme,
port_int,
h,
path_query,
fragment,
url);
} else {
p = NULL;
}
lwc_string_unref(scheme);
if (fragment != NULL)
lwc_string_unref(fragment);
return (p != NULL);
}
/* exported interface documented in content/urldb.h */
nserror urldb_set_url_title(nsurl *url, const char *title)
{
struct path_data *p;
char *temp;
assert(url);
p = urldb_find_url(url);
if (p == NULL) {
return NSERROR_NOT_FOUND;
}
/* copy the parameter if necessary */
if (title != NULL) {
temp = strdup(title);
if (temp == NULL) {
return NSERROR_NOMEM;
}
} else {
temp = NULL;
}
free(p->urld.title);
p->urld.title = temp;
return NSERROR_OK;
}
/* exported interface documented in content/urldb.h */
nserror urldb_set_url_content_type(nsurl *url, content_type type)
{
struct path_data *p;
assert(url);
p = urldb_find_url(url);
if (!p) {
return NSERROR_NOT_FOUND;
}
p->urld.type = type;
return NSERROR_OK;
}
/* exported interface documented in content/urldb.h */
nserror urldb_update_url_visit_data(nsurl *url)
{
struct path_data *p;
assert(url);
p = urldb_find_url(url);
if (!p) {
return NSERROR_NOT_FOUND;
}
p->urld.last_visit = time(NULL);
p->urld.visits++;
return NSERROR_OK;
}
/* exported interface documented in content/urldb.h */
void urldb_reset_url_visit_data(nsurl *url)
{
struct path_data *p;
assert(url);
p = urldb_find_url(url);
if (!p)
return;
p->urld.last_visit = (time_t)0;
p->urld.visits = 0;
}
/* exported interface documented in netsurf/url_db.h */
const struct url_data *urldb_get_url_data(nsurl *url)
{
struct path_data *p;
struct url_internal_data *u;
assert(url);
p = urldb_find_url(url);
if (!p)
return NULL;
u = &p->urld;
return (const struct url_data *) u;
}
/* exported interface documented in content/urldb.h */
nsurl *urldb_get_url(nsurl *url)
{
struct path_data *p;
assert(url);
p = urldb_find_url(url);
if (!p)
return NULL;
return p->url;
}
/* exported interface documented in netsurf/url_db.h */
void urldb_set_auth_details(nsurl *url, const char *realm, const char *auth)
{
struct path_data *p, *pi;
struct host_part *h;
struct prot_space_data *space, *space_alloc;
char *realm_alloc, *auth_alloc;
bool match;
assert(url && realm && auth);
/* add url, in case it's missing */
urldb_add_url(url);
p = urldb_find_url(url);
if (!p)
return;
/* Search for host_part */
for (pi = p; pi->parent != NULL; pi = pi->parent)
;
h = (struct host_part *)pi;
/* Search if given URL belongs to a protection space we already know of. */
for (space = h->prot_space; space; space = space->next) {
if (!strcmp(space->realm, realm) &&
lwc_string_isequal(space->scheme, p->scheme,
&match) == lwc_error_ok &&
match == true &&
space->port == p->port)
break;
}
if (space != NULL) {
/* Overrule existing auth. */
free(space->auth);
space->auth = strdup(auth);
} else {
/* Create a new protection space. */
space = space_alloc = malloc(sizeof(struct prot_space_data));
realm_alloc = strdup(realm);
auth_alloc = strdup(auth);
if (!space_alloc || !realm_alloc || !auth_alloc) {
free(space_alloc);
free(realm_alloc);
free(auth_alloc);
return;
}
space->scheme = lwc_string_ref(p->scheme);
space->port = p->port;
space->realm = realm_alloc;
space->auth = auth_alloc;
space->next = h->prot_space;
h->prot_space = space;
}
p->prot_space = space;
}
/* exported interface documented in netsurf/url_db.h */
const char *urldb_get_auth_details(nsurl *url, const char *realm)
{
struct path_data *p, *p_cur, *p_top;
assert(url);
/* add to the db, so our lookup will work */
urldb_add_url(url);
p = urldb_find_url(url);
if (!p)
return NULL;
/* Check for any auth details attached to the path_data node or any of
* its parents.
*/
for (p_cur = p; p_cur != NULL; p_top = p_cur, p_cur = p_cur->parent) {
if (p_cur->prot_space) {
return p_cur->prot_space->auth;
}
}
/* Only when we have a realm (and canonical root of given URL), we can
* uniquely locate the protection space.
*/
if (realm != NULL) {
const struct host_part *h = (const struct host_part *)p_top;
const struct prot_space_data *space;
bool match;
/* Search for a possible matching protection space. */
for (space = h->prot_space; space != NULL;
space = space->next) {
if (!strcmp(space->realm, realm) &&
lwc_string_isequal(space->scheme,
p->scheme, &match) ==
lwc_error_ok &&
match == true &&
space->port == p->port) {
p->prot_space = space;
return p->prot_space->auth;
}
}
}
return NULL;
}
/* exported interface documented in netsurf/url_db.h */
void urldb_set_cert_permissions(nsurl *url, bool permit)
{
struct path_data *p;
struct host_part *h;
assert(url);
/* add url, in case it's missing */
urldb_add_url(url);
p = urldb_find_url(url);
if (!p)
return;
for (; p && p->parent; p = p->parent)
/* do nothing */;
assert(p);
h = (struct host_part *)p;
h->permit_invalid_certs = permit;
}
/* exported interface documented in content/urldb.h */
bool urldb_get_cert_permissions(nsurl *url)
{
struct path_data *p;
const struct host_part *h;
assert(url);
p = urldb_find_url(url);
if (!p)
return false;
for (; p && p->parent; p = p->parent)
/* do nothing */;
assert(p);
h = (const struct host_part *)p;
return h->permit_invalid_certs;
}
/* exported interface documented in content/urldb.h */
bool urldb_set_hsts_policy(struct nsurl *url, const char *header)
{
struct path_data *p;
struct host_part *h;
lwc_string *host;
time_t now = time(NULL);
http_strict_transport_security *sts;
uint32_t max_age = 0;
nserror error;
assert(url);
host = nsurl_get_component(url, NSURL_HOST);
if (host != NULL) {
if (urldb__host_is_ip_address(lwc_string_data(host))) {
/* Host is IP: ignore */
lwc_string_unref(host);
return true;
} else if (lwc_string_length(host) == 0) {
/* Host is blank: ignore */
lwc_string_unref(host);
return true;
}
lwc_string_unref(host);
} else {
/* No host part: ignore */
return true;
}
/* add url, in case it's missing */
urldb_add_url(url);
p = urldb_find_url(url);
if (!p)
return false;
for (; p && p->parent; p = p->parent)
/* do nothing */;
assert(p);
h = (struct host_part *)p;
if (h->permit_invalid_certs) {
/* Transport is tainted: ignore */
return true;
}
error = http_parse_strict_transport_security(header, &sts);
if (error != NSERROR_OK) {
/* Parse failed: ignore */
return true;
}
h->hsts.include_sub_domains =
http_strict_transport_security_include_subdomains(sts);
max_age = http_strict_transport_security_max_age(sts);
if (max_age == 0) {
h->hsts.expires = 0;
h->hsts.include_sub_domains = false;
} else if ((time_t) (now + max_age) > h->hsts.expires) {
h->hsts.expires = now + max_age;
}
http_strict_transport_security_destroy(sts);
return true;
}
/* exported interface documented in content/urldb.h */
bool urldb_get_hsts_enabled(struct nsurl *url)
{
struct path_data *p;
const struct host_part *h;
lwc_string *host;
time_t now = time(NULL);
assert(url);
host = nsurl_get_component(url, NSURL_HOST);
if (host != NULL) {
if (urldb__host_is_ip_address(lwc_string_data(host))) {
/* Host is IP: not enabled */
lwc_string_unref(host);
return false;
} else if (lwc_string_length(host) == 0) {
/* Host is blank: not enabled */
lwc_string_unref(host);
return false;
}
lwc_string_unref(host);
} else {
/* No host part: not enabled */
return false;
}
/* The URL must exist in the db in order to find HSTS policy, since
* we search up the tree from the URL node, and policy from further
* up may also apply. */
urldb_add_url(url);
p = urldb_find_url(url);
if (!p)
return false;
for (; p && p->parent; p = p->parent)
/* do nothing */;
assert(p);
h = (const struct host_part *)p;
/* Consult record for this host */
if (h->hsts.expires > now) {
/* Not expired */
return true;
}
/* Consult parent domains */
for (h = h->parent; h && h != &db_root; h = h->parent) {
if (h->hsts.expires > now && h->hsts.include_sub_domains) {
/* Not expired and subdomains included */
return true;
}
}
return false;
}
/* exported interface documented in netsurf/url_db.h */
void
urldb_iterate_partial(const char *prefix,
bool (*callback)(nsurl *url, const struct url_data *data))
{
char host[256];
char buf[260]; /* max domain + "www." */
const char *slash, *scheme_sep;
struct search_node *tree;
const struct host_part *h;
assert(prefix && callback);
/* strip scheme */
scheme_sep = strstr(prefix, "://");
if (scheme_sep)
prefix = scheme_sep + 3;
slash = strchr(prefix, '/');
tree = urldb_get_search_tree(prefix);
if (slash) {
/* if there's a slash in the input, then we can
* assume that we're looking for a path */
snprintf(host, sizeof host, "%.*s",
(int) (slash - prefix), prefix);
h = urldb_search_find(tree, host);
if (!h) {
int len = slash - prefix;
if (len <= 3 || strncasecmp(host, "www.", 4) != 0) {
snprintf(buf, sizeof buf, "www.%s", host);
h = urldb_search_find(
search_trees[ST_DN + 'w' - 'a'],
buf);
if (!h)
return;
} else
return;
}
if (h->paths.children) {
/* Have paths, iterate them */
urldb_iterate_partial_path(&h->paths, slash + 1,
callback);
}
} else {
int len = strlen(prefix);
/* looking for hosts */
if (!urldb_iterate_partial_host(tree, prefix, callback))
return;
if (len <= 3 || strncasecmp(prefix, "www.", 4) != 0) {
/* now look for www.prefix */
snprintf(buf, sizeof buf, "www.%s", prefix);
if(!urldb_iterate_partial_host(
search_trees[ST_DN + 'w' - 'a'],
buf, callback))
return;
}
}
}
/* exported interface documented in netsurf/url_db.h */
void
urldb_iterate_entries(bool (*callback)(nsurl *url, const struct url_data *data))
{
int i;
assert(callback);
for (i = 0; i < NUM_SEARCH_TREES; i++) {
if (!urldb_iterate_entries_host(search_trees[i],
callback,
NULL)) {
break;
}
}
}
/* exported interface documented in content/urldb.h */
void urldb_iterate_cookies(bool (*callback)(const struct cookie_data *data))
{
int i;
assert(callback);
for (i = 0; i < NUM_SEARCH_TREES; i++) {
if (!urldb_iterate_entries_host(search_trees[i],
NULL, callback))
break;
}
}
/* exported interface documented in content/urldb.h */
bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer)
{
const char *cur = header, *end;
lwc_string *path, *host, *scheme;
nsurl *urlt;
bool match;
assert(url && header);
/* Get defragmented URL, as 'urlt' */
if (nsurl_defragment(url, &urlt) != NSERROR_OK)
return NULL;
scheme = nsurl_get_component(url, NSURL_SCHEME);
if (scheme == NULL) {
nsurl_unref(urlt);
return false;
}
path = nsurl_get_component(url, NSURL_PATH);
if (path == NULL) {
lwc_string_unref(scheme);
nsurl_unref(urlt);
return false;
}
host = nsurl_get_component(url, NSURL_HOST);
if (host == NULL) {
lwc_string_unref(path);
lwc_string_unref(scheme);
nsurl_unref(urlt);
return false;
}
if (referer) {
lwc_string *rhost;
/* Ensure that url's host name domain matches
* referer's (4.3.5) */
rhost = nsurl_get_component(referer, NSURL_HOST);
if (rhost == NULL) {
goto error;
}
/* Domain match host names */
if (lwc_string_isequal(host, rhost, &match) == lwc_error_ok &&
match == false) {
const char *hptr;
const char *rptr;
const char *dot;
const char *host_data = lwc_string_data(host);
const char *rhost_data = lwc_string_data(rhost);
/* Ensure neither host nor rhost are IP addresses */
if (urldb__host_is_ip_address(host_data) ||
urldb__host_is_ip_address(rhost_data)) {
/* IP address, so no partial match */
lwc_string_unref(rhost);
goto error;
}
/* Not exact match, so try the following:
*
* 1) Find the longest common suffix of host and rhost
* (may be all of host/rhost)
* 2) Discard characters from the start of the suffix
* until the suffix starts with a dot
* (prevents foobar.com matching bar.com)
* 3) Ensure the suffix is non-empty and contains
* embedded dots (to avoid permitting .com as a
* suffix)
*
* Note that the above in no way resembles the
* domain matching algorithm found in RFC2109.
* It does, however, model the real world rather
* more accurately.
*/
/** \todo In future, we should consult a TLD service
* instead of just looking for embedded dots.
*/
hptr = host_data + lwc_string_length(host) - 1;
rptr = rhost_data + lwc_string_length(rhost) - 1;
/* 1 */
while (hptr >= host_data && rptr >= rhost_data) {
if (*hptr != *rptr)
break;
hptr--;
rptr--;
}
/* Ensure we end up pointing at the start of the
* common suffix. The above loop will exit pointing
* to the byte before the start of the suffix. */
hptr++;
/* 2 */
while (*hptr != '\0' && *hptr != '.')
hptr++;
/* 3 */
if (*hptr == '\0' ||
(dot = strchr(hptr + 1, '.')) == NULL ||
*(dot + 1) == '\0') {
lwc_string_unref(rhost);
goto error;
}
}
lwc_string_unref(rhost);
}
end = cur + strlen(cur) - 2 /* Trailing CRLF */;
do {
struct cookie_internal_data *c;
char *dot;
size_t len;
#ifdef WITH_NSPSL
const char *suffix;
#endif
c = urldb_parse_cookie(url, &cur);
if (!c) {
/* failed => stop parsing */
goto error;
}
/* validate cookie */
/* 4.2.2:i Cookie must have NAME and VALUE */
if (!c->name || !c->value) {
urldb_free_cookie(c);
goto error;
}
/* 4.3.2:i Cookie path must be a prefix of URL path */
len = strlen(c->path);
if (len > lwc_string_length(path) ||
strncmp(c->path, lwc_string_data(path),
len) != 0) {
urldb_free_cookie(c);
goto error;
}
#ifdef WITH_NSPSL
/* check domain is not a public suffix */
dot = c->domain;
if (*dot == '.') {
dot++;
}
suffix = nspsl_getpublicsuffix(dot);
if (suffix == NULL) {
NSLOG(netsurf, INFO,
"domain %s was a public suffix domain", dot);
urldb_free_cookie(c);
goto error;
}
#else
/* 4.3.2:ii Cookie domain must contain embedded dots */
dot = strchr(c->domain + 1, '.');
if (!dot || *(dot + 1) == '\0') {
/* no embedded dots */
urldb_free_cookie(c);
goto error;
}
#endif
/* Domain match fetch host with cookie domain */
if (strcasecmp(lwc_string_data(host), c->domain) != 0) {
int hlen, dlen;
char *domain = c->domain;
/* c->domain must be a domain cookie here because:
* c->domain is either:
* + specified in the header as a domain cookie
* (non-domain cookies in the header are ignored
* by urldb_parse_cookie / urldb_parse_avpair)
* + defaulted to the URL's host part
* (by urldb_parse_cookie if no valid domain was
* specified in the header)
*
* The latter will pass the strcasecmp above, which
* leaves the former (i.e. a domain cookie)
*/
assert(c->domain[0] == '.');
/* 4.3.2:iii */
if (urldb__host_is_ip_address(lwc_string_data(host))) {
/* IP address, so no partial match */
urldb_free_cookie(c);
goto error;
}
hlen = lwc_string_length(host);
dlen = strlen(c->domain);
if (hlen <= dlen && hlen != dlen - 1) {
/* Partial match not possible */
urldb_free_cookie(c);
goto error;
}
if (hlen == dlen - 1) {
/* Relax matching to allow
* host a.com to match .a.com */
domain++;
dlen--;
}
if (strcasecmp(lwc_string_data(host) + (hlen - dlen),
domain)) {
urldb_free_cookie(c);
goto error;
}
/* 4.3.2:iv Ensure H contains no dots
*
* If you believe the spec, H should contain no
* dots in _any_ cookie. Unfortunately, however,
* reality differs in that many sites send domain
* cookies of the form .foo.com from hosts such
* as bar.bat.foo.com and then expect domain
* matching to work. Thus we have to do what they
* expect, regardless of any potential security
* implications.
*
* This is what code conforming to the spec would
* look like:
*
* for (int i = 0; i < (hlen - dlen); i++) {
* if (host[i] == '.') {
* urldb_free_cookie(c);
* goto error;
* }
* }
*/
}
/* Now insert into database */
if (!urldb_insert_cookie(c, scheme, urlt))
goto error;
} while (cur < end);
lwc_string_unref(host);
lwc_string_unref(path);
lwc_string_unref(scheme);
nsurl_unref(urlt);
return true;
error:
lwc_string_unref(host);
lwc_string_unref(path);
lwc_string_unref(scheme);
nsurl_unref(urlt);
return false;
}
/* exported interface documented in content/urldb.h */
char *urldb_get_cookie(nsurl *url, bool include_http_only)
{
const struct path_data *p, *q;
const struct host_part *h;
lwc_string *path_lwc;
struct cookie_internal_data *c;
int count = 0, version = COOKIE_RFC2965;
struct cookie_internal_data **matched_cookies;
int matched_cookies_size = 20;
int ret_alloc = 4096, ret_used = 1;
const char *path;
char *ret;
lwc_string *scheme;
time_t now;
int i;
bool match;
assert(url != NULL);
/* The URL must exist in the db in order to find relevant cookies, since
* we search up the tree from the URL node, and cookies from further
* up also apply. */
urldb_add_url(url);
p = urldb_find_url(url);
if (!p)
return NULL;
scheme = p->scheme;
matched_cookies = malloc(matched_cookies_size *
sizeof(struct cookie_internal_data *));
if (!matched_cookies)
return NULL;
#define GROW_MATCHED_COOKIES \
do { \
if (count == matched_cookies_size) { \
struct cookie_internal_data **temp; \
temp = realloc(matched_cookies, \
(matched_cookies_size + 20) * \
sizeof(struct cookie_internal_data *)); \
\
if (temp == NULL) { \
free(ret); \
free(matched_cookies); \
return NULL; \
} \
\
matched_cookies = temp; \
matched_cookies_size += 20; \
} \
} while(0)
ret = malloc(ret_alloc);
if (!ret) {
free(matched_cookies);
return NULL;
}
ret[0] = '\0';
path_lwc = nsurl_get_component(url, NSURL_PATH);
if (path_lwc == NULL) {
free(ret);
free(matched_cookies);
return NULL;
}
path = lwc_string_data(path_lwc);
lwc_string_unref(path_lwc);
now = time(NULL);
if (*(p->segment) != '\0') {
/* Match exact path, unless directory, when prefix matching
* will handle this case for us. */
for (q = p->parent->children; q; q = q->next) {
if (strcmp(q->segment, p->segment))
continue;
/* Consider all cookies associated with
* this exact path */
for (c = q->cookies; c; c = c->next) {
if (c->expires != -1 && c->expires < now)
/* cookie has expired => ignore */
continue;
if (c->secure && lwc_string_isequal(
q->scheme,
corestring_lwc_https,
&match) &&
match == false)
/* secure cookie for insecure host.
* ignore */
continue;
if (c->http_only && !include_http_only)
/* Ignore HttpOnly */
continue;
matched_cookies[count++] = c;
GROW_MATCHED_COOKIES;
if (c->version < (unsigned int)version)
version = c->version;
c->last_used = now;
cookie_manager_add((struct cookie_data *)c);
}
}
}
/* Now consider cookies whose paths prefix-match ours */
for (p = p->parent; p; p = p->parent) {
/* Find directory's path entry(ies) */
/* There are potentially multiple due to differing schemes */
for (q = p->children; q; q = q->next) {
if (*(q->segment) != '\0')
continue;
for (c = q->cookies; c; c = c->next) {
if (c->expires != -1 && c->expires < now)
/* cookie has expired => ignore */
continue;
if (c->secure && lwc_string_isequal(
q->scheme,
corestring_lwc_https,
&match) &&
match == false)
/* Secure cookie for insecure server
* => ignore */
continue;
matched_cookies[count++] = c;
GROW_MATCHED_COOKIES;
if (c->version < (unsigned int) version)
version = c->version;
c->last_used = now;
cookie_manager_add((struct cookie_data *)c);
}
}
if (!p->parent) {
/* No parent, so bail here. This can't go in
* the loop exit condition as we also want to
* process the top-level node.
*
* If p->parent is NULL then p->cookies are
* the domain cookies and thus we don't even
* try matching against them.
*/
break;
}
/* Consider p itself - may be the result of Path=/foo */
for (c = p->cookies; c; c = c->next) {
if (c->expires != -1 && c->expires < now)
/* cookie has expired => ignore */
continue;
/* Ensure cookie path is a prefix of the resource */
if (strncmp(c->path, path, strlen(c->path)) != 0)
/* paths don't match => ignore */
continue;
if (c->secure && lwc_string_isequal(p->scheme,
corestring_lwc_https,
&match) &&
match == false)
/* Secure cookie for insecure server
* => ignore */
continue;
matched_cookies[count++] = c;
GROW_MATCHED_COOKIES;
if (c->version < (unsigned int) version)
version = c->version;
c->last_used = now;
cookie_manager_add((struct cookie_data *)c);
}
}
/* Finally consider domain cookies for hosts which domain match ours */
for (h = (const struct host_part *)p; h && h != &db_root;
h = h->parent) {
for (c = h->paths.cookies; c; c = c->next) {
if (c->expires != -1 && c->expires < now)
/* cookie has expired => ignore */
continue;
/* Ensure cookie path is a prefix of the resource */
if (strncmp(c->path, path, strlen(c->path)) != 0)
/* paths don't match => ignore */
continue;
if (c->secure && lwc_string_isequal(scheme,
corestring_lwc_https,
&match) &&
match == false)
/* secure cookie for insecure host. ignore */
continue;
matched_cookies[count++] = c;
GROW_MATCHED_COOKIES;
if (c->version < (unsigned int)version)
version = c->version;
c->last_used = now;
cookie_manager_add((struct cookie_data *)c);
}
}
if (count == 0) {
/* No cookies found */
free(ret);
free(matched_cookies);
return NULL;
}
/* and build output string */
if (version > COOKIE_NETSCAPE) {
sprintf(ret, "$Version=%d", version);
ret_used = strlen(ret) + 1;
}
for (i = 0; i < count; i++) {
if (!urldb_concat_cookie(matched_cookies[i], version,
&ret_used, &ret_alloc, &ret)) {
free(ret);
free(matched_cookies);
return NULL;
}
}
if (version == COOKIE_NETSCAPE) {
/* Old-style cookies => no version & skip "; " */
memmove(ret, ret + 2, ret_used - 2);
ret_used -= 2;
}
/* Now, shrink the output buffer to the required size */
{
char *temp = realloc(ret, ret_used);
if (!temp) {
free(ret);
free(matched_cookies);
return NULL;
}
ret = temp;
}
free(matched_cookies);
return ret;
#undef GROW_MATCHED_COOKIES
}
/* exported interface documented in content/urldb.h */
void urldb_delete_cookie(const char *domain, const char *path,
const char *name)
{
urldb_delete_cookie_hosts(domain, path, name, &db_root);
}
/* exported interface documented in content/urldb.h */
void urldb_load_cookies(const char *filename)
{
FILE *fp;
char s[16*1024];
assert(filename);
fp = fopen(filename, "r");
if (!fp)
return;
#define FIND_T { \
for (; *p && *p != '\t'; p++) \
; /* do nothing */ \
if (p >= end) { \
NSLOG(netsurf, INFO, "Overran input"); \
continue; \
} \
*p++ = '\0'; \
}
#define SKIP_T { \
for (; *p && *p == '\t'; p++) \
; /* do nothing */ \
if (p >= end) { \
NSLOG(netsurf, INFO, "Overran input"); \
continue; \
} \
}
while (fgets(s, sizeof s, fp)) {
char *p = s, *end = 0,
*domain, *path, *name, *value, *scheme, *url,
*comment;
int version, domain_specified, path_specified,
secure, http_only, no_destroy, value_quoted;
time_t expires, last_used;
struct cookie_internal_data *c;
if(s[0] == 0 || s[0] == '#')
/* Skip blank lines or comments */
continue;
s[strlen(s) - 1] = '\0'; /* lose terminating newline */
end = s + strlen(s);
/* Look for file version first
* (all input is ignored until this is read)
*/
if (strncasecmp(s, "Version:", 8) == 0) {
FIND_T; SKIP_T; loaded_cookie_file_version = atoi(p);
if (loaded_cookie_file_version <
MIN_COOKIE_FILE_VERSION) {
NSLOG(netsurf, INFO,
"Unsupported Cookie file version");
break;
}
continue;
} else if (loaded_cookie_file_version == 0) {
/* Haven't yet seen version; skip this input */
continue;
}
/* One cookie/line */
/* Parse input */
FIND_T; version = atoi(s);
SKIP_T; domain = p; FIND_T;
SKIP_T; domain_specified = atoi(p); FIND_T;
SKIP_T; path = p; FIND_T;
SKIP_T; path_specified = atoi(p); FIND_T;
SKIP_T; secure = atoi(p); FIND_T;
if (loaded_cookie_file_version > 101) {
/* Introduced in version 1.02 */
SKIP_T; http_only = atoi(p); FIND_T;
} else {
http_only = 0;
}
SKIP_T; expires = (time_t)atoi(p); FIND_T;
SKIP_T; last_used = (time_t)atoi(p); FIND_T;
SKIP_T; no_destroy = atoi(p); FIND_T;
SKIP_T; name = p; FIND_T;
SKIP_T; value = p; FIND_T;
if (loaded_cookie_file_version > 100) {
/* Introduced in version 1.01 */
SKIP_T; value_quoted = atoi(p); FIND_T;
} else {
value_quoted = 0;
}
SKIP_T; scheme = p; FIND_T;
SKIP_T; url = p; FIND_T;
/* Comment may have no content, so don't
* use macros as they'll break */
for (; *p && *p == '\t'; p++)
; /* do nothing */
comment = p;
assert(p <= end);
/* Now create cookie */
c = malloc(sizeof(struct cookie_internal_data));
if (!c)
break;
c->name = strdup(name);
c->value = strdup(value);
c->value_was_quoted = value_quoted;
c->comment = strdup(comment);
c->domain_from_set = domain_specified;
c->domain = strdup(domain);
c->path_from_set = path_specified;
c->path = strdup(path);
c->expires = expires;
c->last_used = last_used;
c->secure = secure;
c->http_only = http_only;
c->version = version;
c->no_destroy = no_destroy;
if (!(c->name && c->value && c->comment &&
c->domain && c->path)) {
urldb_free_cookie(c);
break;
}
if (c->domain[0] != '.') {
lwc_string *scheme_lwc = NULL;
nsurl *url_nsurl = NULL;
assert(scheme[0] != 'u');
if (nsurl_create(url, &url_nsurl) != NSERROR_OK) {
urldb_free_cookie(c);
break;
}
scheme_lwc = nsurl_get_component(url_nsurl,
NSURL_SCHEME);
/* And insert it into database */
if (!urldb_insert_cookie(c, scheme_lwc, url_nsurl)) {
/* Cookie freed for us */
nsurl_unref(url_nsurl);
lwc_string_unref(scheme_lwc);
break;
}
nsurl_unref(url_nsurl);
lwc_string_unref(scheme_lwc);
} else {
if (!urldb_insert_cookie(c, NULL, NULL)) {
/* Cookie freed for us */
break;
}
}
}
#undef SKIP_T
#undef FIND_T
fclose(fp);
}
/* exported interface documented in content/urldb.h */
void urldb_save_cookies(const char *filename)
{
FILE *fp;
int cookie_file_version = max(loaded_cookie_file_version,
COOKIE_FILE_VERSION);
assert(filename);
fp = fopen(filename, "w");
if (!fp)
return;
fprintf(fp, "# NetSurf cookies file.\n"
"#\n"
"# Lines starting with a '#' are comments, "
"blank lines are ignored.\n"
"#\n"
"# All lines prior to \"Version:\t%d\" are discarded.\n"
"#\n"
"# Version\tDomain\tDomain from Set-Cookie\tPath\t"
"Path from Set-Cookie\tSecure\tHTTP-Only\tExpires\tLast used\t"
"No destroy\tName\tValue\tValue was quoted\tScheme\t"
"URL\tComment\n",
cookie_file_version);
fprintf(fp, "Version:\t%d\n", cookie_file_version);
urldb_save_cookie_hosts(fp, &db_root);
fclose(fp);
}
/* exported interface documented in netsurf/url_db.h */
void urldb_dump(void)
{
int i;
urldb_dump_hosts(&db_root);
for (i = 0; i != NUM_SEARCH_TREES; i++) {
urldb_dump_search(search_trees[i], 0);
}
}