netsurf/content/urldb.c
Daniel Silverstone 6807b4208a Remove the netsurf/ from the include paths and rationalise use of <> vs "" in includes
NetSurf includes are now done with ""s and other system includes with <>s as C intended.
The scandeps tool has been updated to only look for ""ed includes, and to verify that the
files exist in the tree before adding them to the dependency lines. The depend rule has
therefore been augmented to make sure the autogenerated files are built before it is run.

This is untested under self-hosted RISC OS builds. All else tested and works.


svn path=/trunk/netsurf/; revision=3307
2007-05-30 22:39:54 +00:00

3756 lines
83 KiB
C

/*
* This file is part of NetSurf, http://netsurf-browser.org/
* Licensed under the GNU General Public License,
* http://www.opensource.org/licenses/gpl-license
* Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk>
*/
/** \file
* Unified URL information database (implementation)
*
* URLs are stored in a tree-based structure as follows:
*
* The host component is extracted from each URL and, if a FQDN, split on
* every '.'.The tree is constructed by inserting each FQDN segment in
* reverse order. Duplicate nodes are merged.
*
* If the host part of an URL is an IP address, then this is added to the
* tree verbatim (as if it were a TLD).
*
* This provides something looking like:
*
* root (a sentinel)
* |
* -------------------------------------------------
* | | | | | | |
* com edu gov 127.0.0.1 net org uk TLDs
* | | | | | |
* google ... ... ... ... co 2LDs
* | |
* www bbc Hosts/Subdomains
* |
* www ...
*
* Each of the nodes in this tree is a struct host_part. This stores the
* FQDN segment (or IP address) with which the node is concerned. Each node
* may contain further information about paths on a host (struct path_data)
* or SSL certificate processing on a host-wide basis
* (host_part::permit_invalid_certs).
*
* Path data is concerned with storing various metadata about the path in
* question. This includes global history data, HTTP authentication details
* and any associated HTTP cookies. This is stored as a tree of path segments
* hanging off the relevant host_part node.
*
* Therefore, to find the last visited time of the URL
* http://www.example.com/path/to/resource.html, the FQDN tree would be
* traversed in the order root -> "com" -> "example" -> "www". The "www"
* node would have attached to it a tree of struct path_data:
*
* (sentinel)
* |
* path
* |
* to
* |
* resource.html
*
* This represents the absolute path "/path/to/resource.html". The leaf node
* "resource.html" contains the last visited time of the resource.
*
* The mechanism described above is, however, not particularly conducive to
* fast searching of the database for a given URL (or URLs beginning with a
* given prefix). Therefore, an anciliary data structure is used to enable
* fast searching. This structure simply reflects the contents of the
* database, with entries being added/removed at the same time as for the
* core database. In order to ensure that degenerate cases are kept to a
* minimum, we use an AAtree. This is an approximation of a Red-Black tree
* with similar performance characteristics, but with a significantly
* simpler implementation. Entries in this tree comprise pointers to the
* leaf nodes of the host tree described above.
*/
#include <assert.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <time.h>
#include <sys/select.h>
#include <curl/curl.h>
#include "image/bitmap.h"
#include "content/content.h"
#include "content/urldb.h"
#include "desktop/cookies.h"
#include "desktop/options.h"
#ifdef riscos
/** \todo lose this */
#include "riscos/bitmap.h"
#endif
#include "utils/log.h"
#include "utils/filename.h"
#include "utils/url.h"
#include "utils/utils.h"
struct cookie_internal_data {
char *name; /**< Cookie name */
char *value; /**< Cookie value */
char *comment; /**< Cookie comment */
bool domain_from_set; /**< Domain came from Set-Cookie: header */
char *domain; /**< Domain */
bool path_from_set; /**< Path came from Set-Cookie: header */
char *path; /**< Path */
time_t expires; /**< Expiry timestamp, or 1 for session */
time_t last_used; /**< Last used time */
bool secure; /**< Only send for HTTPS requests */
cookie_version version; /**< Specification compliance */
bool no_destroy; /**< Never destroy this cookie,
* unless it's expired */
struct cookie_internal_data *prev; /**< Previous in list */
struct cookie_internal_data *next; /**< Next in list */
};
struct auth_data {
char *realm; /**< Protection realm */
char *auth; /**< Authentication details in form
* username:password */
};
struct cache_internal_data {
char filename[12]; /**< Cached filename, or first byte 0 for none */
};
struct url_internal_data {
char *title; /**< Resource title */
unsigned int visits; /**< Visit count */
time_t last_visit; /**< Last visit time */
content_type type; /**< Type of resource */
};
struct path_data {
char *url; /**< Full URL */
char *scheme; /**< URL scheme for data */
unsigned int port; /**< Port number for data */
char *segment; /**< Path segment for this node */
unsigned int frag_cnt; /**< Number of entries in ::fragment */
char **fragment; /**< Array of fragments */
bool persistent; /**< This entry should persist */
struct bitmap *thumb; /**< Thumbnail image of resource */
struct url_internal_data urld; /**< URL data for resource */
struct cache_internal_data cache; /**< Cache data for resource */
struct auth_data auth; /**< Authentication data for resource */
struct cookie_internal_data *cookies; /**< Cookies associated with resource */
struct path_data *next; /**< Next sibling */
struct path_data *prev; /**< Previous sibling */
struct path_data *parent; /**< Parent path segment */
struct path_data *children; /**< Child path segments */
struct path_data *last; /**< Last child */
};
struct host_part {
/**< Known paths on this host. This _must_ be first so that
* struct host_part *h = (struct host_part *)mypath; works */
struct path_data paths;
bool permit_invalid_certs; /**< Allow access to SSL protected
* resources on this host without
* verifying certificate authenticity
*/
char *part; /**< Part of host string */
struct host_part *next; /**< Next sibling */
struct host_part *prev; /**< Previous sibling */
struct host_part *parent; /**< Parent host part */
struct host_part *children; /**< Child host parts */
};
struct search_node {
const struct host_part *data; /**< Host tree entry */
unsigned int level; /**< Node level */
struct search_node *left; /**< Left subtree */
struct search_node *right; /**< Right subtree */
};
/* Destruction */
static void urldb_destroy_host_tree(struct host_part *root);
static void urldb_destroy_path_tree(struct path_data *root);
static void urldb_destroy_path_node_content(struct path_data *node);
static void urldb_destroy_cookie(struct cookie_internal_data *c);
static void urldb_destroy_search_tree(struct search_node *root);
/* Saving */
static void urldb_save_search_tree(struct search_node *root, FILE *fp);
static void urldb_count_urls(const struct path_data *root, time_t expiry,
unsigned int *count);
static void urldb_write_paths(const struct path_data *parent,
const char *host, FILE *fp, char **path, int *path_alloc,
int *path_used, time_t expiry);
/* Iteration */
static bool urldb_iterate_partial_host(struct search_node *root,
const char *prefix, bool (*callback)(const char *url,
const struct url_data *data));
static bool urldb_iterate_partial_path(const struct path_data *parent,
const char *prefix, bool (*callback)(const char *url,
const struct url_data *data));
static bool urldb_iterate_entries_host(struct search_node *parent,
bool (*url_callback)(const char *url, const struct url_data *data),
bool (*cookie_callback)(const char *domain, const struct cookie_data *data));
static bool urldb_iterate_entries_path(const struct path_data *parent,
bool (*url_callback)(const char *url, const struct url_data *data),
bool (*cookie_callback)(const char *domain, const struct cookie_data *data));
/* Insertion */
static struct host_part *urldb_add_host_node(const char *part,
struct host_part *parent);
static struct host_part *urldb_add_host(const char *host);
static struct path_data *urldb_add_path_node(const char *scheme,
unsigned int port, const char *segment, const char *fragment,
struct path_data *parent);
static struct path_data *urldb_add_path(const char *scheme,
unsigned int port, const struct host_part *host,
const char *path, const char *query, const char *fragment,
const char *url);
static int urldb_add_path_fragment_cmp(const void *a, const void *b);
static struct path_data *urldb_add_path_fragment(struct path_data *segment,
const char *fragment);
/* Lookup */
static struct path_data *urldb_find_url(const char *url);
static struct path_data *urldb_match_path(const struct path_data *parent,
const char *path, const char *scheme, unsigned short port);
static struct search_node **urldb_get_search_tree_direct(const char *host);
static struct search_node *urldb_get_search_tree(const char *host);
/* Dump */
static void urldb_dump_hosts(struct host_part *parent);
static void urldb_dump_paths(struct path_data *parent);
static void urldb_dump_search(struct search_node *parent, int depth);
/* Search tree */
static struct search_node *urldb_search_insert(struct search_node *root,
const struct host_part *data);
static struct search_node *urldb_search_insert_internal(
struct search_node *root, struct search_node *n);
static struct search_node *urldb_search_remove(struct search_node *root,
const struct host_part *data);
static const struct host_part *urldb_search_find(struct search_node *root,
const char *host);
static struct search_node *urldb_search_skew(struct search_node *root);
static struct search_node *urldb_search_split(struct search_node *root);
static int urldb_search_match_host(const struct host_part *a,
const struct host_part *b);
static int urldb_search_match_string(const struct host_part *a,
const char *b);
static int urldb_search_match_prefix(const struct host_part *a,
const char *b);
/* Cookies */
static struct cookie_internal_data *urldb_parse_cookie(const char *url,
const char **cookie);
static bool urldb_parse_avpair(struct cookie_internal_data *c, char *n, char *v);
static bool urldb_insert_cookie(struct cookie_internal_data *c, const char *scheme,
const char *url);
static void urldb_free_cookie(struct cookie_internal_data *c);
static bool urldb_concat_cookie(struct cookie_internal_data *c, int *used,
int *alloc, char **buf);
static void urldb_delete_cookie_hosts(const char *domain, const char *path, const char *name, struct host_part *parent);
static void urldb_delete_cookie_paths(const char *domain, const char *path, const char *name, struct path_data *parent);
static void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent);
static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent);
/** Root database handle */
static struct host_part db_root;
/** Search trees - one per letter + 1 for IPs + 1 for Everything Else */
#define NUM_SEARCH_TREES 28
#define ST_IP 0
#define ST_EE 1
#define ST_DN 2
static struct search_node empty = { 0, 0, &empty, &empty };
static struct search_node *search_trees[NUM_SEARCH_TREES] = {
&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
&empty, &empty, &empty, &empty
};
#define COOKIE_FILE_VERSION 100
#define URL_FILE_VERSION 106
/**
* Import an URL database from file, replacing any existing database
*
* \param filename Name of file containing data
*/
void urldb_load(const char *filename)
{
#define MAXIMUM_URL_LENGTH 4096
char s[MAXIMUM_URL_LENGTH];
char host[256];
struct host_part *h;
int urls;
int i;
int version;
int length;
FILE *fp;
assert(filename);
LOG(("Loading URL file"));
fp = fopen(filename, "r");
if (!fp) {
LOG(("Failed to open file '%s' for reading", filename));
return;
}
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
return;
version = atoi(s);
if (version < 105) {
LOG(("Unsupported URL file version."));
return;
}
if (version > URL_FILE_VERSION) {
LOG(("Unknown URL file version."));
return;
}
while (fgets(host, sizeof host, fp)) {
/* get the hostname */
length = strlen(host) - 1;
host[length] = '\0';
/* skip data that has ended up with a host of '' */
if (length == 0) {
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
urls = atoi(s);
for (i = 0; i < ((version == 105 ? 6 : 8) * urls);
i++)
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
continue;
}
if (version == 105) {
/* file:/ -> localhost */
if (strcasecmp(host, "file:/") == 0)
snprintf(host, sizeof host, "localhost");
else {
/* strip any port number */
char *colon = strrchr(host, ':');
if (colon)
*colon = '\0';
}
}
/* read number of URLs */
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
urls = atoi(s);
/* no URLs => try next host */
if (urls == 0) {
LOG(("No URLs for '%s'", host));
continue;
}
h = urldb_add_host(host);
if (!h) {
LOG(("Failed adding host: '%s'", host));
die("Memory exhausted whilst loading URL file");
}
/* load the non-corrupt data */
for (i = 0; i < urls; i++) {
struct path_data *p = NULL;
if (version == 105) {
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
length = strlen(s) - 1;
s[length] = '\0';
if (strncasecmp(s, "file:", 5) == 0) {
/* local file, so fudge insertion */
char url[7 + 4096];
snprintf(url, sizeof url,
"file://%s", s + 5);
p = urldb_add_path("file", 0, h,
s + 5, NULL, NULL, url);
if (!p) {
LOG(("Failed inserting '%s'",
url));
die("Memory exhausted "
"whilst loading "
"URL file");
}
} else {
if (!urldb_add_url(s)) {
LOG(("Failed inserting '%s'",
s));
}
p = urldb_find_url(s);
}
} else {
char scheme[64], ports[10];
char url[64 + 3 + 256 + 6 + 4096 + 1];
unsigned int port;
bool is_file = false;
if (!fgets(scheme, sizeof scheme, fp))
break;
length = strlen(scheme) - 1;
scheme[length] = '\0';
if (!fgets(ports, sizeof ports, fp))
break;
length = strlen(ports) - 1;
ports[length] = '\0';
port = atoi(ports);
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
length = strlen(s) - 1;
s[length] = '\0';
if (!strcasecmp(host, "localhost") &&
!strcasecmp(scheme, "file"))
is_file = true;
snprintf(url, sizeof url, "%s://%s%s%s%s",
scheme,
/* file URLs have no host */
(is_file ? "" : host),
(port ? ":" : ""),
(port ? ports : ""),
s);
p = urldb_add_path(scheme, port, h, s, NULL, NULL,
url);
if (!p) {
LOG(("Failed inserting '%s'", url));
die("Memory exhausted whilst loading "
"URL file");
}
}
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
if (p)
p->urld.visits = (unsigned int)atoi(s);
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
if (p)
p->urld.last_visit = (time_t)atoi(s);
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
if (p)
p->urld.type = (content_type)atoi(s);
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
#ifdef riscos
if (p && strlen(s) == 12) {
/* ensure filename is 'XX.XX.XX.XX' */
if ((s[2] == '.') && (s[5] == '.') &&
(s[8] == '.')) {
s[2] = '/';
s[5] = '/';
s[8] = '/';
s[11] = '\0';
p->thumb = bitmap_create_file(s);
} else if ((s[2] == '/') && (s[5] == '/') &&
(s[8] == '/')) {
s[11] = '\0';
p->thumb = bitmap_create_file(s);
}
}
#endif
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
length = strlen(s) - 1;
if (p && length > 0) {
s[length] = '\0';
p->urld.title = malloc(length + 1);
if (p->urld.title)
memcpy(p->urld.title, s, length + 1);
}
}
}
fclose(fp);
LOG(("Successfully loaded URL file"));
#undef MAXIMUM_URL_LENGTH
}
/**
* Export the current database to file
*
* \param filename Name of file to export to
*/
void urldb_save(const char *filename)
{
FILE *fp;
int i;
assert(filename);
fp = fopen(filename, "w");
if (!fp) {
LOG(("Failed to open file '%s' for writing", filename));
return;
}
/* file format version number */
fprintf(fp, "%d\n", URL_FILE_VERSION);
for (i = 0; i != NUM_SEARCH_TREES; i++) {
urldb_save_search_tree(search_trees[i], fp);
}
fclose(fp);
}
/**
* Save a search (sub)tree
*
* \param root Root of (sub)tree to save
* \param fp File to write to
*/
void urldb_save_search_tree(struct search_node *parent, FILE *fp)
{
char host[256];
const struct host_part *h;
unsigned int path_count = 0;
char *path, *p, *end;
int path_alloc = 64, path_used = 2;
time_t expiry = time(NULL) - (60 * 60 * 24) * option_expire_url;
if (parent == &empty)
return;
urldb_save_search_tree(parent->left, fp);
path = malloc(path_alloc);
if (!path)
return;
path[0] = '/';
path[1] = '\0';
for (h = parent->data, p = host, end = host + sizeof host;
h && h != &db_root && p < end; h = h->parent) {
int written = snprintf(p, end - p, "%s%s", h->part,
(h->parent && h->parent->parent) ? "." : "");
if (written < 0) {
free(path);
return;
}
p += written;
}
urldb_count_urls(&parent->data->paths, expiry, &path_count);
if (path_count > 0) {
fprintf(fp, "%s\n%i\n", host, path_count);
urldb_write_paths(&parent->data->paths, host, fp,
&path, &path_alloc, &path_used, expiry);
}
free(path);
urldb_save_search_tree(parent->right, fp);
}
/**
* Count number of URLs associated with a host
*
* \param root Root of path data tree
* \param expiry Expiry time for URLs
* \param count Pointer to count
*/
void urldb_count_urls(const struct path_data *root, time_t expiry,
unsigned int *count)
{
const struct path_data *p;
if (!root->children) {
if (root->persistent || ((root->urld.last_visit > expiry) &&
(root->urld.visits > 0)))
(*count)++;
}
for (p = root->children; p; p = p->next)
urldb_count_urls(p, expiry, count);
}
/**
* Write paths associated with a host
*
* \param parent Root of (sub)tree to write
* \param host Current host name
* \param fp File to write to
* \param path Current path string
* \param path_alloc Allocated size of path
* \param path_used Used size of path
* \param expiry Expiry time of URLs
*/
void urldb_write_paths(const struct path_data *parent, const char *host,
FILE *fp, char **path, int *path_alloc, int *path_used,
time_t expiry)
{
const struct path_data *p;
int i;
int pused = *path_used;
if (!parent->children) {
/* leaf node */
if (!(parent->persistent ||
((parent->urld.last_visit > expiry) &&
(parent->urld.visits > 0))))
/* expired */
return;
fprintf(fp, "%s\n", parent->scheme);
if (parent->port)
fprintf(fp,"%d\n", parent->port);
else
fprintf(fp, "\n");
fprintf(fp, "%s\n", *path);
/** \todo handle fragments? */
fprintf(fp, "%i\n%i\n%i\n", parent->urld.visits,
(int)parent->urld.last_visit,
(int)parent->urld.type);
#ifdef riscos
if (parent->thumb)
fprintf(fp, "%s\n", parent->thumb->filename);
else
fprintf(fp, "\n");
#else
fprintf(fp, "\n");
#endif
if (parent->urld.title) {
char *s = parent->urld.title;
for (i = 0; s[i] != '\0'; i++)
if (s[i] < 32)
s[i] = ' ';
for (--i; ((i > 0) && (s[i] == ' ')); i--)
s[i] = '\0';
fprintf(fp, "%s\n", parent->urld.title);
} else
fprintf(fp, "\n");
}
for (p = parent->children; p; p = p->next) {
int len = *path_used + strlen(p->segment) + 1;
if (*path_alloc < len) {
char *temp = realloc(*path,
(len > 64) ? len : *path_alloc + 64);
if (!temp)
return;
*path = temp;
*path_alloc = (len > 64) ? len : *path_alloc + 64;
}
strcat(*path, p->segment);
if (p->children) {
strcat(*path, "/");
} else {
len -= 1;
}
*path_used = len;
urldb_write_paths(p, host, fp, path, path_alloc, path_used,
expiry);
/* restore path to its state on entry to this function */
*path_used = pused;
(*path)[pused - 1] = '\0';
}
}
/**
* Set the cross-session persistence of the entry for an URL
*
* \param url Absolute URL to persist
* \param persist True to persist, false otherwise
*/
void urldb_set_url_persistence(const char *url, bool persist)
{
struct path_data *p;
assert(url);
p = urldb_find_url(url);
if (!p)
return;
p->persistent = persist;
}
/**
* Insert an URL into the database
*
* \param url Absolute URL to insert
* \return true on success, false otherwise
*/
bool urldb_add_url(const char *url)
{
struct host_part *h;
struct path_data *p;
char *colon;
const char *host;
unsigned short port;
url_func_result ret;
struct url_components components;
assert(url);
/* extract url components */
ret = url_get_components(url, &components);
if (ret != URL_FUNC_OK)
return false;
/* Ensure scheme and authority exist */
if (!(components.scheme && components.authority)) {
url_destroy_components(&components);
return false;
}
/* Extract host part from authority */
host = strchr(components.authority, '@');
if (!host)
host = components.authority;
else
host++;
/* get port and remove from host */
colon = strrchr(host, ':');
if (!colon) {
port = 0;
} else {
*colon = '\0';
port = atoi(colon + 1);
}
/* Get host entry */
if (strcasecmp(components.scheme, "file") == 0)
h = urldb_add_host("localhost");
else
h = urldb_add_host(host);
if (!h) {
url_destroy_components(&components);
return false;
}
/* Get path entry */
p = urldb_add_path(components.scheme, port, h,
components.path ? components.path : "",
components.query, components.fragment, url);
url_destroy_components(&components);
return (p != NULL);
}
/**
* Set an URL's title string, replacing any existing one
*
* \param url The URL to look for
* \param title The title string to use (copied)
*/
void urldb_set_url_title(const char *url, const char *title)
{
struct path_data *p;
char *temp;
assert(url && title);
p = urldb_find_url(url);
if (!p)
return;
temp = strdup(title);
if (!temp)
return;
free(p->urld.title);
p->urld.title = temp;
}
/**
* Set an URL's content type
*
* \param url The URL to look for
* \param type The type to set
*/
void urldb_set_url_content_type(const char *url, content_type type)
{
struct path_data *p;
assert(url);
p = urldb_find_url(url);
if (!p)
return;
p->urld.type = type;
}
/**
* Update an URL's visit data
*
* \param url The URL to update
*/
void urldb_update_url_visit_data(const char *url)
{
struct path_data *p;
assert(url);
p = urldb_find_url(url);
if (!p)
return;
p->urld.last_visit = time(NULL);
p->urld.visits++;
}
/**
* Reset an URL's visit statistics
*
* \param url The URL to reset
*/
void urldb_reset_url_visit_data(const char *url)
{
struct path_data *p;
assert(url);
p = urldb_find_url(url);
if (!p)
return;
p->urld.last_visit = (time_t)0;
p->urld.visits = 0;
}
/**
* Find data for an URL.
*
* \param url Absolute URL to look for
* \return Pointer to result struct, or NULL
*/
const struct url_data *urldb_get_url_data(const char *url)
{
struct path_data *p;
assert(url);
p = urldb_find_url(url);
if (!p)
return NULL;
return (struct url_data *)&p->urld;
}
/**
* Extract an URL from the db
*
* \param url URL to extract
* \return Pointer to database's copy of URL or NULL if not found
*/
const char *urldb_get_url(const char *url)
{
struct path_data *p;
assert(url);
p = urldb_find_url(url);
if (!p)
return NULL;
return p->url;
}
/**
* Look up authentication details in database
*
* \param url Absolute URL to search for
* \return Pointer to authentication details, or NULL if not found
*/
const char *urldb_get_auth_details(const char *url)
{
struct path_data *p, *q = NULL;
assert(url);
/* add to the db, so our lookup will work */
urldb_add_url(url);
p = urldb_find_url(url);
if (!p)
return NULL;
/* Check for any auth details attached to this node */
if (p && p->auth.realm && p->auth.auth)
return p->auth.auth;
/* Now consider ancestors */
for (; p; p = p->parent) {
/* The parent path entry is stored hung off the
* parent entry with an empty (not NULL) segment string.
* We look for this here.
*/
for (q = p->children; q; q = q->next) {
if (strlen(q->segment) == 0)
break;
}
if (q && q->auth.realm && q->auth.auth)
break;
}
if (!q)
return NULL;
return q->auth.auth;
}
/**
* Retrieve certificate verification permissions from database
*
* \param url Absolute URL to search for
* \return true to permit connections to hosts with invalid certificates,
* false otherwise.
*/
bool urldb_get_cert_permissions(const char *url)
{
struct path_data *p;
struct host_part *h;
assert(url);
p = urldb_find_url(url);
if (!p)
return false;
for (; p && p->parent; p = p->parent)
/* do nothing */;
h = (struct host_part *)p;
return h->permit_invalid_certs;
}
/**
* Set authentication data for an URL
*
* \param url The URL to consider
* \param realm The authentication realm
* \param auth The authentication details (in form username:password)
*/
void urldb_set_auth_details(const char *url, const char *realm,
const char *auth)
{
struct path_data *p;
char *urlt, *t1, *t2;
assert(url && realm && auth);
urlt = strdup(url);
if (!urlt)
return;
/* strip leafname from URL */
t1 = strrchr(urlt, '/');
if (t1) {
*(t1 + 1) = '\0';
}
/* add url, in case it's missing */
urldb_add_url(urlt);
p = urldb_find_url(urlt);
free(urlt);
if (!p)
return;
/** \todo search subtree for same realm/auth details
* and remove them (as the lookup routine searches up the tree) */
t1 = strdup(realm);
t2 = strdup(auth);
if (!t1 || !t2) {
free(t1);
free(t2);
return;
}
free(p->auth.realm);
free(p->auth.auth);
p->auth.realm = t1;
p->auth.auth = t2;
}
/**
* Set certificate verification permissions
*
* \param url URL to consider
* \param permit Set to true to allow invalid certificates
*/
void urldb_set_cert_permissions(const char *url, bool permit)
{
struct path_data *p;
struct host_part *h;
assert(url);
/* add url, in case it's missing */
urldb_add_url(url);
p = urldb_find_url(url);
if (!p)
return;
for (; p && p->parent; p = p->parent)
/* do nothing */;
h = (struct host_part *)p;
h->permit_invalid_certs = permit;
}
/**
* Set thumbnail for url, replacing any existing thumbnail
*
* \param url Absolute URL to consider
* \param bitmap Opaque pointer to thumbnail data, or NULL to invalidate
*/
void urldb_set_thumbnail(const char *url, struct bitmap *bitmap)
{
struct path_data *p;
assert(url);
p = urldb_find_url(url);
if (!p)
return;
if (p->thumb && p->thumb != bitmap)
bitmap_destroy(p->thumb);
p->thumb = bitmap;
}
/**
* Retrieve thumbnail data for given URL
*
* \param url Absolute URL to search for
* \return Pointer to thumbnail data, or NULL if not found.
*/
const struct bitmap *urldb_get_thumbnail(const char *url)
{
struct path_data *p;
assert(url);
p = urldb_find_url(url);
if (!p)
return NULL;
return p->thumb;
}
/**
* Iterate over entries in the database which match the given prefix
*
* \param prefix Prefix to match
* \param callback Callback function
*/
void urldb_iterate_partial(const char *prefix,
bool (*callback)(const char *url,
const struct url_data *data))
{
char host[256];
char buf[260]; /* max domain + "www." */
const char *slash, *scheme_sep;
struct search_node *tree;
const struct host_part *h;
assert(prefix && callback);
/* strip scheme */
scheme_sep = strstr(prefix, "://");
if (scheme_sep)
prefix = scheme_sep + 3;
slash = strchr(prefix, '/');
tree = urldb_get_search_tree(prefix);
if (slash) {
/* if there's a slash in the input, then we can
* assume that we're looking for a path */
char *domain = host;
snprintf(host, sizeof host, "%.*s",
(int) (slash - prefix), prefix);
h = urldb_search_find(tree, host);
if (!h) {
int len = slash - prefix;
if ((len == 1 && tolower(host[0]) != 'w') ||
(len == 2 && (tolower(host[0]) != 'w' ||
tolower(host[1]) != 'w')) ||
(len >= 3 &&
strncasecmp(host, "www", 3))) {
snprintf(buf, sizeof buf, "www.%s", host);
h = urldb_search_find(
search_trees[ST_DN + 'w' - 'a'],
buf);
if (!h)
return;
domain = buf;
} else
return;
}
if (h->paths.children) {
/* Have paths, iterate them */
urldb_iterate_partial_path(&h->paths, slash + 1,
callback);
}
} else {
int len = strlen(prefix);
/* looking for hosts */
if (!urldb_iterate_partial_host(tree, prefix, callback))
return;
if ((len == 1 && tolower(prefix[0]) != 'w') ||
(len == 2 && (tolower(prefix[0]) != 'w' ||
tolower(prefix[1]) != 'w')) ||
(len >= 3 &&
strncasecmp(prefix, "www", 3))) {
/* now look for www.prefix */
snprintf(buf, sizeof buf, "www.%s", prefix);
if(!urldb_iterate_partial_host(
search_trees[ST_DN + 'w' - 'a'],
buf, callback))
return;
}
}
}
/**
* Partial host iterator (internal)
*
* \param root Root of (sub)tree to traverse
* \param prefix Prefix to match
* \param callback Callback function
* \return true to continue, false otherwise
*/
bool urldb_iterate_partial_host(struct search_node *root, const char *prefix,
bool (*callback)(const char *url,
const struct url_data *data))
{
int c;
assert(root && prefix && callback);
if (root == &empty)
return true;
c = urldb_search_match_prefix(root->data, prefix);
if (c > 0)
/* No match => look in left subtree */
return urldb_iterate_partial_host(root->left, prefix,
callback);
else if (c < 0)
/* No match => look in right subtree */
return urldb_iterate_partial_host(root->right, prefix,
callback);
else {
/* Match => iterate over l/r subtrees & process this node */
if (!urldb_iterate_partial_host(root->left, prefix,
callback))
return false;
if (root->data->paths.children) {
/* and extract all paths attached to this host */
if (!urldb_iterate_entries_path(&root->data->paths,
callback, NULL)) {
return false;
}
}
if (!urldb_iterate_partial_host(root->right, prefix,
callback))
return false;
}
return true;
}
/**
* Partial path iterator (internal)
*
* \param parent Root of (sub)tree to traverse
* \param prefix Prefix to match
* \param callback Callback function
* \return true to continue, false otherwise
*/
bool urldb_iterate_partial_path(const struct path_data *parent,
const char *prefix, bool (*callback)(const char *url,
const struct url_data *data))
{
const struct path_data *p;
const char *slash, *end = prefix + strlen(prefix);
int c;
slash = strchr(prefix, '/');
if (!slash)
slash = end;
if (slash == prefix && *prefix == '/')
/* Ignore "//" */
return true;
for (p = parent->children; p; p = p->next) {
if ((c = strncasecmp(p->segment, prefix, slash - prefix)) < 0)
/* didn't match, but may be more */
continue;
else if (c > 0)
/* still possible matches in a different case */
continue;
/* prefix matches so far */
if (slash == end) {
/* we've run out of prefix, so all
* paths below this one match */
if (!urldb_iterate_entries_path(p, callback, NULL))
return false;
} else {
/* more prefix to go => recurse */
if (!urldb_iterate_partial_path(p, slash + 1,
callback))
return false;
}
}
return true;
}
/**
* Iterate over all entries in database
*
* \param callback Function to callback for each entry
*/
void urldb_iterate_entries(bool (*callback)(const char *url,
const struct url_data *data))
{
int i;
assert(callback);
for (i = 0; i < NUM_SEARCH_TREES; i++) {
if (!urldb_iterate_entries_host(search_trees[i],
callback, NULL))
break;
}
}
/**
* Iterate over all cookies in database
*
* \param callback Function to callback for each entry
*/
void urldb_iterate_cookies(bool (*callback)(const char *domain, const struct cookie_data *data))
{
int i;
assert(callback);
for (i = 0; i < NUM_SEARCH_TREES; i++) {
if (!urldb_iterate_entries_host(search_trees[i],
NULL, callback))
break;
}
}
/**
* Host data iterator (internal)
*
* \param parent Root of subtree to iterate over
* \param url_callback Callback function
* \param cookie_callback Callback function
* \return true to continue, false otherwise
*/
bool urldb_iterate_entries_host(struct search_node *parent,
bool (*url_callback)(const char *url,
const struct url_data *data),
bool (*cookie_callback)(const char *domain,
const struct cookie_data *data))
{
if (parent == &empty)
return true;
if (!urldb_iterate_entries_host(parent->left,
url_callback, cookie_callback))
return false;
if ((parent->data->paths.children) || ((cookie_callback) &&
(parent->data->paths.cookies))) {
/* We have paths (or domain cookies), so iterate them */
if (!urldb_iterate_entries_path(&parent->data->paths,
url_callback, cookie_callback)) {
return false;
}
}
if (!urldb_iterate_entries_host(parent->right,
url_callback, cookie_callback))
return false;
return true;
}
/**
* Path data iterator (internal)
*
* \param parent Root of subtree to iterate over
* \param url_callback Callback function
* \param cookie_callback Callback function
* \return true to continue, false otherwise
*/
bool urldb_iterate_entries_path(const struct path_data *parent,
bool (*url_callback)(const char *url,
const struct url_data *data),
bool (*cookie_callback)(const char *domain,
const struct cookie_data *data))
{
const struct path_data *p;
if (!parent->children) {
/* leaf node */
/* All leaf nodes in the path tree should have an URL or
* cookies attached to them. If this is not the case, it
* indicates that there's a bug in the file loader/URL
* insertion code. Therefore, assert this here. */
assert(url_callback || cookie_callback);
/** \todo handle fragments? */
if (url_callback) {
assert(parent->url);
if (!url_callback(parent->url,
(const struct url_data *) &parent->urld))
return false;
} else {
if (parent->cookies && !cookie_callback(parent->cookies->domain,
(const struct cookie_data *) parent->cookies))
return false;
}
}
for (p = parent->children; p; p = p->next) {
if (!urldb_iterate_entries_path(p,
url_callback, cookie_callback))
return false;
}
return true;
}
/**
* Add a host node to the tree
*
* \param part Host segment to add (or whole IP address) (copied)
* \param parent Parent node to add to
* \return Pointer to added node, or NULL on memory exhaustion
*/
struct host_part *urldb_add_host_node(const char *part,
struct host_part *parent)
{
struct host_part *d;
assert(part && parent);
d = calloc(1, sizeof(struct host_part));
if (!d)
return NULL;
d->part = strdup(part);
if (!d->part) {
free(d);
return NULL;
}
d->next = parent->children;
if (parent->children)
parent->children->prev = d;
d->parent = parent;
parent->children = d;
return d;
}
/**
* Add a host to the database, creating any intermediate entries
*
* \param host Hostname to add
* \return Pointer to leaf node, or NULL on memory exhaustion
*/
struct host_part *urldb_add_host(const char *host)
{
struct host_part *d = (struct host_part *) &db_root, *e;
struct search_node *s;
char buf[256]; /* 256 bytes is sufficient - domain names are
* limited to 255 chars. */
char *part;
assert(host);
if (url_host_is_ip_address(host)) {
/* Host is an IP, so simply add as TLD */
/* Check for existing entry */
for (e = d->children; e; e = e->next)
if (strcasecmp(host, e->part) == 0)
/* found => return it */
return e;
d = urldb_add_host_node(host, d);
s = urldb_search_insert(search_trees[ST_IP], d);
if (!s) {
/* failed */
d = NULL;
} else {
search_trees[ST_IP] = s;
}
return d;
}
/* Copy host string, so we can corrupt it */
strncpy(buf, host, sizeof buf);
buf[sizeof buf - 1] = '\0';
/* Process FQDN segments backwards */
do {
part = strrchr(buf, '.');
if (!part) {
/* last segment */
/* Check for existing entry */
for (e = d->children; e; e = e->next)
if (strcasecmp(buf, e->part) == 0)
break;
if (e) {
d = e;
} else {
d = urldb_add_host_node(buf, d);
}
/* And insert into search tree */
if (d) {
struct search_node **r;
r = urldb_get_search_tree_direct(buf);
s = urldb_search_insert(*r, d);
if (!s) {
/* failed */
d = NULL;
} else {
*r = s;
}
}
break;
}
/* Check for existing entry */
for (e = d->children; e; e = e->next)
if (strcasecmp(part + 1, e->part) == 0)
break;
d = e ? e : urldb_add_host_node(part + 1, d);
if (!d)
break;
*part = '\0';
} while (1);
return d;
}
/**
* Add a path node to the tree
*
* \param scheme URL scheme associated with path (copied)
* \param port Port number on host associated with path
* \param segment Path segment to add (copied)
* \param fragment URL fragment (copied), or NULL
* \param parent Parent node to add to
* \return Pointer to added node, or NULL on memory exhaustion
*/
struct path_data *urldb_add_path_node(const char *scheme, unsigned int port,
const char *segment, const char *fragment,
struct path_data *parent)
{
struct path_data *d, *e;
assert(scheme && segment && parent);
d = calloc(1, sizeof(struct path_data));
if (!d)
return NULL;
d->scheme = strdup(scheme);
if (!d->scheme) {
free(d);
return NULL;
}
d->port = port;
d->segment = strdup(segment);
if (!d->segment) {
free(d->scheme);
free(d);
return NULL;
}
if (fragment) {
if (!urldb_add_path_fragment(d, fragment)) {
free(d->segment);
free(d->scheme);
free(d);
return NULL;
}
}
for (e = parent->children; e; e = e->next)
if (strcmp(e->segment, d->segment) > 0)
break;
if (e) {
d->prev = e->prev;
d->next = e;
if (e->prev)
e->prev->next = d;
else
parent->children = d;
e->prev = d;
} else if (!parent->children) {
d->prev = d->next = NULL;
parent->children = parent->last = d;
} else {
d->next = NULL;
d->prev = parent->last;
parent->last->next = d;
parent->last = d;
}
d->parent = parent;
return d;
}
/**
* Add a path to the database, creating any intermediate entries
*
* \param scheme URL scheme associated with path
* \param port Port number on host associated with path
* \param host Host tree node to attach to
* \param path Absolute path to add
* \param query Path query to add
* \param fragment URL fragment, or NULL
* \param url URL (fragment ignored)
* \return Pointer to leaf node, or NULL on memory exhaustion
*/
struct path_data *urldb_add_path(const char *scheme, unsigned int port,
const struct host_part *host, const char *path,
const char *query, const char *fragment, const char *url)
{
struct path_data *d, *e;
char *buf, *copy;
char *segment, *slash;
int len = 0;
assert(scheme && host && url);
assert(path || query);
d = (struct path_data *) &host->paths;
/* Copy and merge path/query strings, so we can corrupt them */
if (path)
len += strlen(path);
if (query)
len += strlen(query) + 1;
buf = malloc(len + 1);
if (!buf)
return NULL;
copy = buf;
if (path) {
strcpy(copy, path);
copy += strlen(path);
}
if (query) {
*copy++ = '?';
strcpy(copy, query);
}
/* skip leading '/' */
segment = buf;
if (*segment == '/')
segment++;
/* Process path segments */
do {
slash = strchr(segment, '/');
if (!slash) {
/* last segment */
/* look for existing entry */
for (e = d->children; e; e = e->next)
if (strcmp(segment, e->segment) == 0 &&
strcasecmp(scheme,
e->scheme) == 0 &&
e->port == port)
break;
d = e ? urldb_add_path_fragment(e, fragment) :
urldb_add_path_node(scheme, port,
segment, fragment, d);
break;
}
*slash = '\0';
/* look for existing entry */
for (e = d->children; e; e = e->next)
if (strcmp(segment, e->segment) == 0 &&
strcasecmp(scheme, e->scheme) == 0 &&
e->port == port)
break;
d = e ? e : urldb_add_path_node(scheme, port, segment,
NULL, d);
if (!d)
break;
segment = slash + 1;
} while (1);
free(buf);
if (d && !d->url) {
/* Insert URL */
d->url = strdup(url);
if (!d->url)
return NULL;
/** remove fragment */
segment = strrchr(d->url, '#');
if (segment)
*segment = '\0';
}
return d;
}
/**
* Fragment comparator callback for qsort
*/
int urldb_add_path_fragment_cmp(const void *a, const void *b)
{
return strcasecmp(*((const char **) a), *((const char **) b));
}
/**
* Add a fragment to a path segment
*
* \param segment Path segment to add to
* \param fragment Fragment to add (copied), or NULL
* \return segment or NULL on memory exhaustion
*/
struct path_data *urldb_add_path_fragment(struct path_data *segment,
const char *fragment)
{
char **temp;
assert(segment);
/* If no fragment, this function is a NOP
* This may seem strange, but it makes the rest
* of the code cleaner */
if (!fragment)
return segment;
temp = realloc(segment->fragment,
(segment->frag_cnt + 1) * sizeof(char *));
if (!temp)
return NULL;
segment->fragment = temp;
segment->fragment[segment->frag_cnt] = strdup(fragment);
if (!segment->fragment[segment->frag_cnt]) {
/* Don't free temp - it's now our buffer */
return NULL;
}
segment->frag_cnt++;
/* We want fragments in alphabetical order, so sort them
* It may prove better to insert in alphabetical order instead */
qsort(segment->fragment, segment->frag_cnt, sizeof (char *),
urldb_add_path_fragment_cmp);
return segment;
}
/**
* Find an URL in the database
*
* \param url Absolute URL to find
* \return Pointer to path data, or NULL if not found
*/
struct path_data *urldb_find_url(const char *url)
{
const struct host_part *h;
struct path_data *p;
struct search_node *tree;
char *plq, *copy, *colon;
const char *host;
unsigned short port;
url_func_result ret;
struct url_components components;
int len = 0;
assert(url);
/* Extract url components */
ret = url_get_components(url, &components);
if (ret != URL_FUNC_OK)
return NULL;
/* Ensure scheme and authority exist */
if (!(components.scheme && components.authority)) {
url_destroy_components(&components);
return NULL;
}
/* Extract host part from authority */
host = strchr(components.authority, '@');
if (!host)
host = components.authority;
else
host++;
/* get port and remove from host */
colon = strrchr(host, ':');
if (!colon) {
port = 0;
} else {
*colon = '\0';
port = atoi(colon + 1);
}
/* file urls have no host, so manufacture one */
if (strcasecmp(components.scheme, "file") == 0)
host = "localhost";
tree = urldb_get_search_tree(host);
h = urldb_search_find(tree, host);
if (!h) {
url_destroy_components(&components);
return NULL;
}
/* generate plq */
if (components.path)
len += strlen(components.path);
if (components.query)
len += strlen(components.query) + 1;
plq = malloc(len + 1);
if (!plq) {
url_destroy_components(&components);
return NULL;
}
copy = plq;
if (components.path) {
strcpy(copy, components.path);
copy += strlen(components.path);
}
if (components.query) {
*copy++ = '?';
strcpy(copy, components.query);
}
p = urldb_match_path(&h->paths, plq, components.scheme, port);
url_destroy_components(&components);
free(plq);
return p;
}
/**
* Match a path string
*
* \param parent Path (sub)tree to look in
* \param path The path to search for
* \param scheme The URL scheme associated with the path
* \param port The port associated with the path
* \return Pointer to path data or NULL if not found.
*/
struct path_data *urldb_match_path(const struct path_data *parent,
const char *path, const char *scheme, unsigned short port)
{
struct path_data *p;
const char *slash;
if (*path == '\0')
return (struct path_data *)parent;
slash = strchr(path + 1, '/');
if (!slash)
slash = path + strlen(path);
for (p = parent->children; p; p = p->next) {
if (strncmp(p->segment, path + 1, slash - path - 1) == 0 &&
strcmp(p->scheme, scheme) == 0 &&
p->port == port)
break;
}
if (p) {
return urldb_match_path(p, slash, scheme, port);
}
return NULL;
}
/**
* Get the search tree for a particular host
*
* \param host the host to lookup
* \return the corresponding search tree
*/
struct search_node **urldb_get_search_tree_direct(const char *host) {
assert(host);
if (url_host_is_ip_address(host))
return &search_trees[ST_IP];
else if (isalpha(*host))
return &search_trees[ST_DN + tolower(*host) - 'a'];
return &search_trees[ST_EE];
}
/**
* Get the search tree for a particular host
*
* \param host the host to lookup
* \return the corresponding search tree
*/
struct search_node *urldb_get_search_tree(const char *host) {
return *urldb_get_search_tree_direct(host);
}
/**
* Dump URL database to stderr
*/
void urldb_dump(void)
{
int i;
urldb_dump_hosts(&db_root);
for (i = 0; i != NUM_SEARCH_TREES; i++)
urldb_dump_search(search_trees[i], 0);
}
/**
* Dump URL database hosts to stderr
*
* \param parent Parent node of tree to dump
*/
void urldb_dump_hosts(struct host_part *parent)
{
struct host_part *h;
if (parent->part) {
LOG(("%s", parent->part));
LOG(("\t%s invalid SSL certs",
parent->permit_invalid_certs ? "Permits" : "Denies"));
}
/* Dump path data */
urldb_dump_paths(&parent->paths);
/* and recurse */
for (h = parent->children; h; h = h->next)
urldb_dump_hosts(h);
}
/**
* Dump URL database paths to stderr
*
* \param parent Parent node of tree to dump
*/
void urldb_dump_paths(struct path_data *parent)
{
struct path_data *p;
unsigned int i;
if (parent->segment) {
LOG(("\t%s : %u", parent->scheme, parent->port));
LOG(("\t\t'%s'", parent->segment));
for (i = 0; i != parent->frag_cnt; i++)
LOG(("\t\t\t#%s", parent->fragment[i]));
}
/* and recurse */
for (p = parent->children; p; p = p->next)
urldb_dump_paths(p);
}
/**
* Dump search tree
*
* \param parent Parent node of tree to dump
* \param depth Tree depth
*/
void urldb_dump_search(struct search_node *parent, int depth)
{
const struct host_part *h;
int i;
if (parent == &empty)
return;
urldb_dump_search(parent->left, depth + 1);
for (i = 0; i != depth; i++)
fputc(' ', stderr);
for (h = parent->data; h; h = h->parent) {
fprintf(stderr, "%s", h->part);
if (h->parent && h->parent->parent)
fputc('.', stderr);
}
fputc('\n', stderr);
urldb_dump_search(parent->right, depth + 1);
}
/**
* Insert a node into the search tree
*
* \param root Root of tree to insert into
* \param data User data to insert
* \return Pointer to updated root, or NULL if failed
*/
struct search_node *urldb_search_insert(struct search_node *root,
const struct host_part *data)
{
struct search_node *n;
assert(root && data);
n = malloc(sizeof(struct search_node));
if (!n)
return NULL;
n->level = 1;
n->data = data;
n->left = n->right = &empty;
root = urldb_search_insert_internal(root, n);
return root;
}
/**
* Insert node into search tree
*
* \param root Root of (sub)tree to insert into
* \param n Node to insert
* \return Pointer to updated root
*/
struct search_node *urldb_search_insert_internal(struct search_node *root,
struct search_node *n)
{
assert(root && n);
if (root == &empty) {
root = n;
} else {
int c = urldb_search_match_host(root->data, n->data);
if (c > 0) {
root->left = urldb_search_insert_internal(
root->left, n);
} else if (c < 0) {
root->right = urldb_search_insert_internal(
root->right, n);
} else {
/* exact match */
free(n);
return root;
}
root = urldb_search_skew(root);
root = urldb_search_split(root);
}
return root;
}
/**
* Delete a node from a search tree
*
* \param root Tree to remove from
* \param data Data to delete
* \return Updated root of tree
*/
struct search_node *urldb_search_remove(struct search_node *root,
const struct host_part *data)
{
static struct search_node *last, *deleted;
int c;
assert(root && data);
if (root == &empty)
return root;
c = urldb_search_match_host(root->data, data);
last = root;
if (c > 0) {
root->left = urldb_search_remove(root->left, data);
} else {
deleted = root;
root->right = urldb_search_remove(root->right, data);
}
if (root == last) {
if (deleted != &empty &&
urldb_search_match_host(deleted->data,
data) == 0) {
deleted->data = last->data;
deleted = &empty;
root = root->right;
free(last);
}
} else {
if (root->left->level < root->level - 1 ||
root->right->level < root->level - 1) {
if (root->right->level > --root->level)
root->right->level = root->level;
root = urldb_search_skew(root);
root->right = urldb_search_skew(root->right);
root->right->right =
urldb_search_skew(root->right->right);
root = urldb_search_split(root);
root->right = urldb_search_split(root->right);
}
}
return root;
}
/**
* Find a node in a search tree
*
* \param root Tree to look in
* \param host Host to find
* \return Pointer to host tree node, or NULL if not found
*/
const struct host_part *urldb_search_find(struct search_node *root,
const char *host)
{
int c;
assert(root && host);
if (root == &empty) {
return NULL;
}
c = urldb_search_match_string(root->data, host);
if (c > 0)
return urldb_search_find(root->left, host);
else if (c < 0)
return urldb_search_find(root->right, host);
else
return root->data;
}
/**
* Compare a pair of host_parts
*
* \param a
* \param b
* \return 0 if match, non-zero, otherwise
*/
int urldb_search_match_host(const struct host_part *a,
const struct host_part *b)
{
int ret;
assert(a && b);
/* traverse up tree to root, comparing parts as we go. */
for (; a && a != &db_root && b && b != &db_root;
a = a->parent, b = b->parent)
if ((ret = strcasecmp(a->part, b->part)) != 0)
/* They differ => return the difference here */
return ret;
/* If we get here then either:
* a) The path lengths differ
* or b) The hosts are identical
*/
if (a && a != &db_root && (!b || b == &db_root))
/* len(a) > len(b) */
return 1;
else if ((!a || a == &db_root) && b && b != &db_root)
/* len(a) < len(b) */
return -1;
/* identical */
return 0;
}
/**
* Compare host_part with a string
*
* \param a
* \param b
* \return 0 if match, non-zero, otherwise
*/
int urldb_search_match_string(const struct host_part *a,
const char *b)
{
const char *end, *dot;
int plen, ret;
assert(a && a != &db_root && b);
if (url_host_is_ip_address(b)) {
/* IP address */
return strcasecmp(a->part, b);
}
end = b + strlen(b) + 1;
while (b < end && a && a != &db_root) {
dot = strchr(b, '.');
if (!dot) {
/* last segment */
dot = end - 1;
}
/* Compare strings (length limited) */
if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
/* didn't match => return difference */
return ret;
/* The strings matched, now check that the lengths do, too */
plen = strlen(a->part);
if (plen > dot - b)
/* len(a) > len(b) */
return 1;
else if (plen < dot - b)
/* len(a) < len(b) */
return -1;
b = dot + 1;
a = a->parent;
}
/* If we get here then either:
* a) The path lengths differ
* or b) The hosts are identical
*/
if (a && a != &db_root && b >= end)
/* len(a) > len(b) */
return 1;
else if ((!a || a == &db_root) && b < end)
/* len(a) < len(b) */
return -1;
/* Identical */
return 0;
}
/**
* Compare host_part with prefix
*
* \param a
* \param b
* \return 0 if match, non-zero, otherwise
*/
int urldb_search_match_prefix(const struct host_part *a,
const char *b)
{
const char *end, *dot;
int plen, ret;
assert(a && a != &db_root && b);
if (url_host_is_ip_address(b)) {
/* IP address */
return strncasecmp(a->part, b, strlen(b));
}
end = b + strlen(b) + 1;
while (b < end && a && a != &db_root) {
dot = strchr(b, '.');
if (!dot) {
/* last segment */
dot = end - 1;
}
/* Compare strings (length limited) */
if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
/* didn't match => return difference */
return ret;
/* The strings matched */
if (dot < end - 1) {
/* Consider segment lengths only in the case
* where the prefix contains segments */
plen = strlen(a->part);
if (plen > dot - b)
/* len(a) > len(b) */
return 1;
else if (plen < dot - b)
/* len(a) < len(b) */
return -1;
}
b = dot + 1;
a = a->parent;
}
/* If we get here then either:
* a) The path lengths differ
* or b) The hosts are identical
*/
if (a && a != &db_root && b >= end)
/* len(a) > len(b) => prefix matches */
return 0;
else if ((!a || a == &db_root) && b < end)
/* len(a) < len(b) => prefix does not match */
return -1;
/* Identical */
return 0;
}
/**
* Rotate a subtree right
*
* \param root Root of subtree to rotate
* \return new root of subtree
*/
struct search_node *urldb_search_skew(struct search_node *root)
{
struct search_node *temp;
assert(root);
if (root->left->level == root->level) {
temp = root->left;
root->left = temp->right;
temp->right = root;
root = temp;
}
return root;
}
/**
* Rotate a node left, increasing the parent's level
*
* \param root Root of subtree to rotate
* \return New root of subtree
*/
struct search_node *urldb_search_split(struct search_node *root)
{
struct search_node *temp;
assert(root);
if (root->right->right->level == root->level) {
temp = root->right;
root->right = temp->left;
temp->left = root;
root = temp;
root->level++;
}
return root;
}
/**
* Retrieve cookies for an URL
*
* \param url URL being fetched
* \return Cookies string for libcurl (on heap), or NULL on error/no cookies
*/
char *urldb_get_cookie(const char *url)
{
const struct path_data *p, *q;
const struct host_part *h;
struct cookie_internal_data *c;
int count = 0, version = COOKIE_RFC2965;
int ret_alloc = 4096, ret_used = 1;
char *path;
char *ret;
char *scheme;
time_t now;
url_func_result res;
assert(url);
// LOG(("%s", url));
urldb_add_url(url);
p = urldb_find_url(url);
if (!p)
return NULL;
scheme = p->scheme;
ret = malloc(ret_alloc);
if (!ret)
return NULL;
ret[0] = '\0';
res = url_path(url, &path);
if (res != URL_FUNC_OK) {
free(ret);
return NULL;
}
now = time(NULL);
if (*(p->segment) != '\0') {
/* Match exact path, unless directory, when prefix matching
* will handle this case for us. */
for (q = p->parent->children; q; q = q->next) {
if (strcmp(q->segment, p->segment))
continue;
/* Consider all cookies associated with
* this exact path */
for (c = q->cookies; c; c = c->next) {
if (c->expires != 1 && c->expires < now)
/* cookie has expired => ignore */
continue;
if (c->secure && strcasecmp(
q->scheme, "https"))
/* secure cookie for insecure host.
* ignore */
continue;
if (!urldb_concat_cookie(c, &ret_used,
&ret_alloc, &ret)) {
free(path);
free(ret);
return NULL;
}
if (c->version < (unsigned int)version)
version = c->version;
c->last_used = now;
cookies_update(c->domain, (struct cookie_data *)c);
count++;
}
}
}
// LOG(("%s", ret));
/* Now consider cookies whose paths prefix-match ours */
for (p = p->parent; p; p = p->parent) {
/* Find directory's path entry(ies) */
/* There are potentially multiple due to differing schemes */
for (q = p->children; q; q = q->next) {
if (*(q->segment) != '\0')
continue;
for (c = q->cookies; c; c = c->next) {
// LOG(("%p: %s=%s", c, c->name, c->value));
if (c->expires != 1 && c->expires < now)
/* cookie has expired => ignore */
continue;
if (c->secure && strcasecmp(
q->scheme, "https"))
/* Secure cookie for insecure server
* => ignore */
continue;
if (!urldb_concat_cookie(c, &ret_used,
&ret_alloc, &ret)) {
free(path);
free(ret);
return NULL;
}
if (c->version < (unsigned int) version)
version = c->version;
c->last_used = now;
cookies_update(c->domain,
(struct cookie_data *)c);
count++;
}
}
/* Consider p itself - may be the result of Path=/foo */
for (c = p->cookies; c; c = c->next) {
if (c->expires != 1 && c->expires < now)
/* cookie has expired => ignore */
continue;
/* Ensure cookie path is a prefix of the resource */
if (strncmp(c->path, path, strlen(c->path)) != 0)
/* paths don't match => ignore */
continue;
if (c->secure && strcasecmp(p->scheme, "https"))
/* Secure cookie for insecure server
* => ignore */
continue;
if (!urldb_concat_cookie(c, &ret_used,
&ret_alloc, &ret)) {
free(path);
free(ret);
return NULL;
}
if (c->version < (unsigned int) version)
version = c->version;
c->last_used = now;
cookies_update(c->domain, (struct cookie_data *)c);
count++;
}
if (!p->parent) {
/* No parent, so bail here. This can't go in the
* loop exit condition as we want to process the
* top-level node, too */
break;
}
}
// LOG(("%s", ret));
/* Finally consider domain cookies for hosts which domain match ours */
for (h = (const struct host_part *)p; h && h != &db_root;
h = h->parent) {
for (c = h->paths.cookies; c; c = c->next) {
if (c->expires != 1 && c->expires < now)
/* cookie has expired => ignore */
continue;
/* Ensure cookie path is a prefix of the resource */
if (strncmp(c->path, path, strlen(c->path)) != 0)
/* paths don't match => ignore */
continue;
if (c->secure && strcasecmp(scheme, "https"))
/* secure cookie for insecure host. ignore */
continue;
if (!urldb_concat_cookie(c, &ret_used, &ret_alloc,
&ret)) {
free(path);
free(ret);
return NULL;
}
if (c->version < (unsigned int)version)
version = c->version;
c->last_used = now;
cookies_update(c->domain, (struct cookie_data *)c);
count++;
}
}
// LOG(("%s", ret));
if (count == 0) {
/* No cookies found */
free(path);
free(ret);
return NULL;
}
/* and build output string */
{
char *temp;
if (version > 0)
temp = malloc(12 + ret_used);
else
temp = malloc(ret_used);
if (!temp) {
free(path);
free(ret);
return NULL;
}
if (version > 0)
sprintf(temp, "$Version=%d%s", version, ret);
else {
/* Old-style cookies => no version & skip "; " */
sprintf(temp, "%s", ret + 2);
}
free(path);
free(ret);
ret = temp;
}
return ret;
}
/**
* Parse Set-Cookie header and insert cookie(s) into database
*
* \param header Header to parse, with Set-Cookie: stripped
* \param url URL being fetched
* \param referer Referring resource, or 0 for verifiable transaction
* \return true on success, false otherwise
*/
bool urldb_set_cookie(const char *header, const char *url,
const char *referer)
{
const char *cur = header, *end;
char *path, *host, *scheme, *urlt;
url_func_result res;
assert(url && header);
// LOG(("'%s' : '%s'", url, header));
/* strip fragment */
urlt = strdup(url);
if (!urlt)
return false;
scheme = strchr(urlt, '#');
if (scheme)
*scheme = '\0';
res = url_scheme(url, &scheme);
if (res != URL_FUNC_OK) {
free(urlt);
return false;
}
res = url_path(url, &path);
if (res != URL_FUNC_OK) {
free(scheme);
free(urlt);
return false;
}
res = url_host(url, &host);
if (res != URL_FUNC_OK) {
free(path);
free(scheme);
free(urlt);
return false;
}
if (referer) {
char *rhost;
/* Ensure that url's host name domain matches
* referer's (4.3.5) */
res = url_host(referer, &rhost);
if (res != URL_FUNC_OK) {
goto error;
}
/* Domain match host names */
if (strcasecmp(host, rhost) != 0) {
/* Not exact match, so try the following:
*
* 1) host = A.B; rhost = B (i.e. strip first
* segment from host and compare against rhost)
* 2) host = A.B; rhost = C.B (i.e. strip first
* segment off both hosts and compare) */
const char *dot = strchr(host, '.');
const char *rdot = strchr(host, '.');
if (!dot || !rdot) {
free(rhost);
goto error;
}
/* 1 */
if (strcasecmp(dot + 1, rhost) != 0) {
/* B must contain embedded dots */
if (strchr(rdot + 1, '.') == NULL) {
free(rhost);
goto error;
}
/* 2 */
if (strcasecmp(dot, rdot) != 0) {
free(rhost);
goto error;
}
}
}
free(rhost);
}
end = cur + strlen(cur) - 2 /* Trailing CRLF */;
do {
struct cookie_internal_data *c;
char *dot;
c = urldb_parse_cookie(url, &cur);
if (!c) {
/* failed => stop parsing */
goto error;
}
/* validate cookie */
/* 4.2.2:i Cookie must have NAME and VALUE */
if (!c->name || !c->value) {
urldb_free_cookie(c);
goto error;
}
/* 4.3.2:i Cookie path must be a prefix of URL path */
if (strncmp(c->path, path, strlen(c->path)) != 0 ||
strlen(c->path) > strlen(path)) {
urldb_free_cookie(c);
goto error;
}
/* 4.3.2:ii Cookie domain must contain embedded dots */
dot = strchr(c->domain + 1, '.');
if (!dot || *(dot + 1) == '\0') {
/* no embedded dots */
urldb_free_cookie(c);
goto error;
}
/* Domain match fetch host with cookie domain */
if (strcasecmp(host, c->domain) != 0) {
int hlen, dlen;
char *domain = c->domain;
/* 4.3.2:iii */
if (url_host_is_ip_address(host)) {
/* IP address, so no partial match */
urldb_free_cookie(c);
goto error;
}
hlen = strlen(host);
dlen = strlen(c->domain);
if (hlen <= dlen && hlen != dlen - 1) {
/* Partial match not possible */
urldb_free_cookie(c);
goto error;
}
if (hlen == dlen - 1) {
/* Relax matching to allow
* host a.com to match .a.com */
domain++;
dlen--;
}
if (strcasecmp(host + (hlen - dlen), domain)) {
urldb_free_cookie(c);
goto error;
}
/* 4.3.2:iv Ensure H contains no dots */
for (int i = 0; i < (hlen - dlen); i++)
if (host[i] == '.') {
urldb_free_cookie(c);
goto error;
}
}
/* Now insert into database */
if (!urldb_insert_cookie(c, scheme, urlt))
goto error;
cookies_update(c->domain, (struct cookie_data *)c);
} while (cur < end);
free(host);
free(path);
free(scheme);
free(urlt);
return true;
error:
free(host);
free(path);
free(scheme);
free(urlt);
return false;
}
/**
* Parse a cookie
*
* \param url URL being fetched
* \param cookie Pointer to cookie string (updated on exit)
* \return Pointer to cookie structure (on heap, caller frees) or NULL
*/
struct cookie_internal_data *urldb_parse_cookie(const char *url,
const char **cookie)
{
struct cookie_internal_data *c;
const char *cur;
char name[1024], value[4096];
char *n = name, *v = value;
bool had_equals = false;
bool quoted = false;
url_func_result res;
assert(url && cookie && *cookie);
c = calloc(1, sizeof(struct cookie_internal_data));
if (!c)
return NULL;
c->expires = -1;
name[0] = '\0';
value[0] = '\0';
for (cur = *cookie; *cur && *cur != '\r' && *cur != '\n'; cur++) {
if (had_equals && (*cur == '"' || *cur == '\'')) {
/* Only values may be quoted */
quoted = !quoted;
continue;
}
if (!quoted && !had_equals && *cur == '=') {
/* First equals => attr-value separator */
had_equals = true;
continue;
}
if (!quoted && *cur == ';') {
/* Semicolon => end of current avpair */
/* NUL-terminate tokens */
*n = '\0';
*v = '\0';
if (!urldb_parse_avpair(c, name, value)) {
/* Memory exhausted */
urldb_free_cookie(c);
return NULL;
}
/* And reset to start */
n = name;
v = value;
had_equals = false;
continue;
}
/* And now handle commas. These are a pain as they may mean
* any of the following:
*
* + End of cookie
* + Day separator in Expires avpair
* + (Invalid) comma in unquoted value
*
* Therefore, in order to handle all 3 cases (2 and 3 are
* identical, the difference being that 2 is in the spec and
* 3 isn't), we need to determine where the comma actually
* lies. We use the following heuristic:
*
* Given a comma at the current input position, find the
* immediately following semicolon (or end of input if none
* found). Then, consider the input characters between
* these two positions. If any of these characters is an
* '=', we must assume that the comma signified the end of
* the current cookie.
*
* This holds as the first avpair of any cookie must be
* NAME=VALUE, so the '=' is guaranteed to appear in the
* case where the comma marks the end of a cookie.
*
* This will fail, however, in the case where '=' appears in
* the value of the current avpair after the comma or the
* subsequent cookie does not start with NAME=VALUE. Neither
* of these is particularly likely and if they do occur, the
* website is more broken than we can be bothered to handle.
*/
if (!quoted && *cur == ',') {
/* Find semi-colon, if any */
const char *p;
const char *semi = strchr(cur + 1, ';');
if (!semi)
semi = cur + strlen(cur) - 2 /* CRLF */;
/* Look for equals sign between comma and semi */
for (p = cur + 1; p < semi; p++)
if (*p == '=')
break;
if (p == semi) {
/* none found => comma internal to value */
/* do nothing */
} else {
/* found one => comma marks end of cookie */
cur++;
break;
}
}
/* Accumulate into buffers, always leaving space for a NUL */
if (!had_equals) {
if (n < name + 1023)
*n++ = *cur;
} else {
if (v < value + 4095)
*v++ = *cur;
}
}
/* Parse final avpair */
*n = '\0';
*v = '\0';
if (!urldb_parse_avpair(c, name, value)) {
/* Memory exhausted */
urldb_free_cookie(c);
return NULL;
}
/* Now fix-up default values */
if (!c->domain) {
res = url_host(url, &c->domain);
if (res != URL_FUNC_OK) {
urldb_free_cookie(c);
return NULL;
}
}
if (!c->path) {
res = url_path(url, &c->path);
if (res != URL_FUNC_OK) {
urldb_free_cookie(c);
return NULL;
}
}
if (c->expires == -1)
c->expires = 1;
/* Write back current position */
*cookie = cur;
return c;
}
/**
* Parse a cookie avpair
*
* \param c Cookie struct to populate
* \param n Name component
* \param v Value component
* \return true on success, false on memory exhaustion
*/
bool urldb_parse_avpair(struct cookie_internal_data *c, char *n, char *v)
{
int vlen;
assert(c && n && v);
/* Strip whitespace from start of name */
for (; *n; n++) {
if (*n != ' ' && *n != '\t')
break;
}
/* Strip whitespace from end of name */
for (vlen = strlen(n); vlen; vlen--) {
if (n[vlen] == ' ' || n[vlen] == '\t')
n[vlen] = '\0';
else
break;
}
/* Strip whitespace from start of value */
for (; *v; v++) {
if (*v != ' ' && *v != '\t')
break;
}
/* Strip whitespace from end of value */
for (vlen = strlen(v); vlen; vlen--) {
if (v[vlen] == ' ' || v[vlen] == '\t')
v[vlen] = '\0';
else
break;
}
if (!c->comment && strcasecmp(n, "Comment") == 0) {
c->comment = strdup(v);
if (!c->comment)
return false;
} else if (!c->domain && strcasecmp(n, "Domain") == 0) {
if (v[0] == '.') {
/* Domain must start with a dot */
c->domain_from_set = true;
c->domain = strdup(v);
if (!c->domain)
return false;
}
} else if (strcasecmp(n, "Max-Age") == 0) {
int temp = atoi(v);
if (temp == 0)
/* Special case - 0 means delete */
c->expires = 0;
else
c->expires = time(NULL) + temp;
} else if (!c->path && strcasecmp(n, "Path") == 0) {
c->path_from_set = true;
c->path = strdup(v);
if (!c->path)
return false;
} else if (strcasecmp(n, "Version") == 0) {
c->version = atoi(v);
} else if (strcasecmp(n, "Expires") == 0) {
char *datenoday;
time_t expires;
/* Strip dayname from date (these are hugely
* variable and liable to break the parser.
* They also serve no useful purpose) */
for (datenoday = v; *datenoday && !isdigit(*datenoday);
datenoday++)
; /* do nothing */
expires = curl_getdate(datenoday, NULL);
if (expires == -1) {
/* assume we have an unrepresentable
* date => force it to the maximum
* possible value of a 32bit time_t
* (this may break in 2038. We'll
* deal with that once we come to
* it) */
expires = (time_t)0x7fffffff;
}
c->expires = expires;
} else if (strcasecmp(n, "Secure") == 0) {
c->secure = true;
} else if (!c->name) {
c->name = strdup(n);
c->value = strdup(v);
if (!c->name || !c->value)
return false;
}
return true;
}
/**
* Insert a cookie into the database
*
* \param c The cookie to insert
* \param scheme URL scheme associated with cookie path
* \param url URL (sans fragment) associated with cookie
* \return true on success, false on memory exhaustion (c will be freed)
*/
bool urldb_insert_cookie(struct cookie_internal_data *c, const char *scheme,
const char *url)
{
struct cookie_internal_data *d;
const struct host_part *h;
struct path_data *p;
assert(c && scheme && url);
if (c->domain[0] == '.') {
h = urldb_search_find(
urldb_get_search_tree(&(c->domain[1])),
c->domain + 1);
if (!h) {
h = urldb_add_host(c->domain + 1);
if (!h) {
urldb_free_cookie(c);
return false;
}
}
p = &h->paths;
} else {
h = urldb_search_find(
urldb_get_search_tree(c->domain),
c->domain);
if (!h) {
h = urldb_add_host(c->domain);
if (!h) {
urldb_free_cookie(c);
return false;
}
}
/* find path */
p = urldb_add_path(scheme, 0, h,
c->path, NULL, NULL, url);
if (!p) {
urldb_free_cookie(c);
return false;
}
}
/* add cookie */
for (d = p->cookies; d; d = d->next) {
if (!strcmp(d->domain, c->domain) &&
!strcmp(d->path, c->path) &&
!strcmp(d->name, c->name))
break;
}
if (d) {
if (c->expires == 0) {
/* remove cookie */
if (d->next)
d->next->prev = d->prev;
if (d->prev)
d->prev->next = d->next;
else
p->cookies = d->next;
urldb_free_cookie(d);
urldb_free_cookie(c);
} else {
/* replace d with c */
c->prev = d->prev;
c->next = d->next;
if (c->next)
c->next->prev = c;
if (c->prev)
c->prev->next = c;
else
p->cookies = c;
urldb_free_cookie(d);
// LOG(("%p: %s=%s", c, c->name, c->value));
}
} else {
c->prev = NULL;
c->next = p->cookies;
if (p->cookies)
p->cookies->prev = c;
p->cookies = c;
// LOG(("%p: %s=%s", c, c->name, c->value));
}
return true;
}
/**
* Free a cookie
*
* \param c The cookie to free
*/
void urldb_free_cookie(struct cookie_internal_data *c)
{
assert(c);
free(c->comment);
free(c->domain);
free(c->path);
free(c->name);
free(c->value);
free(c);
}
/**
* Concatenate a cookie into the provided buffer
*
* \param c Cookie to concatenate
* \param used Pointer to amount of buffer used (updated)
* \param alloc Pointer to allocated size of buffer (updated)
* \param buf Pointer to Pointer to buffer (updated)
* \return true on success, false on memory exhaustion
*/
bool urldb_concat_cookie(struct cookie_internal_data *c, int *used, int *alloc, char **buf)
{
int clen;
assert(c && used && alloc && buf && *buf);
clen = 2 + strlen(c->name) + 1 + strlen(c->value) +
(c->path_from_set ?
8 + strlen(c->path) : 0) +
(c->domain_from_set ?
10 + strlen(c->domain) : 0);
if (*used + clen >= *alloc) {
char *temp = realloc(*buf, *alloc + 4096);
if (!temp) {
return false;
}
*buf = temp;
*alloc += 4096;
}
/** \todo Quote value strings iff version > 0 */
sprintf(*buf + *used - 1, "; %s=%s%s%s%s%s",
c->name, c->value,
(c->path_from_set ? "; $Path=" : "" ),
(c->path_from_set ? c->path : "" ),
// (c->path_from_set ? "\"" : ""),
(c->domain_from_set ? "; $Domain=" : ""),
(c->domain_from_set ? c->domain : "")
// ,(c->domain_from_set ? "\"" : "")
);
*used += clen;
return true;
}
/**
* Load a cookie file into the database
*
* \param filename File to load
*/
void urldb_load_cookies(const char *filename)
{
FILE *fp;
char s[16*1024];
int file_version = 0;
assert(filename);
fp = fopen(filename, "r");
if (!fp)
return;
#define FIND_T { \
for (; *p && *p != '\t'; p++) \
; /* do nothing */ \
if (p >= end) { \
LOG(("Overran input")); \
continue; \
} \
*p++ = '\0'; \
}
#define SKIP_T { \
for (; *p && *p == '\t'; p++) \
; /* do nothing */ \
if (p >= end) { \
LOG(("Overran input")); \
continue; \
} \
}
while (fgets(s, sizeof s, fp)) {
char *p = s, *end = 0,
*domain, *path, *name, *value, *scheme, *url,
*comment;
int version, domain_specified, path_specified,
secure, no_destroy;
time_t expires, last_used;
if(s[0] == 0 || s[0] == '#')
/* Skip blank lines or comments */
continue;
s[strlen(s) - 1] = '\0'; /* lose terminating newline */
end = s + strlen(s);
/* Look for file version first
* (all input is ignored until this is read)
*/
if (strncasecmp(s, "Version:", 8) == 0) {
FIND_T; SKIP_T; file_version = atoi(p);
if (file_version != COOKIE_FILE_VERSION) {
LOG(("Unknown Cookie file version"));
break;
}
continue;
} else if (file_version == 0) {
/* Haven't yet seen version; skip this input */
continue;
}
/* One cookie/line */
/* Parse input */
FIND_T; version = atoi(s);
SKIP_T; domain = p; FIND_T;
SKIP_T; domain_specified = atoi(p); FIND_T;
SKIP_T; path = p; FIND_T;
SKIP_T; path_specified = atoi(p); FIND_T;
SKIP_T; secure = atoi(p); FIND_T;
SKIP_T; expires = (time_t)atoi(p); FIND_T;
SKIP_T; last_used = (time_t)atoi(p); FIND_T;
SKIP_T; no_destroy = atoi(p); FIND_T;
SKIP_T; name = p; FIND_T;
SKIP_T; value = p; FIND_T;
SKIP_T; scheme = p; FIND_T;
SKIP_T; url = p; FIND_T;
/* Comment may have no content, so don't
* use macros as they'll break */
for (; *p && *p == '\t'; p++)
; /* do nothing */
comment = p;
assert(p <= end);
/* Now create cookie */
struct cookie_internal_data *c =
malloc(sizeof(struct cookie_internal_data));
if (!c)
break;
c->name = strdup(name);
c->value = strdup(value);
c->comment = strdup(comment);
c->domain_from_set = domain_specified;
c->domain = strdup(domain);
c->path_from_set = path_specified;
c->path = strdup(path);
c->expires = expires;
c->last_used = last_used;
c->secure = secure;
c->version = version;
c->no_destroy = no_destroy;
if (!(c->name && c->value && c->comment &&
c->domain && c->path)) {
urldb_free_cookie(c);
break;
}
/* And insert it into database */
if (!urldb_insert_cookie(c, scheme, url)) {
/* Cookie freed for us */
break;
}
}
#undef SKIP_WS
#undef FIND_WS
fclose(fp);
}
/**
* Delete a cookie
*
* \param domain The cookie's domain
* \param path The cookie's path
* \param name The cookie's name
*/
void urldb_delete_cookie(const char *domain, const char *path, const char *name)
{
urldb_delete_cookie_hosts(domain, path, name, &db_root);
}
void urldb_delete_cookie_hosts(const char *domain, const char *path, const char *name, struct host_part *parent)
{
assert(parent);
urldb_delete_cookie_paths(domain, path, name, &parent->paths);
for (struct host_part *h = parent->children; h; h = h->next)
urldb_delete_cookie_hosts(domain, path, name, h);
}
void urldb_delete_cookie_paths(const char *domain, const char *path, const char *name, struct path_data *parent)
{
struct cookie_internal_data *c;
assert(parent);
for (c = parent->cookies; c; c = c->next) {
if (!strcmp(c->domain, domain) && !strcmp(c->path, path) &&
!strcmp(c->name, name)) {
if (c->prev)
c->prev->next = c->next;
else
parent->cookies = c->next;
if (c->next)
c->next->prev = c->prev;
if (!parent->cookies)
cookies_update(domain, NULL);
urldb_free_cookie(c);
return;
}
}
for (struct path_data *p = parent->children; p; p = p->next)
urldb_delete_cookie_paths(domain, path, name, p);
}
/**
* Save persistent cookies to file
*
* \param filename Path to save to
*/
void urldb_save_cookies(const char *filename)
{
FILE *fp;
assert(filename);
fp = fopen(filename, "w");
if (!fp)
return;
fprintf(fp, "# >%s\n", filename);
fprintf(fp, "# NetSurf cookies file.\n"
"#\n"
"# Lines starting with a '#' are comments, "
"blank lines are ignored.\n"
"#\n"
"# All lines prior to \"Version:\t%d\" are discarded.\n"
"#\n"
"# Version\tDomain\tDomain from Set-Cookie\tPath\t"
"Path from Set-Cookie\tSecure\tExpires\tLast used\t"
"No destroy\tName\tValue\tScheme\tURL\tComment\n",
COOKIE_FILE_VERSION);
fprintf(fp, "Version:\t%d\n", COOKIE_FILE_VERSION);
urldb_save_cookie_hosts(fp, &db_root);
fclose(fp);
}
/**
* Save a host subtree's cookies
*
* \param fp File pointer to write to
* \param parent Parent host
*/
void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent)
{
assert(fp && parent);
urldb_save_cookie_paths(fp, &parent->paths);
for (struct host_part *h = parent->children; h; h = h->next)
urldb_save_cookie_hosts(fp, h);
}
/**
* Save a path subtree's cookies
*
* \param fp File pointer to write to
* \param parent Parent path
*/
void urldb_save_cookie_paths(FILE *fp, struct path_data *parent)
{
time_t now = time(NULL);
assert(fp && parent);
if (parent->cookies) {
for (struct cookie_internal_data *c = parent->cookies; c; c = c->next) {
if (c->expires < now)
/* Skip expired cookies */
continue;
fprintf(fp, "%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t"
"%s\t%s\t%s\t%s\t%s\n",
c->version, c->domain,
c->domain_from_set, c->path,
c->path_from_set, c->secure,
(int)c->expires, (int)c->last_used,
c->no_destroy, c->name, c->value,
parent->scheme ? parent->scheme
: "unused",
parent->url ? parent->url : "unused",
c->comment ? c->comment : "");
}
}
for (struct path_data *p = parent->children; p; p = p->next)
urldb_save_cookie_paths(fp, p);
}
/**
* Sets the content data associated with a particular URL
*
* \param url the URL to associate content with
* \param content the content to associate
* \return true on success, false otherwise
*/
bool urldb_set_cache_data(const char *url, const struct content *content) {
struct path_data *p;
const char *filename;
assert(url && content);
p = urldb_find_url(url);
if (!p)
return false;
/* new filename needed */
if (p->cache.filename[0] == 0) {
filename = filename_request();
if (!filename)
return false;
sprintf(p->cache.filename, filename);
}
/* todo: save content, set cache data etc */
return true;
}
/**
* Gets a file:// URL for the cached data associated with a URL
*
* \param url the URL to get the associated content for
* \return a local URL allocated on heap, or NULL
*/
char *urldb_get_cache_data(const char *url) {
struct path_data *p;
assert(url);
p = urldb_find_url(url);
if (!p)
return NULL;
/* no file cache */
if (p->cache.filename[0] == 0)
return NULL;
/* todo: handle cache expiry etc */
return filename_as_url(p->cache.filename);
}
/**
* Destroy urldb
*/
void urldb_destroy(void)
{
struct host_part *a, *b;
/* Clean up search trees */
for (int i = 0; i < NUM_SEARCH_TREES; i++) {
if (search_trees[i] != &empty)
urldb_destroy_search_tree(search_trees[i]);
}
/* And database */
for (a = db_root.children; a; a = b) {
b = a->next;
urldb_destroy_host_tree(a);
}
}
/**
* Destroy a host tree
*
* \param root Root node of tree to destroy
*/
void urldb_destroy_host_tree(struct host_part *root)
{
struct host_part *a, *b;
struct path_data *p, *q;
/* Destroy children */
for (a = root->children; a; a = b) {
b = a->next;
urldb_destroy_host_tree(a);
}
/* Now clean up paths */
for (p = root->paths.children; p; p = q) {
q = p->next;
urldb_destroy_path_tree(p);
}
/* Root path */
urldb_destroy_path_node_content(&root->paths);
/* And ourselves */
free(root->part);
free(root);
}
/**
* Destroy a path tree
*
* \param root Root node of tree to destroy
*/
void urldb_destroy_path_tree(struct path_data *root)
{
struct path_data *p, *q;
/* Destroy children */
for (p = root->children; p; p = q) {
q = p->next;
urldb_destroy_path_tree(p);
}
/* And ourselves */
urldb_destroy_path_node_content(root);
free(root);
}
/**
* Destroy the contents of a path node
*
* \param node Node to destroy contents of (does not destroy node)
*/
void urldb_destroy_path_node_content(struct path_data *node)
{
struct cookie_internal_data *a, *b;
free(node->url);
free(node->scheme);
free(node->segment);
for (unsigned int i = 0; i < node->frag_cnt; i++)
free(node->fragment[i]);
free(node->fragment);
if (node->thumb)
bitmap_destroy(node->thumb);
free(node->urld.title);
free(node->auth.realm);
free(node->auth.auth);
for (a = node->cookies; a; a = b) {
b = a->next;
urldb_destroy_cookie(a);
}
}
/**
* Destroy a cookie node
*
* \param c Cookie to destroy
*/
void urldb_destroy_cookie(struct cookie_internal_data *c)
{
free(c->name);
free(c->value);
free(c->comment);
free(c->domain);
free(c->path);
free(c);
}
/**
* Destroy a search tree
*
* \param root Root node of tree to destroy
*/
void urldb_destroy_search_tree(struct search_node *root)
{
/* Destroy children */
if (root->left != &empty)
urldb_destroy_search_tree(root->left);
if (root->right != &empty)
urldb_destroy_search_tree(root->right);
/* And destroy ourselves */
free(root);
}
#ifdef TEST_URLDB
int option_expire_url = 0;
bool cookies_update(const char *domain, const struct cookie_data *data)
{
return true;
}
void die(const char *error)
{
printf("die: %s\n", error);
exit(1);
}
void warn_user(const char *warning, const char *detail)
{
printf("WARNING: %s %s\n", warning, detail);
}
void bitmap_destroy(struct bitmap *bitmap)
{
}
char *path_to_url(const char *path)
{
char *r = malloc(strlen(path) + 7 + 1);
strcpy(r, "file://");
strcat(r, path);
return r;
}
int main(void)
{
struct host_part *h;
struct path_data *p;
int i;
url_init();
h = urldb_add_host("127.0.0.1");
if (!h) {
LOG(("failed adding host"));
return 1;
}
/* Get host entry */
h = urldb_add_host("netsurf.strcprstskrzkrk.co.uk");
if (!h) {
LOG(("failed adding host"));
return 1;
}
/* Get path entry */
p = urldb_add_path("http", 80, h, "/path/to/resource.htm", "a=b", "zz",
"http://netsurf.strcprstskrzkrk.co.uk/path/to/resource.htm?a=b");
if (!p) {
LOG(("failed adding path"));
return 1;
}
p = urldb_add_path("http", 80, h, "/path/to/resource.htm", "a=b", "aa",
"http://netsurf.strcprstskrzkrk.co.uk/path/to/resource.htm?a=b");
if (!p) {
LOG(("failed adding path"));
return 1;
}
p = urldb_add_path("http", 80, h, "/path/to/resource.htm", "a=b", "yy",
"http://netsurf.strcprstskrzkrk.co.uk/path/to/resource.htm?a=b");
if (!p) {
LOG(("failed adding path"));
return 1;
}
urldb_set_cookie("mmblah=foo; path=/; expires=Thur, 31-Dec-2099 00:00:00 GMT\r\n", "http://www.minimarcos.org.uk/cgi-bin/forum/Blah.pl?,v=login,p=2");
urldb_set_cookie("BlahPW=bar; path=/; expires=Thur, 31-Dec-2099 00:00:00 GMT\r\n", "http://www.minimarcos.org.uk/cgi-bin/forum/Blah.pl?,v=login,p=2");
urldb_set_cookie("details=foo|bar|Sun, 03-Jun-2007;expires=Mon, 24-Jul-2006 09:53:45 GMT", "http://ccdb.cropcircleresearch.com/");
urldb_set_cookie("PREF=ID=a:TM=b:LM=c:S=d; path=/; domain=.google.com", "http://www.google.com/");
urldb_set_cookie("test=foo, bar, baz; path=/, quux=blah; path=/", "http://www.bbc.co.uk/");
// urldb_set_cookie("a=b; path=/; domain=.a.com", "http://a.com/");
urldb_set_cookie("foo=bar;Path=/blah;Secure", "https://www.foo.com/blah/moose");
urldb_get_cookie("https://www.foo.com/blah/wxyzabc", "https://www.foo.com/blah/moose");
/* 1563546 */
assert(urldb_add_url("http:moodle.org") == false);
assert(urldb_get_url("http:moodle.org") == NULL);
/* also 1563546 */
assert(urldb_add_url("http://a_a/"));
assert(urldb_get_url("http://a_a/"));
/* 1597646 */
if (urldb_add_url("http://foo@moose.com/")) {
LOG(("added http://foo@moose.com/"));
assert(urldb_get_url("http://foo@moose.com/") != NULL);
}
/* 1535120 */
assert(urldb_add_url("http://www2.2checkout.com/"));
assert(urldb_get_url("http://www2.2checkout.com/"));
urldb_dump();
return 0;
}
#endif