From cf7abb4a0ad6a6de3acf3215ca6d31fdebbf4708 Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Sat, 1 Nov 2014 23:27:29 +0000 Subject: [PATCH] restructure urldb source remove forward declarations and restructure. exported functions are also now documented in the urldb.h header. --- content/urldb.c | 5622 +++++++++++++++++++++++------------------------ content/urldb.h | 216 +- 2 files changed, 2941 insertions(+), 2897 deletions(-) diff --git a/content/urldb.c b/content/urldb.c index bf873c62e..8af6ae150 100644 --- a/content/urldb.c +++ b/content/urldb.c @@ -17,8 +17,9 @@ * along with this program. If not, see . */ -/** \file - * Unified URL information database (implementation) +/** + * \file + * Unified URL information database implementation * * URLs are stored in a tree-based structure as follows: * @@ -81,8 +82,8 @@ * simpler implementation. Entries in this tree comprise pointers to the * leaf nodes of the host tree described above. * - * REALLY IMPORTANT NOTE: urldb expects all URLs to be normalised. Use of - * non-normalised URLs with urldb will result in undefined behaviour and + * REALLY IMPORTANT NOTE: urldb expects all URLs to be normalised. Use of + * non-normalised URLs with urldb will result in undefined behaviour and * potential crashes. */ @@ -217,94 +218,6 @@ struct search_node { struct search_node *right; /**< Right subtree */ }; -/* Destruction */ -static void urldb_destroy_host_tree(struct host_part *root); -static void urldb_destroy_path_tree(struct path_data *root); -static void urldb_destroy_path_node_content(struct path_data *node); -static void urldb_destroy_cookie(struct cookie_internal_data *c); -static void urldb_destroy_prot_space(struct prot_space_data *space); -static void urldb_destroy_search_tree(struct search_node *root); - -/* Saving */ -static void urldb_save_search_tree(struct search_node *root, FILE *fp); -static void urldb_count_urls(const struct path_data *root, time_t expiry, - unsigned int *count); -static void urldb_write_paths(const struct path_data *parent, - const char *host, FILE *fp, char **path, int *path_alloc, - int *path_used, time_t expiry); - -/* Iteration */ -static bool urldb_iterate_partial_host(struct search_node *root, - const char *prefix, bool (*callback)(nsurl *url, - const struct url_data *data)); -static bool urldb_iterate_partial_path(const struct path_data *parent, - const char *prefix, bool (*callback)(nsurl *url, - const struct url_data *data)); -static bool urldb_iterate_entries_host(struct search_node *parent, - bool (*url_callback)(nsurl *url, - const struct url_data *data), - bool (*cookie_callback)(const struct cookie_data *data)); -static bool urldb_iterate_entries_path(const struct path_data *parent, - bool (*url_callback)(nsurl *url, - const struct url_data *data), - bool (*cookie_callback)(const struct cookie_data *data)); - -/* Insertion */ -static struct host_part *urldb_add_host_node(const char *part, - struct host_part *parent); -static struct path_data *urldb_add_path_node(lwc_string *scheme, - unsigned int port, const char *segment, lwc_string *fragment, - struct path_data *parent); -static int urldb_add_path_fragment_cmp(const void *a, const void *b); -static struct path_data *urldb_add_path_fragment(struct path_data *segment, - lwc_string *fragment); - -/* Lookup */ -static struct path_data *urldb_find_url(nsurl *url); -static struct path_data *urldb_match_path(const struct path_data *parent, - const char *path, lwc_string *scheme, unsigned short port); -static struct search_node **urldb_get_search_tree_direct(const char *host); -static struct search_node *urldb_get_search_tree(const char *host); - -/* Dump */ -static void urldb_dump_hosts(struct host_part *parent); -static void urldb_dump_paths(struct path_data *parent); -static void urldb_dump_search(struct search_node *parent, int depth); - -/* Search tree */ -static struct search_node *urldb_search_insert(struct search_node *root, - const struct host_part *data); -static struct search_node *urldb_search_insert_internal( - struct search_node *root, struct search_node *n); -/* for urldb_search_remove, see r5531 which removed it */ -static const struct host_part *urldb_search_find(struct search_node *root, - const char *host); -static struct search_node *urldb_search_skew(struct search_node *root); -static struct search_node *urldb_search_split(struct search_node *root); -static int urldb_search_match_host(const struct host_part *a, - const struct host_part *b); -static int urldb_search_match_string(const struct host_part *a, - const char *b); -static int urldb_search_match_prefix(const struct host_part *a, - const char *b); - -/* Cookies */ -static struct cookie_internal_data *urldb_parse_cookie(nsurl *url, - const char **cookie); -static bool urldb_parse_avpair(struct cookie_internal_data *c, char *n, - char *v, bool was_quoted); -static bool urldb_insert_cookie(struct cookie_internal_data *c, - lwc_string *scheme, nsurl *url); -static void urldb_free_cookie(struct cookie_internal_data *c); -static bool urldb_concat_cookie(struct cookie_internal_data *c, int version, - int *used, int *alloc, char **buf); -static void urldb_delete_cookie_hosts(const char *domain, const char *path, - const char *name, struct host_part *parent); -static void urldb_delete_cookie_paths(const char *domain, const char *path, - const char *name, struct path_data *parent); -static void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent); -static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent); - /** Root database handle */ static struct host_part db_root; @@ -325,6 +238,8 @@ static struct search_node *search_trees[NUM_SEARCH_TREES] = { #define COOKIE_FILE_VERSION 102 static int loaded_cookie_file_version; #define MIN_URL_FILE_VERSION 106 + +/** URL database file version */ #define URL_FILE_VERSION 106 /* Bloom filter used for short-circuting the false case of "is this @@ -338,241 +253,163 @@ static int loaded_cookie_file_version; static struct bloom_filter *url_bloom; #define BLOOM_SIZE (1024 * 32) + + /** - * Import an URL database from file, replacing any existing database + * Write paths associated with a host * - * \param filename Name of file containing data + * \param parent Root of (sub)tree to write + * \param host Current host name + * \param fp File to write to + * \param path Current path string + * \param path_alloc Allocated size of path + * \param path_used Used size of path + * \param expiry Expiry time of URLs */ -nserror urldb_load(const char *filename) +static void urldb_write_paths(const struct path_data *parent, const char *host, + FILE *fp, char **path, int *path_alloc, int *path_used, + time_t expiry) { -#define MAXIMUM_URL_LENGTH 4096 - char s[MAXIMUM_URL_LENGTH]; - char host[256]; - struct host_part *h; - int urls; + const struct path_data *p = parent; int i; - int version; - int length; - FILE *fp; - assert(filename); + do { + int seglen = p->segment != NULL ? strlen(p->segment) : 0; + int len = *path_used + seglen + 1; - LOG(("Loading URL file %s", filename)); + if (*path_alloc < len) { + char *temp = realloc(*path, + (len > 64) ? len : *path_alloc + 64); + if (!temp) + return; + *path = temp; + *path_alloc = (len > 64) ? len : *path_alloc + 64; + } - if (url_bloom == NULL) - url_bloom = bloom_create(BLOOM_SIZE); + if (p->segment != NULL) + memcpy(*path + *path_used - 1, p->segment, seglen); - fp = fopen(filename, "r"); - if (!fp) { - LOG(("Failed to open file '%s' for reading", filename)); - return NSERROR_NOT_FOUND; - } + if (p->children != NULL) { + (*path)[*path_used + seglen - 1] = '/'; + (*path)[*path_used + seglen] = '\0'; + } else { + (*path)[*path_used + seglen - 1] = '\0'; + len -= 1; + } - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) { - fclose(fp); - return NSERROR_NEED_DATA; - } + *path_used = len; - version = atoi(s); - if (version < MIN_URL_FILE_VERSION) { - LOG(("Unsupported URL file version.")); - fclose(fp); - return NSERROR_INVALID; - } - if (version > URL_FILE_VERSION) { - LOG(("Unknown URL file version.")); - fclose(fp); - return NSERROR_INVALID; - } + if (p->children != NULL) { + /* Drill down into children */ + p = p->children; + } else { + /* leaf node */ + if (p->persistent ||((p->urld.last_visit > expiry) && + (p->urld.visits > 0))) { + fprintf(fp, "%s\n", lwc_string_data(p->scheme)); - while (fgets(host, sizeof host, fp)) { - /* get the hostname */ - length = strlen(host) - 1; - host[length] = '\0'; + if (p->port) + fprintf(fp,"%d\n", p->port); + else + fprintf(fp, "\n"); - /* skip data that has ended up with a host of '' */ - if (length == 0) { - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) - break; - urls = atoi(s); - /* Eight fields/url */ - for (i = 0; i < (8 * urls); i++) { - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + fprintf(fp, "%s\n", *path); + + /** \todo handle fragments? */ + + fprintf(fp, "%i\n%i\n%i\n", p->urld.visits, + (int)p->urld.last_visit, + (int)p->urld.type); + + fprintf(fp, "\n"); + + if (p->urld.title) { + uint8_t *s = (uint8_t *) p->urld.title; + + for (i = 0; s[i] != '\0'; i++) + if (s[i] < 32) + s[i] = ' '; + for (--i; ((i > 0) && (s[i] == ' ')); + i--) + s[i] = '\0'; + fprintf(fp, "%s\n", p->urld.title); + } else + fprintf(fp, "\n"); + } + + /* Now, find next node to process. */ + while (p != parent) { + int seglen = p->segment != NULL + ? strlen(p->segment) : 0; + + /* Remove our segment from the path */ + *path_used -= seglen; + (*path)[*path_used - 1] = '\0'; + + if (p->next != NULL) { + /* Have a sibling, process that */ + p = p->next; break; - } - continue; - } + } - /* read number of URLs */ - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) - break; - urls = atoi(s); + /* Going up, so remove '/' */ + *path_used -= 1; + (*path)[*path_used - 1] = '\0'; - /* no URLs => try next host */ - if (urls == 0) { - LOG(("No URLs for '%s'", host)); - continue; - } - - h = urldb_add_host(host); - if (!h) { - LOG(("Failed adding host: '%s'", host)); - fclose(fp); - return NSERROR_NOMEM; - } - - /* load the non-corrupt data */ - for (i = 0; i < urls; i++) { - struct path_data *p = NULL; - char scheme[64], ports[10]; - char url[64 + 3 + 256 + 6 + 4096 + 1]; - unsigned int port; - bool is_file = false; - nsurl *nsurl; - lwc_string *scheme_lwc, *fragment_lwc; - char *path_query; - size_t len; - - if (!fgets(scheme, sizeof scheme, fp)) - break; - length = strlen(scheme) - 1; - scheme[length] = '\0'; - - if (!fgets(ports, sizeof ports, fp)) - break; - length = strlen(ports) - 1; - ports[length] = '\0'; - port = atoi(ports); - - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) - break; - length = strlen(s) - 1; - s[length] = '\0'; - - if (!strcasecmp(host, "localhost") && - !strcasecmp(scheme, "file")) - is_file = true; - - snprintf(url, sizeof url, "%s://%s%s%s%s", - scheme, - /* file URLs have no host */ - (is_file ? "" : host), - (port ? ":" : ""), - (port ? ports : ""), - s); - - /* TODO: store URLs in pre-parsed state, and make - * a nsurl_load to generate the nsurl more - * swiftly. - * Need a nsurl_save too. - */ - if (nsurl_create(url, &nsurl) != NSERROR_OK) { - LOG(("Failed inserting '%s'", url)); - fclose(fp); - return NSERROR_NOMEM; - } - - if (url_bloom != NULL) { - uint32_t hash = nsurl_hash(nsurl); - bloom_insert_hash(url_bloom, hash); - } - - /* Copy and merge path/query strings */ - if (nsurl_get(nsurl, NSURL_PATH | NSURL_QUERY, - &path_query, &len) != NSERROR_OK) { - LOG(("Failed inserting '%s'", url)); - fclose(fp); - return NSERROR_NOMEM; - } - - scheme_lwc = nsurl_get_component(nsurl, NSURL_SCHEME); - fragment_lwc = nsurl_get_component(nsurl, - NSURL_FRAGMENT); - p = urldb_add_path(scheme_lwc, port, h, path_query, - fragment_lwc, nsurl); - if (!p) { - LOG(("Failed inserting '%s'", url)); - fclose(fp); - return NSERROR_NOMEM; - } - nsurl_unref(nsurl); - lwc_string_unref(scheme_lwc); - if (fragment_lwc != NULL) - lwc_string_unref(fragment_lwc); - - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) - break; - if (p) - p->urld.visits = (unsigned int)atoi(s); - - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) - break; - if (p) - p->urld.last_visit = (time_t)atoi(s); - - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) - break; - if (p) - p->urld.type = (content_type)atoi(s); - - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) - break; - - - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) - break; - length = strlen(s) - 1; - if (p && length > 0) { - s[length] = '\0'; - p->urld.title = malloc(length + 1); - if (p->urld.title) - memcpy(p->urld.title, s, length + 1); + /* Ascend tree */ + p = p->parent; } } - } - - fclose(fp); - LOG(("Successfully loaded URL file")); -#undef MAXIMUM_URL_LENGTH - - return NSERROR_OK; + } while (p != parent); } + /** - * Export the current database to file + * Count number of URLs associated with a host * - * \param filename Name of file to export to + * \param root Root of path data tree + * \param expiry Expiry time for URLs + * \param count Pointer to count */ -void urldb_save(const char *filename) +static void urldb_count_urls(const struct path_data *root, time_t expiry, + unsigned int *count) { - FILE *fp; - int i; + const struct path_data *p = root; - assert(filename); + do { + if (p->children != NULL) { + /* Drill down into children */ + p = p->children; + } else { + /* No more children, increment count if required */ + if (p->persistent || ((p->urld.last_visit > expiry) && + (p->urld.visits > 0))) { + (*count)++; + } - fp = fopen(filename, "w"); - if (!fp) { - LOG(("Failed to open file '%s' for writing", filename)); - return; - } + /* Now, find next node to process. */ + while (p != root) { + if (p->next != NULL) { + /* Have a sibling, process that */ + p = p->next; + break; + } - /* file format version number */ - fprintf(fp, "%d\n", URL_FILE_VERSION); - - for (i = 0; i != NUM_SEARCH_TREES; i++) { - urldb_save_search_tree(search_trees[i], fp); - } - - fclose(fp); + /* Ascend tree */ + p = p->parent; + } + } + } while (p != root); } + /** * Save a search (sub)tree * * \param root Root of (sub)tree to save * \param fp File to write to */ -void urldb_save_search_tree(struct search_node *parent, FILE *fp) +static void urldb_save_search_tree(struct search_node *parent, FILE *fp) { char host[256]; const struct host_part *h; @@ -619,873 +456,6 @@ void urldb_save_search_tree(struct search_node *parent, FILE *fp) urldb_save_search_tree(parent->right, fp); } -/** - * Count number of URLs associated with a host - * - * \param root Root of path data tree - * \param expiry Expiry time for URLs - * \param count Pointer to count - */ -void urldb_count_urls(const struct path_data *root, time_t expiry, - unsigned int *count) -{ - const struct path_data *p = root; - - do { - if (p->children != NULL) { - /* Drill down into children */ - p = p->children; - } else { - /* No more children, increment count if required */ - if (p->persistent || ((p->urld.last_visit > expiry) && - (p->urld.visits > 0))) - (*count)++; - - /* Now, find next node to process. */ - while (p != root) { - if (p->next != NULL) { - /* Have a sibling, process that */ - p = p->next; - break; - } - - /* Ascend tree */ - p = p->parent; - } - } - } while (p != root); -} - -/** - * Write paths associated with a host - * - * \param parent Root of (sub)tree to write - * \param host Current host name - * \param fp File to write to - * \param path Current path string - * \param path_alloc Allocated size of path - * \param path_used Used size of path - * \param expiry Expiry time of URLs - */ -void urldb_write_paths(const struct path_data *parent, const char *host, - FILE *fp, char **path, int *path_alloc, int *path_used, - time_t expiry) -{ - const struct path_data *p = parent; - int i; - - do { - int seglen = p->segment != NULL ? strlen(p->segment) : 0; - int len = *path_used + seglen + 1; - - if (*path_alloc < len) { - char *temp = realloc(*path, - (len > 64) ? len : *path_alloc + 64); - if (!temp) - return; - *path = temp; - *path_alloc = (len > 64) ? len : *path_alloc + 64; - } - - if (p->segment != NULL) - memcpy(*path + *path_used - 1, p->segment, seglen); - - if (p->children != NULL) { - (*path)[*path_used + seglen - 1] = '/'; - (*path)[*path_used + seglen] = '\0'; - } else { - (*path)[*path_used + seglen - 1] = '\0'; - len -= 1; - } - - *path_used = len; - - if (p->children != NULL) { - /* Drill down into children */ - p = p->children; - } else { - /* leaf node */ - if (p->persistent ||((p->urld.last_visit > expiry) && - (p->urld.visits > 0))) { - fprintf(fp, "%s\n", lwc_string_data(p->scheme)); - - if (p->port) - fprintf(fp,"%d\n", p->port); - else - fprintf(fp, "\n"); - - fprintf(fp, "%s\n", *path); - - /** \todo handle fragments? */ - - fprintf(fp, "%i\n%i\n%i\n", p->urld.visits, - (int)p->urld.last_visit, - (int)p->urld.type); - - fprintf(fp, "\n"); - - if (p->urld.title) { - uint8_t *s = (uint8_t *) p->urld.title; - - for (i = 0; s[i] != '\0'; i++) - if (s[i] < 32) - s[i] = ' '; - for (--i; ((i > 0) && (s[i] == ' ')); - i--) - s[i] = '\0'; - fprintf(fp, "%s\n", p->urld.title); - } else - fprintf(fp, "\n"); - } - - /* Now, find next node to process. */ - while (p != parent) { - int seglen = p->segment != NULL - ? strlen(p->segment) : 0; - - /* Remove our segment from the path */ - *path_used -= seglen; - (*path)[*path_used - 1] = '\0'; - - if (p->next != NULL) { - /* Have a sibling, process that */ - p = p->next; - break; - } - - /* Going up, so remove '/' */ - *path_used -= 1; - (*path)[*path_used - 1] = '\0'; - - /* Ascend tree */ - p = p->parent; - } - } - } while (p != parent); -} - -/** - * Set the cross-session persistence of the entry for an URL - * - * \param url Absolute URL to persist - * \param persist True to persist, false otherwise - */ -void urldb_set_url_persistence(nsurl *url, bool persist) -{ - struct path_data *p; - - assert(url); - - p = urldb_find_url(url); - if (!p) - return; - - p->persistent = persist; -} - -/** - * Insert an URL into the database - * - * \param url Absolute URL to insert - * \return true on success, false otherwise - */ -bool urldb_add_url(nsurl *url) -{ - struct host_part *h; - struct path_data *p; - lwc_string *scheme; - lwc_string *port; - lwc_string *host; - lwc_string *fragment; - const char *host_str; - char *path_query = NULL; - size_t len; - bool match; - unsigned int port_int; - - assert(url); - - if (url_bloom == NULL) - url_bloom = bloom_create(BLOOM_SIZE); - - if (url_bloom != NULL) { - uint32_t hash = nsurl_hash(url); - bloom_insert_hash(url_bloom, hash); - } - - /* Copy and merge path/query strings */ - if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &path_query, &len) != - NSERROR_OK) { - return false; - } - assert(path_query != NULL); - - scheme = nsurl_get_component(url, NSURL_SCHEME); - if (scheme == NULL) { - free(path_query); - return false; - } - - host = nsurl_get_component(url, NSURL_HOST); - if (host != NULL) { - host_str = lwc_string_data(host); - lwc_string_unref(host); - - } else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) == - lwc_error_ok && match == true) { - host_str = "localhost"; - - } else { - lwc_string_unref(scheme); - free(path_query); - return false; - } - - fragment = nsurl_get_component(url, NSURL_FRAGMENT); - - port = nsurl_get_component(url, NSURL_PORT); - if (port != NULL) { - port_int = atoi(lwc_string_data(port)); - lwc_string_unref(port); - } else { - port_int = 0; - } - - /* Get host entry */ - h = urldb_add_host(host_str); - - /* Get path entry */ - p = (h != NULL) ? urldb_add_path(scheme, port_int, h, path_query, - fragment, url) : NULL; - - lwc_string_unref(scheme); - if (fragment != NULL) - lwc_string_unref(fragment); - - return (p != NULL); -} - -/** - * Set an URL's title string, replacing any existing one - * - * \param url The URL to look for - * \param title The title string to use (copied) - */ -void urldb_set_url_title(nsurl *url, const char *title) -{ - struct path_data *p; - char *temp; - - assert(url && title); - - p = urldb_find_url(url); - if (!p) - return; - - temp = strdup(title); - if (!temp) - return; - - free(p->urld.title); - p->urld.title = temp; -} - -/** - * Set an URL's content type - * - * \param url The URL to look for - * \param type The type to set - */ -void urldb_set_url_content_type(nsurl *url, content_type type) -{ - struct path_data *p; - - assert(url); - - p = urldb_find_url(url); - if (!p) - return; - - p->urld.type = type; -} - -/** - * Update an URL's visit data - * - * \param url The URL to update - */ -void urldb_update_url_visit_data(nsurl *url) -{ - struct path_data *p; - - assert(url); - - p = urldb_find_url(url); - if (!p) - return; - - p->urld.last_visit = time(NULL); - p->urld.visits++; -} - -/** - * Reset an URL's visit statistics - * - * \param url The URL to reset - */ -void urldb_reset_url_visit_data(nsurl *url) -{ - struct path_data *p; - - assert(url); - - p = urldb_find_url(url); - if (!p) - return; - - p->urld.last_visit = (time_t)0; - p->urld.visits = 0; -} - - -/** - * Find data for an URL. - * - * \param url Absolute URL to look for - * \return Pointer to result struct, or NULL - */ -const struct url_data *urldb_get_url_data(nsurl *url) -{ - struct path_data *p; - struct url_internal_data *u; - - assert(url); - - p = urldb_find_url(url); - if (!p) - return NULL; - - u = &p->urld; - - return (const struct url_data *) u; -} - -/** - * Extract an URL from the db - * - * \param url URL to extract - * \return Pointer to database's copy of URL or NULL if not found - */ -nsurl *urldb_get_url(nsurl *url) -{ - struct path_data *p; - - assert(url); - - p = urldb_find_url(url); - if (!p) - return NULL; - - return p->url; -} - -/** - * Look up authentication details in database - * - * \param url Absolute URL to search for - * \param realm When non-NULL, it is realm which can be used to determine - * the protection space when that's not been done before for given URL. - * \return Pointer to authentication details, or NULL if not found - */ -const char *urldb_get_auth_details(nsurl *url, const char *realm) -{ - struct path_data *p, *p_cur, *p_top; - - assert(url); - - /* add to the db, so our lookup will work */ - urldb_add_url(url); - - p = urldb_find_url(url); - if (!p) - return NULL; - - /* Check for any auth details attached to the path_data node or any of - * its parents. */ - for (p_cur = p; p_cur != NULL; p_top = p_cur, p_cur = p_cur->parent) { - if (p_cur->prot_space) { - return p_cur->prot_space->auth; - } - } - - /* Only when we have a realm (and canonical root of given URL), we can - * uniquely locate the protection space. */ - if (realm != NULL) { - const struct host_part *h = (const struct host_part *)p_top; - const struct prot_space_data *space; - bool match; - - /* Search for a possible matching protection space. */ - for (space = h->prot_space; space != NULL; - space = space->next) { - if (!strcmp(space->realm, realm) && - lwc_string_isequal(space->scheme, - p->scheme, &match) == - lwc_error_ok && - match == true && - space->port == p->port) { - p->prot_space = space; - return p->prot_space->auth; - } - } - } - - return NULL; -} - -/** - * Retrieve certificate verification permissions from database - * - * \param url Absolute URL to search for - * \return true to permit connections to hosts with invalid certificates, - * false otherwise. - */ -bool urldb_get_cert_permissions(nsurl *url) -{ - struct path_data *p; - const struct host_part *h; - - assert(url); - - p = urldb_find_url(url); - if (!p) - return false; - - for (; p && p->parent; p = p->parent) - /* do nothing */; - assert(p); - - h = (const struct host_part *)p; - - return h->permit_invalid_certs; -} - -/** - * Set authentication data for an URL - * - * \param url The URL to consider - * \param realm The authentication realm - * \param auth The authentication details (in form username:password) - */ -void urldb_set_auth_details(nsurl *url, const char *realm, - const char *auth) -{ - struct path_data *p, *pi; - struct host_part *h; - struct prot_space_data *space, *space_alloc; - char *realm_alloc, *auth_alloc; - bool match; - - assert(url && realm && auth); - - /* add url, in case it's missing */ - urldb_add_url(url); - - p = urldb_find_url(url); - - if (!p) - return; - - /* Search for host_part */ - for (pi = p; pi->parent != NULL; pi = pi->parent) - ; - h = (struct host_part *)pi; - - /* Search if given URL belongs to a protection space we already know of. */ - for (space = h->prot_space; space; space = space->next) { - if (!strcmp(space->realm, realm) && - lwc_string_isequal(space->scheme, p->scheme, - &match) == lwc_error_ok && - match == true && - space->port == p->port) - break; - } - - if (space != NULL) { - /* Overrule existing auth. */ - free(space->auth); - space->auth = strdup(auth); - } else { - /* Create a new protection space. */ - space = space_alloc = malloc(sizeof(struct prot_space_data)); - realm_alloc = strdup(realm); - auth_alloc = strdup(auth); - - if (!space_alloc || !realm_alloc || !auth_alloc) { - free(space_alloc); - free(realm_alloc); - free(auth_alloc); - return; - } - - space->scheme = lwc_string_ref(p->scheme); - space->port = p->port; - space->realm = realm_alloc; - space->auth = auth_alloc; - space->next = h->prot_space; - h->prot_space = space; - } - - p->prot_space = space; -} - -/** - * Set certificate verification permissions - * - * \param url URL to consider - * \param permit Set to true to allow invalid certificates - */ -void urldb_set_cert_permissions(nsurl *url, bool permit) -{ - struct path_data *p; - struct host_part *h; - - assert(url); - - /* add url, in case it's missing */ - urldb_add_url(url); - - p = urldb_find_url(url); - if (!p) - return; - - for (; p && p->parent; p = p->parent) - /* do nothing */; - assert(p); - - h = (struct host_part *)p; - - h->permit_invalid_certs = permit; -} - -/** - * Set thumbnail for url, replacing any existing thumbnail - * - * \param url Absolute URL to consider - * \param bitmap Opaque pointer to thumbnail data, or NULL to invalidate - */ -void urldb_set_thumbnail(nsurl *url, struct bitmap *bitmap) -{ - struct path_data *p; - - assert(url); - - p = urldb_find_url(url); - if (!p) - return; - - if (p->thumb && p->thumb != bitmap) - bitmap_destroy(p->thumb); - - p->thumb = bitmap; -} - -/** - * Retrieve thumbnail data for given URL - * - * \param url Absolute URL to search for - * \return Pointer to thumbnail data, or NULL if not found. - */ -struct bitmap *urldb_get_thumbnail(nsurl *url) -{ - struct path_data *p; - - assert(url); - - p = urldb_find_url(url); - if (!p) - return NULL; - - return p->thumb; -} - -/** - * Iterate over entries in the database which match the given prefix - * - * \param prefix Prefix to match - * \param callback Callback function - */ -void urldb_iterate_partial(const char *prefix, - bool (*callback)(nsurl *url, - const struct url_data *data)) -{ - char host[256]; - char buf[260]; /* max domain + "www." */ - const char *slash, *scheme_sep; - struct search_node *tree; - const struct host_part *h; - - assert(prefix && callback); - - /* strip scheme */ - scheme_sep = strstr(prefix, "://"); - if (scheme_sep) - prefix = scheme_sep + 3; - - slash = strchr(prefix, '/'); - tree = urldb_get_search_tree(prefix); - - if (slash) { - /* if there's a slash in the input, then we can - * assume that we're looking for a path */ - snprintf(host, sizeof host, "%.*s", - (int) (slash - prefix), prefix); - - h = urldb_search_find(tree, host); - if (!h) { - int len = slash - prefix; - - if (len <= 3 || strncasecmp(host, "www.", 4) != 0) { - snprintf(buf, sizeof buf, "www.%s", host); - h = urldb_search_find( - search_trees[ST_DN + 'w' - 'a'], - buf); - if (!h) - return; - } else - return; - } - - if (h->paths.children) { - /* Have paths, iterate them */ - urldb_iterate_partial_path(&h->paths, slash + 1, - callback); - } - - } else { - int len = strlen(prefix); - - /* looking for hosts */ - if (!urldb_iterate_partial_host(tree, prefix, callback)) - return; - - if (len <= 3 || strncasecmp(prefix, "www.", 4) != 0) { - /* now look for www.prefix */ - snprintf(buf, sizeof buf, "www.%s", prefix); - if(!urldb_iterate_partial_host( - search_trees[ST_DN + 'w' - 'a'], - buf, callback)) - return; - } - } -} - -/** - * Partial host iterator (internal) - * - * \param root Root of (sub)tree to traverse - * \param prefix Prefix to match - * \param callback Callback function - * \return true to continue, false otherwise - */ -bool urldb_iterate_partial_host(struct search_node *root, const char *prefix, - bool (*callback)(nsurl *url, const struct url_data *data)) -{ - int c; - - assert(root && prefix && callback); - - if (root == &empty) - return true; - - c = urldb_search_match_prefix(root->data, prefix); - - if (c > 0) - /* No match => look in left subtree */ - return urldb_iterate_partial_host(root->left, prefix, - callback); - else if (c < 0) - /* No match => look in right subtree */ - return urldb_iterate_partial_host(root->right, prefix, - callback); - else { - /* Match => iterate over l/r subtrees & process this node */ - if (!urldb_iterate_partial_host(root->left, prefix, - callback)) - return false; - - if (root->data->paths.children) { - /* and extract all paths attached to this host */ - if (!urldb_iterate_entries_path(&root->data->paths, - callback, NULL)) { - return false; - } - } - - if (!urldb_iterate_partial_host(root->right, prefix, - callback)) - return false; - } - - return true; -} - -/** - * Partial path iterator (internal) - * - * \param parent Root of (sub)tree to traverse - * \param prefix Prefix to match - * \param callback Callback function - * \return true to continue, false otherwise - */ -bool urldb_iterate_partial_path(const struct path_data *parent, - const char *prefix, bool (*callback)(nsurl *url, - const struct url_data *data)) -{ - const struct path_data *p = parent->children; - const char *slash, *end = prefix + strlen(prefix); - - /* - * Given: http://www.example.org/a/b/c/d//e - * and assuming a path tree: - * . - * / \ - * a1 b1 - * / \ - * a2 b2 - * /|\ - * a b c - * 3 3 | - * d - * | - * e - * / \ - * f g - * - * Prefix will be: p will be: - * - * a/b/c/d//e a1 - * b/c/d//e a2 - * b/c/d//e b3 - * c/d//e a3 - * c/d//e b3 - * c/d//e c - * d//e d - * /e e (skip /) - * e e - * - * I.E. we perform a breadth-first search of the tree. - */ - - do { - slash = strchr(prefix, '/'); - if (!slash) - slash = end; - - if (slash == prefix && *prefix == '/') { - /* Ignore "//" */ - prefix++; - continue; - } - - if (strncasecmp(p->segment, prefix, slash - prefix) == 0) { - /* prefix matches so far */ - if (slash == end) { - /* we've run out of prefix, so all - * paths below this one match */ - if (!urldb_iterate_entries_path(p, callback, - NULL)) - return false; - - /* Progress to next sibling */ - p = p->next; - } else { - /* Skip over this segment */ - prefix = slash + 1; - - p = p->children; - } - } else { - /* Doesn't match this segment, try next sibling */ - p = p->next; - } - } while (p != NULL); - - return true; -} - -/** - * Iterate over all entries in database - * - * \param callback Function to callback for each entry - */ -void urldb_iterate_entries(bool (*callback)(nsurl *url, - const struct url_data *data)) -{ - int i; - - assert(callback); - - for (i = 0; i < NUM_SEARCH_TREES; i++) { - if (!urldb_iterate_entries_host(search_trees[i], - callback, NULL)) - break; - } -} - -/** - * Iterate over all cookies in database - * - * \param callback Function to callback for each entry - */ -void urldb_iterate_cookies(bool (*callback)(const struct cookie_data *data)) -{ - int i; - - assert(callback); - - for (i = 0; i < NUM_SEARCH_TREES; i++) { - if (!urldb_iterate_entries_host(search_trees[i], - NULL, callback)) - break; - } -} - -/** - * Host data iterator (internal) - * - * \param parent Root of subtree to iterate over - * \param url_callback Callback function - * \param cookie_callback Callback function - * \return true to continue, false otherwise - */ -bool urldb_iterate_entries_host(struct search_node *parent, - bool (*url_callback)(nsurl *url, - const struct url_data *data), - bool (*cookie_callback)(const struct cookie_data *data)) -{ - if (parent == &empty) - return true; - - if (!urldb_iterate_entries_host(parent->left, - url_callback, cookie_callback)) - return false; - - if ((parent->data->paths.children) || ((cookie_callback) && - (parent->data->paths.cookies))) { - /* We have paths (or domain cookies), so iterate them */ - if (!urldb_iterate_entries_path(&parent->data->paths, - url_callback, cookie_callback)) { - return false; - } - } - - if (!urldb_iterate_entries_host(parent->right, - url_callback, cookie_callback)) - return false; - - return true; -} /** * Path data iterator (internal) @@ -1495,14 +465,13 @@ bool urldb_iterate_entries_host(struct search_node *parent, * \param cookie_callback Callback function * \return true to continue, false otherwise */ -bool urldb_iterate_entries_path(const struct path_data *parent, - bool (*url_callback)(nsurl *url, - const struct url_data *data), +static bool urldb_iterate_entries_path(const struct path_data *parent, + bool (*url_callback)(nsurl *url, const struct url_data *data), bool (*cookie_callback)(const struct cookie_data *data)) { const struct path_data *p = parent; const struct cookie_data *c; - + do { if (p->children != NULL) { /* Drill down into children */ @@ -1525,9 +494,10 @@ bool urldb_iterate_entries_path(const struct path_data *parent, return false; } else { c = (const struct cookie_data *)p->cookies; - for (; c != NULL; c = c->next) + for (; c != NULL; c = c->next) { if (!cookie_callback(c)) return false; + } } /* Now, find next node to process. */ @@ -1547,39 +517,6 @@ bool urldb_iterate_entries_path(const struct path_data *parent, return true; } -/** - * Add a host node to the tree - * - * \param part Host segment to add (or whole IP address) (copied) - * \param parent Parent node to add to - * \return Pointer to added node, or NULL on memory exhaustion - */ -struct host_part *urldb_add_host_node(const char *part, - struct host_part *parent) -{ - struct host_part *d; - - assert(part && parent); - - d = calloc(1, sizeof(struct host_part)); - if (!d) - return NULL; - - d->part = strdup(part); - if (!d->part) { - free(d); - return NULL; - } - - d->next = parent->children; - if (parent->children) - parent->children->prev = d; - d->parent = parent; - parent->children = d; - - return d; -} - /** * Check whether a host string is an IP address. @@ -1673,740 +610,6 @@ out_true: } -/** - * Add a host to the database, creating any intermediate entries - * - * \param host Hostname to add - * \return Pointer to leaf node, or NULL on memory exhaustion - */ -struct host_part *urldb_add_host(const char *host) -{ - struct host_part *d = (struct host_part *) &db_root, *e; - struct search_node *s; - char buf[256]; /* 256 bytes is sufficient - domain names are - * limited to 255 chars. */ - char *part; - - assert(host); - - if (urldb__host_is_ip_address(host)) { - /* Host is an IP, so simply add as TLD */ - - /* Check for existing entry */ - for (e = d->children; e; e = e->next) - if (strcasecmp(host, e->part) == 0) - /* found => return it */ - return e; - - d = urldb_add_host_node(host, d); - - s = urldb_search_insert(search_trees[ST_IP], d); - if (!s) { - /* failed */ - d = NULL; - } else { - search_trees[ST_IP] = s; - } - - return d; - } - - /* Copy host string, so we can corrupt it */ - strncpy(buf, host, sizeof buf); - buf[sizeof buf - 1] = '\0'; - - /* Process FQDN segments backwards */ - do { - part = strrchr(buf, '.'); - if (!part) { - /* last segment */ - /* Check for existing entry */ - for (e = d->children; e; e = e->next) - if (strcasecmp(buf, e->part) == 0) - break; - - if (e) { - d = e; - } else { - d = urldb_add_host_node(buf, d); - } - - /* And insert into search tree */ - if (d) { - struct search_node **r; - - r = urldb_get_search_tree_direct(buf); - s = urldb_search_insert(*r, d); - if (!s) { - /* failed */ - d = NULL; - } else { - *r = s; - } - } - break; - } - - /* Check for existing entry */ - for (e = d->children; e; e = e->next) - if (strcasecmp(part + 1, e->part) == 0) - break; - - d = e ? e : urldb_add_host_node(part + 1, d); - if (!d) - break; - - *part = '\0'; - } while (1); - - return d; -} - -/** - * Add a path node to the tree - * - * \param scheme URL scheme associated with path (copied) - * \param port Port number on host associated with path - * \param segment Path segment to add (copied) - * \param fragment URL fragment (copied), or NULL - * \param parent Parent node to add to - * \return Pointer to added node, or NULL on memory exhaustion - */ -struct path_data *urldb_add_path_node(lwc_string *scheme, unsigned int port, - const char *segment, lwc_string *fragment, - struct path_data *parent) -{ - struct path_data *d, *e; - - assert(scheme && segment && parent); - - d = calloc(1, sizeof(struct path_data)); - if (!d) - return NULL; - - d->scheme = lwc_string_ref(scheme); - - d->port = port; - - d->segment = strdup(segment); - if (!d->segment) { - lwc_string_unref(d->scheme); - free(d); - return NULL; - } - - if (fragment) { - if (!urldb_add_path_fragment(d, fragment)) { - free(d->segment); - lwc_string_unref(d->scheme); - free(d); - return NULL; - } - } - - for (e = parent->children; e; e = e->next) - if (strcmp(e->segment, d->segment) > 0) - break; - - if (e) { - d->prev = e->prev; - d->next = e; - if (e->prev) - e->prev->next = d; - else - parent->children = d; - e->prev = d; - } else if (!parent->children) { - d->prev = d->next = NULL; - parent->children = parent->last = d; - } else { - d->next = NULL; - d->prev = parent->last; - parent->last->next = d; - parent->last = d; - } - d->parent = parent; - - return d; -} - -/** - * Add a path to the database, creating any intermediate entries - * - * \param scheme URL scheme associated with path - * \param port Port number on host associated with path - * \param host Host tree node to attach to - * \param path_query Absolute path plus query to add (freed) - * \param fragment URL fragment, or NULL - * \param url URL (fragment ignored) - * \return Pointer to leaf node, or NULL on memory exhaustion - */ -struct path_data *urldb_add_path(lwc_string *scheme, unsigned int port, - const struct host_part *host, char *path_query, - lwc_string *fragment, nsurl *url) -{ - struct path_data *d, *e; - char *buf = path_query; - char *segment, *slash; - bool match; - - assert(scheme && host && url); - - d = (struct path_data *) &host->paths; - - /* skip leading '/' */ - segment = buf; - if (*segment == '/') - segment++; - - /* Process path segments */ - do { - slash = strchr(segment, '/'); - if (!slash) { - /* last segment */ - /* look for existing entry */ - for (e = d->children; e; e = e->next) - if (strcmp(segment, e->segment) == 0 && - lwc_string_isequal(scheme, - e->scheme, &match) == - lwc_error_ok && - match == true && - e->port == port) - break; - - d = e ? urldb_add_path_fragment(e, fragment) : - urldb_add_path_node(scheme, port, - segment, fragment, d); - break; - } - - *slash = '\0'; - - /* look for existing entry */ - for (e = d->children; e; e = e->next) - if (strcmp(segment, e->segment) == 0 && - lwc_string_isequal(scheme, e->scheme, - &match) == lwc_error_ok && - match == true && - e->port == port) - break; - - d = e ? e : urldb_add_path_node(scheme, port, segment, NULL, d); - if (!d) - break; - - segment = slash + 1; - } while (1); - - free(path_query); - - if (d && !d->url) { - /* Insert defragmented URL */ - if (nsurl_defragment(url, &d->url) != NSERROR_OK) - return NULL; - } - - return d; -} - -/** - * Fragment comparator callback for qsort - */ -int urldb_add_path_fragment_cmp(const void *a, const void *b) -{ - return strcasecmp(*((const char **) a), *((const char **) b)); -} - -/** - * Add a fragment to a path segment - * - * \param segment Path segment to add to - * \param fragment Fragment to add (copied), or NULL - * \return segment or NULL on memory exhaustion - */ -struct path_data *urldb_add_path_fragment(struct path_data *segment, - lwc_string *fragment) -{ - char **temp; - - assert(segment); - - /* If no fragment, this function is a NOP - * This may seem strange, but it makes the rest - * of the code cleaner */ - if (!fragment) - return segment; - - temp = realloc(segment->fragment, - (segment->frag_cnt + 1) * sizeof(char *)); - if (!temp) - return NULL; - - segment->fragment = temp; - segment->fragment[segment->frag_cnt] = - strdup(lwc_string_data(fragment)); - if (!segment->fragment[segment->frag_cnt]) { - /* Don't free temp - it's now our buffer */ - return NULL; - } - - segment->frag_cnt++; - - /* We want fragments in alphabetical order, so sort them - * It may prove better to insert in alphabetical order instead */ - qsort(segment->fragment, segment->frag_cnt, sizeof (char *), - urldb_add_path_fragment_cmp); - - return segment; -} - -/** - * Find an URL in the database - * - * \param url Absolute URL to find - * \return Pointer to path data, or NULL if not found - */ -struct path_data *urldb_find_url(nsurl *url) -{ - const struct host_part *h; - struct path_data *p; - struct search_node *tree; - char *plq; - const char *host_str; - lwc_string *scheme, *host, *port; - size_t len = 0; - unsigned int port_int; - bool match; - - assert(url); - - if (url_bloom != NULL) { - if (bloom_search_hash(url_bloom, - nsurl_hash(url)) == false) { - return NULL; - } - } - - scheme = nsurl_get_component(url, NSURL_SCHEME); - if (scheme == NULL) - return NULL; - - host = nsurl_get_component(url, NSURL_HOST); - if (host != NULL) { - host_str = lwc_string_data(host); - lwc_string_unref(host); - - } else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) == - lwc_error_ok && match == true) { - host_str = "localhost"; - - } else { - lwc_string_unref(scheme); - return NULL; - } - - tree = urldb_get_search_tree(host_str); - h = urldb_search_find(tree, host_str); - if (!h) { - lwc_string_unref(scheme); - return NULL; - } - - /* generate plq (path, leaf, query) */ - if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &plq, &len) != - NSERROR_OK) { - lwc_string_unref(scheme); - return NULL; - } - - /* Get port */ - port = nsurl_get_component(url, NSURL_PORT); - if (port != NULL) { - port_int = atoi(lwc_string_data(port)); - lwc_string_unref(port); - } else { - port_int = 0; - } - - p = urldb_match_path(&h->paths, plq, scheme, port_int); - - free(plq); - lwc_string_unref(scheme); - - return p; -} - -/** - * Match a path string - * - * \param parent Path (sub)tree to look in - * \param path The path to search for - * \param scheme The URL scheme associated with the path - * \param port The port associated with the path - * \return Pointer to path data or NULL if not found. - */ -struct path_data *urldb_match_path(const struct path_data *parent, - const char *path, lwc_string *scheme, unsigned short port) -{ - const struct path_data *p; - const char *slash; - bool match; - - assert(parent != NULL); - assert(parent->segment == NULL); - assert(path[0] == '/'); - - /* Start with children, as parent has no segment */ - p = parent->children; - - while (p != NULL) { - slash = strchr(path + 1, '/'); - if (!slash) - slash = path + strlen(path); - - if (strncmp(p->segment, path + 1, slash - path - 1) == 0 && - lwc_string_isequal(p->scheme, scheme, &match) == - lwc_error_ok && - match == true && - p->port == port) { - if (*slash == '\0') { - /* Complete match */ - return (struct path_data *) p; - } - - /* Match so far, go down tree */ - p = p->children; - - path = slash; - } else { - /* No match, try next sibling */ - p = p->next; - } - } - - return NULL; -} - -/** - * Get the search tree for a particular host - * - * \param host the host to lookup - * \return the corresponding search tree - */ -struct search_node **urldb_get_search_tree_direct(const char *host) { - assert(host); - - if (urldb__host_is_ip_address(host)) - return &search_trees[ST_IP]; - else if (isalpha(*host)) - return &search_trees[ST_DN + tolower(*host) - 'a']; - return &search_trees[ST_EE]; -} - -/** - * Get the search tree for a particular host - * - * \param host the host to lookup - * \return the corresponding search tree - */ -struct search_node *urldb_get_search_tree(const char *host) { - return *urldb_get_search_tree_direct(host); -} - -/** - * Dump URL database to stderr - */ -void urldb_dump(void) -{ - int i; - - urldb_dump_hosts(&db_root); - - for (i = 0; i != NUM_SEARCH_TREES; i++) - urldb_dump_search(search_trees[i], 0); -} - -/** - * Dump URL database hosts to stderr - * - * \param parent Parent node of tree to dump - */ -void urldb_dump_hosts(struct host_part *parent) -{ - struct host_part *h; - - if (parent->part) { - LOG(("%s", parent->part)); - - LOG(("\t%s invalid SSL certs", - parent->permit_invalid_certs ? "Permits" : "Denies")); - } - - /* Dump path data */ - urldb_dump_paths(&parent->paths); - - /* and recurse */ - for (h = parent->children; h; h = h->next) - urldb_dump_hosts(h); -} - -/** - * Dump URL database paths to stderr - * - * \param parent Parent node of tree to dump - */ -void urldb_dump_paths(struct path_data *parent) -{ - const struct path_data *p = parent; - unsigned int i; - - do { - if (p->segment != NULL) { - LOG(("\t%s : %u", lwc_string_data(p->scheme), p->port)); - - LOG(("\t\t'%s'", p->segment)); - - for (i = 0; i != p->frag_cnt; i++) - LOG(("\t\t\t#%s", p->fragment[i])); - } - - if (p->children != NULL) { - p = p->children; - } else { - while (p != parent) { - if (p->next != NULL) { - p = p->next; - break; - } - - p = p->parent; - } - } - } while (p != parent); -} - -/** - * Dump search tree - * - * \param parent Parent node of tree to dump - * \param depth Tree depth - */ -void urldb_dump_search(struct search_node *parent, int depth) -{ - const struct host_part *h; - int i; - - if (parent == &empty) - return; - - urldb_dump_search(parent->left, depth + 1); - - for (i = 0; i != depth; i++) - fputc(' ', stderr); - - for (h = parent->data; h; h = h->parent) { - if (h->part) - fprintf(stderr, "%s", h->part); - - if (h->parent && h->parent->parent) - fputc('.', stderr); - } - - fputc('\n', stderr); - - urldb_dump_search(parent->right, depth + 1); -} - -/** - * Insert a node into the search tree - * - * \param root Root of tree to insert into - * \param data User data to insert - * \return Pointer to updated root, or NULL if failed - */ -struct search_node *urldb_search_insert(struct search_node *root, - const struct host_part *data) -{ - struct search_node *n; - - assert(root && data); - - n = malloc(sizeof(struct search_node)); - if (!n) - return NULL; - - n->level = 1; - n->data = data; - n->left = n->right = ∅ - - root = urldb_search_insert_internal(root, n); - - return root; -} - -/** - * Insert node into search tree - * - * \param root Root of (sub)tree to insert into - * \param n Node to insert - * \return Pointer to updated root - */ -struct search_node *urldb_search_insert_internal(struct search_node *root, - struct search_node *n) -{ - assert(root && n); - - if (root == &empty) { - root = n; - } else { - int c = urldb_search_match_host(root->data, n->data); - - if (c > 0) { - root->left = urldb_search_insert_internal( - root->left, n); - } else if (c < 0) { - root->right = urldb_search_insert_internal( - root->right, n); - } else { - /* exact match */ - free(n); - return root; - } - - root = urldb_search_skew(root); - root = urldb_search_split(root); - } - - return root; -} - -/** - * Find a node in a search tree - * - * \param root Tree to look in - * \param host Host to find - * \return Pointer to host tree node, or NULL if not found - */ -const struct host_part *urldb_search_find(struct search_node *root, - const char *host) -{ - int c; - - assert(root && host); - - if (root == &empty) { - return NULL; - } - - c = urldb_search_match_string(root->data, host); - - if (c > 0) - return urldb_search_find(root->left, host); - else if (c < 0) - return urldb_search_find(root->right, host); - else - return root->data; -} - -/** - * Compare a pair of host_parts - * - * \param a - * \param b - * \return 0 if match, non-zero, otherwise - */ -int urldb_search_match_host(const struct host_part *a, - const struct host_part *b) -{ - int ret; - - assert(a && b); - - /* traverse up tree to root, comparing parts as we go. */ - for (; a && a != &db_root && b && b != &db_root; - a = a->parent, b = b->parent) - if ((ret = strcasecmp(a->part, b->part)) != 0) - /* They differ => return the difference here */ - return ret; - - /* If we get here then either: - * a) The path lengths differ - * or b) The hosts are identical - */ - if (a && a != &db_root && (!b || b == &db_root)) - /* len(a) > len(b) */ - return 1; - else if ((!a || a == &db_root) && b && b != &db_root) - /* len(a) < len(b) */ - return -1; - - /* identical */ - return 0; -} - -/** - * Compare host_part with a string - * - * \param a - * \param b - * \return 0 if match, non-zero, otherwise - */ -int urldb_search_match_string(const struct host_part *a, - const char *b) -{ - const char *end, *dot; - int plen, ret; - - assert(a && a != &db_root && b); - - if (urldb__host_is_ip_address(b)) { - /* IP address */ - return strcasecmp(a->part, b); - } - - end = b + strlen(b) + 1; - - while (b < end && a && a != &db_root) { - dot = strchr(b, '.'); - if (!dot) { - /* last segment */ - dot = end - 1; - } - - /* Compare strings (length limited) */ - if ((ret = strncasecmp(a->part, b, dot - b)) != 0) - /* didn't match => return difference */ - return ret; - - /* The strings matched, now check that the lengths do, too */ - plen = strlen(a->part); - - if (plen > dot - b) - /* len(a) > len(b) */ - return 1; - else if (plen < dot - b) - /* len(a) < len(b) */ - return -1; - - b = dot + 1; - a = a->parent; - } - - /* If we get here then either: - * a) The path lengths differ - * or b) The hosts are identical - */ - if (a && a != &db_root && b >= end) - /* len(a) > len(b) */ - return 1; - else if ((!a || a == &db_root) && b < end) - /* len(a) < len(b) */ - return -1; - - /* Identical */ - return 0; -} - /** * Compare host_part with prefix * @@ -2414,8 +617,7 @@ int urldb_search_match_string(const struct host_part *a, * \param b * \return 0 if match, non-zero, otherwise */ -int urldb_search_match_prefix(const struct host_part *a, - const char *b) +static int urldb_search_match_prefix(const struct host_part *a, const char *b) { const char *end, *dot; int plen, ret; @@ -2473,13 +675,734 @@ int urldb_search_match_prefix(const struct host_part *a, return 0; } + +/** + * Partial host iterator (internal) + * + * \param root Root of (sub)tree to traverse + * \param prefix Prefix to match + * \param callback Callback function + * \return true to continue, false otherwise + */ +static bool +urldb_iterate_partial_host(struct search_node *root, + const char *prefix, + bool (*callback)(nsurl *url, const struct url_data *data)) +{ + int c; + + assert(root && prefix && callback); + + if (root == &empty) + return true; + + c = urldb_search_match_prefix(root->data, prefix); + + if (c > 0) + /* No match => look in left subtree */ + return urldb_iterate_partial_host(root->left, prefix, + callback); + else if (c < 0) + /* No match => look in right subtree */ + return urldb_iterate_partial_host(root->right, prefix, + callback); + else { + /* Match => iterate over l/r subtrees & process this node */ + if (!urldb_iterate_partial_host(root->left, prefix, + callback)) + return false; + + if (root->data->paths.children) { + /* and extract all paths attached to this host */ + if (!urldb_iterate_entries_path(&root->data->paths, + callback, NULL)) { + return false; + } + } + + if (!urldb_iterate_partial_host(root->right, prefix, + callback)) + return false; + } + + return true; +} + + +/** + * Partial path iterator (internal) + * + * \param parent Root of (sub)tree to traverse + * \param prefix Prefix to match + * \param callback Callback function + * \return true to continue, false otherwise + */ +static bool urldb_iterate_partial_path(const struct path_data *parent, + const char *prefix, bool (*callback)(nsurl *url, + const struct url_data *data)) +{ + const struct path_data *p = parent->children; + const char *slash, *end = prefix + strlen(prefix); + + /* + * Given: http://www.example.org/a/b/c/d//e + * and assuming a path tree: + * . + * / \ + * a1 b1 + * / \ + * a2 b2 + * /|\ + * a b c + * 3 3 | + * d + * | + * e + * / \ + * f g + * + * Prefix will be: p will be: + * + * a/b/c/d//e a1 + * b/c/d//e a2 + * b/c/d//e b3 + * c/d//e a3 + * c/d//e b3 + * c/d//e c + * d//e d + * /e e (skip /) + * e e + * + * I.E. we perform a breadth-first search of the tree. + */ + + do { + slash = strchr(prefix, '/'); + if (!slash) + slash = end; + + if (slash == prefix && *prefix == '/') { + /* Ignore "//" */ + prefix++; + continue; + } + + if (strncasecmp(p->segment, prefix, slash - prefix) == 0) { + /* prefix matches so far */ + if (slash == end) { + /* we've run out of prefix, so all + * paths below this one match */ + if (!urldb_iterate_entries_path(p, callback, + NULL)) + return false; + + /* Progress to next sibling */ + p = p->next; + } else { + /* Skip over this segment */ + prefix = slash + 1; + + p = p->children; + } + } else { + /* Doesn't match this segment, try next sibling */ + p = p->next; + } + } while (p != NULL); + + return true; +} + + +/** + * Host data iterator (internal) + * + * \param parent Root of subtree to iterate over + * \param url_callback Callback function + * \param cookie_callback Callback function + * \return true to continue, false otherwise + */ +static bool urldb_iterate_entries_host(struct search_node *parent, + bool (*url_callback)(nsurl *url, + const struct url_data *data), + bool (*cookie_callback)(const struct cookie_data *data)) +{ + if (parent == &empty) + return true; + + if (!urldb_iterate_entries_host(parent->left, + url_callback, cookie_callback)) + return false; + + if ((parent->data->paths.children) || ((cookie_callback) && + (parent->data->paths.cookies))) { + /* We have paths (or domain cookies), so iterate them */ + if (!urldb_iterate_entries_path(&parent->data->paths, + url_callback, cookie_callback)) { + return false; + } + } + + if (!urldb_iterate_entries_host(parent->right, + url_callback, cookie_callback)) + return false; + + return true; +} + + +/** + * Add a host node to the tree + * + * \param part Host segment to add (or whole IP address) (copied) + * \param parent Parent node to add to + * \return Pointer to added node, or NULL on memory exhaustion + */ +static struct host_part *urldb_add_host_node(const char *part, + struct host_part *parent) +{ + struct host_part *d; + + assert(part && parent); + + d = calloc(1, sizeof(struct host_part)); + if (!d) + return NULL; + + d->part = strdup(part); + if (!d->part) { + free(d); + return NULL; + } + + d->next = parent->children; + if (parent->children) + parent->children->prev = d; + d->parent = parent; + parent->children = d; + + return d; +} + + +/** + * Fragment comparator callback for qsort + */ +static int urldb_add_path_fragment_cmp(const void *a, const void *b) +{ + return strcasecmp(*((const char **) a), *((const char **) b)); +} + + +/** + * Add a fragment to a path segment + * + * \param segment Path segment to add to + * \param fragment Fragment to add (copied), or NULL + * \return segment or NULL on memory exhaustion + */ +static struct path_data * +urldb_add_path_fragment(struct path_data *segment, lwc_string *fragment) +{ + char **temp; + + assert(segment); + + /* If no fragment, this function is a NOP + * This may seem strange, but it makes the rest + * of the code cleaner */ + if (!fragment) + return segment; + + temp = realloc(segment->fragment, + (segment->frag_cnt + 1) * sizeof(char *)); + if (!temp) + return NULL; + + segment->fragment = temp; + segment->fragment[segment->frag_cnt] = + strdup(lwc_string_data(fragment)); + if (!segment->fragment[segment->frag_cnt]) { + /* Don't free temp - it's now our buffer */ + return NULL; + } + + segment->frag_cnt++; + + /* We want fragments in alphabetical order, so sort them + * It may prove better to insert in alphabetical order instead */ + qsort(segment->fragment, segment->frag_cnt, sizeof (char *), + urldb_add_path_fragment_cmp); + + return segment; +} + + +/** + * Add a path node to the tree + * + * \param scheme URL scheme associated with path (copied) + * \param port Port number on host associated with path + * \param segment Path segment to add (copied) + * \param fragment URL fragment (copied), or NULL + * \param parent Parent node to add to + * \return Pointer to added node, or NULL on memory exhaustion + */ +static struct path_data * +urldb_add_path_node(lwc_string *scheme, unsigned int port, + const char *segment, lwc_string *fragment, + struct path_data *parent) +{ + struct path_data *d, *e; + + assert(scheme && segment && parent); + + d = calloc(1, sizeof(struct path_data)); + if (!d) + return NULL; + + d->scheme = lwc_string_ref(scheme); + + d->port = port; + + d->segment = strdup(segment); + if (!d->segment) { + lwc_string_unref(d->scheme); + free(d); + return NULL; + } + + if (fragment) { + if (!urldb_add_path_fragment(d, fragment)) { + free(d->segment); + lwc_string_unref(d->scheme); + free(d); + return NULL; + } + } + + for (e = parent->children; e; e = e->next) { + if (strcmp(e->segment, d->segment) > 0) + break; + } + + if (e) { + d->prev = e->prev; + d->next = e; + if (e->prev) + e->prev->next = d; + else + parent->children = d; + e->prev = d; + } else if (!parent->children) { + d->prev = d->next = NULL; + parent->children = parent->last = d; + } else { + d->next = NULL; + d->prev = parent->last; + parent->last->next = d; + parent->last = d; + } + d->parent = parent; + + return d; +} + + +/** + * Get the search tree for a particular host + * + * \param host the host to lookup + * \return the corresponding search tree + */ +static struct search_node **urldb_get_search_tree_direct(const char *host) +{ + assert(host); + + if (urldb__host_is_ip_address(host)) + return &search_trees[ST_IP]; + else if (isalpha(*host)) + return &search_trees[ST_DN + tolower(*host) - 'a']; + return &search_trees[ST_EE]; +} + + +/** + * Get the search tree for a particular host + * + * \param host the host to lookup + * \return the corresponding search tree + */ +static struct search_node *urldb_get_search_tree(const char *host) +{ + return *urldb_get_search_tree_direct(host); +} + + +/** + * Compare host_part with a string + * + * \param a + * \param b + * \return 0 if match, non-zero, otherwise + */ +static int urldb_search_match_string(const struct host_part *a, const char *b) +{ + const char *end, *dot; + int plen, ret; + + assert(a && a != &db_root && b); + + if (urldb__host_is_ip_address(b)) { + /* IP address */ + return strcasecmp(a->part, b); + } + + end = b + strlen(b) + 1; + + while (b < end && a && a != &db_root) { + dot = strchr(b, '.'); + if (!dot) { + /* last segment */ + dot = end - 1; + } + + /* Compare strings (length limited) */ + if ((ret = strncasecmp(a->part, b, dot - b)) != 0) + /* didn't match => return difference */ + return ret; + + /* The strings matched, now check that the lengths do, too */ + plen = strlen(a->part); + + if (plen > dot - b) + /* len(a) > len(b) */ + return 1; + else if (plen < dot - b) + /* len(a) < len(b) */ + return -1; + + b = dot + 1; + a = a->parent; + } + + /* If we get here then either: + * a) The path lengths differ + * or b) The hosts are identical + */ + if (a && a != &db_root && b >= end) + /* len(a) > len(b) */ + return 1; + else if ((!a || a == &db_root) && b < end) + /* len(a) < len(b) */ + return -1; + + /* Identical */ + return 0; +} + + +/** + * Find a node in a search tree + * + * \param root Tree to look in + * \param host Host to find + * \return Pointer to host tree node, or NULL if not found + */ +static const struct host_part * +urldb_search_find(struct search_node *root, const char *host) +{ + int c; + + assert(root && host); + + if (root == &empty) { + return NULL; + } + + c = urldb_search_match_string(root->data, host); + + if (c > 0) + return urldb_search_find(root->left, host); + else if (c < 0) + return urldb_search_find(root->right, host); + else + return root->data; +} + + +/** + * Match a path string + * + * \param parent Path (sub)tree to look in + * \param path The path to search for + * \param scheme The URL scheme associated with the path + * \param port The port associated with the path + * \return Pointer to path data or NULL if not found. + */ +static struct path_data *urldb_match_path(const struct path_data *parent, + const char *path, lwc_string *scheme, unsigned short port) +{ + const struct path_data *p; + const char *slash; + bool match; + + assert(parent != NULL); + assert(parent->segment == NULL); + + if (path[0] != '/') { + LOG(("path is %s", path)); + } + + assert(path[0] == '/'); + + /* Start with children, as parent has no segment */ + p = parent->children; + + while (p != NULL) { + slash = strchr(path + 1, '/'); + if (!slash) + slash = path + strlen(path); + + if (strncmp(p->segment, path + 1, slash - path - 1) == 0 && + lwc_string_isequal(p->scheme, scheme, &match) == + lwc_error_ok && + match == true && + p->port == port) { + if (*slash == '\0') { + /* Complete match */ + return (struct path_data *) p; + } + + /* Match so far, go down tree */ + p = p->children; + + path = slash; + } else { + /* No match, try next sibling */ + p = p->next; + } + } + + return NULL; +} + + +/** + * Find an URL in the database + * + * \param url Absolute URL to find + * \return Pointer to path data, or NULL if not found + */ +static struct path_data *urldb_find_url(nsurl *url) +{ + const struct host_part *h; + struct path_data *p; + struct search_node *tree; + char *plq; + const char *host_str; + lwc_string *scheme, *host, *port; + size_t len = 0; + unsigned int port_int; + bool match; + + assert(url); + + if (url_bloom != NULL) { + if (bloom_search_hash(url_bloom, + nsurl_hash(url)) == false) { + return NULL; + } + } + + scheme = nsurl_get_component(url, NSURL_SCHEME); + if (scheme == NULL) + return NULL; + + host = nsurl_get_component(url, NSURL_HOST); + if (host != NULL) { + host_str = lwc_string_data(host); + lwc_string_unref(host); + + } else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) == + lwc_error_ok && match == true) { + host_str = "localhost"; + + } else { + lwc_string_unref(scheme); + return NULL; + } + + tree = urldb_get_search_tree(host_str); + h = urldb_search_find(tree, host_str); + if (!h) { + lwc_string_unref(scheme); + return NULL; + } + + /* generate plq (path, leaf, query) */ + if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &plq, &len) != + NSERROR_OK) { + lwc_string_unref(scheme); + return NULL; + } + + /* Get port */ + port = nsurl_get_component(url, NSURL_PORT); + if (port != NULL) { + port_int = atoi(lwc_string_data(port)); + lwc_string_unref(port); + } else { + port_int = 0; + } + + p = urldb_match_path(&h->paths, plq, scheme, port_int); + + free(plq); + lwc_string_unref(scheme); + + return p; +} + + +/** + * Dump URL database paths to stderr + * + * \param parent Parent node of tree to dump + */ +static void urldb_dump_paths(struct path_data *parent) +{ + const struct path_data *p = parent; + unsigned int i; + + do { + if (p->segment != NULL) { + LOG(("\t%s : %u", lwc_string_data(p->scheme), p->port)); + + LOG(("\t\t'%s'", p->segment)); + + for (i = 0; i != p->frag_cnt; i++) + LOG(("\t\t\t#%s", p->fragment[i])); + } + + if (p->children != NULL) { + p = p->children; + } else { + while (p != parent) { + if (p->next != NULL) { + p = p->next; + break; + } + + p = p->parent; + } + } + } while (p != parent); +} + + +/** + * Dump URL database hosts to stderr + * + * \param parent Parent node of tree to dump + */ +static void urldb_dump_hosts(struct host_part *parent) +{ + struct host_part *h; + + if (parent->part) { + LOG(("%s", parent->part)); + + LOG(("\t%s invalid SSL certs", + parent->permit_invalid_certs ? "Permits" : "Denies")); + } + + /* Dump path data */ + urldb_dump_paths(&parent->paths); + + /* and recurse */ + for (h = parent->children; h; h = h->next) + urldb_dump_hosts(h); +} + + +/** + * Dump search tree + * + * \param parent Parent node of tree to dump + * \param depth Tree depth + */ +static void urldb_dump_search(struct search_node *parent, int depth) +{ + const struct host_part *h; + int i; + + if (parent == &empty) + return; + + urldb_dump_search(parent->left, depth + 1); + + for (i = 0; i != depth; i++) + fputc(' ', stderr); + + for (h = parent->data; h; h = h->parent) { + if (h->part) + fprintf(stderr, "%s", h->part); + + if (h->parent && h->parent->parent) + fputc('.', stderr); + } + + fputc('\n', stderr); + + urldb_dump_search(parent->right, depth + 1); +} + + +/** + * Compare a pair of host_parts + * + * \param a + * \param b + * \return 0 if match, non-zero, otherwise + */ +static int +urldb_search_match_host(const struct host_part *a, const struct host_part *b) +{ + int ret; + + assert(a && b); + + /* traverse up tree to root, comparing parts as we go. */ + for (; a && a != &db_root && b && b != &db_root; + a = a->parent, b = b->parent) + if ((ret = strcasecmp(a->part, b->part)) != 0) + /* They differ => return the difference here */ + return ret; + + /* If we get here then either: + * a) The path lengths differ + * or b) The hosts are identical + */ + if (a && a != &db_root && (!b || b == &db_root)) + /* len(a) > len(b) */ + return 1; + else if ((!a || a == &db_root) && b && b != &db_root) + /* len(a) < len(b) */ + return -1; + + /* identical */ + return 0; +} + + /** * Rotate a subtree right * * \param root Root of subtree to rotate * \return new root of subtree */ -struct search_node *urldb_search_skew(struct search_node *root) +static struct search_node *urldb_search_skew(struct search_node *root) { struct search_node *temp; @@ -2495,13 +1418,14 @@ struct search_node *urldb_search_skew(struct search_node *root) return root; } + /** * Rotate a node left, increasing the parent's level * * \param root Root of subtree to rotate * \return New root of subtree */ -struct search_node *urldb_search_split(struct search_node *root) +static struct search_node *urldb_search_split(struct search_node *root) { struct search_node *temp; @@ -2519,538 +1443,198 @@ struct search_node *urldb_search_split(struct search_node *root) return root; } + /** - * Retrieve cookies for an URL + * Insert node into search tree * - * \param url URL being fetched - * \param include_http_only Whether to include HTTP(S) only cookies. - * \return Cookies string for libcurl (on heap), or NULL on error/no cookies + * \param root Root of (sub)tree to insert into + * \param n Node to insert + * \return Pointer to updated root */ -char *urldb_get_cookie(nsurl *url, bool include_http_only) +static struct search_node * +urldb_search_insert_internal(struct search_node *root, struct search_node *n) { - const struct path_data *p, *q; - const struct host_part *h; - lwc_string *path_lwc; - struct cookie_internal_data *c; - int count = 0, version = COOKIE_RFC2965; - struct cookie_internal_data **matched_cookies; - int matched_cookies_size = 20; - int ret_alloc = 4096, ret_used = 1; - const char *path; - char *ret; - lwc_string *scheme; - time_t now; - int i; - bool match; + assert(root && n); - assert(url != NULL); + if (root == &empty) { + root = n; + } else { + int c = urldb_search_match_host(root->data, n->data); - /* The URL must exist in the db in order to find relevant cookies, since - * we search up the tree from the URL node, and cookies from further - * up also apply. */ - urldb_add_url(url); - - p = urldb_find_url(url); - if (!p) - return NULL; - - scheme = p->scheme; - - matched_cookies = malloc(matched_cookies_size * - sizeof(struct cookie_internal_data *)); - if (!matched_cookies) - return NULL; - -#define GROW_MATCHED_COOKIES \ - do { \ - if (count == matched_cookies_size) { \ - struct cookie_internal_data **temp; \ - temp = realloc(matched_cookies, \ - (matched_cookies_size + 20) * \ - sizeof(struct cookie_internal_data *)); \ - \ - if (temp == NULL) { \ - free(ret); \ - free(matched_cookies); \ - return NULL; \ - } \ - \ - matched_cookies = temp; \ - matched_cookies_size += 20; \ - } \ - } while(0) - - ret = malloc(ret_alloc); - if (!ret) { - free(matched_cookies); - return NULL; - } - - ret[0] = '\0'; - - path_lwc = nsurl_get_component(url, NSURL_PATH); - if (path_lwc == NULL) { - free(ret); - free(matched_cookies); - return NULL; - } - path = lwc_string_data(path_lwc); - lwc_string_unref(path_lwc); - - now = time(NULL); - - if (*(p->segment) != '\0') { - /* Match exact path, unless directory, when prefix matching - * will handle this case for us. */ - for (q = p->parent->children; q; q = q->next) { - if (strcmp(q->segment, p->segment)) - continue; - - /* Consider all cookies associated with - * this exact path */ - for (c = q->cookies; c; c = c->next) { - if (c->expires != -1 && c->expires < now) - /* cookie has expired => ignore */ - continue; - - if (c->secure && lwc_string_isequal( - q->scheme, - corestring_lwc_https, - &match) && - match == false) - /* secure cookie for insecure host. - * ignore */ - continue; - - if (c->http_only && !include_http_only) - /* Ignore HttpOnly */ - continue; - - matched_cookies[count++] = c; - - GROW_MATCHED_COOKIES; - - if (c->version < (unsigned int)version) - version = c->version; - - c->last_used = now; - - cookie_manager_add((struct cookie_data *)c); - } - } - } - - /* Now consider cookies whose paths prefix-match ours */ - for (p = p->parent; p; p = p->parent) { - /* Find directory's path entry(ies) */ - /* There are potentially multiple due to differing schemes */ - for (q = p->children; q; q = q->next) { - if (*(q->segment) != '\0') - continue; - - for (c = q->cookies; c; c = c->next) { - if (c->expires != -1 && c->expires < now) - /* cookie has expired => ignore */ - continue; - - if (c->secure && lwc_string_isequal( - q->scheme, - corestring_lwc_https, - &match) && - match == false) - /* Secure cookie for insecure server - * => ignore */ - continue; - - matched_cookies[count++] = c; - - GROW_MATCHED_COOKIES; - - if (c->version < (unsigned int) version) - version = c->version; - - c->last_used = now; - - cookie_manager_add((struct cookie_data *)c); - } - } - - if (!p->parent) { - /* No parent, so bail here. This can't go in - * the loop exit condition as we also want to - * process the top-level node. - * - * If p->parent is NULL then p->cookies are - * the domain cookies and thus we don't even - * try matching against them. - */ - break; - } - - /* Consider p itself - may be the result of Path=/foo */ - for (c = p->cookies; c; c = c->next) { - if (c->expires != -1 && c->expires < now) - /* cookie has expired => ignore */ - continue; - - /* Ensure cookie path is a prefix of the resource */ - if (strncmp(c->path, path, strlen(c->path)) != 0) - /* paths don't match => ignore */ - continue; - - if (c->secure && lwc_string_isequal(p->scheme, - corestring_lwc_https, - &match) && - match == false) - /* Secure cookie for insecure server - * => ignore */ - continue; - - matched_cookies[count++] = c; - - GROW_MATCHED_COOKIES; - - if (c->version < (unsigned int) version) - version = c->version; - - c->last_used = now; - - cookie_manager_add((struct cookie_data *)c); + if (c > 0) { + root->left = urldb_search_insert_internal( + root->left, n); + } else if (c < 0) { + root->right = urldb_search_insert_internal( + root->right, n); + } else { + /* exact match */ + free(n); + return root; } + root = urldb_search_skew(root); + root = urldb_search_split(root); } - /* Finally consider domain cookies for hosts which domain match ours */ - for (h = (const struct host_part *)p; h && h != &db_root; - h = h->parent) { - for (c = h->paths.cookies; c; c = c->next) { - if (c->expires != -1 && c->expires < now) - /* cookie has expired => ignore */ - continue; - - /* Ensure cookie path is a prefix of the resource */ - if (strncmp(c->path, path, strlen(c->path)) != 0) - /* paths don't match => ignore */ - continue; - - if (c->secure && lwc_string_isequal(scheme, - corestring_lwc_https, - &match) && - match == false) - /* secure cookie for insecure host. ignore */ - continue; - - matched_cookies[count++] = c; - - GROW_MATCHED_COOKIES; - - if (c->version < (unsigned int)version) - version = c->version; - - c->last_used = now; - - cookie_manager_add((struct cookie_data *)c); - } - } - - if (count == 0) { - /* No cookies found */ - free(ret); - free(matched_cookies); - return NULL; - } - - /* and build output string */ - if (version > COOKIE_NETSCAPE) { - sprintf(ret, "$Version=%d", version); - ret_used = strlen(ret) + 1; - } - - for (i = 0; i < count; i++) { - if (!urldb_concat_cookie(matched_cookies[i], version, - &ret_used, &ret_alloc, &ret)) { - free(ret); - free(matched_cookies); - return NULL; - } - } - - if (version == COOKIE_NETSCAPE) { - /* Old-style cookies => no version & skip "; " */ - memmove(ret, ret + 2, ret_used - 2); - ret_used -= 2; - } - - /* Now, shrink the output buffer to the required size */ - { - char *temp = realloc(ret, ret_used); - if (!temp) { - free(ret); - free(matched_cookies); - return NULL; - } - - ret = temp; - } - - free(matched_cookies); - - return ret; - -#undef GROW_MATCHED_COOKIES + return root; } + /** - * Parse Set-Cookie header and insert cookie(s) into database + * Insert a node into the search tree * - * \param header Header to parse, with Set-Cookie: stripped - * \param url URL being fetched - * \param referer Referring resource, or 0 for verifiable transaction - * \return true on success, false otherwise + * \param root Root of tree to insert into + * \param data User data to insert + * \return Pointer to updated root, or NULL if failed */ -bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer) +static struct search_node * +urldb_search_insert(struct search_node *root, const struct host_part *data) { - const char *cur = header, *end; - lwc_string *path, *host, *scheme; - nsurl *urlt; - bool match; + struct search_node *n; - assert(url && header); + assert(root && data); - /* Get defragmented URL, as 'urlt' */ - if (nsurl_defragment(url, &urlt) != NSERROR_OK) + n = malloc(sizeof(struct search_node)); + if (!n) return NULL; - scheme = nsurl_get_component(url, NSURL_SCHEME); - if (scheme == NULL) { - nsurl_unref(urlt); - return false; + n->level = 1; + n->data = data; + n->left = n->right = ∅ + + root = urldb_search_insert_internal(root, n); + + return root; +} + + +/** + * Parse a cookie avpair + * + * \param c Cookie struct to populate + * \param n Name component + * \param v Value component + * \param was_quoted Whether ::v was quoted in the input + * \return true on success, false on memory exhaustion + */ +static bool urldb_parse_avpair(struct cookie_internal_data *c, char *n, + char *v, bool was_quoted) +{ + int vlen; + + assert(c && n && v); + + /* Strip whitespace from start of name */ + for (; *n; n++) { + if (*n != ' ' && *n != '\t') + break; } - path = nsurl_get_component(url, NSURL_PATH); - if (path == NULL) { - lwc_string_unref(scheme); - nsurl_unref(urlt); - return false; + /* Strip whitespace from end of name */ + for (vlen = strlen(n); vlen; vlen--) { + if (n[vlen] == ' ' || n[vlen] == '\t') + n[vlen] = '\0'; + else + break; } - host = nsurl_get_component(url, NSURL_HOST); - if (host == NULL) { - lwc_string_unref(path); - lwc_string_unref(scheme); - nsurl_unref(urlt); - return false; + /* Strip whitespace from start of value */ + for (; *v; v++) { + if (*v != ' ' && *v != '\t') + break; } - if (referer) { - lwc_string *rhost; - - /* Ensure that url's host name domain matches - * referer's (4.3.5) */ - rhost = nsurl_get_component(referer, NSURL_HOST); - if (rhost == NULL) { - goto error; - } - - /* Domain match host names */ - if (lwc_string_isequal(host, rhost, &match) == lwc_error_ok && - match == false) { - const char *hptr; - const char *rptr; - const char *dot; - const char *host_data = lwc_string_data(host); - const char *rhost_data = lwc_string_data(rhost); - - /* Ensure neither host nor rhost are IP addresses */ - if (urldb__host_is_ip_address(host_data) || - urldb__host_is_ip_address(rhost_data)) { - /* IP address, so no partial match */ - lwc_string_unref(rhost); - goto error; - } - - /* Not exact match, so try the following: - * - * 1) Find the longest common suffix of host and rhost - * (may be all of host/rhost) - * 2) Discard characters from the start of the suffix - * until the suffix starts with a dot - * (prevents foobar.com matching bar.com) - * 3) Ensure the suffix is non-empty and contains - * embedded dots (to avoid permitting .com as a - * suffix) - * - * Note that the above in no way resembles the - * domain matching algorithm found in RFC2109. - * It does, however, model the real world rather - * more accurately. - */ - - /** \todo In future, we should consult a TLD service - * instead of just looking for embedded dots. - */ - - hptr = host_data + lwc_string_length(host) - 1; - rptr = rhost_data + lwc_string_length(rhost) - 1; - - /* 1 */ - while (hptr >= host_data && rptr >= rhost_data) { - if (*hptr != *rptr) - break; - hptr--; - rptr--; - } - /* Ensure we end up pointing at the start of the - * common suffix. The above loop will exit pointing - * to the byte before the start of the suffix. */ - hptr++; - - /* 2 */ - while (*hptr != '\0' && *hptr != '.') - hptr++; - - /* 3 */ - if (*hptr == '\0' || - (dot = strchr(hptr + 1, '.')) == NULL || - *(dot + 1) == '\0') { - lwc_string_unref(rhost); - goto error; - } - } - - lwc_string_unref(rhost); + /* Strip whitespace from end of value */ + for (vlen = strlen(v); vlen; vlen--) { + if (v[vlen] == ' ' || v[vlen] == '\t') + v[vlen] = '\0'; + else + break; } - end = cur + strlen(cur) - 2 /* Trailing CRLF */; - - do { - struct cookie_internal_data *c; - char *dot; - size_t len; - - c = urldb_parse_cookie(url, &cur); - if (!c) { - /* failed => stop parsing */ - goto error; + if (!c->comment && strcasecmp(n, "Comment") == 0) { + c->comment = strdup(v); + if (!c->comment) + return false; + } else if (!c->domain && strcasecmp(n, "Domain") == 0) { + if (v[0] == '.') { + /* Domain must start with a dot */ + c->domain_from_set = true; + c->domain = strdup(v); + if (!c->domain) + return false; } + } else if (strcasecmp(n, "Max-Age") == 0) { + int temp = atoi(v); + if (temp == 0) + /* Special case - 0 means delete */ + c->expires = 0; + else + c->expires = time(NULL) + temp; + } else if (!c->path && strcasecmp(n, "Path") == 0) { + c->path_from_set = true; + c->path = strdup(v); + if (!c->path) + return false; + } else if (strcasecmp(n, "Version") == 0) { + c->version = atoi(v); + } else if (strcasecmp(n, "Expires") == 0) { + char *datenoday; + time_t expires; - /* validate cookie */ + /* Strip dayname from date (these are hugely + * variable and liable to break the parser. + * They also serve no useful purpose) */ + for (datenoday = v; *datenoday && !isdigit(*datenoday); + datenoday++) + ; /* do nothing */ - /* 4.2.2:i Cookie must have NAME and VALUE */ - if (!c->name || !c->value) { - urldb_free_cookie(c); - goto error; + expires = curl_getdate(datenoday, NULL); + if (expires == -1) { + /* assume we have an unrepresentable + * date => force it to the maximum + * possible value of a 32bit time_t + * (this may break in 2038. We'll + * deal with that once we come to + * it) */ + expires = (time_t)0x7fffffff; } - - /* 4.3.2:i Cookie path must be a prefix of URL path */ - len = strlen(c->path); - if (len > lwc_string_length(path) || - strncmp(c->path, lwc_string_data(path), - len) != 0) { - urldb_free_cookie(c); - goto error; - } - - /* 4.3.2:ii Cookie domain must contain embedded dots */ - dot = strchr(c->domain + 1, '.'); - if (!dot || *(dot + 1) == '\0') { - /* no embedded dots */ - urldb_free_cookie(c); - goto error; - } - - /* Domain match fetch host with cookie domain */ - if (strcasecmp(lwc_string_data(host), c->domain) != 0) { - int hlen, dlen; - char *domain = c->domain; - - /* c->domain must be a domain cookie here because: - * c->domain is either: - * + specified in the header as a domain cookie - * (non-domain cookies in the header are ignored - * by urldb_parse_cookie / urldb_parse_avpair) - * + defaulted to the URL's host part - * (by urldb_parse_cookie if no valid domain was - * specified in the header) - * - * The latter will pass the strcasecmp above, which - * leaves the former (i.e. a domain cookie) - */ - assert(c->domain[0] == '.'); - - /* 4.3.2:iii */ - if (urldb__host_is_ip_address(lwc_string_data(host))) { - /* IP address, so no partial match */ - urldb_free_cookie(c); - goto error; - } - - hlen = lwc_string_length(host); - dlen = strlen(c->domain); - - if (hlen <= dlen && hlen != dlen - 1) { - /* Partial match not possible */ - urldb_free_cookie(c); - goto error; - } - - if (hlen == dlen - 1) { - /* Relax matching to allow - * host a.com to match .a.com */ - domain++; - dlen--; - } - - if (strcasecmp(lwc_string_data(host) + (hlen - dlen), - domain)) { - urldb_free_cookie(c); - goto error; - } - - /* 4.3.2:iv Ensure H contains no dots - * - * If you believe the spec, H should contain no - * dots in _any_ cookie. Unfortunately, however, - * reality differs in that many sites send domain - * cookies of the form .foo.com from hosts such - * as bar.bat.foo.com and then expect domain - * matching to work. Thus we have to do what they - * expect, regardless of any potential security - * implications. - * - * This is what code conforming to the spec would - * look like: - * - * for (int i = 0; i < (hlen - dlen); i++) { - * if (host[i] == '.') { - * urldb_free_cookie(c); - * goto error; - * } - * } - */ - } - - /* Now insert into database */ - if (!urldb_insert_cookie(c, scheme, urlt)) - goto error; - } while (cur < end); - - lwc_string_unref(host); - lwc_string_unref(path); - lwc_string_unref(scheme); - nsurl_unref(urlt); + c->expires = expires; + } else if (strcasecmp(n, "Secure") == 0) { + c->secure = true; + } else if (strcasecmp(n, "HttpOnly") == 0) { + c->http_only = true; + } else if (!c->name) { + c->name = strdup(n); + c->value = strdup(v); + c->value_was_quoted = was_quoted; + if (!c->name || !c->value) + return false; + } return true; - -error: - lwc_string_unref(host); - lwc_string_unref(path); - lwc_string_unref(scheme); - nsurl_unref(urlt); - - return false; } + +/** + * Free a cookie + * + * \param c The cookie to free + */ +static void urldb_free_cookie(struct cookie_internal_data *c) +{ + assert(c); + + free(c->comment); + free(c->domain); + free(c->path); + free(c->name); + free(c->value); + free(c); +} + + /** * Parse a cookie * @@ -3058,8 +1642,8 @@ error: * \param cookie Pointer to cookie string (updated on exit) * \return Pointer to cookie structure (on heap, caller frees) or NULL */ -struct cookie_internal_data *urldb_parse_cookie(nsurl *url, - const char **cookie) +static struct cookie_internal_data * +urldb_parse_cookie(nsurl *url, const char **cookie) { struct cookie_internal_data *c; const char *cur; @@ -3091,34 +1675,12 @@ struct cookie_internal_data *urldb_parse_cookie(nsurl *url, /* Match Firefox 2.0.0.11 */ value[0] = '\0'; -#if 0 - /* This is what IE6/7 & Safari 3 do */ - /* Opera 9.25 discards the entire cookie */ - - /* Shuffle value up by 1 */ - memmove(value + 1, value, - min(v - value, sizeof(value) - 2)); - v++; - /* And insert " character at the start */ - value[0] = '"'; - - /* Now, run forwards through the value - * looking for a semicolon. If one exists, - * terminate the value at this point. */ - for (char *s = value; s < v; s++) { - if (*s == ';') { - *s = '\0'; - v = s; - break; - } - } -#endif } break; } else if (*cur == '\r') { /* Spurious linefeed */ - continue; + continue; } else if (*cur == '\n') { /* Spurious newline */ continue; @@ -3131,7 +1693,7 @@ struct cookie_internal_data *urldb_parse_cookie(nsurl *url, } else { had_value_data = true; - /* Value is taken verbatim if first non-space + /* Value is taken verbatim if first non-space * character is not a " */ if (*cur != '"') { value_verbatim = true; @@ -3157,7 +1719,7 @@ struct cookie_internal_data *urldb_parse_cookie(nsurl *url, } if (!quoted && (was_quoted || *cur == ';')) { - /* Semicolon or after quoted value + /* Semicolon or after quoted value * => end of current avpair */ /* NUL-terminate tokens */ @@ -3316,112 +1878,6 @@ struct cookie_internal_data *urldb_parse_cookie(nsurl *url, return c; } -/** - * Parse a cookie avpair - * - * \param c Cookie struct to populate - * \param n Name component - * \param v Value component - * \param was_quoted Whether ::v was quoted in the input - * \return true on success, false on memory exhaustion - */ -bool urldb_parse_avpair(struct cookie_internal_data *c, char *n, char *v, - bool was_quoted) -{ - int vlen; - - assert(c && n && v); - - /* Strip whitespace from start of name */ - for (; *n; n++) { - if (*n != ' ' && *n != '\t') - break; - } - - /* Strip whitespace from end of name */ - for (vlen = strlen(n); vlen; vlen--) { - if (n[vlen] == ' ' || n[vlen] == '\t') - n[vlen] = '\0'; - else - break; - } - - /* Strip whitespace from start of value */ - for (; *v; v++) { - if (*v != ' ' && *v != '\t') - break; - } - - /* Strip whitespace from end of value */ - for (vlen = strlen(v); vlen; vlen--) { - if (v[vlen] == ' ' || v[vlen] == '\t') - v[vlen] = '\0'; - else - break; - } - - if (!c->comment && strcasecmp(n, "Comment") == 0) { - c->comment = strdup(v); - if (!c->comment) - return false; - } else if (!c->domain && strcasecmp(n, "Domain") == 0) { - if (v[0] == '.') { - /* Domain must start with a dot */ - c->domain_from_set = true; - c->domain = strdup(v); - if (!c->domain) - return false; - } - } else if (strcasecmp(n, "Max-Age") == 0) { - int temp = atoi(v); - if (temp == 0) - /* Special case - 0 means delete */ - c->expires = 0; - else - c->expires = time(NULL) + temp; - } else if (!c->path && strcasecmp(n, "Path") == 0) { - c->path_from_set = true; - c->path = strdup(v); - if (!c->path) - return false; - } else if (strcasecmp(n, "Version") == 0) { - c->version = atoi(v); - } else if (strcasecmp(n, "Expires") == 0) { - char *datenoday; - time_t expires; - - /* Strip dayname from date (these are hugely - * variable and liable to break the parser. - * They also serve no useful purpose) */ - for (datenoday = v; *datenoday && !isdigit(*datenoday); - datenoday++) - ; /* do nothing */ - - expires = curl_getdate(datenoday, NULL); - if (expires == -1) { - /* assume we have an unrepresentable - * date => force it to the maximum - * possible value of a 32bit time_t - * (this may break in 2038. We'll - * deal with that once we come to - * it) */ - expires = (time_t)0x7fffffff; - } - c->expires = expires; - } else if (strcasecmp(n, "Secure") == 0) { - c->secure = true; - } else if (strcasecmp(n, "HttpOnly") == 0) { - c->http_only = true; - } else if (!c->name) { - c->name = strdup(n); - c->value = strdup(v); - c->value_was_quoted = was_quoted; - if (!c->name || !c->value) - return false; - } - - return true; -} /** * Insert a cookie into the database @@ -3431,8 +1887,8 @@ bool urldb_parse_avpair(struct cookie_internal_data *c, char *n, char *v, * \param url URL (sans fragment) associated with cookie * \return true on success, false on memory exhaustion (c will be freed) */ -bool urldb_insert_cookie(struct cookie_internal_data *c, lwc_string *scheme, - nsurl *url) +static bool urldb_insert_cookie(struct cookie_internal_data *c, + lwc_string *scheme, nsurl *url) { struct cookie_internal_data *d; const struct host_part *h; @@ -3537,22 +1993,6 @@ bool urldb_insert_cookie(struct cookie_internal_data *c, lwc_string *scheme, return true; } -/** - * Free a cookie - * - * \param c The cookie to free - */ -void urldb_free_cookie(struct cookie_internal_data *c) -{ - assert(c); - - free(c->comment); - free(c->domain); - free(c->path); - free(c->name); - free(c->value); - free(c); -} /** * Concatenate a cookie into the provided buffer @@ -3564,16 +2004,16 @@ void urldb_free_cookie(struct cookie_internal_data *c) * \param buf Pointer to Pointer to buffer (updated) * \return true on success, false on memory exhaustion */ -bool urldb_concat_cookie(struct cookie_internal_data *c, int version, +static bool urldb_concat_cookie(struct cookie_internal_data *c, int version, int *used, int *alloc, char **buf) { /* Combined (A)BNF for the Cookie: request header: - * + * * CHAR = * CTL = * CR = - * LF = + * LF = * SP = * HT = * <"> = @@ -3610,22 +2050,22 @@ bool urldb_concat_cookie(struct cookie_internal_data *c, int version, * * A note on quoted-string handling: * The cookie data stored in the db is verbatim (i.e. sans enclosing - * <">, if any, and with all quoted-pairs intact) thus all that we + * <">, if any, and with all quoted-pairs intact) thus all that we * need to do here is ensure that value strings which were quoted - * in Set-Cookie or which include any of the separators are quoted + * in Set-Cookie or which include any of the separators are quoted * before use. * * A note on cookie-value separation: - * We use semicolons for all separators, including between + * We use semicolons for all separators, including between * cookie-values. This simplifies things and is backwards compatible. - */ + */ const char * const separators = "()<>@,;:\\\"/[]?={} \t"; int max_len; assert(c && used && alloc && buf && *buf); - /* "; " cookie-value + /* "; " cookie-value * We allow for the possibility that values are quoted */ max_len = 2 + strlen(c->name) + 1 + strlen(c->value) + 2 + @@ -3663,7 +2103,7 @@ bool urldb_concat_cookie(struct cookie_internal_data *c, int version, *used += strlen(c->value); } - /* We don't send path/domain information -- that's what the + /* We don't send path/domain information -- that's what the * Netscape spec suggests we should do, anyway. */ } else { /* RFC2109 or RFC2965 cookie */ @@ -3713,11 +2153,1567 @@ bool urldb_concat_cookie(struct cookie_internal_data *c, int version, return true; } + /** - * Load a cookie file into the database - * - * \param filename File to load + * deletes paths from a cookie. */ +static void urldb_delete_cookie_paths(const char *domain, const char *path, + const char *name, struct path_data *parent) +{ + struct cookie_internal_data *c; + struct path_data *p = parent; + + assert(parent); + + do { + for (c = p->cookies; c; c = c->next) { + if (strcmp(c->domain, domain) == 0 && + strcmp(c->path, path) == 0 && + strcmp(c->name, name) == 0) { + if (c->prev) + c->prev->next = c->next; + else + p->cookies = c->next; + + if (c->next) + c->next->prev = c->prev; + else + p->cookies_end = c->prev; + + urldb_free_cookie(c); + + return; + } + } + + if (p->children) { + p = p->children; + } else { + while (p != parent) { + if (p->next != NULL) { + p = p->next; + break; + } + + p = p->parent; + } + } + } while (p != parent); +} + + +/** + * Deletes cookie hosts and their assoicated paths + */ +static void urldb_delete_cookie_hosts(const char *domain, const char *path, + const char *name, struct host_part *parent) +{ + struct host_part *h; + assert(parent); + + urldb_delete_cookie_paths(domain, path, name, &parent->paths); + + for (h = parent->children; h; h = h->next) + urldb_delete_cookie_hosts(domain, path, name, h); +} + + +/** + * Save a path subtree's cookies + * + * \param fp File pointer to write to + * \param parent Parent path + */ +static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent) +{ + struct path_data *p = parent; + time_t now = time(NULL); + + assert(fp && parent); + + do { + if (p->cookies != NULL) { + struct cookie_internal_data *c; + + for (c = p->cookies; c != NULL; c = c->next) { + if (c->expires == -1 || c->expires < now) + /* Skip expired & session cookies */ + continue; + + fprintf(fp, + "%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t" + "%s\t%s\t%d\t%s\t%s\t%s\n", + c->version, c->domain, + c->domain_from_set, c->path, + c->path_from_set, c->secure, + c->http_only, + (int)c->expires, (int)c->last_used, + c->no_destroy, c->name, c->value, + c->value_was_quoted, + p->scheme ? lwc_string_data(p->scheme) : + "unused", + p->url ? nsurl_access(p->url) : + "unused", + c->comment ? c->comment : ""); + } + } + + if (p->children != NULL) { + p = p->children; + } else { + while (p != parent) { + if (p->next != NULL) { + p = p->next; + break; + } + + p = p->parent; + } + } + } while (p != parent); +} + + +/** + * Save a host subtree's cookies + * + * \param fp File pointer to write to + * \param parent Parent host + */ +static void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent) +{ + struct host_part *h; + assert(fp && parent); + + urldb_save_cookie_paths(fp, &parent->paths); + + for (h = parent->children; h; h = h->next) + urldb_save_cookie_hosts(fp, h); +} + + +/** + * Destroy a cookie node + * + * \param c Cookie to destroy + */ +static void urldb_destroy_cookie(struct cookie_internal_data *c) +{ + free(c->name); + free(c->value); + free(c->comment); + free(c->domain); + free(c->path); + + free(c); +} + + +/** + * Destroy the contents of a path node + * + * \param node Node to destroy contents of (does not destroy node) + */ +static void urldb_destroy_path_node_content(struct path_data *node) +{ + struct cookie_internal_data *a, *b; + unsigned int i; + + if (node->url != NULL) + nsurl_unref(node->url); + + if (node->scheme != NULL) + lwc_string_unref(node->scheme); + + free(node->segment); + for (i = 0; i < node->frag_cnt; i++) + free(node->fragment[i]); + free(node->fragment); + + if (node->thumb) + bitmap_destroy(node->thumb); + + free(node->urld.title); + + for (a = node->cookies; a; a = b) { + b = a->next; + urldb_destroy_cookie(a); + } +} + + +/** + * Destroy protection space data + * + * \param space Protection space to destroy + */ +static void urldb_destroy_prot_space(struct prot_space_data *space) +{ + lwc_string_unref(space->scheme); + free(space->realm); + free(space->auth); + + free(space); +} + + +/** + * Destroy a path tree + * + * \param root Root node of tree to destroy + */ +static void urldb_destroy_path_tree(struct path_data *root) +{ + struct path_data *p = root; + + do { + if (p->children != NULL) { + p = p->children; + } else { + struct path_data *q = p; + + while (p != root) { + if (p->next != NULL) { + p = p->next; + break; + } + + p = p->parent; + + urldb_destroy_path_node_content(q); + free(q); + + q = p; + } + + urldb_destroy_path_node_content(q); + free(q); + } + } while (p != root); +} + + +/** + * Destroy a host tree + * + * \param root Root node of tree to destroy + */ +static void urldb_destroy_host_tree(struct host_part *root) +{ + struct host_part *a, *b; + struct path_data *p, *q; + struct prot_space_data *s, *t; + + /* Destroy children */ + for (a = root->children; a; a = b) { + b = a->next; + urldb_destroy_host_tree(a); + } + + /* Now clean up paths */ + for (p = root->paths.children; p; p = q) { + q = p->next; + urldb_destroy_path_tree(p); + } + + /* Root path */ + urldb_destroy_path_node_content(&root->paths); + + /* Proctection space data */ + for (s = root->prot_space; s; s = t) { + t = s->next; + urldb_destroy_prot_space(s); + } + + /* And ourselves */ + free(root->part); + free(root); +} + + +/** + * Destroy a search tree + * + * \param root Root node of tree to destroy + */ +static void urldb_destroy_search_tree(struct search_node *root) +{ + /* Destroy children */ + if (root->left != &empty) + urldb_destroy_search_tree(root->left); + if (root->right != &empty) + urldb_destroy_search_tree(root->right); + + /* And destroy ourselves */ + free(root); +} + + +/*************** External interface ***************/ + + +/* exported interface documented in content/urldb.h */ +void urldb_destroy(void) +{ + struct host_part *a, *b; + int i; + + /* Clean up search trees */ + for (i = 0; i < NUM_SEARCH_TREES; i++) { + if (search_trees[i] != &empty) + urldb_destroy_search_tree(search_trees[i]); + } + + /* And database */ + for (a = db_root.children; a; a = b) { + b = a->next; + urldb_destroy_host_tree(a); + } + + /* And the bloom filter */ + if (url_bloom != NULL) + bloom_destroy(url_bloom); +} + + +/* exported interface documented in content/urldb.h */ +nserror urldb_load(const char *filename) +{ +#define MAXIMUM_URL_LENGTH 4096 + char s[MAXIMUM_URL_LENGTH]; + char host[256]; + struct host_part *h; + int urls; + int i; + int version; + int length; + FILE *fp; + + assert(filename); + + LOG(("Loading URL file %s", filename)); + + if (url_bloom == NULL) + url_bloom = bloom_create(BLOOM_SIZE); + + fp = fopen(filename, "r"); + if (!fp) { + LOG(("Failed to open file '%s' for reading", filename)); + return NSERROR_NOT_FOUND; + } + + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) { + fclose(fp); + return NSERROR_NEED_DATA; + } + + version = atoi(s); + if (version < MIN_URL_FILE_VERSION) { + LOG(("Unsupported URL file version.")); + fclose(fp); + return NSERROR_INVALID; + } + if (version > URL_FILE_VERSION) { + LOG(("Unknown URL file version.")); + fclose(fp); + return NSERROR_INVALID; + } + + while (fgets(host, sizeof host, fp)) { + /* get the hostname */ + length = strlen(host) - 1; + host[length] = '\0'; + + /* skip data that has ended up with a host of '' */ + if (length == 0) { + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + urls = atoi(s); + /* Eight fields/url */ + for (i = 0; i < (8 * urls); i++) { + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + } + continue; + } + + /* read number of URLs */ + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + urls = atoi(s); + + /* no URLs => try next host */ + if (urls == 0) { + LOG(("No URLs for '%s'", host)); + continue; + } + + h = urldb_add_host(host); + if (!h) { + LOG(("Failed adding host: '%s'", host)); + fclose(fp); + return NSERROR_NOMEM; + } + + /* load the non-corrupt data */ + for (i = 0; i < urls; i++) { + struct path_data *p = NULL; + char scheme[64], ports[10]; + char url[64 + 3 + 256 + 6 + 4096 + 1]; + unsigned int port; + bool is_file = false; + nsurl *nsurl; + lwc_string *scheme_lwc, *fragment_lwc; + char *path_query; + size_t len; + + if (!fgets(scheme, sizeof scheme, fp)) + break; + length = strlen(scheme) - 1; + scheme[length] = '\0'; + + if (!fgets(ports, sizeof ports, fp)) + break; + length = strlen(ports) - 1; + ports[length] = '\0'; + port = atoi(ports); + + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + length = strlen(s) - 1; + s[length] = '\0'; + + if (!strcasecmp(host, "localhost") && + !strcasecmp(scheme, "file")) + is_file = true; + + snprintf(url, sizeof url, "%s://%s%s%s%s", + scheme, + /* file URLs have no host */ + (is_file ? "" : host), + (port ? ":" : ""), + (port ? ports : ""), + s); + + /* TODO: store URLs in pre-parsed state, and make + * a nsurl_load to generate the nsurl more + * swiftly. + * Need a nsurl_save too. + */ + if (nsurl_create(url, &nsurl) != NSERROR_OK) { + LOG(("Failed inserting '%s'", url)); + fclose(fp); + return NSERROR_NOMEM; + } + + if (url_bloom != NULL) { + uint32_t hash = nsurl_hash(nsurl); + bloom_insert_hash(url_bloom, hash); + } + + /* Copy and merge path/query strings */ + if (nsurl_get(nsurl, NSURL_PATH | NSURL_QUERY, + &path_query, &len) != NSERROR_OK) { + LOG(("Failed inserting '%s'", url)); + fclose(fp); + return NSERROR_NOMEM; + } + + scheme_lwc = nsurl_get_component(nsurl, NSURL_SCHEME); + fragment_lwc = nsurl_get_component(nsurl, + NSURL_FRAGMENT); + p = urldb_add_path(scheme_lwc, port, h, path_query, + fragment_lwc, nsurl); + if (!p) { + LOG(("Failed inserting '%s'", url)); + fclose(fp); + return NSERROR_NOMEM; + } + nsurl_unref(nsurl); + lwc_string_unref(scheme_lwc); + if (fragment_lwc != NULL) + lwc_string_unref(fragment_lwc); + + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + if (p) + p->urld.visits = (unsigned int)atoi(s); + + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + if (p) + p->urld.last_visit = (time_t)atoi(s); + + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + if (p) + p->urld.type = (content_type)atoi(s); + + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + + + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + length = strlen(s) - 1; + if (p && length > 0) { + s[length] = '\0'; + p->urld.title = malloc(length + 1); + if (p->urld.title) + memcpy(p->urld.title, s, length + 1); + } + } + } + + fclose(fp); + LOG(("Successfully loaded URL file")); +#undef MAXIMUM_URL_LENGTH + + return NSERROR_OK; +} + +/* exported interface documented in content/urldb.h */ +nserror urldb_save(const char *filename) +{ + FILE *fp; + int i; + + assert(filename); + + fp = fopen(filename, "w"); + if (!fp) { + LOG(("Failed to open file '%s' for writing", filename)); + return NSERROR_SAVE_FAILED; + } + + /* file format version number */ + fprintf(fp, "%d\n", URL_FILE_VERSION); + + for (i = 0; i != NUM_SEARCH_TREES; i++) { + urldb_save_search_tree(search_trees[i], fp); + } + + fclose(fp); + + return NSERROR_OK; +} + + +/* exported interface documented in content/urldb.h */ +void urldb_set_url_persistence(nsurl *url, bool persist) +{ + struct path_data *p; + + assert(url); + + p = urldb_find_url(url); + if (!p) + return; + + p->persistent = persist; +} + + +/* exported interface documented in content/urldb.h */ +bool urldb_add_url(nsurl *url) +{ + struct host_part *h; + struct path_data *p; + lwc_string *scheme; + lwc_string *port; + lwc_string *host; + lwc_string *fragment; + const char *host_str; + char *path_query = NULL; + size_t len; + bool match; + unsigned int port_int; + + assert(url); + + if (url_bloom == NULL) + url_bloom = bloom_create(BLOOM_SIZE); + + if (url_bloom != NULL) { + uint32_t hash = nsurl_hash(url); + bloom_insert_hash(url_bloom, hash); + } + + /* Copy and merge path/query strings */ + if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &path_query, &len) != + NSERROR_OK) { + return false; + } + assert(path_query != NULL); + + scheme = nsurl_get_component(url, NSURL_SCHEME); + if (scheme == NULL) { + free(path_query); + return false; + } + + host = nsurl_get_component(url, NSURL_HOST); + if (host != NULL) { + host_str = lwc_string_data(host); + lwc_string_unref(host); + + } else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) == + lwc_error_ok && match == true) { + host_str = "localhost"; + + } else { + lwc_string_unref(scheme); + free(path_query); + return false; + } + + fragment = nsurl_get_component(url, NSURL_FRAGMENT); + + port = nsurl_get_component(url, NSURL_PORT); + if (port != NULL) { + port_int = atoi(lwc_string_data(port)); + lwc_string_unref(port); + } else { + port_int = 0; + } + + /* Get host entry */ + h = urldb_add_host(host_str); + + /* Get path entry */ + p = (h != NULL) ? urldb_add_path(scheme, port_int, h, path_query, + fragment, url) : NULL; + + lwc_string_unref(scheme); + if (fragment != NULL) + lwc_string_unref(fragment); + + return (p != NULL); +} + + +/* exported interface documented in content/urldb.h */ +void urldb_set_url_title(nsurl *url, const char *title) +{ + struct path_data *p; + char *temp; + + assert(url && title); + + p = urldb_find_url(url); + if (!p) + return; + + temp = strdup(title); + if (!temp) + return; + + free(p->urld.title); + p->urld.title = temp; +} + + +/* exported interface documented in content/urldb.h */ +void urldb_set_url_content_type(nsurl *url, content_type type) +{ + struct path_data *p; + + assert(url); + + p = urldb_find_url(url); + if (!p) + return; + + p->urld.type = type; +} + + +/* exported interface documented in content/urldb.h */ +void urldb_update_url_visit_data(nsurl *url) +{ + struct path_data *p; + + assert(url); + + p = urldb_find_url(url); + if (!p) + return; + + p->urld.last_visit = time(NULL); + p->urld.visits++; +} + + +/* exported interface documented in content/urldb.h */ +void urldb_reset_url_visit_data(nsurl *url) +{ + struct path_data *p; + + assert(url); + + p = urldb_find_url(url); + if (!p) + return; + + p->urld.last_visit = (time_t)0; + p->urld.visits = 0; +} + + +/* exported interface documented in content/urldb.h */ +const struct url_data *urldb_get_url_data(nsurl *url) +{ + struct path_data *p; + struct url_internal_data *u; + + assert(url); + + p = urldb_find_url(url); + if (!p) + return NULL; + + u = &p->urld; + + return (const struct url_data *) u; +} + + +/* exported interface documented in content/urldb.h */ +nsurl *urldb_get_url(nsurl *url) +{ + struct path_data *p; + + assert(url); + + p = urldb_find_url(url); + if (!p) + return NULL; + + return p->url; +} + + +/* exported interface documented in content/urldb.h */ +void urldb_set_auth_details(nsurl *url, const char *realm, const char *auth) +{ + struct path_data *p, *pi; + struct host_part *h; + struct prot_space_data *space, *space_alloc; + char *realm_alloc, *auth_alloc; + bool match; + + assert(url && realm && auth); + + /* add url, in case it's missing */ + urldb_add_url(url); + + p = urldb_find_url(url); + + if (!p) + return; + + /* Search for host_part */ + for (pi = p; pi->parent != NULL; pi = pi->parent) + ; + h = (struct host_part *)pi; + + /* Search if given URL belongs to a protection space we already know of. */ + for (space = h->prot_space; space; space = space->next) { + if (!strcmp(space->realm, realm) && + lwc_string_isequal(space->scheme, p->scheme, + &match) == lwc_error_ok && + match == true && + space->port == p->port) + break; + } + + if (space != NULL) { + /* Overrule existing auth. */ + free(space->auth); + space->auth = strdup(auth); + } else { + /* Create a new protection space. */ + space = space_alloc = malloc(sizeof(struct prot_space_data)); + realm_alloc = strdup(realm); + auth_alloc = strdup(auth); + + if (!space_alloc || !realm_alloc || !auth_alloc) { + free(space_alloc); + free(realm_alloc); + free(auth_alloc); + return; + } + + space->scheme = lwc_string_ref(p->scheme); + space->port = p->port; + space->realm = realm_alloc; + space->auth = auth_alloc; + space->next = h->prot_space; + h->prot_space = space; + } + + p->prot_space = space; +} + + +/* exported interface documented in content/urldb.h */ +const char *urldb_get_auth_details(nsurl *url, const char *realm) +{ + struct path_data *p, *p_cur, *p_top; + + assert(url); + + /* add to the db, so our lookup will work */ + urldb_add_url(url); + + p = urldb_find_url(url); + if (!p) + return NULL; + + /* Check for any auth details attached to the path_data node or any of + * its parents. + */ + for (p_cur = p; p_cur != NULL; p_top = p_cur, p_cur = p_cur->parent) { + if (p_cur->prot_space) { + return p_cur->prot_space->auth; + } + } + + /* Only when we have a realm (and canonical root of given URL), we can + * uniquely locate the protection space. + */ + if (realm != NULL) { + const struct host_part *h = (const struct host_part *)p_top; + const struct prot_space_data *space; + bool match; + + /* Search for a possible matching protection space. */ + for (space = h->prot_space; space != NULL; + space = space->next) { + if (!strcmp(space->realm, realm) && + lwc_string_isequal(space->scheme, + p->scheme, &match) == + lwc_error_ok && + match == true && + space->port == p->port) { + p->prot_space = space; + return p->prot_space->auth; + } + } + } + + return NULL; +} + + +/* exported interface documented in content/urldb.h */ +void urldb_set_cert_permissions(nsurl *url, bool permit) +{ + struct path_data *p; + struct host_part *h; + + assert(url); + + /* add url, in case it's missing */ + urldb_add_url(url); + + p = urldb_find_url(url); + if (!p) + return; + + for (; p && p->parent; p = p->parent) + /* do nothing */; + assert(p); + + h = (struct host_part *)p; + + h->permit_invalid_certs = permit; +} + + +/* exported interface documented in content/urldb.h */ +bool urldb_get_cert_permissions(nsurl *url) +{ + struct path_data *p; + const struct host_part *h; + + assert(url); + + p = urldb_find_url(url); + if (!p) + return false; + + for (; p && p->parent; p = p->parent) + /* do nothing */; + assert(p); + + h = (const struct host_part *)p; + + return h->permit_invalid_certs; +} + + +/* exported interface documented in content/urldb.h */ +void urldb_set_thumbnail(nsurl *url, struct bitmap *bitmap) +{ + struct path_data *p; + + assert(url); + + p = urldb_find_url(url); + if (!p) + return; + + if (p->thumb && p->thumb != bitmap) + bitmap_destroy(p->thumb); + + p->thumb = bitmap; +} + + +/* exported interface documented in content/urldb.h */ +struct bitmap *urldb_get_thumbnail(nsurl *url) +{ + struct path_data *p; + + assert(url); + + p = urldb_find_url(url); + if (!p) + return NULL; + + return p->thumb; +} + + +/* exported interface documented in content/urldb.h */ +void urldb_iterate_partial(const char *prefix, + bool (*callback)(nsurl *url, + const struct url_data *data)) +{ + char host[256]; + char buf[260]; /* max domain + "www." */ + const char *slash, *scheme_sep; + struct search_node *tree; + const struct host_part *h; + + assert(prefix && callback); + + /* strip scheme */ + scheme_sep = strstr(prefix, "://"); + if (scheme_sep) + prefix = scheme_sep + 3; + + slash = strchr(prefix, '/'); + tree = urldb_get_search_tree(prefix); + + if (slash) { + /* if there's a slash in the input, then we can + * assume that we're looking for a path */ + snprintf(host, sizeof host, "%.*s", + (int) (slash - prefix), prefix); + + h = urldb_search_find(tree, host); + if (!h) { + int len = slash - prefix; + + if (len <= 3 || strncasecmp(host, "www.", 4) != 0) { + snprintf(buf, sizeof buf, "www.%s", host); + h = urldb_search_find( + search_trees[ST_DN + 'w' - 'a'], + buf); + if (!h) + return; + } else + return; + } + + if (h->paths.children) { + /* Have paths, iterate them */ + urldb_iterate_partial_path(&h->paths, slash + 1, + callback); + } + + } else { + int len = strlen(prefix); + + /* looking for hosts */ + if (!urldb_iterate_partial_host(tree, prefix, callback)) + return; + + if (len <= 3 || strncasecmp(prefix, "www.", 4) != 0) { + /* now look for www.prefix */ + snprintf(buf, sizeof buf, "www.%s", prefix); + if(!urldb_iterate_partial_host( + search_trees[ST_DN + 'w' - 'a'], + buf, callback)) + return; + } + } +} + + +/* exported interface documented in content/urldb.h */ +void urldb_iterate_entries(bool (*callback)(nsurl *url, + const struct url_data *data)) +{ + int i; + + assert(callback); + + for (i = 0; i < NUM_SEARCH_TREES; i++) { + if (!urldb_iterate_entries_host(search_trees[i], + callback, NULL)) + break; + } +} + + +/* exported interface documented in content/urldb.h */ +void urldb_iterate_cookies(bool (*callback)(const struct cookie_data *data)) +{ + int i; + + assert(callback); + + for (i = 0; i < NUM_SEARCH_TREES; i++) { + if (!urldb_iterate_entries_host(search_trees[i], + NULL, callback)) + break; + } +} + + +/* exported interface documented in content/urldb.h */ +bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer) +{ + const char *cur = header, *end; + lwc_string *path, *host, *scheme; + nsurl *urlt; + bool match; + + assert(url && header); + + /* Get defragmented URL, as 'urlt' */ + if (nsurl_defragment(url, &urlt) != NSERROR_OK) + return NULL; + + scheme = nsurl_get_component(url, NSURL_SCHEME); + if (scheme == NULL) { + nsurl_unref(urlt); + return false; + } + + path = nsurl_get_component(url, NSURL_PATH); + if (path == NULL) { + lwc_string_unref(scheme); + nsurl_unref(urlt); + return false; + } + + host = nsurl_get_component(url, NSURL_HOST); + if (host == NULL) { + lwc_string_unref(path); + lwc_string_unref(scheme); + nsurl_unref(urlt); + return false; + } + + if (referer) { + lwc_string *rhost; + + /* Ensure that url's host name domain matches + * referer's (4.3.5) */ + rhost = nsurl_get_component(referer, NSURL_HOST); + if (rhost == NULL) { + goto error; + } + + /* Domain match host names */ + if (lwc_string_isequal(host, rhost, &match) == lwc_error_ok && + match == false) { + const char *hptr; + const char *rptr; + const char *dot; + const char *host_data = lwc_string_data(host); + const char *rhost_data = lwc_string_data(rhost); + + /* Ensure neither host nor rhost are IP addresses */ + if (urldb__host_is_ip_address(host_data) || + urldb__host_is_ip_address(rhost_data)) { + /* IP address, so no partial match */ + lwc_string_unref(rhost); + goto error; + } + + /* Not exact match, so try the following: + * + * 1) Find the longest common suffix of host and rhost + * (may be all of host/rhost) + * 2) Discard characters from the start of the suffix + * until the suffix starts with a dot + * (prevents foobar.com matching bar.com) + * 3) Ensure the suffix is non-empty and contains + * embedded dots (to avoid permitting .com as a + * suffix) + * + * Note that the above in no way resembles the + * domain matching algorithm found in RFC2109. + * It does, however, model the real world rather + * more accurately. + */ + + /** \todo In future, we should consult a TLD service + * instead of just looking for embedded dots. + */ + + hptr = host_data + lwc_string_length(host) - 1; + rptr = rhost_data + lwc_string_length(rhost) - 1; + + /* 1 */ + while (hptr >= host_data && rptr >= rhost_data) { + if (*hptr != *rptr) + break; + hptr--; + rptr--; + } + /* Ensure we end up pointing at the start of the + * common suffix. The above loop will exit pointing + * to the byte before the start of the suffix. */ + hptr++; + + /* 2 */ + while (*hptr != '\0' && *hptr != '.') + hptr++; + + /* 3 */ + if (*hptr == '\0' || + (dot = strchr(hptr + 1, '.')) == NULL || + *(dot + 1) == '\0') { + lwc_string_unref(rhost); + goto error; + } + } + + lwc_string_unref(rhost); + } + + end = cur + strlen(cur) - 2 /* Trailing CRLF */; + + do { + struct cookie_internal_data *c; + char *dot; + size_t len; + + c = urldb_parse_cookie(url, &cur); + if (!c) { + /* failed => stop parsing */ + goto error; + } + + /* validate cookie */ + + /* 4.2.2:i Cookie must have NAME and VALUE */ + if (!c->name || !c->value) { + urldb_free_cookie(c); + goto error; + } + + /* 4.3.2:i Cookie path must be a prefix of URL path */ + len = strlen(c->path); + if (len > lwc_string_length(path) || + strncmp(c->path, lwc_string_data(path), + len) != 0) { + urldb_free_cookie(c); + goto error; + } + + /* 4.3.2:ii Cookie domain must contain embedded dots */ + dot = strchr(c->domain + 1, '.'); + if (!dot || *(dot + 1) == '\0') { + /* no embedded dots */ + urldb_free_cookie(c); + goto error; + } + + /* Domain match fetch host with cookie domain */ + if (strcasecmp(lwc_string_data(host), c->domain) != 0) { + int hlen, dlen; + char *domain = c->domain; + + /* c->domain must be a domain cookie here because: + * c->domain is either: + * + specified in the header as a domain cookie + * (non-domain cookies in the header are ignored + * by urldb_parse_cookie / urldb_parse_avpair) + * + defaulted to the URL's host part + * (by urldb_parse_cookie if no valid domain was + * specified in the header) + * + * The latter will pass the strcasecmp above, which + * leaves the former (i.e. a domain cookie) + */ + assert(c->domain[0] == '.'); + + /* 4.3.2:iii */ + if (urldb__host_is_ip_address(lwc_string_data(host))) { + /* IP address, so no partial match */ + urldb_free_cookie(c); + goto error; + } + + hlen = lwc_string_length(host); + dlen = strlen(c->domain); + + if (hlen <= dlen && hlen != dlen - 1) { + /* Partial match not possible */ + urldb_free_cookie(c); + goto error; + } + + if (hlen == dlen - 1) { + /* Relax matching to allow + * host a.com to match .a.com */ + domain++; + dlen--; + } + + if (strcasecmp(lwc_string_data(host) + (hlen - dlen), + domain)) { + urldb_free_cookie(c); + goto error; + } + + /* 4.3.2:iv Ensure H contains no dots + * + * If you believe the spec, H should contain no + * dots in _any_ cookie. Unfortunately, however, + * reality differs in that many sites send domain + * cookies of the form .foo.com from hosts such + * as bar.bat.foo.com and then expect domain + * matching to work. Thus we have to do what they + * expect, regardless of any potential security + * implications. + * + * This is what code conforming to the spec would + * look like: + * + * for (int i = 0; i < (hlen - dlen); i++) { + * if (host[i] == '.') { + * urldb_free_cookie(c); + * goto error; + * } + * } + */ + } + + /* Now insert into database */ + if (!urldb_insert_cookie(c, scheme, urlt)) + goto error; + } while (cur < end); + + lwc_string_unref(host); + lwc_string_unref(path); + lwc_string_unref(scheme); + nsurl_unref(urlt); + + return true; + +error: + lwc_string_unref(host); + lwc_string_unref(path); + lwc_string_unref(scheme); + nsurl_unref(urlt); + + return false; +} + + +/* exported interface documented in content/urldb.h */ +char *urldb_get_cookie(nsurl *url, bool include_http_only) +{ + const struct path_data *p, *q; + const struct host_part *h; + lwc_string *path_lwc; + struct cookie_internal_data *c; + int count = 0, version = COOKIE_RFC2965; + struct cookie_internal_data **matched_cookies; + int matched_cookies_size = 20; + int ret_alloc = 4096, ret_used = 1; + const char *path; + char *ret; + lwc_string *scheme; + time_t now; + int i; + bool match; + + assert(url != NULL); + + /* The URL must exist in the db in order to find relevant cookies, since + * we search up the tree from the URL node, and cookies from further + * up also apply. */ + urldb_add_url(url); + + p = urldb_find_url(url); + if (!p) + return NULL; + + scheme = p->scheme; + + matched_cookies = malloc(matched_cookies_size * + sizeof(struct cookie_internal_data *)); + if (!matched_cookies) + return NULL; + +#define GROW_MATCHED_COOKIES \ + do { \ + if (count == matched_cookies_size) { \ + struct cookie_internal_data **temp; \ + temp = realloc(matched_cookies, \ + (matched_cookies_size + 20) * \ + sizeof(struct cookie_internal_data *)); \ + \ + if (temp == NULL) { \ + free(ret); \ + free(matched_cookies); \ + return NULL; \ + } \ + \ + matched_cookies = temp; \ + matched_cookies_size += 20; \ + } \ + } while(0) + + ret = malloc(ret_alloc); + if (!ret) { + free(matched_cookies); + return NULL; + } + + ret[0] = '\0'; + + path_lwc = nsurl_get_component(url, NSURL_PATH); + if (path_lwc == NULL) { + free(ret); + free(matched_cookies); + return NULL; + } + path = lwc_string_data(path_lwc); + lwc_string_unref(path_lwc); + + now = time(NULL); + + if (*(p->segment) != '\0') { + /* Match exact path, unless directory, when prefix matching + * will handle this case for us. */ + for (q = p->parent->children; q; q = q->next) { + if (strcmp(q->segment, p->segment)) + continue; + + /* Consider all cookies associated with + * this exact path */ + for (c = q->cookies; c; c = c->next) { + if (c->expires != -1 && c->expires < now) + /* cookie has expired => ignore */ + continue; + + if (c->secure && lwc_string_isequal( + q->scheme, + corestring_lwc_https, + &match) && + match == false) + /* secure cookie for insecure host. + * ignore */ + continue; + + if (c->http_only && !include_http_only) + /* Ignore HttpOnly */ + continue; + + matched_cookies[count++] = c; + + GROW_MATCHED_COOKIES; + + if (c->version < (unsigned int)version) + version = c->version; + + c->last_used = now; + + cookie_manager_add((struct cookie_data *)c); + } + } + } + + /* Now consider cookies whose paths prefix-match ours */ + for (p = p->parent; p; p = p->parent) { + /* Find directory's path entry(ies) */ + /* There are potentially multiple due to differing schemes */ + for (q = p->children; q; q = q->next) { + if (*(q->segment) != '\0') + continue; + + for (c = q->cookies; c; c = c->next) { + if (c->expires != -1 && c->expires < now) + /* cookie has expired => ignore */ + continue; + + if (c->secure && lwc_string_isequal( + q->scheme, + corestring_lwc_https, + &match) && + match == false) + /* Secure cookie for insecure server + * => ignore */ + continue; + + matched_cookies[count++] = c; + + GROW_MATCHED_COOKIES; + + if (c->version < (unsigned int) version) + version = c->version; + + c->last_used = now; + + cookie_manager_add((struct cookie_data *)c); + } + } + + if (!p->parent) { + /* No parent, so bail here. This can't go in + * the loop exit condition as we also want to + * process the top-level node. + * + * If p->parent is NULL then p->cookies are + * the domain cookies and thus we don't even + * try matching against them. + */ + break; + } + + /* Consider p itself - may be the result of Path=/foo */ + for (c = p->cookies; c; c = c->next) { + if (c->expires != -1 && c->expires < now) + /* cookie has expired => ignore */ + continue; + + /* Ensure cookie path is a prefix of the resource */ + if (strncmp(c->path, path, strlen(c->path)) != 0) + /* paths don't match => ignore */ + continue; + + if (c->secure && lwc_string_isequal(p->scheme, + corestring_lwc_https, + &match) && + match == false) + /* Secure cookie for insecure server + * => ignore */ + continue; + + matched_cookies[count++] = c; + + GROW_MATCHED_COOKIES; + + if (c->version < (unsigned int) version) + version = c->version; + + c->last_used = now; + + cookie_manager_add((struct cookie_data *)c); + } + + } + + /* Finally consider domain cookies for hosts which domain match ours */ + for (h = (const struct host_part *)p; h && h != &db_root; + h = h->parent) { + for (c = h->paths.cookies; c; c = c->next) { + if (c->expires != -1 && c->expires < now) + /* cookie has expired => ignore */ + continue; + + /* Ensure cookie path is a prefix of the resource */ + if (strncmp(c->path, path, strlen(c->path)) != 0) + /* paths don't match => ignore */ + continue; + + if (c->secure && lwc_string_isequal(scheme, + corestring_lwc_https, + &match) && + match == false) + /* secure cookie for insecure host. ignore */ + continue; + + matched_cookies[count++] = c; + + GROW_MATCHED_COOKIES; + + if (c->version < (unsigned int)version) + version = c->version; + + c->last_used = now; + + cookie_manager_add((struct cookie_data *)c); + } + } + + if (count == 0) { + /* No cookies found */ + free(ret); + free(matched_cookies); + return NULL; + } + + /* and build output string */ + if (version > COOKIE_NETSCAPE) { + sprintf(ret, "$Version=%d", version); + ret_used = strlen(ret) + 1; + } + + for (i = 0; i < count; i++) { + if (!urldb_concat_cookie(matched_cookies[i], version, + &ret_used, &ret_alloc, &ret)) { + free(ret); + free(matched_cookies); + return NULL; + } + } + + if (version == COOKIE_NETSCAPE) { + /* Old-style cookies => no version & skip "; " */ + memmove(ret, ret + 2, ret_used - 2); + ret_used -= 2; + } + + /* Now, shrink the output buffer to the required size */ + { + char *temp = realloc(ret, ret_used); + if (!temp) { + free(ret); + free(matched_cookies); + return NULL; + } + + ret = temp; + } + + free(matched_cookies); + + return ret; + +#undef GROW_MATCHED_COOKIES +} + + +/* exported interface documented in content/urldb.h */ +void urldb_delete_cookie(const char *domain, const char *path, + const char *name) +{ + urldb_delete_cookie_hosts(domain, path, name, &db_root); +} + + +/* exported interface documented in content/urldb.h */ void urldb_load_cookies(const char *filename) { FILE *fp; @@ -3770,7 +3766,7 @@ void urldb_load_cookies(const char *filename) if (strncasecmp(s, "Version:", 8) == 0) { FIND_T; SKIP_T; loaded_cookie_file_version = atoi(p); - if (loaded_cookie_file_version < + if (loaded_cookie_file_version < MIN_COOKIE_FILE_VERSION) { LOG(("Unsupported Cookie file version")); break; @@ -3882,84 +3878,12 @@ void urldb_load_cookies(const char *filename) fclose(fp); } -/** - * Delete a cookie - * - * \param domain The cookie's domain - * \param path The cookie's path - * \param name The cookie's name - */ -void urldb_delete_cookie(const char *domain, const char *path, - const char *name) -{ - urldb_delete_cookie_hosts(domain, path, name, &db_root); -} -void urldb_delete_cookie_hosts(const char *domain, const char *path, - const char *name, struct host_part *parent) -{ - struct host_part *h; - assert(parent); - - urldb_delete_cookie_paths(domain, path, name, &parent->paths); - - for (h = parent->children; h; h = h->next) - urldb_delete_cookie_hosts(domain, path, name, h); -} - -void urldb_delete_cookie_paths(const char *domain, const char *path, - const char *name, struct path_data *parent) -{ - struct cookie_internal_data *c; - struct path_data *p = parent; - - assert(parent); - - do { - for (c = p->cookies; c; c = c->next) { - if (strcmp(c->domain, domain) == 0 && - strcmp(c->path, path) == 0 && - strcmp(c->name, name) == 0) { - if (c->prev) - c->prev->next = c->next; - else - p->cookies = c->next; - - if (c->next) - c->next->prev = c->prev; - else - p->cookies_end = c->prev; - - urldb_free_cookie(c); - - return; - } - } - - if (p->children) { - p = p->children; - } else { - while (p != parent) { - if (p->next != NULL) { - p = p->next; - break; - } - - p = p->parent; - } - } - } while(p != parent); -} - -/** - * Save persistent cookies to file - * - * \param filename Path to save to - */ +/* exported interface documented in content/urldb.h */ void urldb_save_cookies(const char *filename) { FILE *fp; - int cookie_file_version = max(loaded_cookie_file_version, + int cookie_file_version = max(loaded_cookie_file_version, COOKIE_FILE_VERSION); assert(filename); @@ -3988,253 +3912,173 @@ void urldb_save_cookies(const char *filename) fclose(fp); } -/** - * Save a host subtree's cookies - * - * \param fp File pointer to write to - * \param parent Parent host - */ -void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent) + +/* exported interface documented in content/urldb.h */ +void urldb_dump(void) { - struct host_part *h; - assert(fp && parent); - - urldb_save_cookie_paths(fp, &parent->paths); - - for (h = parent->children; h; h = h->next) - urldb_save_cookie_hosts(fp, h); -} - -/** - * Save a path subtree's cookies - * - * \param fp File pointer to write to - * \param parent Parent path - */ -void urldb_save_cookie_paths(FILE *fp, struct path_data *parent) -{ - struct path_data *p = parent; - time_t now = time(NULL); - - assert(fp && parent); - - do { - if (p->cookies != NULL) { - struct cookie_internal_data *c; - - for (c = p->cookies; c != NULL; c = c->next) { - if (c->expires == -1 || c->expires < now) - /* Skip expired & session cookies */ - continue; - - fprintf(fp, - "%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t" - "%s\t%s\t%d\t%s\t%s\t%s\n", - c->version, c->domain, - c->domain_from_set, c->path, - c->path_from_set, c->secure, - c->http_only, - (int)c->expires, (int)c->last_used, - c->no_destroy, c->name, c->value, - c->value_was_quoted, - p->scheme ? lwc_string_data(p->scheme) : - "unused", - p->url ? nsurl_access(p->url) : - "unused", - c->comment ? c->comment : ""); - } - } - - if (p->children != NULL) { - p = p->children; - } else { - while (p != parent) { - if (p->next != NULL) { - p = p->next; - break; - } - - p = p->parent; - } - } - } while (p != parent); -} - - -/** - * Destroy urldb - */ -void urldb_destroy(void) -{ - struct host_part *a, *b; int i; - /* Clean up search trees */ - for (i = 0; i < NUM_SEARCH_TREES; i++) { - if (search_trees[i] != &empty) - urldb_destroy_search_tree(search_trees[i]); - } + urldb_dump_hosts(&db_root); - /* And database */ - for (a = db_root.children; a; a = b) { - b = a->next; - urldb_destroy_host_tree(a); - } - - /* And the bloom filter */ - if (url_bloom != NULL) - bloom_destroy(url_bloom); + for (i = 0; i != NUM_SEARCH_TREES; i++) + urldb_dump_search(search_trees[i], 0); } -/** - * Destroy a host tree - * - * \param root Root node of tree to destroy - */ -void urldb_destroy_host_tree(struct host_part *root) + +/* exported interface documented in content/urldb.h */ +struct host_part *urldb_add_host(const char *host) { - struct host_part *a, *b; - struct path_data *p, *q; - struct prot_space_data *s, *t; + struct host_part *d = (struct host_part *) &db_root, *e; + struct search_node *s; + char buf[256]; /* 256 bytes is sufficient - domain names are + * limited to 255 chars. */ + char *part; - /* Destroy children */ - for (a = root->children; a; a = b) { - b = a->next; - urldb_destroy_host_tree(a); - } + assert(host); - /* Now clean up paths */ - for (p = root->paths.children; p; p = q) { - q = p->next; - urldb_destroy_path_tree(p); - } + if (urldb__host_is_ip_address(host)) { + /* Host is an IP, so simply add as TLD */ - /* Root path */ - urldb_destroy_path_node_content(&root->paths); + /* Check for existing entry */ + for (e = d->children; e; e = e->next) + if (strcasecmp(host, e->part) == 0) + /* found => return it */ + return e; - /* Proctection space data */ - for (s = root->prot_space; s; s = t) { - t = s->next; - urldb_destroy_prot_space(s); - } + d = urldb_add_host_node(host, d); - /* And ourselves */ - free(root->part); - free(root); -} - -/** - * Destroy a path tree - * - * \param root Root node of tree to destroy - */ -void urldb_destroy_path_tree(struct path_data *root) -{ - struct path_data *p = root; - - do { - if (p->children != NULL) { - p = p->children; + s = urldb_search_insert(search_trees[ST_IP], d); + if (!s) { + /* failed */ + d = NULL; } else { - struct path_data *q = p; + search_trees[ST_IP] = s; + } - while (p != root) { - if (p->next != NULL) { - p = p->next; + return d; + } + + /* Copy host string, so we can corrupt it */ + strncpy(buf, host, sizeof buf); + buf[sizeof buf - 1] = '\0'; + + /* Process FQDN segments backwards */ + do { + part = strrchr(buf, '.'); + if (!part) { + /* last segment */ + /* Check for existing entry */ + for (e = d->children; e; e = e->next) + if (strcasecmp(buf, e->part) == 0) break; - } - p = p->parent; - - urldb_destroy_path_node_content(q); - free(q); - - q = p; + if (e) { + d = e; + } else { + d = urldb_add_host_node(buf, d); } - urldb_destroy_path_node_content(q); - free(q); + /* And insert into search tree */ + if (d) { + struct search_node **r; + + r = urldb_get_search_tree_direct(buf); + s = urldb_search_insert(*r, d); + if (!s) { + /* failed */ + d = NULL; + } else { + *r = s; + } + } + break; } - } while (p != root); + + /* Check for existing entry */ + for (e = d->children; e; e = e->next) + if (strcasecmp(part + 1, e->part) == 0) + break; + + d = e ? e : urldb_add_host_node(part + 1, d); + if (!d) + break; + + *part = '\0'; + } while (1); + + return d; } -/** - * Destroy the contents of a path node - * - * \param node Node to destroy contents of (does not destroy node) - */ -void urldb_destroy_path_node_content(struct path_data *node) + +/* exported interface documented in content/urldb.h */ +struct path_data * +urldb_add_path(lwc_string *scheme, + unsigned int port, + const struct host_part *host, + char *path_query, + lwc_string *fragment, + nsurl *url) { - struct cookie_internal_data *a, *b; - unsigned int i; + struct path_data *d, *e; + char *buf = path_query; + char *segment, *slash; + bool match; - if (node->url != NULL) - nsurl_unref(node->url); + assert(scheme && host && url); - if (node->scheme != NULL) - lwc_string_unref(node->scheme); + d = (struct path_data *) &host->paths; - free(node->segment); - for (i = 0; i < node->frag_cnt; i++) - free(node->fragment[i]); - free(node->fragment); + /* skip leading '/' */ + segment = buf; + if (*segment == '/') + segment++; - if (node->thumb) - bitmap_destroy(node->thumb); + /* Process path segments */ + do { + slash = strchr(segment, '/'); + if (!slash) { + /* last segment */ + /* look for existing entry */ + for (e = d->children; e; e = e->next) + if (strcmp(segment, e->segment) == 0 && + lwc_string_isequal(scheme, + e->scheme, &match) == + lwc_error_ok && + match == true && + e->port == port) + break; - free(node->urld.title); + d = e ? urldb_add_path_fragment(e, fragment) : + urldb_add_path_node(scheme, port, + segment, fragment, d); + break; + } - for (a = node->cookies; a; a = b) { - b = a->next; - urldb_destroy_cookie(a); + *slash = '\0'; + + /* look for existing entry */ + for (e = d->children; e; e = e->next) + if (strcmp(segment, e->segment) == 0 && + lwc_string_isequal(scheme, e->scheme, + &match) == lwc_error_ok && + match == true && + e->port == port) + break; + + d = e ? e : urldb_add_path_node(scheme, port, segment, NULL, d); + if (!d) + break; + + segment = slash + 1; + } while (1); + + free(path_query); + + if (d && !d->url) { + /* Insert defragmented URL */ + if (nsurl_defragment(url, &d->url) != NSERROR_OK) + return NULL; } + + return d; } - -/** - * Destroy a cookie node - * - * \param c Cookie to destroy - */ -void urldb_destroy_cookie(struct cookie_internal_data *c) -{ - free(c->name); - free(c->value); - free(c->comment); - free(c->domain); - free(c->path); - - free(c); -} - -/** - * Destroy protection space data - * - * \param space Protection space to destroy - */ -void urldb_destroy_prot_space(struct prot_space_data *space) -{ - lwc_string_unref(space->scheme); - free(space->realm); - free(space->auth); - - free(space); -} - - -/** - * Destroy a search tree - * - * \param root Root node of tree to destroy - */ -void urldb_destroy_search_tree(struct search_node *root) -{ - /* Destroy children */ - if (root->left != &empty) - urldb_destroy_search_tree(root->left); - if (root->right != &empty) - urldb_destroy_search_tree(root->right); - - /* And destroy ourselves */ - free(root); -} - diff --git a/content/urldb.h b/content/urldb.h index c0fece24e..d7ca8b0f8 100644 --- a/content/urldb.h +++ b/content/urldb.h @@ -64,62 +64,262 @@ struct cookie_data { struct bitmap; -/* Destruction */ +/** + * Destroy urldb + */ void urldb_destroy(void); + /* Persistence support */ + +/** + * Import an URL database from file, replacing any existing database + * + * \param filename Name of file containing data + */ nserror urldb_load(const char *filename); -void urldb_save(const char *filename); + +/** + * Export the current database to file + * + * \param filename Name of file to export to + */ +nserror urldb_save(const char *filename); + +/** + * Set the cross-session persistence of the entry for an URL + * + * \param url Absolute URL to persist + * \param persist True to persist, false otherwise + */ void urldb_set_url_persistence(nsurl *url, bool persist); + /* URL insertion */ + +/** + * Insert an URL into the database + * + * \param url Absolute URL to insert + * \return true on success, false otherwise + */ bool urldb_add_url(nsurl *url); /* URL data modification / lookup */ + +/** + * Set an URL's title string, replacing any existing one + * + * \param url The URL to look for + * \param title The title string to use (copied) + */ void urldb_set_url_title(nsurl *url, const char *title); + +/** + * Set an URL's content type + * + * \param url The URL to look for + * \param type The type to set + */ void urldb_set_url_content_type(nsurl *url, content_type type); + +/** + * Update an URL's visit data + * + * \param url The URL to update + */ void urldb_update_url_visit_data(nsurl *url); + +/** + * Reset an URL's visit statistics + * + * \param url The URL to reset + */ void urldb_reset_url_visit_data(nsurl *url); + +/** + * Find data for an URL. + * + * \param url Absolute URL to look for + * \return Pointer to result struct, or NULL + */ const struct url_data *urldb_get_url_data(nsurl *url); + +/** + * Extract an URL from the db + * + * \param url URL to extract + * \return Pointer to database's copy of URL or NULL if not found + */ nsurl *urldb_get_url(nsurl *url); + /* Authentication modification / lookup */ -void urldb_set_auth_details(nsurl *url, const char *realm, - const char *auth); + +/** + * Set authentication data for an URL + * + * \param url The URL to consider + * \param realm The authentication realm + * \param auth The authentication details (in form username:password) + */ +void urldb_set_auth_details(nsurl *url, const char *realm, const char *auth); + +/** + * Look up authentication details in database + * + * \param url Absolute URL to search for + * \param realm When non-NULL, it is realm which can be used to determine + * the protection space when that's not been done before for given URL. + * \return Pointer to authentication details, or NULL if not found + */ const char *urldb_get_auth_details(nsurl *url, const char *realm); + /* SSL certificate permissions */ + +/** + * Set certificate verification permissions + * + * \param url URL to consider + * \param permit Set to true to allow invalid certificates + */ void urldb_set_cert_permissions(nsurl *url, bool permit); + +/** + * Retrieve certificate verification permissions from database + * + * \param url Absolute URL to search for + * \return true to permit connections to hosts with invalid certificates, + * false otherwise. + */ bool urldb_get_cert_permissions(nsurl *url); + /* Thumbnail handling */ + +/** + * Set thumbnail for url, replacing any existing thumbnail + * + * \param url Absolute URL to consider + * \param bitmap Opaque pointer to thumbnail data, or NULL to invalidate + */ void urldb_set_thumbnail(nsurl *url, struct bitmap *bitmap); + +/** + * Retrieve thumbnail data for given URL + * + * \param url Absolute URL to search for + * \return Pointer to thumbnail data, or NULL if not found. + */ struct bitmap *urldb_get_thumbnail(nsurl *url); + /* URL completion */ + +/** + * Iterate over entries in the database which match the given prefix + * + * \param prefix Prefix to match + * \param callback Callback function + */ void urldb_iterate_partial(const char *prefix, - bool (*callback)(nsurl *url, - const struct url_data *data)); + bool (*callback)(nsurl *url, const struct url_data *data)); + /* Iteration */ + +/** + * Iterate over all entries in database + * + * \param callback Function to callback for each entry + */ void urldb_iterate_entries(bool (*callback)(nsurl *url, const struct url_data *data)); + +/** + * Iterate over all cookies in database + * + * \param callback Function to callback for each entry + */ void urldb_iterate_cookies(bool (*callback)(const struct cookie_data *cookie)); -/* Debug */ -void urldb_dump(void); /* Cookies */ + +/** + * Parse Set-Cookie header and insert cookie(s) into database + * + * \param header Header to parse, with Set-Cookie: stripped + * \param url URL being fetched + * \param referer Referring resource, or 0 for verifiable transaction + * \return true on success, false otherwise + */ bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer); + +/** + * Retrieve cookies for an URL + * + * \param url URL being fetched + * \param include_http_only Whether to include HTTP(S) only cookies. + * \return Cookies string for libcurl (on heap), or NULL on error/no cookies + */ char *urldb_get_cookie(nsurl *url, bool include_http_only); + +/** + * Delete a cookie + * + * \param domain The cookie's domain + * \param path The cookie's path + * \param name The cookie's name + */ void urldb_delete_cookie(const char *domain, const char *path, const char *name); + +/** + * Load a cookie file into the database + * + * \param filename File to load + */ void urldb_load_cookies(const char *filename); + +/** + * Save persistent cookies to file + * + * \param filename Path to save to + */ void urldb_save_cookies(const char *filename); +/* Debug */ + +/** + * Dump URL database to stderr + */ +void urldb_dump(void); + + /* test harness only */ + +/** + * Add a host to the database, creating any intermediate entries + * + * \param host Hostname to add + * \return Pointer to leaf node, or NULL on memory exhaustion + */ struct host_part *urldb_add_host(const char *host); + +/** + * Add a path to the database, creating any intermediate entries + * + * \param scheme URL scheme associated with path + * \param port Port number on host associated with path + * \param host Host tree node to attach to + * \param path_query Absolute path plus query to add (freed) + * \param fragment URL fragment, or NULL + * \param url URL (fragment ignored) + * \return Pointer to leaf node, or NULL on memory exhaustion + */ struct path_data *urldb_add_path(lwc_string *scheme, unsigned int port, const struct host_part *host, char *path_query, lwc_string *fragment, nsurl *url);