2005-02-03 16:18:22 +03:00
|
|
|
/*
|
|
|
|
* This file is part of NetSurf, http://netsurf.sourceforge.net/
|
|
|
|
* Licensed under the GNU General Public License,
|
|
|
|
* http://www.opensource.org/licenses/gpl-license
|
|
|
|
* Copyright 2005 Richard Wilson <info@tinct.net>
|
|
|
|
*/
|
|
|
|
|
|
|
|
/** \file
|
|
|
|
* Central repository for URL data (implementation).
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <assert.h>
|
|
|
|
#include <stdbool.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2005-12-31 07:27:53 +03:00
|
|
|
#include <time.h>
|
2005-02-03 16:18:22 +03:00
|
|
|
#include "netsurf/content/url_store.h"
|
2006-01-02 16:18:32 +03:00
|
|
|
#include "netsurf/image/bitmap.h"
|
2005-12-31 07:27:53 +03:00
|
|
|
#include "netsurf/desktop/options.h"
|
2005-06-23 21:22:28 +04:00
|
|
|
#ifdef riscos
|
|
|
|
#include "netsurf/riscos/bitmap.h"
|
|
|
|
#endif
|
2005-02-03 16:18:22 +03:00
|
|
|
#include "netsurf/utils/log.h"
|
2005-12-31 07:27:53 +03:00
|
|
|
#include "netsurf/utils/url.h"
|
|
|
|
#include "netsurf/utils/utils.h"
|
2005-02-03 16:18:22 +03:00
|
|
|
|
|
|
|
|
|
|
|
#define ITERATIONS_BEFORE_TEST 32
|
|
|
|
#define MAXIMUM_URL_LENGTH 1024
|
|
|
|
|
2005-12-31 07:27:53 +03:00
|
|
|
struct hostname_data *url_store_hostnames = NULL;
|
2005-02-03 16:18:22 +03:00
|
|
|
|
|
|
|
static struct hostname_data *url_store_find_hostname(const char *url);
|
2006-01-05 05:05:34 +03:00
|
|
|
static struct hostname_data *url_store_match_hostname(
|
2005-02-03 16:18:22 +03:00
|
|
|
struct hostname_data *previous);
|
|
|
|
|
2006-01-05 05:05:34 +03:00
|
|
|
/* used for faster matching */
|
|
|
|
static size_t current_match_url_length;
|
|
|
|
static char *current_match_scheme;
|
|
|
|
static int current_match_scheme_length;
|
|
|
|
static char *current_match_hostname;
|
|
|
|
static int current_match_hostname_length;
|
|
|
|
static bool current_match_www_test;
|
|
|
|
|
|
|
|
/* used for faster searching */
|
2006-01-03 02:03:07 +03:00
|
|
|
static struct hostname_data *last_hostname_found = NULL;
|
2005-02-03 16:18:22 +03:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the hostname data for the specified URL. If no hostname
|
|
|
|
* data is currently available then it is created.
|
|
|
|
*
|
2006-01-02 16:18:32 +03:00
|
|
|
* \param url the url to find hostname data for
|
|
|
|
* \return the current hostname data, or NULL if memory exhausted
|
2005-02-03 16:18:22 +03:00
|
|
|
*/
|
2006-01-02 16:18:32 +03:00
|
|
|
struct hostname_data *url_store_find_hostname(const char *url)
|
|
|
|
{
|
2006-01-05 02:03:00 +03:00
|
|
|
struct hostname_data *first = url_store_hostnames;
|
2005-02-03 16:18:22 +03:00
|
|
|
struct hostname_data *search;
|
|
|
|
struct hostname_data *result;
|
|
|
|
url_func_result res;
|
2006-01-03 02:03:07 +03:00
|
|
|
char *hostname = NULL;
|
2005-02-03 16:18:22 +03:00
|
|
|
int hostname_length;
|
|
|
|
int compare;
|
|
|
|
int fast_exit_counter = ITERATIONS_BEFORE_TEST;
|
2006-01-05 05:05:34 +03:00
|
|
|
const char *host_test;
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2005-02-03 16:18:22 +03:00
|
|
|
assert(url);
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2006-01-05 02:03:00 +03:00
|
|
|
/* as the URL is normalised, we optimise the hostname finding for http:// */
|
|
|
|
if (!strncmp("http://", url, 7)) {
|
2006-01-05 05:05:34 +03:00
|
|
|
/* check for duplicate hostname calls */
|
2006-01-05 02:03:00 +03:00
|
|
|
if ((last_hostname_found) &&
|
|
|
|
(!strncmp(last_hostname_found->hostname, url + 7,
|
|
|
|
last_hostname_found->hostname_length))) {
|
|
|
|
/* ensure it isn't comparing 'foo.com' to 'foo.com.au' etc */
|
|
|
|
if (url[last_hostname_found->hostname_length + 7] != '.')
|
|
|
|
return last_hostname_found;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check for a hostname match */
|
2006-01-05 05:05:34 +03:00
|
|
|
for (host_test = url + 7;
|
|
|
|
((*host_test > 32) && (*host_test != '/'));
|
|
|
|
*host_test++);
|
2006-01-05 02:03:00 +03:00
|
|
|
hostname_length = host_test - url - 7;
|
|
|
|
host_test = url + 7;
|
|
|
|
if ((last_hostname_found) &&
|
|
|
|
(strncmp(host_test,
|
|
|
|
last_hostname_found->hostname,
|
|
|
|
hostname_length) > 0))
|
|
|
|
first = last_hostname_found;
|
|
|
|
for (search = first; search; search = search->next) {
|
|
|
|
if (search->hostname_length == hostname_length) {
|
|
|
|
compare = strncmp(host_test, search->hostname,
|
|
|
|
hostname_length);
|
|
|
|
if (compare == 0) {
|
|
|
|
last_hostname_found = search;
|
|
|
|
return search;
|
|
|
|
} else if (compare < 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2006-01-05 05:05:34 +03:00
|
|
|
|
|
|
|
/* allocate a new hostname */
|
2006-01-05 02:03:00 +03:00
|
|
|
hostname = malloc(hostname_length + 1);
|
|
|
|
if (!hostname)
|
2005-06-23 21:22:28 +04:00
|
|
|
return NULL;
|
2006-01-25 02:04:07 +03:00
|
|
|
memcpy(hostname, host_test, hostname_length);
|
2006-01-05 02:03:00 +03:00
|
|
|
hostname[hostname_length] = '\0';
|
|
|
|
} else {
|
|
|
|
/* no quick match found, fallback */
|
|
|
|
res = url_host(url, &hostname);
|
|
|
|
switch (res) {
|
|
|
|
case URL_FUNC_OK:
|
|
|
|
break;
|
|
|
|
case URL_FUNC_NOMEM:
|
2006-01-03 02:03:07 +03:00
|
|
|
return NULL;
|
2006-01-05 02:03:00 +03:00
|
|
|
case URL_FUNC_FAILED:
|
|
|
|
hostname = strdup("file:/"); /* for 'file:/' */
|
|
|
|
if (!hostname)
|
|
|
|
return NULL;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
hostname_length = strlen(hostname);
|
2005-06-23 21:22:28 +04:00
|
|
|
}
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2005-02-03 16:18:22 +03:00
|
|
|
/* try to find a matching hostname fairly quickly */
|
2006-01-05 02:03:00 +03:00
|
|
|
if ((last_hostname_found) &&
|
|
|
|
(strcmp(hostname, last_hostname_found->hostname) > 0))
|
|
|
|
first = last_hostname_found;
|
|
|
|
for (search = first; search; search = search->next) {
|
2005-02-03 16:18:22 +03:00
|
|
|
if ((fast_exit_counter <= 0) ||
|
|
|
|
(search->hostname_length == hostname_length)) {
|
|
|
|
compare = strcmp(hostname, search->hostname);
|
|
|
|
if (compare == 0) {
|
|
|
|
free(hostname);
|
2006-01-03 02:03:07 +03:00
|
|
|
last_hostname_found = search;
|
2005-02-03 16:18:22 +03:00
|
|
|
return search;
|
|
|
|
} else if (compare < 0)
|
|
|
|
break;
|
|
|
|
fast_exit_counter = ITERATIONS_BEFORE_TEST;
|
|
|
|
} else {
|
|
|
|
fast_exit_counter--;
|
|
|
|
}
|
|
|
|
}
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2005-02-03 16:18:22 +03:00
|
|
|
/* no hostname is available: create a new one */
|
2006-01-02 16:18:32 +03:00
|
|
|
result = malloc(sizeof *result);
|
|
|
|
if (!result) {
|
|
|
|
free(hostname);
|
2005-02-03 16:18:22 +03:00
|
|
|
return NULL;
|
2006-01-02 16:18:32 +03:00
|
|
|
}
|
2005-02-03 16:18:22 +03:00
|
|
|
result->hostname = hostname;
|
|
|
|
result->hostname_length = hostname_length;
|
2006-01-02 16:18:32 +03:00
|
|
|
result->url = 0;
|
|
|
|
result->previous = 0;
|
|
|
|
result->next = 0;
|
2006-01-03 02:03:07 +03:00
|
|
|
last_hostname_found = result;
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2005-02-03 16:18:22 +03:00
|
|
|
/* simple case: no current hostnames */
|
|
|
|
if (!url_store_hostnames) {
|
|
|
|
url_store_hostnames = result;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* worst case scenario: the place we need to link is within the last
|
|
|
|
* section of the hostname list so we have no reference to work back
|
|
|
|
* from. rather than slowing with the very common case of searching,
|
|
|
|
* we take a speed hit for this case and simply move to the very end
|
|
|
|
* of the hostname list ready to work backwards. */
|
|
|
|
if (!search)
|
|
|
|
for (search = url_store_hostnames; search->next;
|
2006-01-02 16:18:32 +03:00
|
|
|
search = search->next)
|
|
|
|
;
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2005-02-03 16:18:22 +03:00
|
|
|
/* we can now simply scan backwards as we know roughly where we need
|
|
|
|
* to link to (we either had an early exit from the searching so we
|
|
|
|
* know we're in the block following where we need to link, or we're
|
|
|
|
* at the very end of the list as we were in the last block.) */
|
|
|
|
while ((search) && (strcmp(hostname, search->hostname) < 0))
|
|
|
|
search = search->previous;
|
|
|
|
|
|
|
|
/* simple case: our new hostname is the first in the list */
|
|
|
|
if (!search) {
|
|
|
|
result->next = url_store_hostnames;
|
|
|
|
url_store_hostnames->previous = result;
|
|
|
|
url_store_hostnames = result;
|
|
|
|
return result;
|
|
|
|
}
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2005-02-03 16:18:22 +03:00
|
|
|
/* general case: link in after the found hostname */
|
|
|
|
result->previous = search;
|
|
|
|
result->next = search->next;
|
|
|
|
if (search->next)
|
|
|
|
search->next->previous = result;
|
|
|
|
search->next = result;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the url data for the specified URL. If no url
|
|
|
|
* data is currently available then it is created.
|
|
|
|
*
|
2006-01-02 16:18:32 +03:00
|
|
|
* \param url a normalized url to find hostname data for
|
|
|
|
* \return the current hostname data, or NULL if memory exhausted
|
2005-02-03 16:18:22 +03:00
|
|
|
*/
|
|
|
|
struct url_content *url_store_find(const char *url) {
|
|
|
|
struct hostname_data *hostname_data;
|
|
|
|
struct url_data *search;
|
|
|
|
struct url_data *result;
|
2006-01-02 16:18:32 +03:00
|
|
|
size_t url_length;
|
2005-02-03 16:18:22 +03:00
|
|
|
int compare;
|
|
|
|
int fast_exit_counter = ITERATIONS_BEFORE_TEST;
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2005-02-03 16:18:22 +03:00
|
|
|
assert(url);
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2005-02-03 16:18:22 +03:00
|
|
|
/* find the corresponding hostname data */
|
|
|
|
hostname_data = url_store_find_hostname(url);
|
|
|
|
if (!hostname_data)
|
|
|
|
return NULL;
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2005-02-03 16:18:22 +03:00
|
|
|
/* move to the start of the leafname */
|
|
|
|
url_length = strlen(url);
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2005-02-03 16:18:22 +03:00
|
|
|
/* try to find a matching url fairly quickly */
|
|
|
|
for (search = hostname_data->url; search; search = search->next) {
|
|
|
|
if ((fast_exit_counter <= 0) ||
|
2005-02-07 17:28:43 +03:00
|
|
|
(search->data.url_length == url_length)) {
|
|
|
|
compare = strcmp(url, search->data.url);
|
2005-02-03 16:18:22 +03:00
|
|
|
if (compare == 0)
|
|
|
|
return &search->data;
|
|
|
|
else if (compare < 0)
|
|
|
|
break;
|
|
|
|
fast_exit_counter = ITERATIONS_BEFORE_TEST;
|
|
|
|
} else {
|
|
|
|
fast_exit_counter--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* no URL is available: create a new one */
|
2005-10-02 03:27:22 +04:00
|
|
|
result = calloc(1, sizeof(struct url_data));
|
2005-02-03 16:18:22 +03:00
|
|
|
if (!result)
|
|
|
|
return NULL;
|
2005-02-07 17:28:43 +03:00
|
|
|
result->data.url = malloc(url_length + 1);
|
|
|
|
if (!result->data.url) {
|
2005-02-03 16:18:22 +03:00
|
|
|
free(result);
|
|
|
|
return NULL;
|
|
|
|
}
|
2006-01-25 02:04:07 +03:00
|
|
|
memcpy(result->data.url, url, url_length + 1);
|
2005-02-07 17:28:43 +03:00
|
|
|
result->data.url_length = url_length;
|
2005-02-03 16:18:22 +03:00
|
|
|
result->parent = hostname_data;
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2005-02-03 16:18:22 +03:00
|
|
|
/* simple case: no current URLs */
|
|
|
|
if (!hostname_data->url) {
|
|
|
|
hostname_data->url = result;
|
|
|
|
return &result->data;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* worst case scenario: the place we need to link is within the last
|
|
|
|
* section of the URL list so we have no reference to work back
|
|
|
|
* from. rather than slowing with the very common case of searching,
|
|
|
|
* we take a speed hit for this case and simply move to the very end
|
|
|
|
* of the URL list ready to work backwards. */
|
|
|
|
if (!search)
|
|
|
|
for (search = hostname_data->url; search->next;
|
2006-01-02 16:18:32 +03:00
|
|
|
search = search->next)
|
|
|
|
;
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2005-02-03 16:18:22 +03:00
|
|
|
/* we can now simply scan backwards as we know roughly where we need
|
|
|
|
* to link to (we either had an early exit from the searching so we
|
|
|
|
* know we're in the block following where we need to link, or we're
|
|
|
|
* at the very end of the list as we were in the last block.) */
|
2005-02-07 17:28:43 +03:00
|
|
|
while ((search) && (strcmp(url, search->data.url) < 0))
|
2005-02-03 16:18:22 +03:00
|
|
|
search = search->previous;
|
|
|
|
|
|
|
|
/* simple case: our new hostname is the first in the list */
|
|
|
|
if (!search) {
|
|
|
|
result->next = hostname_data->url;
|
|
|
|
hostname_data->url->previous = result;
|
|
|
|
hostname_data->url = result;
|
|
|
|
return &result->data;
|
|
|
|
}
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2005-02-03 16:18:22 +03:00
|
|
|
/* general case: link in after the found hostname */
|
|
|
|
result->previous = search;
|
|
|
|
result->next = search->next;
|
|
|
|
if (search->next)
|
|
|
|
search->next->previous = result;
|
|
|
|
search->next = result;
|
|
|
|
return &result->data;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the next hostname that matches a part of the specified URL.
|
|
|
|
*
|
2006-01-05 05:05:34 +03:00
|
|
|
* The following variables must be initialised prior to calling:
|
|
|
|
*
|
|
|
|
* - current_match_scheme
|
|
|
|
* - current_match_hostname
|
|
|
|
* - current_match_hostname_length;
|
|
|
|
*
|
2005-02-03 16:18:22 +03:00
|
|
|
* \param url a normalized url to find the next match for
|
|
|
|
* \param current the current hostname to search forward from, or NULL
|
|
|
|
* \return the next matching hostname, or NULL
|
|
|
|
*/
|
2006-01-05 05:05:34 +03:00
|
|
|
struct hostname_data *url_store_match_hostname(
|
2005-02-03 16:18:22 +03:00
|
|
|
struct hostname_data *current) {
|
|
|
|
int compare;
|
|
|
|
|
2006-01-05 05:05:34 +03:00
|
|
|
assert(current_match_hostname);
|
2005-02-03 16:18:22 +03:00
|
|
|
|
|
|
|
/* advance to the next hostname */
|
|
|
|
if (!current)
|
|
|
|
current = url_store_hostnames;
|
|
|
|
else
|
|
|
|
current = current->next;
|
|
|
|
|
|
|
|
/* skip past hostname data without URLs */
|
2006-01-05 05:05:34 +03:00
|
|
|
for (; current && (!current->url); current = current->next);
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2005-02-03 16:18:22 +03:00
|
|
|
while (current) {
|
2006-01-05 05:05:34 +03:00
|
|
|
if (current->hostname_length >= current_match_hostname_length) {
|
|
|
|
compare = strncmp(current_match_hostname, current->hostname,
|
|
|
|
current_match_hostname_length);
|
|
|
|
if (compare == 0)
|
2005-02-03 16:18:22 +03:00
|
|
|
return current;
|
2006-01-05 05:05:34 +03:00
|
|
|
else if ((compare < 0) && !current_match_www_test)
|
2005-02-03 16:18:22 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* special case: if hostname is not www then try it */
|
2006-01-05 05:05:34 +03:00
|
|
|
if (current_match_www_test && ((current->hostname_length - 4) >=
|
|
|
|
current_match_hostname_length) &&
|
2005-02-03 16:18:22 +03:00
|
|
|
(!strncmp(current->hostname, "www.", 4)) &&
|
2006-01-05 05:05:34 +03:00
|
|
|
(!strncmp(current_match_hostname,
|
|
|
|
current->hostname + 4,
|
|
|
|
current_match_hostname_length)))
|
2005-02-03 16:18:22 +03:00
|
|
|
return current;
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2005-02-03 16:18:22 +03:00
|
|
|
/* move to next hostname with URLs */
|
|
|
|
current = current->next;
|
2006-01-05 05:05:34 +03:00
|
|
|
for (; current && (!current->url); current = current->next);
|
2005-02-03 16:18:22 +03:00
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the complete URL for the next matched stored URL.
|
|
|
|
*
|
|
|
|
* \param url a normalized url to find the next match for
|
|
|
|
* \param reference internal reference (NULL for first call)
|
|
|
|
* \return the next URL that matches
|
|
|
|
*/
|
|
|
|
char *url_store_match(const char *url, struct url_data **reference) {
|
|
|
|
struct hostname_data *hostname;
|
|
|
|
struct url_data *search = NULL;
|
|
|
|
url_func_result res;
|
|
|
|
|
|
|
|
assert(url);
|
|
|
|
|
|
|
|
if (!url_store_hostnames)
|
|
|
|
return NULL;
|
2005-12-31 08:35:59 +03:00
|
|
|
|
2005-04-27 04:01:17 +04:00
|
|
|
/* find the scheme and first URL, not necessarily matching */
|
2005-02-03 16:18:22 +03:00
|
|
|
if (!*reference) {
|
2006-01-05 05:05:34 +03:00
|
|
|
/* the hostname match is constant throughout */
|
|
|
|
if (current_match_hostname)
|
|
|
|
free(current_match_hostname);
|
|
|
|
current_match_hostname = NULL;
|
|
|
|
res = url_host(url, ¤t_match_hostname);
|
|
|
|
switch (res) {
|
|
|
|
case URL_FUNC_OK:
|
|
|
|
break;
|
|
|
|
case URL_FUNC_NOMEM:
|
|
|
|
return NULL;
|
|
|
|
case URL_FUNC_FAILED:
|
|
|
|
/* for 'file:/' */
|
|
|
|
current_match_hostname = strdup("file:/");
|
|
|
|
if (!current_match_hostname)
|
|
|
|
return NULL;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(0);
|
2005-04-27 04:01:17 +04:00
|
|
|
}
|
2006-01-05 05:05:34 +03:00
|
|
|
current_match_hostname_length = strlen(current_match_hostname);
|
|
|
|
/* the scheme is constant throughout */
|
|
|
|
if (current_match_scheme)
|
|
|
|
free(current_match_scheme);
|
|
|
|
current_match_scheme = NULL;
|
|
|
|
res = url_scheme(url, ¤t_match_scheme);
|
2005-04-27 04:01:17 +04:00
|
|
|
if (res != URL_FUNC_OK)
|
|
|
|
return NULL;
|
2006-01-05 05:05:34 +03:00
|
|
|
current_match_scheme_length = strlen(current_match_scheme);
|
|
|
|
/* the url is constant throughout */
|
|
|
|
current_match_url_length = strlen(url);
|
|
|
|
current_match_www_test = (!strcmp(current_match_scheme, "http") &&
|
|
|
|
strncmp(url + 4 + 3, "www.", 4)); /* 'http' + '://' */
|
|
|
|
/* get our initial reference */
|
|
|
|
hostname = url_store_match_hostname(NULL);
|
|
|
|
if (!hostname)
|
|
|
|
return NULL;
|
2006-01-25 02:04:07 +03:00
|
|
|
} else {
|
2005-02-03 16:18:22 +03:00
|
|
|
search = *reference;
|
|
|
|
hostname = search->parent;
|
|
|
|
}
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2005-02-03 16:18:22 +03:00
|
|
|
/* work through all our strings, ignoring the scheme and 'www.' */
|
|
|
|
while (hostname) {
|
|
|
|
|
|
|
|
/* get the next URL to test */
|
|
|
|
if (!search)
|
|
|
|
search = hostname->url;
|
|
|
|
else
|
|
|
|
search = search->next;
|
|
|
|
|
|
|
|
/* loop past end of list, or search */
|
|
|
|
if (!search) {
|
2006-01-05 05:05:34 +03:00
|
|
|
hostname = url_store_match_hostname(hostname);
|
2005-02-03 16:18:22 +03:00
|
|
|
if (!hostname)
|
|
|
|
return NULL;
|
2005-12-31 07:27:53 +03:00
|
|
|
} else if (search->data.visits > 0) {
|
2005-02-03 16:18:22 +03:00
|
|
|
/* straight match */
|
2006-01-05 05:05:34 +03:00
|
|
|
if ((search->data.url_length >= current_match_url_length) &&
|
2005-04-27 04:01:17 +04:00
|
|
|
(!strncmp(search->data.url, url,
|
2006-01-05 05:05:34 +03:00
|
|
|
current_match_url_length))) {
|
2005-02-03 16:18:22 +03:00
|
|
|
*reference = search;
|
2005-02-07 17:28:43 +03:00
|
|
|
return search->data.url;
|
2005-02-03 16:18:22 +03:00
|
|
|
}
|
|
|
|
/* try with 'www.' inserted after the scheme */
|
2006-01-05 05:05:34 +03:00
|
|
|
if (current_match_www_test &&
|
|
|
|
((search->data.url_length - 4) >=
|
|
|
|
current_match_url_length) &&
|
2005-04-27 04:01:17 +04:00
|
|
|
(!strncmp(search->data.url,
|
2006-01-05 05:05:34 +03:00
|
|
|
current_match_scheme,
|
|
|
|
current_match_scheme_length)) &&
|
|
|
|
(!strncmp(search->data.url +
|
|
|
|
current_match_scheme_length + 3,
|
2005-04-27 04:01:17 +04:00
|
|
|
"www.", 4)) &&
|
2006-01-05 05:05:34 +03:00
|
|
|
(!strncmp(search->data.url +
|
|
|
|
current_match_scheme_length + 7,
|
|
|
|
url +
|
|
|
|
current_match_scheme_length + 3,
|
|
|
|
current_match_url_length -
|
|
|
|
current_match_scheme_length - 3))) {
|
2005-02-03 16:18:22 +03:00
|
|
|
*reference = search;
|
2005-02-07 17:28:43 +03:00
|
|
|
return search->data.url;
|
2005-02-03 16:18:22 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2005-04-09 02:18:28 +04:00
|
|
|
return NULL;
|
2005-02-03 16:18:22 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts a text string into one suitable for URL matching.
|
|
|
|
*
|
|
|
|
* \param text the text to search with
|
|
|
|
* \return URL matching string allocated on heap, or NULL on error
|
|
|
|
*/
|
|
|
|
char *url_store_match_string(const char *text) {
|
|
|
|
url_func_result res;
|
|
|
|
char *url;
|
|
|
|
|
|
|
|
assert(text);
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2005-02-03 16:18:22 +03:00
|
|
|
res = url_normalize(text, &url);
|
|
|
|
if (res != URL_FUNC_OK)
|
|
|
|
return NULL;
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2005-02-03 16:18:22 +03:00
|
|
|
/* drop the '/' from the end if it was added when normalizing */
|
|
|
|
if ((url[strlen(url) - 1] == '/') && (text[strlen(text) - 1] != '/'))
|
|
|
|
url[strlen(url) - 1] = '\0';
|
|
|
|
return url;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
2005-10-02 03:27:22 +04:00
|
|
|
* Loads the current contents of the URL store from disk
|
2005-02-03 16:18:22 +03:00
|
|
|
*
|
|
|
|
* \param file the file to load options from
|
|
|
|
*/
|
2005-04-09 02:18:28 +04:00
|
|
|
void url_store_load(const char *file) {
|
2005-02-03 16:18:22 +03:00
|
|
|
char s[MAXIMUM_URL_LENGTH];
|
2005-02-08 04:06:03 +03:00
|
|
|
struct hostname_data *hostname;
|
|
|
|
struct url_data *result;
|
|
|
|
int urls;
|
|
|
|
int i;
|
|
|
|
int version;
|
2005-12-31 07:27:53 +03:00
|
|
|
int length;
|
2005-02-03 16:18:22 +03:00
|
|
|
FILE *fp;
|
2005-12-31 08:35:59 +03:00
|
|
|
|
2005-12-31 07:27:53 +03:00
|
|
|
LOG(("Loading URL file"));
|
2005-02-03 16:18:22 +03:00
|
|
|
|
|
|
|
fp = fopen(file, "r");
|
|
|
|
if (!fp) {
|
|
|
|
LOG(("Failed to open file '%s' for reading", file));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
|
|
|
|
return;
|
2005-02-08 04:06:03 +03:00
|
|
|
version = atoi(s);
|
2005-12-31 07:27:53 +03:00
|
|
|
if (version < 102) {
|
2006-01-05 05:05:34 +03:00
|
|
|
LOG(("Unsupported URL file version."));
|
|
|
|
return;
|
2005-12-31 07:27:53 +03:00
|
|
|
}
|
2006-01-06 08:32:45 +03:00
|
|
|
if (version > 105) {
|
2006-01-05 05:05:34 +03:00
|
|
|
LOG(("Unknown URL file version."));
|
2005-02-03 16:18:22 +03:00
|
|
|
return;
|
2005-12-31 07:27:53 +03:00
|
|
|
}
|
2005-04-09 02:18:28 +04:00
|
|
|
|
2006-01-06 08:32:45 +03:00
|
|
|
last_hostname_found = NULL;
|
2005-12-31 07:27:53 +03:00
|
|
|
while (fgets(s, MAXIMUM_URL_LENGTH, fp)) {
|
2006-01-06 08:32:45 +03:00
|
|
|
/* get the hostname */
|
2006-01-25 02:04:07 +03:00
|
|
|
length = strlen(s) - 1;
|
|
|
|
s[length] = '\0';
|
2006-01-06 08:32:45 +03:00
|
|
|
|
|
|
|
/* skip data that has ended up with a host of '' */
|
|
|
|
if (length == 0) {
|
|
|
|
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
|
|
|
|
break;
|
|
|
|
urls = atoi(s);
|
|
|
|
for (i = 0; i < (6 * urls); i++)
|
|
|
|
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
|
|
|
|
break;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* add the host at the tail */
|
|
|
|
if (version == 105) {
|
|
|
|
hostname = malloc(sizeof *hostname);
|
|
|
|
if (!hostname)
|
|
|
|
die("Insufficient memory to create hostname");
|
2006-01-25 02:04:07 +03:00
|
|
|
hostname->hostname = malloc(length + 1);
|
2006-01-06 08:32:45 +03:00
|
|
|
if (!hostname->hostname)
|
|
|
|
die("Insufficient memory to create hostname");
|
2006-01-25 02:04:07 +03:00
|
|
|
memcpy(hostname->hostname, s, length + 1);
|
2006-01-06 08:32:45 +03:00
|
|
|
hostname->hostname_length = length;
|
|
|
|
hostname->url = 0;
|
|
|
|
hostname->previous = last_hostname_found;
|
|
|
|
if (!hostname->previous)
|
|
|
|
url_store_hostnames = hostname;
|
|
|
|
else
|
|
|
|
last_hostname_found->next = hostname;
|
|
|
|
hostname->next = 0;
|
|
|
|
last_hostname_found = hostname;
|
|
|
|
} else {
|
|
|
|
hostname = url_store_find_hostname(s);
|
|
|
|
if (!hostname)
|
|
|
|
break;
|
|
|
|
}
|
2005-12-31 07:27:53 +03:00
|
|
|
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
|
|
|
|
break;
|
|
|
|
urls = atoi(s);
|
2006-01-06 08:32:45 +03:00
|
|
|
|
|
|
|
/* load the non-corrupt data */
|
2005-12-31 07:27:53 +03:00
|
|
|
for (i = 0; i < urls; i++) {
|
2005-02-08 04:06:03 +03:00
|
|
|
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
|
|
|
|
break;
|
2006-01-25 02:04:07 +03:00
|
|
|
length = strlen(s) - 1;
|
|
|
|
s[length] = '\0';
|
2005-12-31 07:27:53 +03:00
|
|
|
result = calloc(1, sizeof(struct url_data));
|
|
|
|
if (!result)
|
2006-01-06 08:32:45 +03:00
|
|
|
die("Insufficient memory to create URL");
|
2005-12-31 07:27:53 +03:00
|
|
|
result->data.url_length = length;
|
2006-01-25 02:04:07 +03:00
|
|
|
result->data.url = malloc(length + 1);
|
2005-12-31 07:27:53 +03:00
|
|
|
if (!result->data.url)
|
2006-01-06 08:32:45 +03:00
|
|
|
die("Insufficient memory to create URL");
|
2006-01-25 02:04:07 +03:00
|
|
|
memcpy(result->data.url, s, length + 1);
|
2005-12-31 07:27:53 +03:00
|
|
|
result->parent = hostname;
|
|
|
|
result->next = hostname->url;
|
|
|
|
if (hostname->url)
|
|
|
|
hostname->url->previous = result;
|
|
|
|
hostname->url = result;
|
2005-02-08 04:06:03 +03:00
|
|
|
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
|
|
|
|
break;
|
2005-12-31 07:27:53 +03:00
|
|
|
result->data.visits = atoi(s);
|
|
|
|
if (version == 102) {
|
2006-01-05 05:05:34 +03:00
|
|
|
/* ignore requests */
|
2005-02-08 04:06:03 +03:00
|
|
|
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
|
|
|
|
break;
|
2005-12-31 07:27:53 +03:00
|
|
|
/* ignore thumbnail size */
|
|
|
|
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
|
2005-02-08 04:06:03 +03:00
|
|
|
break;
|
2005-12-31 07:27:53 +03:00
|
|
|
/* set last visit as today to retain */
|
|
|
|
result->data.last_visit = time(NULL);
|
|
|
|
} else {
|
2005-02-08 04:06:03 +03:00
|
|
|
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
|
|
|
|
break;
|
2005-12-31 07:27:53 +03:00
|
|
|
result->data.last_visit = atoi(s);
|
2005-02-08 04:06:03 +03:00
|
|
|
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
|
|
|
|
break;
|
2005-12-31 07:27:53 +03:00
|
|
|
result->data.type = atoi(s);
|
|
|
|
}
|
|
|
|
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
|
|
|
|
break;
|
2005-06-23 21:22:28 +04:00
|
|
|
#ifdef riscos
|
2006-01-25 02:04:07 +03:00
|
|
|
if (strlen(s) == 12) {
|
2006-01-05 02:03:00 +03:00
|
|
|
/* ensure filename is 'XX.XX.XX.XX' */
|
|
|
|
if ((s[2] == '.') && (s[5] == '.') &&
|
2006-01-25 02:04:07 +03:00
|
|
|
(s[8] == '.')) {
|
|
|
|
s[11] = '\0';
|
2006-01-05 02:03:00 +03:00
|
|
|
result->data.thumbnail =
|
|
|
|
bitmap_create_file(s);
|
2006-01-25 02:04:07 +03:00
|
|
|
}
|
2006-01-05 02:03:00 +03:00
|
|
|
}
|
2005-06-23 21:22:28 +04:00
|
|
|
#endif
|
2006-01-06 08:32:45 +03:00
|
|
|
if (version >= 104) {
|
2005-12-31 07:27:53 +03:00
|
|
|
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
|
|
|
|
break;
|
2006-01-25 02:04:07 +03:00
|
|
|
length = strlen(s) - 1;
|
|
|
|
if (length > 0) {
|
|
|
|
s[length] = '\0';
|
|
|
|
result->data.title = malloc(length + 1);
|
|
|
|
if (result->data.title)
|
|
|
|
memcpy(result->data.title, s,
|
|
|
|
length + 1);
|
|
|
|
}
|
2005-12-31 08:35:59 +03:00
|
|
|
}
|
2005-02-08 04:06:03 +03:00
|
|
|
}
|
2005-02-03 16:18:22 +03:00
|
|
|
}
|
|
|
|
fclose(fp);
|
2005-12-31 07:27:53 +03:00
|
|
|
LOG(("Successfully loaded URL file"));
|
2005-02-03 16:18:22 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Saves the current contents of the URL store to disk
|
|
|
|
*
|
|
|
|
* \param file the file to load options from
|
|
|
|
*/
|
2005-04-09 02:18:28 +04:00
|
|
|
void url_store_save(const char *file) {
|
2005-02-03 16:18:22 +03:00
|
|
|
struct hostname_data *search;
|
|
|
|
struct url_data *url;
|
2005-02-08 04:06:03 +03:00
|
|
|
int url_count;
|
2005-12-31 07:27:53 +03:00
|
|
|
const char *thumb_file;
|
|
|
|
char *s;
|
|
|
|
int i;
|
2005-02-03 16:18:22 +03:00
|
|
|
FILE *fp;
|
2005-08-21 16:04:18 +04:00
|
|
|
#ifdef riscos
|
2005-06-23 21:22:28 +04:00
|
|
|
struct bitmap *bitmap;
|
2005-08-21 16:04:18 +04:00
|
|
|
#endif
|
2006-01-02 16:18:32 +03:00
|
|
|
time_t min_date;
|
2005-12-31 07:27:53 +03:00
|
|
|
char *title;
|
2005-02-03 16:18:22 +03:00
|
|
|
|
|
|
|
fp = fopen(file, "w");
|
|
|
|
if (!fp) {
|
|
|
|
LOG(("Failed to open file '%s' for writing", file));
|
|
|
|
return;
|
|
|
|
}
|
2005-12-31 08:35:59 +03:00
|
|
|
|
2005-12-31 07:27:53 +03:00
|
|
|
/* get the minimum date for expiry */
|
|
|
|
min_date = time(NULL) - (60 * 60 * 24) * option_expire_url;
|
2005-02-03 16:18:22 +03:00
|
|
|
|
|
|
|
/* file format version number */
|
2006-01-06 08:32:45 +03:00
|
|
|
fprintf(fp, "105\n");
|
2006-01-05 02:03:00 +03:00
|
|
|
for (search = url_store_hostnames; search; search = search->next) {
|
2005-02-08 04:06:03 +03:00
|
|
|
url_count = 0;
|
2005-02-07 17:28:43 +03:00
|
|
|
for (url = search->url; url; url = url->next)
|
2005-12-31 07:27:53 +03:00
|
|
|
if ((url->data.last_visit > min_date) &&
|
|
|
|
(url->data.visits > 0) &&
|
|
|
|
(url->data.url_length <
|
2005-12-31 08:35:59 +03:00
|
|
|
MAXIMUM_URL_LENGTH)) {
|
2005-02-08 04:06:03 +03:00
|
|
|
url_count++;
|
2005-12-31 08:35:59 +03:00
|
|
|
}
|
2006-01-06 08:32:45 +03:00
|
|
|
if (url_count > 0) {
|
|
|
|
fprintf(fp, "%s\n%i\n", search->hostname, url_count);
|
2005-12-31 08:35:59 +03:00
|
|
|
for (url = search->url; url && url->next;
|
|
|
|
url = url->next);
|
2005-02-08 04:06:03 +03:00
|
|
|
for (; url; url = url->previous)
|
2005-12-31 07:27:53 +03:00
|
|
|
if ((url->data.last_visit > min_date) &&
|
|
|
|
(url->data.visits > 0) &&
|
|
|
|
(url->data.url_length <
|
|
|
|
MAXIMUM_URL_LENGTH)) {
|
|
|
|
thumb_file = "";
|
2005-06-23 21:22:28 +04:00
|
|
|
#ifdef riscos
|
|
|
|
bitmap = url->data.thumbnail;
|
2005-12-31 07:27:53 +03:00
|
|
|
if (bitmap)
|
2005-06-23 21:22:28 +04:00
|
|
|
thumb_file = bitmap->filename;
|
|
|
|
#endif
|
2005-12-31 08:35:59 +03:00
|
|
|
|
|
|
|
if (url->data.title) {
|
|
|
|
s = url->data.title;
|
|
|
|
for (i = 0; s[i] != '\0';
|
|
|
|
i++)
|
|
|
|
if (s[i] < 32)
|
|
|
|
s[i] = ' ';
|
|
|
|
for (--i;
|
|
|
|
((i > 0) &&
|
|
|
|
(s[i] == ' '));
|
|
|
|
i--)
|
|
|
|
s[i] = '\0';
|
|
|
|
|
2005-12-31 07:27:53 +03:00
|
|
|
title = url->data.title;
|
2005-12-31 08:35:59 +03:00
|
|
|
}
|
2005-12-31 07:27:53 +03:00
|
|
|
else
|
|
|
|
title = "";
|
|
|
|
fprintf(fp, "%s\n%i\n%i\n%i\n%s\n%s\n",
|
2005-04-27 04:01:17 +04:00
|
|
|
url->data.url,
|
|
|
|
url->data.visits,
|
2006-01-02 16:18:32 +03:00
|
|
|
(int) url->data.
|
|
|
|
last_visit,
|
2005-12-31 07:27:53 +03:00
|
|
|
url->data.type,
|
|
|
|
thumb_file,
|
|
|
|
title);
|
2005-06-23 21:22:28 +04:00
|
|
|
}
|
2005-02-08 04:06:03 +03:00
|
|
|
}
|
2005-02-03 16:18:22 +03:00
|
|
|
}
|
|
|
|
fclose(fp);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
2005-12-31 07:27:53 +03:00
|
|
|
* Associates a thumbnail with a specified URL.
|
2005-02-03 16:18:22 +03:00
|
|
|
*/
|
2005-06-23 21:22:28 +04:00
|
|
|
void url_store_add_thumbnail(const char *url, struct bitmap *bitmap) {
|
|
|
|
struct url_content *content;
|
2005-12-31 08:35:59 +03:00
|
|
|
|
2005-06-23 21:22:28 +04:00
|
|
|
content = url_store_find(url);
|
|
|
|
if (content) {
|
2006-01-05 05:05:34 +03:00
|
|
|
if (content->thumbnail)
|
|
|
|
bitmap_destroy(content->thumbnail);
|
|
|
|
content->thumbnail = bitmap;
|
2005-06-23 21:22:28 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-12-31 07:27:53 +03:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the thumbnail associated with a given URL.
|
|
|
|
*/
|
2005-06-23 21:22:28 +04:00
|
|
|
struct bitmap *url_store_get_thumbnail(const char *url) {
|
|
|
|
struct url_content *content;
|
2005-12-31 08:35:59 +03:00
|
|
|
|
2005-06-23 21:22:28 +04:00
|
|
|
content = url_store_find(url);
|
|
|
|
if (content)
|
|
|
|
return content->thumbnail;
|
|
|
|
return NULL;
|
|
|
|
}
|
2005-12-31 07:27:53 +03:00
|
|
|
|
|
|
|
|
|
|
|
int url_store_compare_last_visit(const void *a, const void *b) {
|
2006-01-05 05:05:34 +03:00
|
|
|
struct url_content * const *url_a = (struct url_content * const *)a;
|
|
|
|
struct url_content * const *url_b = (struct url_content * const *)b;
|
2005-12-31 07:27:53 +03:00
|
|
|
return ((*url_a)->last_visit - (*url_b)->last_visit);
|
|
|
|
}
|