367 lines
9.2 KiB
C
367 lines
9.2 KiB
C
/*
|
|
* This file is part of NetSurf, http://netsurf.sourceforge.net/
|
|
* Licensed under the GNU General Public License,
|
|
* http://www.opensource.org/licenses/gpl-license
|
|
* Copyright 2004 James Bursa <bursa@users.sourceforge.net>
|
|
*/
|
|
|
|
/** \file
|
|
* High-level fetching, caching and conversion (implementation).
|
|
*
|
|
* The implementation checks the cache for the requested URL. If it is not
|
|
* present, a content is created and a fetch is initiated. As the status of the
|
|
* fetch changes and data is received, the content is updated appropriately.
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <string.h>
|
|
#include <sys/types.h>
|
|
#include <regex.h>
|
|
#include "netsurf/utils/config.h"
|
|
#include "netsurf/content/cache.h"
|
|
#include "netsurf/content/content.h"
|
|
#include "netsurf/content/fetchcache.h"
|
|
#include "netsurf/content/fetch.h"
|
|
#include "netsurf/utils/log.h"
|
|
#include "netsurf/utils/messages.h"
|
|
#include "netsurf/utils/url.h"
|
|
#include "netsurf/utils/utils.h"
|
|
|
|
|
|
static char error_page[1000];
|
|
static regex_t re_content_type;
|
|
static void fetchcache_callback(fetch_msg msg, void *p, char *data, unsigned long size);
|
|
static char *fetchcache_parse_type(char *s, char **params[]);
|
|
static void fetchcache_error_page(struct content *c, const char *error);
|
|
|
|
|
|
/**
|
|
* Retrieve a URL or fetch, convert, and cache it.
|
|
*
|
|
* The referer may be 0.
|
|
*
|
|
* The caller must supply a callback function which is called when anything
|
|
* interesting happens to the content which is returned. See content.h.
|
|
*
|
|
* If an error occurs immediately, 0 may be returned. Later errors will be
|
|
* reported via the callback.
|
|
*
|
|
* \param url address to fetch
|
|
* \param referer url of referring page, or 0 if none
|
|
* \param callback function to call when anything interesting happens to
|
|
* the new content
|
|
* \param p1 user parameter for callback
|
|
* \param p2 user parameter for callback
|
|
* \param width available space
|
|
* \param height available space
|
|
* \param no_error_pages if an error occurs, send CONTENT_MSG_ERROR instead
|
|
* of generating an error page
|
|
* \param post_urlenc url encoded post data, or 0 if none
|
|
* \param post_multipart multipart post data, or 0 if none
|
|
* \param cookies send and accept cookies
|
|
* \return a new content, or 0 if an error occurred and no_error_pages is true
|
|
*/
|
|
|
|
struct content * fetchcache(const char *url, char *referer,
|
|
void (*callback)(content_msg msg, struct content *c, void *p1,
|
|
void *p2, union content_msg_data data),
|
|
void *p1, void *p2, unsigned long width, unsigned long height,
|
|
bool no_error_pages
|
|
#ifdef WITH_POST
|
|
, char *post_urlenc,
|
|
struct form_successful_control *post_multipart
|
|
#endif
|
|
#ifdef WITH_COOKIES
|
|
,bool cookies
|
|
#endif
|
|
)
|
|
{
|
|
struct content *c;
|
|
char *url1 = xstrdup(url);
|
|
char *hash = strchr(url1, '#');
|
|
char error_message[500];
|
|
|
|
/* strip fragment identifier */
|
|
if (hash != 0)
|
|
*hash = 0;
|
|
|
|
LOG(("url %s", url1));
|
|
|
|
#ifdef WITH_POST
|
|
if (!post_urlenc && !post_multipart)
|
|
#endif
|
|
{
|
|
c = cache_get(url1);
|
|
if (c != 0) {
|
|
free(url1);
|
|
content_add_user(c, callback, p1, p2);
|
|
return c;
|
|
}
|
|
}
|
|
|
|
c = content_create(url1);
|
|
content_add_user(c, callback, p1, p2);
|
|
|
|
#ifdef WITH_POST
|
|
if (!post_urlenc && !post_multipart)
|
|
#endif
|
|
cache_put(c);
|
|
|
|
c->width = width;
|
|
c->height = height;
|
|
c->no_error_pages = no_error_pages;
|
|
c->fetch = fetch_start(url1, referer, fetchcache_callback, c, no_error_pages
|
|
#ifdef WITH_POST
|
|
,post_urlenc, post_multipart
|
|
#endif
|
|
#ifdef WITH_COOKIES
|
|
,cookies
|
|
#endif
|
|
);
|
|
if (c->fetch == 0) {
|
|
LOG(("warning: fetch_start failed"));
|
|
if (c->cache)
|
|
cache_destroy(c);
|
|
if (no_error_pages) {
|
|
content_destroy(c);
|
|
free(url1);
|
|
return 0;
|
|
}
|
|
snprintf(error_message, sizeof error_message,
|
|
messages_get("InvalidURL"), url1);
|
|
fetchcache_error_page(c, error_message);
|
|
}
|
|
free(url1);
|
|
return c;
|
|
}
|
|
|
|
|
|
/**
|
|
* Callback function for fetch.
|
|
*
|
|
* This is called when the status of a fetch changes.
|
|
*/
|
|
|
|
void fetchcache_callback(fetch_msg msg, void *p, char *data, unsigned long size)
|
|
{
|
|
struct content *c = p;
|
|
content_type type;
|
|
char *mime_type, *url;
|
|
char **params;
|
|
unsigned int i;
|
|
union content_msg_data msg_data;
|
|
|
|
c->lock++;
|
|
|
|
switch (msg) {
|
|
case FETCH_TYPE:
|
|
c->total_size = size;
|
|
mime_type = fetchcache_parse_type(data, ¶ms);
|
|
type = content_lookup(mime_type);
|
|
LOG(("FETCH_TYPE, type %u", type));
|
|
content_set_type(c, type, mime_type, (const char**)params);
|
|
free(mime_type);
|
|
for (i = 0; params[i]; i++)
|
|
free(params[i]);
|
|
free(params);
|
|
if (c->cache && c->type == CONTENT_OTHER)
|
|
cache_destroy(c);
|
|
break;
|
|
|
|
case FETCH_DATA:
|
|
LOG(("FETCH_DATA"));
|
|
if (c->total_size)
|
|
sprintf(c->status_message,
|
|
messages_get("RecPercent"),
|
|
human_friendly_bytesize(c->source_size + size),
|
|
human_friendly_bytesize(c->total_size),
|
|
(unsigned int) ((c->source_size + size) * 100.0 / c->total_size));
|
|
else
|
|
sprintf(c->status_message,
|
|
messages_get("Received"),
|
|
human_friendly_bytesize(c->source_size + size));
|
|
content_broadcast(c, CONTENT_MSG_STATUS, msg_data);
|
|
content_process_data(c, data, size);
|
|
break;
|
|
|
|
case FETCH_FINISHED:
|
|
LOG(("FETCH_FINISHED"));
|
|
sprintf(c->status_message, messages_get("Converting"),
|
|
c->source_size);
|
|
c->fetch = 0;
|
|
content_broadcast(c, CONTENT_MSG_STATUS, msg_data);
|
|
content_convert(c, c->width, c->height);
|
|
break;
|
|
|
|
case FETCH_ERROR:
|
|
LOG(("FETCH_ERROR, '%s'", data));
|
|
c->fetch = 0;
|
|
if (c->cache)
|
|
cache_destroy(c);
|
|
if (c->no_error_pages) {
|
|
msg_data.error = data;
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
content_destroy(c);
|
|
} else {
|
|
content_reset(c);
|
|
fetchcache_error_page(c, data);
|
|
}
|
|
break;
|
|
|
|
case FETCH_REDIRECT:
|
|
LOG(("FETCH_REDIRECT, '%s'", data));
|
|
c->fetch = 0;
|
|
/* redirect URLs must be absolute by HTTP/1.1, but many sites send
|
|
* relative ones: treat them as relative to requested URL */
|
|
url = url_join(data, c->url);
|
|
if (url) {
|
|
msg_data.redirect = url;
|
|
content_broadcast(c, CONTENT_MSG_REDIRECT, msg_data);
|
|
free(url);
|
|
} else {
|
|
msg_data.error = messages_get("BadRedirect");
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
}
|
|
if (c->cache)
|
|
cache_destroy(c);
|
|
content_destroy(c);
|
|
break;
|
|
#ifdef WITH_AUTH
|
|
case FETCH_AUTH:
|
|
/* data -> string containing the Realm */
|
|
LOG(("FETCH_AUTH, '%s'", data));
|
|
c->fetch = 0;
|
|
msg_data.auth_realm = data;
|
|
content_broadcast(c, CONTENT_MSG_AUTH, msg_data);
|
|
cache_destroy(c);
|
|
break;
|
|
#endif
|
|
default:
|
|
assert(0);
|
|
}
|
|
|
|
if (--(c->lock) == 0 && c->destroy_pending)
|
|
content_destroy(c);
|
|
}
|
|
|
|
|
|
/**
|
|
* Initialise the fetchcache module.
|
|
*/
|
|
|
|
void fetchcache_init(void)
|
|
{
|
|
regcomp_wrapper(&re_content_type,
|
|
"^([-0-9a-zA-Z_.]+/[-0-9a-zA-Z_.]+)[ \t]*"
|
|
"(;[ \t]*([-0-9a-zA-Z_.]+)="
|
|
"([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
|
|
REG_EXTENDED);
|
|
}
|
|
|
|
|
|
/**
|
|
* Parse a Content-Type header.
|
|
*
|
|
* \param s a Content-Type header
|
|
* \param params updated to point to an array of strings, ordered attribute,
|
|
* value, attribute, ..., 0
|
|
* \return a new string containing the MIME-type
|
|
*/
|
|
|
|
#define MAX_ATTRS 10
|
|
|
|
char *fetchcache_parse_type(char *s, char **params[])
|
|
{
|
|
char *type;
|
|
unsigned int i;
|
|
int r;
|
|
regmatch_t pmatch[2 + MAX_ATTRS * 3];
|
|
*params = xcalloc(MAX_ATTRS * 2 + 2, sizeof (*params)[0]);
|
|
|
|
r = regexec(&re_content_type, s, 2 + MAX_ATTRS * 3, pmatch, 0);
|
|
if (r) {
|
|
LOG(("failed to parse content-type '%s'", s));
|
|
return xstrdup(s);
|
|
}
|
|
|
|
type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so);
|
|
assert(type);
|
|
|
|
/* parameters */
|
|
for (i = 0; i != MAX_ATTRS && pmatch[2 + 3 * i].rm_so != -1; i++) {
|
|
(*params)[2 * i] = strndup(s + pmatch[2 + 3 * i + 1].rm_so,
|
|
pmatch[2 + 3 * i + 1].rm_eo - pmatch[2 + 3 * i + 1].rm_so);
|
|
(*params)[2 * i + 1] = strndup(s + pmatch[2 + 3 * i + 2].rm_so,
|
|
pmatch[2 + 3 * i + 2].rm_eo - pmatch[2 + 3 * i + 2].rm_so);
|
|
assert((*params)[2 * i] && (*params)[2 * i + 1]);
|
|
}
|
|
(*params)[2 * i] = 0;
|
|
|
|
return type;
|
|
}
|
|
|
|
|
|
/**
|
|
* Generate an error page.
|
|
*
|
|
* \param c empty content to generate the page in
|
|
* \param error message to display
|
|
*/
|
|
|
|
void fetchcache_error_page(struct content *c, const char *error)
|
|
{
|
|
const char *params[] = { 0 };
|
|
snprintf(error_page, sizeof error_page, messages_get("ErrorPage"), error);
|
|
content_set_type(c, CONTENT_HTML, "text/html", params);
|
|
content_process_data(c, error_page, strlen(error_page));
|
|
content_convert(c, c->width, c->height);
|
|
}
|
|
|
|
|
|
#ifdef TEST
|
|
|
|
#include <unistd.h>
|
|
|
|
void callback(fetchcache_msg msg, struct content *c, void *p, char *error)
|
|
{
|
|
switch (msg) {
|
|
case FETCHCACHE_OK:
|
|
LOG(("FETCHCACHE_OK, url '%s'", p));
|
|
break;
|
|
case FETCHCACHE_BADTYPE:
|
|
LOG(("FETCHCACHE_BADTYPE, url '%s'", p));
|
|
break;
|
|
case FETCHCACHE_ERROR:
|
|
LOG(("FETCHCACHE_ERROR, url '%s', error '%s'", p, error));
|
|
break;
|
|
default:
|
|
assert(0);
|
|
}
|
|
}
|
|
|
|
char *test[] = {"http://www.google.co.uk/", "http://www.ox.ac.uk/", "blah://blah/"};
|
|
|
|
int main(void)
|
|
{
|
|
int i;
|
|
|
|
cache_init();
|
|
fetch_init();
|
|
|
|
for (i = 0; i != sizeof(test) / sizeof(test[0]); i++)
|
|
fetchcache(test[i], 0, callback, test[i], 800, 0);
|
|
for (i = 0; i != 5; i++) {
|
|
fetch_poll();
|
|
sleep(1);
|
|
}
|
|
for (i = 0; i != sizeof(test) / sizeof(test[0]); i++)
|
|
fetchcache(test[i], 0, callback, test[i], 800, 0);
|
|
for (i = 0; i != 20; i++) {
|
|
fetch_poll();
|
|
sleep(1);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#endif
|