mirror of
https://github.com/netsurf-browser/netsurf
synced 2024-11-28 09:13:08 +03:00
cc3481a2ff
Make core fetching code be responsible for inserting cookies into the urldb Provide accessor to a fetch's parent url (this is defined as being the URL of the verifiable fetch which caused this one to occur) Make fetchcache's 3xx handling use the parent url when spawning new fetches svn path=/trunk/netsurf/; revision=3809
1004 lines
26 KiB
C
1004 lines
26 KiB
C
/*
|
|
* Copyright 2005 James Bursa <bursa@users.sourceforge.net>
|
|
*
|
|
* This file is part of NetSurf, http://www.netsurf-browser.org/
|
|
*
|
|
* NetSurf is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; version 2 of the License.
|
|
*
|
|
* NetSurf is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
/** \file
|
|
* High-level fetching, caching and conversion (implementation).
|
|
*
|
|
* The implementation checks the cache for the requested URL. If it is not
|
|
* present, a content is created and a fetch is initiated. As the status of the
|
|
* fetch changes and data is received, the content is updated appropriately.
|
|
*/
|
|
|
|
#define _GNU_SOURCE /* for strndup */
|
|
#include <assert.h>
|
|
#include <string.h>
|
|
#include <strings.h>
|
|
#include <sys/types.h>
|
|
#include <regex.h>
|
|
#include <time.h>
|
|
#include "utils/config.h"
|
|
#include "content/content.h"
|
|
#include "content/fetchcache.h"
|
|
#include "content/fetch.h"
|
|
#include "utils/log.h"
|
|
#include "utils/messages.h"
|
|
#include "utils/talloc.h"
|
|
#include "utils/url.h"
|
|
#include "utils/utils.h"
|
|
|
|
|
|
static char error_page[1000];
|
|
static regex_t re_content_type;
|
|
static void fetchcache_callback(fetch_msg msg, void *p, const void *data,
|
|
unsigned long size);
|
|
static char *fetchcache_parse_type(const char *s, char **params[]);
|
|
static void fetchcache_error_page(struct content *c, const char *error);
|
|
static void fetchcache_cache_update(struct content *c,
|
|
const struct cache_data *data);
|
|
static void fetchcache_notmodified(struct content *c, const void *data);
|
|
static void fetchcache_redirect(struct content *c, const void *data,
|
|
unsigned long size);
|
|
|
|
|
|
/**
|
|
* Retrieve a URL or prepare to fetch, convert, and cache it.
|
|
*
|
|
* The caller must supply a callback function which is called when anything
|
|
* interesting happens to the content which is returned. See content.h.
|
|
*
|
|
* \param url address to fetch
|
|
* \param callback function to call when anything interesting happens to
|
|
* the new content
|
|
* \param p1 user parameter for callback (may be a pointer or integer)
|
|
* \param p2 user parameter for callback (may be a pointer or integer)
|
|
* \param width available space
|
|
* \param height available space
|
|
* \param no_error_pages if an error occurs, send CONTENT_MSG_ERROR instead
|
|
* of generating an error page
|
|
* \param post_urlenc url encoded post data, or 0 if none
|
|
* \param post_multipart multipart post data, or 0 if none
|
|
* \param verifiable this transaction is verifiable
|
|
* \param download download, rather than render content
|
|
* \return a new content, or 0 on memory exhaustion
|
|
*
|
|
* On success, call fetchcache_go() to start work on the new content.
|
|
*/
|
|
|
|
struct content * fetchcache(const char *url,
|
|
void (*callback)(content_msg msg, struct content *c,
|
|
intptr_t p1, intptr_t p2, union content_msg_data data),
|
|
intptr_t p1, intptr_t p2,
|
|
int width, int height,
|
|
bool no_error_pages,
|
|
char *post_urlenc,
|
|
struct form_successful_control *post_multipart,
|
|
bool verifiable,
|
|
bool download)
|
|
{
|
|
struct content *c;
|
|
char *url1;
|
|
char *hash, *query;
|
|
char *etag = 0;
|
|
time_t date = 0;
|
|
|
|
if (strncasecmp(url, "file:///", 8) &&
|
|
strncasecmp(url, "file:/", 6) == 0) {
|
|
/* Manipulate file URLs into correct format */
|
|
if (strncasecmp(url, "file://", 7) == 0) {
|
|
/* file://path */
|
|
url1 = malloc(7 + strlen(url));
|
|
if (!url1)
|
|
return NULL;
|
|
|
|
strcpy(url1, "file://");
|
|
strcat(url1 + 7, url + 6);
|
|
} else {
|
|
/* file:/... */
|
|
url1 = malloc(7 + strlen(url));
|
|
if (!url1)
|
|
return NULL;
|
|
|
|
strcpy(url1, "file://");
|
|
strcat(url1 + 7, url + 5);
|
|
}
|
|
} else {
|
|
/* simply duplicate the URL */
|
|
if ((url1 = strdup(url)) == NULL)
|
|
return NULL;
|
|
}
|
|
|
|
/* strip fragment identifier */
|
|
if ((hash = strchr(url1, '#')) != NULL)
|
|
*hash = 0;
|
|
|
|
/* look for query; we don't cache URLs with a query segment */
|
|
query = strchr(url1, '?');
|
|
|
|
LOG(("url %s", url1));
|
|
|
|
if (!post_urlenc && !post_multipart && !download && !query) {
|
|
if ((c = content_get(url1)) != NULL) {
|
|
struct cache_data *cd = c->cache_data;
|
|
int current_age, freshness_lifetime;
|
|
|
|
/* Calculate staleness of cached content as per
|
|
* RFC 2616 13.2.3/13.2.4 */
|
|
current_age = max(0, (cd->res_time - cd->date));
|
|
current_age = max(current_age,
|
|
(cd->age == INVALID_AGE) ? 0
|
|
: cd->age);
|
|
current_age += cd->res_time - cd->req_time +
|
|
time(0) - cd->res_time;
|
|
freshness_lifetime =
|
|
(cd->max_age != INVALID_AGE) ? cd->max_age :
|
|
(cd->expires != 0) ? cd->expires - cd->date :
|
|
(cd->last_modified != 0) ?
|
|
(time(0) - cd->last_modified) / 10 :
|
|
0;
|
|
|
|
if (freshness_lifetime > current_age ||
|
|
cd->date == 0) {
|
|
/* Ok, either a fresh content or we're
|
|
* currently fetching the selected content
|
|
* (therefore it must be fresh) */
|
|
free(url1);
|
|
if (!content_add_user(c, callback, p1, p2))
|
|
return NULL;
|
|
else
|
|
return c;
|
|
}
|
|
|
|
/* Ok. We have a cache entry, but it appears stale.
|
|
* Therefore, validate it. */
|
|
if (cd->last_modified)
|
|
date = cd->last_modified;
|
|
else
|
|
date = c->cache_data->date;
|
|
etag = c->cache_data->etag;
|
|
}
|
|
}
|
|
|
|
c = content_create(url1);
|
|
free(url1);
|
|
if (!c)
|
|
return NULL;
|
|
|
|
/* Fill in cache validation fields (if present) */
|
|
if (date)
|
|
c->cache_data->date = date;
|
|
if (etag) {
|
|
c->cache_data->etag = talloc_strdup(c, etag);
|
|
if (!c->cache_data->etag)
|
|
return NULL;
|
|
}
|
|
|
|
if (!content_add_user(c, callback, p1, p2)) {
|
|
return NULL;
|
|
}
|
|
|
|
if (!post_urlenc && !post_multipart && !download && !query)
|
|
c->fresh = true;
|
|
|
|
c->width = width;
|
|
c->height = height;
|
|
c->no_error_pages = no_error_pages;
|
|
c->download = download;
|
|
|
|
return c;
|
|
}
|
|
|
|
|
|
/**
|
|
* Start fetching and converting a content.
|
|
*
|
|
* \param content content to fetch, as returned by fetchcache()
|
|
* \param referer referring URL, or 0
|
|
* \param callback function to call when anything interesting happens to
|
|
* the new content
|
|
* \param p1 user parameter for callback
|
|
* \param p2 user parameter for callback
|
|
* \param width available space
|
|
* \param height available space
|
|
* \param post_urlenc url encoded post data, or 0 if none
|
|
* \param post_multipart multipart post data, or 0 if none
|
|
* \param verifiable this transaction is verifiable
|
|
* \param parent_url URL of fetch which spawned this one, or 0 if none
|
|
*
|
|
* Errors will be sent back through the callback.
|
|
*/
|
|
|
|
void fetchcache_go(struct content *content, const char *referer,
|
|
void (*callback)(content_msg msg, struct content *c,
|
|
intptr_t p1, intptr_t p2, union content_msg_data data),
|
|
intptr_t p1, intptr_t p2,
|
|
int width, int height,
|
|
char *post_urlenc,
|
|
struct form_successful_control *post_multipart,
|
|
bool verifiable, const char *parent_url)
|
|
{
|
|
char error_message[500];
|
|
union content_msg_data msg_data;
|
|
|
|
LOG(("url %s, status %s", content->url,
|
|
content_status_name[content->status]));
|
|
|
|
/* We may well have been asked to fetch an URL using a protocol
|
|
* that we can't support. Check for this here and, if we can't
|
|
* perform the fetch, notify the caller and exit */
|
|
if (!fetch_can_fetch(content->url)) {
|
|
|
|
/* The only case where this should fail is if we're a
|
|
* brand new content with no active fetch. If we're not,
|
|
* another content with the same URL somehow got through
|
|
* the fetch_can_fetch check. That should be impossible.
|
|
*/
|
|
assert(content->status == CONTENT_STATUS_TYPE_UNKNOWN &&
|
|
!content->fetch);
|
|
|
|
snprintf(error_message, sizeof error_message,
|
|
messages_get("InvalidURL"),
|
|
content->url);
|
|
|
|
if (content->no_error_pages) {
|
|
/* Mark as in error so content is destroyed
|
|
* on cache clean */
|
|
content->status = CONTENT_STATUS_ERROR;
|
|
msg_data.error = error_message;
|
|
callback(CONTENT_MSG_ERROR,
|
|
content, p1, p2, msg_data);
|
|
} else {
|
|
fetchcache_error_page(content, error_message);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
if (content->status == CONTENT_STATUS_TYPE_UNKNOWN &&
|
|
content->fetch) {
|
|
/* fetching, but not yet received any response:
|
|
* no action required */
|
|
|
|
} else if (content->status == CONTENT_STATUS_TYPE_UNKNOWN) {
|
|
/* brand new content: start fetch */
|
|
char **headers;
|
|
int i = 0;
|
|
char *etag = content->cache_data->etag;
|
|
time_t date = content->cache_data->date;
|
|
content->cache_data->etag = 0;
|
|
content->cache_data->date = 0;
|
|
headers = malloc(3 * sizeof(char *));
|
|
if (!headers) {
|
|
content->status = CONTENT_STATUS_ERROR;
|
|
msg_data.error = messages_get("NoMemory");
|
|
callback(CONTENT_MSG_ERROR, content, p1, p2,
|
|
msg_data);
|
|
return;
|
|
}
|
|
if (etag) {
|
|
headers[i] = malloc(15 + strlen(etag) + 1);
|
|
if (!headers[i]) {
|
|
free(headers);
|
|
content->status = CONTENT_STATUS_ERROR;
|
|
msg_data.error = messages_get("NoMemory");
|
|
callback(CONTENT_MSG_ERROR, content, p1, p2,
|
|
msg_data);
|
|
return;
|
|
}
|
|
sprintf(headers[i++], "If-None-Match: %s", etag);
|
|
talloc_free(etag);
|
|
}
|
|
if (date) {
|
|
headers[i] = malloc(19 + 29 + 1);
|
|
if (!headers[i]) {
|
|
while (--i >= 0) {
|
|
free(headers[i]);
|
|
}
|
|
free(headers);
|
|
content->status = CONTENT_STATUS_ERROR;
|
|
msg_data.error = messages_get("NoMemory");
|
|
callback(CONTENT_MSG_ERROR, content, p1, p2,
|
|
msg_data);
|
|
return;
|
|
}
|
|
sprintf(headers[i++], "If-Modified-Since: %s",
|
|
rfc1123_date(date));
|
|
}
|
|
headers[i] = 0;
|
|
content->fetch = fetch_start(content->url, referer,
|
|
fetchcache_callback, content,
|
|
content->no_error_pages,
|
|
post_urlenc, post_multipart, verifiable,
|
|
parent_url, headers);
|
|
for (i = 0; headers[i]; i++)
|
|
free(headers[i]);
|
|
free(headers);
|
|
if (!content->fetch) {
|
|
LOG(("warning: fetch_start failed"));
|
|
snprintf(error_message, sizeof error_message,
|
|
messages_get("InvalidURL"),
|
|
content->url);
|
|
if (content->no_error_pages) {
|
|
content->status = CONTENT_STATUS_ERROR;
|
|
msg_data.error = error_message;
|
|
content_broadcast(content, CONTENT_MSG_ERROR,
|
|
msg_data);
|
|
} else {
|
|
fetchcache_error_page(content, error_message);
|
|
}
|
|
}
|
|
|
|
/* in these remaining cases, we have to 'catch up' with the content's
|
|
* status, ie. send the same messages as if the content was
|
|
* gradually getting to the current status from TYPE_UNKNOWN */
|
|
} else if (content->status == CONTENT_STATUS_LOADING) {
|
|
callback(CONTENT_MSG_LOADING, content, p1, p2, msg_data);
|
|
|
|
} else if (content->status == CONTENT_STATUS_READY) {
|
|
callback(CONTENT_MSG_LOADING, content, p1, p2, msg_data);
|
|
if (content_find_user(content, callback, p1, p2))
|
|
callback(CONTENT_MSG_READY, content, p1, p2, msg_data);
|
|
|
|
} else if (content->status == CONTENT_STATUS_DONE) {
|
|
callback(CONTENT_MSG_LOADING, content, p1, p2, msg_data);
|
|
if (content->available_width != width)
|
|
content_reformat(content, width, height);
|
|
if (content_find_user(content, callback, p1, p2))
|
|
callback(CONTENT_MSG_READY, content, p1, p2, msg_data);
|
|
if (content_find_user(content, callback, p1, p2))
|
|
callback(CONTENT_MSG_DONE, content, p1, p2, msg_data);
|
|
|
|
} else if (content->status == CONTENT_STATUS_ERROR) {
|
|
/* shouldn't usually occur */
|
|
msg_data.error = messages_get("MiscError");
|
|
callback(CONTENT_MSG_ERROR, content, p1, p2, msg_data);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Callback function for fetch.
|
|
*
|
|
* This is called when the status of a fetch changes.
|
|
*/
|
|
|
|
void fetchcache_callback(fetch_msg msg, void *p, const void *data,
|
|
unsigned long size)
|
|
{
|
|
bool res;
|
|
struct content *c = p;
|
|
content_type type;
|
|
char *mime_type;
|
|
char **params;
|
|
unsigned int i;
|
|
union content_msg_data msg_data;
|
|
|
|
switch (msg) {
|
|
case FETCH_TYPE:
|
|
c->total_size = size;
|
|
c->http_code = fetch_http_code(c->fetch);
|
|
mime_type = fetchcache_parse_type(data, ¶ms);
|
|
if (!mime_type) {
|
|
msg_data.error = messages_get("NoMemory");
|
|
content_broadcast(c, CONTENT_MSG_ERROR,
|
|
msg_data);
|
|
fetch_abort(c->fetch);
|
|
c->fetch = 0;
|
|
return;
|
|
}
|
|
type = content_lookup(mime_type);
|
|
res = content_set_type(c,
|
|
c->download ? CONTENT_OTHER : type,
|
|
mime_type, params);
|
|
free(mime_type);
|
|
for (i = 0; params[i]; i++)
|
|
free(params[i]);
|
|
free(params);
|
|
if (!res) {
|
|
fetch_abort(c->fetch);
|
|
c->fetch = 0;
|
|
}
|
|
break;
|
|
|
|
case FETCH_PROGRESS:
|
|
if (size)
|
|
content_set_status(c,
|
|
messages_get("RecPercent"),
|
|
data, (unsigned int)size);
|
|
else
|
|
content_set_status(c,
|
|
messages_get("Received"),
|
|
data);
|
|
content_broadcast(c, CONTENT_MSG_STATUS, msg_data);
|
|
break;
|
|
|
|
case FETCH_DATA:
|
|
if (!content_process_data(c, data, size)) {
|
|
fetch_abort(c->fetch);
|
|
c->fetch = 0;
|
|
}
|
|
break;
|
|
|
|
case FETCH_FINISHED:
|
|
fetchcache_cache_update(c,
|
|
(const struct cache_data *)data);
|
|
c->fetch = 0;
|
|
content_set_status(c, messages_get("Converting"),
|
|
c->source_size);
|
|
content_broadcast(c, CONTENT_MSG_STATUS, msg_data);
|
|
content_convert(c, c->width, c->height);
|
|
break;
|
|
|
|
case FETCH_ERROR:
|
|
LOG(("FETCH_ERROR, '%s'", (const char *)data));
|
|
c->fetch = 0;
|
|
if (c->no_error_pages) {
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
msg_data.error = data;
|
|
content_broadcast(c, CONTENT_MSG_ERROR,
|
|
msg_data);
|
|
} else {
|
|
content_reset(c);
|
|
fetchcache_error_page(c, data);
|
|
}
|
|
break;
|
|
|
|
case FETCH_REDIRECT:
|
|
fetchcache_redirect(c, data, size);
|
|
break;
|
|
|
|
case FETCH_NOTMODIFIED:
|
|
fetchcache_notmodified(c, data);
|
|
break;
|
|
|
|
#ifdef WITH_AUTH
|
|
case FETCH_AUTH:
|
|
/* data -> string containing the Realm */
|
|
LOG(("FETCH_AUTH, '%s'", (const char *)data));
|
|
c->fetch = 0;
|
|
msg_data.auth_realm = data;
|
|
content_broadcast(c, CONTENT_MSG_AUTH, msg_data);
|
|
/* set the status to ERROR so that the content is
|
|
* destroyed in content_clean() */
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
break;
|
|
#endif
|
|
|
|
#ifdef WITH_SSL
|
|
case FETCH_CERT_ERR:
|
|
c->fetch = 0;
|
|
/* set the status to ERROR so that the content is
|
|
* destroyed in content_clean() */
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
|
|
msg_data.ssl.certs = data;
|
|
msg_data.ssl.num = size;
|
|
content_broadcast(c, CONTENT_MSG_SSL, msg_data);
|
|
break;
|
|
#endif
|
|
|
|
default:
|
|
assert(0);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Initialise the fetchcache module.
|
|
*/
|
|
|
|
void fetchcache_init(void)
|
|
{
|
|
regcomp_wrapper(&re_content_type,
|
|
"^([-0-9a-zA-Z_.]+/[-0-9a-zA-Z_.+]+)[ \t]*"
|
|
"(;[ \t]*([-0-9a-zA-Z_.]+)="
|
|
"([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
|
|
REG_EXTENDED);
|
|
}
|
|
|
|
|
|
/**
|
|
* Parse a Content-Type header.
|
|
*
|
|
* \param s a Content-Type header
|
|
* \param params updated to point to an array of strings, ordered attribute,
|
|
* value, attribute, ..., 0
|
|
* \return a new string containing the MIME-type, or 0 on memory exhaustion
|
|
*/
|
|
|
|
#define MAX_ATTRS 10
|
|
|
|
char *fetchcache_parse_type(const char *s, char **params[])
|
|
{
|
|
char *type = 0;
|
|
unsigned int i;
|
|
int r;
|
|
regmatch_t pmatch[2 + MAX_ATTRS * 3];
|
|
|
|
*params = malloc((MAX_ATTRS * 2 + 2) * sizeof (*params)[0]);
|
|
if (!*params)
|
|
goto no_memory;
|
|
for (i = 0; i != MAX_ATTRS * 2 + 2; i++)
|
|
(*params)[i] = 0;
|
|
|
|
r = regexec(&re_content_type, s, 2 + MAX_ATTRS * 3, pmatch, 0);
|
|
if (r) {
|
|
LOG(("failed to parse content-type '%s'", s));
|
|
/* The mime type must be first, so only copy up to the
|
|
* first semicolon in the string. This allows us to have
|
|
* a better attempt at handling pages sent with broken
|
|
* Content-Type headers. Obviously, any truly broken
|
|
* Content-Type headers will be unaffected by this heuristic
|
|
*/
|
|
char *semi = strchr(s, ';');
|
|
if (semi)
|
|
type = strndup(s, semi - s);
|
|
else
|
|
type = strdup(s);
|
|
if (!type)
|
|
goto no_memory;
|
|
return type;
|
|
}
|
|
|
|
type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so);
|
|
if (!type) {
|
|
free(*params);
|
|
return 0;
|
|
}
|
|
|
|
/* parameters */
|
|
for (i = 0; i != MAX_ATTRS && pmatch[2 + 3 * i].rm_so != -1; i++) {
|
|
(*params)[2 * i] = strndup(s + pmatch[2 + 3 * i + 1].rm_so,
|
|
pmatch[2 + 3 * i + 1].rm_eo -
|
|
pmatch[2 + 3 * i + 1].rm_so);
|
|
(*params)[2 * i + 1] = strndup(s + pmatch[2 + 3 * i + 2].rm_so,
|
|
pmatch[2 + 3 * i + 2].rm_eo -
|
|
pmatch[2 + 3 * i + 2].rm_so);
|
|
if (!(*params)[2 * i] || !(*params)[2 * i + 1])
|
|
goto no_memory;
|
|
}
|
|
(*params)[2 * i] = 0;
|
|
|
|
return type;
|
|
|
|
no_memory:
|
|
for (i = 0; i != MAX_ATTRS * 2 + 2; i++)
|
|
free((*params)[i]);
|
|
free(*params);
|
|
free(type);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
* Generate an error page.
|
|
*
|
|
* \param c empty content to generate the page in
|
|
* \param error message to display
|
|
*/
|
|
|
|
void fetchcache_error_page(struct content *c, const char *error)
|
|
{
|
|
const char *params[] = { 0 };
|
|
int length;
|
|
|
|
if ((length = snprintf(error_page, sizeof(error_page),
|
|
messages_get("ErrorPage"), error)) < 0)
|
|
length = 0;
|
|
if (!content_set_type(c, CONTENT_HTML, "text/html", params))
|
|
return;
|
|
if (!content_process_data(c, error_page, length))
|
|
return;
|
|
content_convert(c, c->width, c->height);
|
|
}
|
|
|
|
|
|
/**
|
|
* Update a content's cache info
|
|
*
|
|
* \param The content
|
|
* \param Cache data
|
|
*/
|
|
|
|
void fetchcache_cache_update(struct content *c,
|
|
const struct cache_data *data)
|
|
{
|
|
assert(c && data);
|
|
|
|
c->cache_data->req_time = data->req_time;
|
|
c->cache_data->res_time = data->res_time;
|
|
|
|
if (data->date != 0)
|
|
c->cache_data->date = data->date;
|
|
else
|
|
c->cache_data->date = time(0);
|
|
|
|
if (data->expires != 0)
|
|
c->cache_data->expires = data->expires;
|
|
|
|
if (data->age != INVALID_AGE)
|
|
c->cache_data->age = data->age;
|
|
|
|
if (data->max_age != INVALID_AGE)
|
|
c->cache_data->max_age = data->max_age;
|
|
|
|
if (data->no_cache)
|
|
c->fresh = false;
|
|
|
|
if (data->etag) {
|
|
talloc_free(c->cache_data->etag);
|
|
c->cache_data->etag = talloc_strdup(c, data->etag);
|
|
}
|
|
|
|
if (data->last_modified)
|
|
c->cache_data->last_modified = data->last_modified;
|
|
}
|
|
|
|
|
|
/**
|
|
* Not modified callback handler
|
|
*/
|
|
|
|
void fetchcache_notmodified(struct content *c, const void *data)
|
|
{
|
|
struct content *fb;
|
|
union content_msg_data msg_data;
|
|
|
|
assert(c && data);
|
|
assert(c->status == CONTENT_STATUS_TYPE_UNKNOWN);
|
|
|
|
/* Look for cached content */
|
|
fb = content_get_ready(c->url);
|
|
|
|
if (fb) {
|
|
/* Found it */
|
|
intptr_t p1, p2;
|
|
void (*callback)(content_msg msg,
|
|
struct content *c, intptr_t p1,
|
|
intptr_t p2,
|
|
union content_msg_data data);
|
|
|
|
/* Now notify all users that we're changing content */
|
|
while (c->user_list->next) {
|
|
p1 = c->user_list->next->p1;
|
|
p2 = c->user_list->next->p2;
|
|
callback = c->user_list->next->callback;
|
|
|
|
if (!content_add_user(fb, callback, p1, p2)) {
|
|
c->type = CONTENT_UNKNOWN;
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
msg_data.error = messages_get("NoMemory");
|
|
content_broadcast(c, CONTENT_MSG_ERROR,
|
|
msg_data);
|
|
return;
|
|
}
|
|
|
|
content_remove_user(c, callback, p1, p2);
|
|
callback(CONTENT_MSG_NEWPTR, fb, p1, p2, msg_data);
|
|
|
|
/* and catch user up with fallback's state */
|
|
if (fb->status == CONTENT_STATUS_LOADING) {
|
|
callback(CONTENT_MSG_LOADING,
|
|
fb, p1, p2, msg_data);
|
|
} else if (fb->status == CONTENT_STATUS_READY) {
|
|
callback(CONTENT_MSG_LOADING,
|
|
fb, p1, p2, msg_data);
|
|
if (content_find_user(fb, callback, p1, p2))
|
|
callback(CONTENT_MSG_READY,
|
|
fb, p1, p2, msg_data);
|
|
} else if (fb->status == CONTENT_STATUS_DONE) {
|
|
callback(CONTENT_MSG_LOADING,
|
|
fb, p1, p2, msg_data);
|
|
if (content_find_user(fb, callback, p1, p2))
|
|
callback(CONTENT_MSG_READY,
|
|
fb, p1, p2, msg_data);
|
|
if (content_find_user(fb, callback, p1, p2))
|
|
callback(CONTENT_MSG_DONE,
|
|
fb, p1, p2, msg_data);
|
|
} else if (fb->status == CONTENT_STATUS_ERROR) {
|
|
/* shouldn't usually occur */
|
|
msg_data.error = messages_get("MiscError");
|
|
callback(CONTENT_MSG_ERROR, fb, p1, p2,
|
|
msg_data);
|
|
}
|
|
}
|
|
|
|
/* mark content invalid */
|
|
c->fetch = 0;
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
|
|
/* and update fallback's cache control data */
|
|
fetchcache_cache_update(fb,
|
|
(const struct cache_data *)data);
|
|
}
|
|
else {
|
|
/* No cached content, so unconditionally refetch */
|
|
struct content_user *u;
|
|
const char *ref = fetch_get_referer(c->fetch);
|
|
const char *parent = fetch_get_parent_url(c->fetch);
|
|
char *referer = NULL;
|
|
char *parent_url = NULL;
|
|
|
|
if (ref) {
|
|
referer = strdup(ref);
|
|
if (!referer) {
|
|
c->type = CONTENT_UNKNOWN;
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
msg_data.error = messages_get("NoMemory");
|
|
content_broadcast(c, CONTENT_MSG_ERROR,
|
|
msg_data);
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (parent) {
|
|
parent_url = strdup(parent);
|
|
if (!parent_url) {
|
|
c->type = CONTENT_UNKNOWN;
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
msg_data.error = messages_get("NoMemory");
|
|
content_broadcast(c, CONTENT_MSG_ERROR,
|
|
msg_data);
|
|
free(referer);
|
|
return;
|
|
}
|
|
}
|
|
|
|
fetch_abort(c->fetch);
|
|
c->fetch = 0;
|
|
|
|
c->cache_data->date = 0;
|
|
talloc_free(c->cache_data->etag);
|
|
c->cache_data->etag = 0;
|
|
|
|
for (u = c->user_list->next; u; u = u->next) {
|
|
fetchcache_go(c, referer, u->callback, u->p1, u->p2,
|
|
c->width, c->height, 0, 0,
|
|
false, parent_url);
|
|
}
|
|
|
|
free(parent_url);
|
|
free(referer);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Redirect callback handler
|
|
*/
|
|
|
|
void fetchcache_redirect(struct content *c, const void *data,
|
|
unsigned long size)
|
|
{
|
|
char *url, *url1;
|
|
char *referer, *parent_url;
|
|
long http_code = fetch_http_code(c->fetch);
|
|
const char *ref = fetch_get_referer(c->fetch);
|
|
const char *parent = fetch_get_parent_url(c->fetch);
|
|
union content_msg_data msg_data;
|
|
url_func_result result;
|
|
|
|
/* Preconditions */
|
|
assert(c && data);
|
|
assert(c->status == CONTENT_STATUS_TYPE_UNKNOWN);
|
|
/* Ensure a redirect happened */
|
|
assert(300 <= http_code && http_code <= 399);
|
|
/* 304 is handled by fetch_notmodified() */
|
|
assert(http_code != 304);
|
|
|
|
/* Extract fetch details */
|
|
http_code = fetch_http_code(c->fetch);
|
|
ref = fetch_get_referer(c->fetch);
|
|
parent = fetch_get_parent_url(c->fetch);
|
|
|
|
/* Clone referer and parent url
|
|
* originals are destroyed in fetch_abort() */
|
|
referer = ref ? strdup(ref) : NULL;
|
|
parent_url = parent ? strdup(parent) : NULL;
|
|
|
|
/* set the status to ERROR so that this content is
|
|
* destroyed in content_clean() */
|
|
fetch_abort(c->fetch);
|
|
c->fetch = 0;
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
|
|
/* Ensure that referer cloning succeeded
|
|
* _must_ be after content invalidation */
|
|
if (ref && !referer) {
|
|
LOG(("Failed cloning referer"));
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
return;
|
|
}
|
|
|
|
/* Ensure parent url cloning succeeded
|
|
* _must_ be after content invalidation */
|
|
if (parent && !parent_url) {
|
|
LOG(("Failed cloning parent url"));
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
free(referer);
|
|
return;
|
|
}
|
|
|
|
/** \todo 300, 305, 307
|
|
* More specifically:
|
|
* + 300 needs to serve up the fetch body to the user
|
|
* + 305 needs to refetch using the proxy specified in ::data
|
|
* + 307 needs to refetch.
|
|
*
|
|
* If the original request method was either GET or HEAD, then follow
|
|
* redirect unconditionally. If the original request method was neither
|
|
* GET nor HEAD, then the user MUST be asked what to do.
|
|
*
|
|
* Note:
|
|
* For backwards compatibility, all 301, 302 and 303 redirects are
|
|
* followed unconditionally with a GET request to the new location.
|
|
*/
|
|
if (http_code != 301 && http_code != 302 && http_code != 303) {
|
|
LOG(("Unsupported redirect type %ld", http_code));
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
free(parent_url);
|
|
free(referer);
|
|
return;
|
|
}
|
|
|
|
/* Forcibly stop redirecting if we've followed too many redirects */
|
|
#define REDIRECT_LIMIT 10
|
|
if (c->redirect_count > REDIRECT_LIMIT) {
|
|
LOG(("Too many nested redirects"));
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
free(parent_url);
|
|
free(referer);
|
|
return;
|
|
}
|
|
#undef REDIRECT_LIMIT
|
|
|
|
/* redirect URLs must be absolute by HTTP/1.1, but many
|
|
* sites send relative ones: treat them as relative to
|
|
* requested URL */
|
|
result = url_join(data, c->url, &url1);
|
|
if (result != URL_FUNC_OK) {
|
|
msg_data.error = messages_get("BadRedirect");
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
free(parent_url);
|
|
free(referer);
|
|
return;
|
|
}
|
|
|
|
/* Normalize redirect target -- this is vital as this URL may
|
|
* be inserted into the urldb, which expects normalized URLs */
|
|
result = url_normalize(url1, &url);
|
|
if (result != URL_FUNC_OK) {
|
|
msg_data.error = messages_get("BadRedirect");
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
free(url1);
|
|
free(parent_url);
|
|
free(referer);
|
|
return;
|
|
}
|
|
|
|
/* No longer need url1 */
|
|
free(url1);
|
|
|
|
/* Process users of this content */
|
|
while (c->user_list->next) {
|
|
intptr_t p1, p2;
|
|
void (*callback)(content_msg msg,
|
|
struct content *c, intptr_t p1,
|
|
intptr_t p2,
|
|
union content_msg_data data);
|
|
struct content *replacement;
|
|
|
|
p1 = c->user_list->next->p1;
|
|
p2 = c->user_list->next->p2;
|
|
callback = c->user_list->next->callback;
|
|
|
|
/* Remove user */
|
|
content_remove_user(c, callback, p1, p2);
|
|
|
|
/* Get replacement content -- HTTP GET request */
|
|
replacement = fetchcache(url, callback, p1, p2,
|
|
c->width, c->height, c->no_error_pages,
|
|
NULL, NULL, false, c->download);
|
|
if (!replacement) {
|
|
msg_data.error = messages_get("BadRedirect");
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
free(url);
|
|
free(parent_url);
|
|
free(referer);
|
|
return;
|
|
}
|
|
|
|
/* Set replacement's redirect count to 1 greater than ours */
|
|
replacement->redirect_count = c->redirect_count + 1;
|
|
|
|
/* Notify user that content has changed */
|
|
callback(CONTENT_MSG_NEWPTR, replacement, p1, p2, msg_data);
|
|
|
|
/* Start fetching the replacement content */
|
|
fetchcache_go(replacement, referer, callback, p1, p2,
|
|
c->width, c->height, NULL, NULL,
|
|
false, parent_url);
|
|
}
|
|
|
|
/* Clean up */
|
|
free(url);
|
|
free(parent_url);
|
|
free(referer);
|
|
}
|
|
|
|
#ifdef TEST
|
|
|
|
#include <unistd.h>
|
|
|
|
void callback(fetchcache_msg msg, struct content *c, void *p, char *error)
|
|
{
|
|
switch (msg) {
|
|
case FETCHCACHE_OK:
|
|
LOG(("FETCHCACHE_OK, url '%s'", p));
|
|
break;
|
|
case FETCHCACHE_BADTYPE:
|
|
LOG(("FETCHCACHE_BADTYPE, url '%s'", p));
|
|
break;
|
|
case FETCHCACHE_ERROR:
|
|
LOG(("FETCHCACHE_ERROR, url '%s', error '%s'", p, error));
|
|
break;
|
|
default:
|
|
assert(0);
|
|
}
|
|
}
|
|
|
|
char *test[] = {"http://www.google.co.uk/", "http://www.ox.ac.uk/", "blah://blah/"};
|
|
|
|
int main(void)
|
|
{
|
|
int i;
|
|
|
|
cache_init();
|
|
fetch_init();
|
|
|
|
for (i = 0; i != sizeof(test) / sizeof(test[0]); i++)
|
|
fetchcache(test[i], 0, callback, test[i], 800, 0);
|
|
for (i = 0; i != 5; i++) {
|
|
fetch_poll();
|
|
sleep(1);
|
|
}
|
|
for (i = 0; i != sizeof(test) / sizeof(test[0]); i++)
|
|
fetchcache(test[i], 0, callback, test[i], 800, 0);
|
|
for (i = 0; i != 20; i++) {
|
|
fetch_poll();
|
|
sleep(1);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#endif
|