2003-06-30 16:44:03 +04:00
|
|
|
/*
|
2005-08-21 16:04:18 +04:00
|
|
|
* Copyright 2005 James Bursa <bursa@users.sourceforge.net>
|
2007-08-08 20:16:03 +04:00
|
|
|
*
|
|
|
|
* This file is part of NetSurf, http://www.netsurf-browser.org/
|
|
|
|
*
|
|
|
|
* NetSurf is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; version 2 of the License.
|
|
|
|
*
|
|
|
|
* NetSurf is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2003-02-09 15:58:15 +03:00
|
|
|
*/
|
|
|
|
|
2003-10-01 00:33:45 +04:00
|
|
|
/** \file
|
|
|
|
* High-level fetching, caching and conversion (implementation).
|
|
|
|
*
|
|
|
|
* The implementation checks the cache for the requested URL. If it is not
|
|
|
|
* present, a content is created and a fetch is initiated. As the status of the
|
|
|
|
* fetch changes and data is received, the content is updated appropriately.
|
|
|
|
*/
|
|
|
|
|
2005-01-13 23:29:24 +03:00
|
|
|
#define _GNU_SOURCE /* for strndup */
|
2003-02-09 15:58:15 +03:00
|
|
|
#include <assert.h>
|
2003-02-28 14:49:13 +03:00
|
|
|
#include <string.h>
|
2007-01-30 01:27:15 +03:00
|
|
|
#include <strings.h>
|
2003-12-27 23:15:23 +03:00
|
|
|
#include <sys/types.h>
|
|
|
|
#include <regex.h>
|
2006-02-06 03:10:09 +03:00
|
|
|
#include <time.h>
|
2008-05-30 08:11:16 +04:00
|
|
|
#include <curl/curl.h> /* for curl_getdate() */
|
2007-05-31 02:39:54 +04:00
|
|
|
#include "utils/config.h"
|
|
|
|
#include "content/content.h"
|
|
|
|
#include "content/fetchcache.h"
|
|
|
|
#include "content/fetch.h"
|
|
|
|
#include "utils/log.h"
|
|
|
|
#include "utils/messages.h"
|
|
|
|
#include "utils/talloc.h"
|
|
|
|
#include "utils/url.h"
|
|
|
|
#include "utils/utils.h"
|
2003-02-09 15:58:15 +03:00
|
|
|
|
|
|
|
|
2004-01-23 23:46:29 +03:00
|
|
|
static char error_page[1000];
|
2003-12-27 23:15:23 +03:00
|
|
|
static regex_t re_content_type;
|
2006-02-23 18:06:54 +03:00
|
|
|
static void fetchcache_callback(fetch_msg msg, void *p, const void *data,
|
2004-06-22 21:37:51 +04:00
|
|
|
unsigned long size);
|
|
|
|
static char *fetchcache_parse_type(const char *s, char **params[]);
|
2008-05-30 08:11:16 +04:00
|
|
|
static void fetchcache_parse_header(struct content *c, const char *data,
|
|
|
|
size_t size);
|
2004-01-23 23:46:29 +03:00
|
|
|
static void fetchcache_error_page(struct content *c, const char *error);
|
2008-06-07 03:51:51 +04:00
|
|
|
static void fetchcache_cache_update(struct content *c);
|
|
|
|
static void fetchcache_cache_clone(struct content *c,
|
2006-02-06 03:10:09 +03:00
|
|
|
const struct cache_data *data);
|
2006-02-23 18:06:54 +03:00
|
|
|
static void fetchcache_notmodified(struct content *c, const void *data);
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
static void fetchcache_redirect(struct content *c, const void *data,
|
|
|
|
unsigned long size);
|
2003-02-09 15:58:15 +03:00
|
|
|
|
|
|
|
|
2003-10-01 00:33:45 +04:00
|
|
|
/**
|
2004-06-11 03:55:23 +04:00
|
|
|
* Retrieve a URL or prepare to fetch, convert, and cache it.
|
2003-10-01 00:33:45 +04:00
|
|
|
*
|
|
|
|
* The caller must supply a callback function which is called when anything
|
|
|
|
* interesting happens to the content which is returned. See content.h.
|
|
|
|
*
|
2004-06-11 03:55:23 +04:00
|
|
|
* \param url address to fetch
|
|
|
|
* \param callback function to call when anything interesting happens to
|
|
|
|
* the new content
|
2005-08-21 16:04:18 +04:00
|
|
|
* \param p1 user parameter for callback (may be a pointer or integer)
|
|
|
|
* \param p2 user parameter for callback (may be a pointer or integer)
|
2004-06-11 03:55:23 +04:00
|
|
|
* \param width available space
|
|
|
|
* \param height available space
|
|
|
|
* \param no_error_pages if an error occurs, send CONTENT_MSG_ERROR instead
|
|
|
|
* of generating an error page
|
|
|
|
* \param post_urlenc url encoded post data, or 0 if none
|
|
|
|
* \param post_multipart multipart post data, or 0 if none
|
2007-01-27 23:58:20 +03:00
|
|
|
* \param verifiable this transaction is verifiable
|
2005-01-03 05:09:20 +03:00
|
|
|
* \param download download, rather than render content
|
2004-06-11 03:55:23 +04:00
|
|
|
* \return a new content, or 0 on memory exhaustion
|
2004-01-26 17:16:23 +03:00
|
|
|
*
|
2004-06-11 03:55:23 +04:00
|
|
|
* On success, call fetchcache_go() to start work on the new content.
|
2003-10-01 00:33:45 +04:00
|
|
|
*/
|
|
|
|
|
2004-06-11 03:55:23 +04:00
|
|
|
struct content * fetchcache(const char *url,
|
2005-08-21 16:04:18 +04:00
|
|
|
void (*callback)(content_msg msg, struct content *c,
|
|
|
|
intptr_t p1, intptr_t p2, union content_msg_data data),
|
|
|
|
intptr_t p1, intptr_t p2,
|
2004-06-11 03:55:23 +04:00
|
|
|
int width, int height,
|
|
|
|
bool no_error_pages,
|
|
|
|
char *post_urlenc,
|
|
|
|
struct form_successful_control *post_multipart,
|
2007-01-27 23:58:20 +03:00
|
|
|
bool verifiable,
|
2005-01-03 05:09:20 +03:00
|
|
|
bool download)
|
2003-02-09 15:58:15 +03:00
|
|
|
{
|
|
|
|
struct content *c;
|
2004-06-11 00:41:26 +04:00
|
|
|
char *url1;
|
2006-02-06 03:10:09 +03:00
|
|
|
char *hash, *query;
|
|
|
|
char *etag = 0;
|
|
|
|
time_t date = 0;
|
2005-04-01 06:25:11 +04:00
|
|
|
|
2006-04-12 12:09:27 +04:00
|
|
|
if (strncasecmp(url, "file:///", 8) &&
|
|
|
|
strncasecmp(url, "file:/", 6) == 0) {
|
|
|
|
/* Manipulate file URLs into correct format */
|
|
|
|
if (strncasecmp(url, "file://", 7) == 0) {
|
2006-07-02 02:27:10 +04:00
|
|
|
/* file://path */
|
2006-04-12 12:09:27 +04:00
|
|
|
url1 = malloc(7 + strlen(url));
|
|
|
|
if (!url1)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
strcpy(url1, "file://");
|
2006-07-02 02:27:10 +04:00
|
|
|
strcat(url1 + 7, url + 6);
|
2006-04-12 12:09:27 +04:00
|
|
|
} else {
|
|
|
|
/* file:/... */
|
|
|
|
url1 = malloc(7 + strlen(url));
|
|
|
|
if (!url1)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
strcpy(url1, "file://");
|
|
|
|
strcat(url1 + 7, url + 5);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* simply duplicate the URL */
|
|
|
|
if ((url1 = strdup(url)) == NULL)
|
|
|
|
return NULL;
|
|
|
|
}
|
2004-06-11 00:41:26 +04:00
|
|
|
|
2003-07-01 02:21:33 +04:00
|
|
|
/* strip fragment identifier */
|
2004-08-14 18:30:12 +04:00
|
|
|
if ((hash = strchr(url1, '#')) != NULL)
|
2005-01-13 23:29:24 +03:00
|
|
|
*hash = 0;
|
2003-06-17 23:24:21 +04:00
|
|
|
|
2006-02-06 03:10:09 +03:00
|
|
|
/* look for query; we don't cache URLs with a query segment */
|
|
|
|
query = strchr(url1, '?');
|
|
|
|
|
2004-01-26 17:16:23 +03:00
|
|
|
LOG(("url %s", url1));
|
2003-02-09 15:58:15 +03:00
|
|
|
|
2006-02-06 03:10:09 +03:00
|
|
|
if (!post_urlenc && !post_multipart && !download && !query) {
|
2004-08-14 18:30:12 +04:00
|
|
|
if ((c = content_get(url1)) != NULL) {
|
2008-06-03 05:10:46 +04:00
|
|
|
struct cache_data *cd = &c->cache_data;
|
2006-02-06 03:10:09 +03:00
|
|
|
int current_age, freshness_lifetime;
|
|
|
|
|
|
|
|
/* Calculate staleness of cached content as per
|
|
|
|
* RFC 2616 13.2.3/13.2.4 */
|
|
|
|
current_age = max(0, (cd->res_time - cd->date));
|
|
|
|
current_age = max(current_age,
|
|
|
|
(cd->age == INVALID_AGE) ? 0
|
|
|
|
: cd->age);
|
|
|
|
current_age += cd->res_time - cd->req_time +
|
|
|
|
time(0) - cd->res_time;
|
|
|
|
freshness_lifetime =
|
|
|
|
(cd->max_age != INVALID_AGE) ? cd->max_age :
|
|
|
|
(cd->expires != 0) ? cd->expires - cd->date :
|
2006-02-08 03:35:05 +03:00
|
|
|
(cd->last_modified != 0) ?
|
|
|
|
(time(0) - cd->last_modified) / 10 :
|
|
|
|
0;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
if (freshness_lifetime > current_age ||
|
|
|
|
cd->date == 0) {
|
|
|
|
/* Ok, either a fresh content or we're
|
|
|
|
* currently fetching the selected content
|
|
|
|
* (therefore it must be fresh) */
|
|
|
|
free(url1);
|
|
|
|
if (!content_add_user(c, callback, p1, p2))
|
|
|
|
return NULL;
|
|
|
|
else
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Ok. We have a cache entry, but it appears stale.
|
|
|
|
* Therefore, validate it. */
|
2006-02-08 03:35:05 +03:00
|
|
|
if (cd->last_modified)
|
|
|
|
date = cd->last_modified;
|
|
|
|
else
|
2008-06-03 05:10:46 +04:00
|
|
|
date = c->cache_data.date;
|
|
|
|
etag = c->cache_data.etag;
|
2003-10-25 20:22:11 +04:00
|
|
|
}
|
2003-04-18 01:35:02 +04:00
|
|
|
}
|
2003-02-09 15:58:15 +03:00
|
|
|
|
2004-01-26 17:16:23 +03:00
|
|
|
c = content_create(url1);
|
2004-07-30 20:14:44 +04:00
|
|
|
free(url1);
|
|
|
|
if (!c)
|
2004-08-14 18:30:12 +04:00
|
|
|
return NULL;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
/* Fill in cache validation fields (if present) */
|
|
|
|
if (date)
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.date = date;
|
2006-02-06 03:10:09 +03:00
|
|
|
if (etag) {
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.etag = talloc_strdup(c, etag);
|
|
|
|
if (!c->cache_data.etag)
|
2006-02-06 03:10:09 +03:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2005-01-02 06:58:21 +03:00
|
|
|
if (!content_add_user(c, callback, p1, p2)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2004-01-20 22:08:34 +03:00
|
|
|
|
2006-02-06 03:10:09 +03:00
|
|
|
if (!post_urlenc && !post_multipart && !download && !query)
|
2004-06-21 19:09:59 +04:00
|
|
|
c->fresh = true;
|
2004-01-23 23:46:29 +03:00
|
|
|
|
2003-06-17 23:24:21 +04:00
|
|
|
c->width = width;
|
|
|
|
c->height = height;
|
2004-01-26 17:16:23 +03:00
|
|
|
c->no_error_pages = no_error_pages;
|
2005-01-03 05:09:20 +03:00
|
|
|
c->download = download;
|
2004-06-11 03:55:23 +04:00
|
|
|
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Start fetching and converting a content.
|
|
|
|
*
|
2004-06-21 19:09:59 +04:00
|
|
|
* \param content content to fetch, as returned by fetchcache()
|
2004-06-11 03:55:23 +04:00
|
|
|
* \param referer referring URL, or 0
|
|
|
|
* \param callback function to call when anything interesting happens to
|
|
|
|
* the new content
|
2005-08-21 16:04:18 +04:00
|
|
|
* \param p1 user parameter for callback
|
|
|
|
* \param p2 user parameter for callback
|
2005-01-25 02:02:37 +03:00
|
|
|
* \param width available space
|
|
|
|
* \param height available space
|
2004-06-11 03:55:23 +04:00
|
|
|
* \param post_urlenc url encoded post data, or 0 if none
|
|
|
|
* \param post_multipart multipart post data, or 0 if none
|
2007-01-27 23:58:20 +03:00
|
|
|
* \param verifiable this transaction is verifiable
|
2007-02-03 02:08:13 +03:00
|
|
|
* \param parent_url URL of fetch which spawned this one, or 0 if none
|
2004-06-11 03:55:23 +04:00
|
|
|
*
|
|
|
|
* Errors will be sent back through the callback.
|
|
|
|
*/
|
|
|
|
|
2007-01-27 23:58:20 +03:00
|
|
|
void fetchcache_go(struct content *content, const char *referer,
|
2005-08-21 16:04:18 +04:00
|
|
|
void (*callback)(content_msg msg, struct content *c,
|
|
|
|
intptr_t p1, intptr_t p2, union content_msg_data data),
|
|
|
|
intptr_t p1, intptr_t p2,
|
2005-01-25 02:02:37 +03:00
|
|
|
int width, int height,
|
2004-06-11 03:55:23 +04:00
|
|
|
char *post_urlenc,
|
|
|
|
struct form_successful_control *post_multipart,
|
2007-02-03 02:08:13 +03:00
|
|
|
bool verifiable, const char *parent_url)
|
2004-06-11 03:55:23 +04:00
|
|
|
{
|
|
|
|
char error_message[500];
|
|
|
|
union content_msg_data msg_data;
|
|
|
|
|
|
|
|
LOG(("url %s, status %s", content->url,
|
|
|
|
content_status_name[content->status]));
|
|
|
|
|
2006-08-06 21:51:23 +04:00
|
|
|
/* We may well have been asked to fetch an URL using a protocol
|
|
|
|
* that we can't support. Check for this here and, if we can't
|
|
|
|
* perform the fetch, notify the caller and exit */
|
|
|
|
if (!fetch_can_fetch(content->url)) {
|
|
|
|
|
|
|
|
/* The only case where this should fail is if we're a
|
|
|
|
* brand new content with no active fetch. If we're not,
|
|
|
|
* another content with the same URL somehow got through
|
|
|
|
* the fetch_can_fetch check. That should be impossible.
|
|
|
|
*/
|
|
|
|
assert(content->status == CONTENT_STATUS_TYPE_UNKNOWN &&
|
|
|
|
!content->fetch);
|
|
|
|
|
|
|
|
snprintf(error_message, sizeof error_message,
|
|
|
|
messages_get("InvalidURL"),
|
|
|
|
content->url);
|
|
|
|
|
|
|
|
if (content->no_error_pages) {
|
|
|
|
/* Mark as in error so content is destroyed
|
|
|
|
* on cache clean */
|
|
|
|
content->status = CONTENT_STATUS_ERROR;
|
|
|
|
msg_data.error = error_message;
|
|
|
|
callback(CONTENT_MSG_ERROR,
|
|
|
|
content, p1, p2, msg_data);
|
|
|
|
} else {
|
|
|
|
fetchcache_error_page(content, error_message);
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (content->status == CONTENT_STATUS_TYPE_UNKNOWN &&
|
|
|
|
content->fetch) {
|
2004-06-11 03:55:23 +04:00
|
|
|
/* fetching, but not yet received any response:
|
|
|
|
* no action required */
|
|
|
|
|
2006-02-06 03:10:09 +03:00
|
|
|
} else if (content->status == CONTENT_STATUS_TYPE_UNKNOWN) {
|
|
|
|
/* brand new content: start fetch */
|
|
|
|
char **headers;
|
|
|
|
int i = 0;
|
2008-06-03 05:10:46 +04:00
|
|
|
char *etag = content->cache_data.etag;
|
|
|
|
time_t date = content->cache_data.date;
|
|
|
|
|
|
|
|
content->cache_data.req_time = time(NULL);
|
|
|
|
content->cache_data.res_time = 0;
|
|
|
|
content->cache_data.date = 0;
|
|
|
|
content->cache_data.expires = 0;
|
|
|
|
content->cache_data.age = INVALID_AGE;
|
|
|
|
content->cache_data.max_age = INVALID_AGE;
|
|
|
|
content->cache_data.no_cache = false;
|
|
|
|
content->cache_data.etag = 0;
|
|
|
|
content->cache_data.last_modified = 0;
|
2008-05-30 15:09:50 +04:00
|
|
|
|
2006-02-06 03:10:09 +03:00
|
|
|
headers = malloc(3 * sizeof(char *));
|
|
|
|
if (!headers) {
|
2006-08-06 21:51:23 +04:00
|
|
|
content->status = CONTENT_STATUS_ERROR;
|
2006-02-06 03:10:09 +03:00
|
|
|
msg_data.error = messages_get("NoMemory");
|
|
|
|
callback(CONTENT_MSG_ERROR, content, p1, p2,
|
|
|
|
msg_data);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (etag) {
|
|
|
|
headers[i] = malloc(15 + strlen(etag) + 1);
|
|
|
|
if (!headers[i]) {
|
|
|
|
free(headers);
|
2006-08-06 21:51:23 +04:00
|
|
|
content->status = CONTENT_STATUS_ERROR;
|
2006-02-06 03:10:09 +03:00
|
|
|
msg_data.error = messages_get("NoMemory");
|
|
|
|
callback(CONTENT_MSG_ERROR, content, p1, p2,
|
|
|
|
msg_data);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
sprintf(headers[i++], "If-None-Match: %s", etag);
|
|
|
|
talloc_free(etag);
|
|
|
|
}
|
|
|
|
if (date) {
|
|
|
|
headers[i] = malloc(19 + 29 + 1);
|
|
|
|
if (!headers[i]) {
|
|
|
|
while (--i >= 0) {
|
|
|
|
free(headers[i]);
|
|
|
|
}
|
|
|
|
free(headers);
|
2006-08-06 21:51:23 +04:00
|
|
|
content->status = CONTENT_STATUS_ERROR;
|
2006-02-06 03:10:09 +03:00
|
|
|
msg_data.error = messages_get("NoMemory");
|
|
|
|
callback(CONTENT_MSG_ERROR, content, p1, p2,
|
|
|
|
msg_data);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
sprintf(headers[i++], "If-Modified-Since: %s",
|
|
|
|
rfc1123_date(date));
|
|
|
|
}
|
|
|
|
headers[i] = 0;
|
2004-06-11 03:55:23 +04:00
|
|
|
content->fetch = fetch_start(content->url, referer,
|
|
|
|
fetchcache_callback, content,
|
|
|
|
content->no_error_pages,
|
2007-01-27 23:58:20 +03:00
|
|
|
post_urlenc, post_multipart, verifiable,
|
2007-02-03 02:08:13 +03:00
|
|
|
parent_url, headers);
|
2006-02-06 03:10:09 +03:00
|
|
|
for (i = 0; headers[i]; i++)
|
|
|
|
free(headers[i]);
|
|
|
|
free(headers);
|
2004-06-11 03:55:23 +04:00
|
|
|
if (!content->fetch) {
|
|
|
|
LOG(("warning: fetch_start failed"));
|
|
|
|
snprintf(error_message, sizeof error_message,
|
|
|
|
messages_get("InvalidURL"),
|
|
|
|
content->url);
|
|
|
|
if (content->no_error_pages) {
|
|
|
|
content->status = CONTENT_STATUS_ERROR;
|
|
|
|
msg_data.error = error_message;
|
|
|
|
content_broadcast(content, CONTENT_MSG_ERROR,
|
|
|
|
msg_data);
|
|
|
|
} else {
|
|
|
|
fetchcache_error_page(content, error_message);
|
|
|
|
}
|
2004-01-26 17:16:23 +03:00
|
|
|
}
|
2004-06-11 03:55:23 +04:00
|
|
|
|
|
|
|
/* in these remaining cases, we have to 'catch up' with the content's
|
|
|
|
* status, ie. send the same messages as if the content was
|
|
|
|
* gradually getting to the current status from TYPE_UNKNOWN */
|
|
|
|
} else if (content->status == CONTENT_STATUS_LOADING) {
|
|
|
|
callback(CONTENT_MSG_LOADING, content, p1, p2, msg_data);
|
|
|
|
|
|
|
|
} else if (content->status == CONTENT_STATUS_READY) {
|
|
|
|
callback(CONTENT_MSG_LOADING, content, p1, p2, msg_data);
|
2004-06-23 19:41:50 +04:00
|
|
|
if (content_find_user(content, callback, p1, p2))
|
|
|
|
callback(CONTENT_MSG_READY, content, p1, p2, msg_data);
|
2004-06-11 03:55:23 +04:00
|
|
|
|
|
|
|
} else if (content->status == CONTENT_STATUS_DONE) {
|
|
|
|
callback(CONTENT_MSG_LOADING, content, p1, p2, msg_data);
|
2005-01-25 02:02:37 +03:00
|
|
|
if (content->available_width != width)
|
|
|
|
content_reformat(content, width, height);
|
2004-06-23 19:41:50 +04:00
|
|
|
if (content_find_user(content, callback, p1, p2))
|
|
|
|
callback(CONTENT_MSG_READY, content, p1, p2, msg_data);
|
|
|
|
if (content_find_user(content, callback, p1, p2))
|
|
|
|
callback(CONTENT_MSG_DONE, content, p1, p2, msg_data);
|
2004-06-11 03:55:23 +04:00
|
|
|
|
|
|
|
} else if (content->status == CONTENT_STATUS_ERROR) {
|
|
|
|
/* shouldn't usually occur */
|
|
|
|
msg_data.error = messages_get("MiscError");
|
|
|
|
callback(CONTENT_MSG_ERROR, content, p1, p2, msg_data);
|
2003-07-16 21:38:46 +04:00
|
|
|
}
|
2003-02-09 15:58:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-10-01 00:33:45 +04:00
|
|
|
/**
|
|
|
|
* Callback function for fetch.
|
|
|
|
*
|
|
|
|
* This is called when the status of a fetch changes.
|
|
|
|
*/
|
|
|
|
|
2006-02-23 18:06:54 +03:00
|
|
|
void fetchcache_callback(fetch_msg msg, void *p, const void *data,
|
2004-06-22 21:37:51 +04:00
|
|
|
unsigned long size)
|
2003-02-09 15:58:15 +03:00
|
|
|
{
|
2004-06-11 00:41:26 +04:00
|
|
|
bool res;
|
2003-06-17 23:24:21 +04:00
|
|
|
struct content *c = p;
|
2003-02-09 15:58:15 +03:00
|
|
|
content_type type;
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
char *mime_type;
|
2003-12-27 23:15:23 +03:00
|
|
|
char **params;
|
|
|
|
unsigned int i;
|
2004-04-25 03:42:32 +04:00
|
|
|
union content_msg_data msg_data;
|
2003-04-18 01:35:02 +04:00
|
|
|
|
2003-02-09 15:58:15 +03:00
|
|
|
switch (msg) {
|
|
|
|
case FETCH_TYPE:
|
2003-08-29 00:04:35 +04:00
|
|
|
c->total_size = size;
|
2007-01-13 03:21:15 +03:00
|
|
|
c->http_code = fetch_http_code(c->fetch);
|
2003-12-27 23:15:23 +03:00
|
|
|
mime_type = fetchcache_parse_type(data, ¶ms);
|
2005-01-02 01:27:05 +03:00
|
|
|
if (!mime_type) {
|
|
|
|
msg_data.error = messages_get("NoMemory");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR,
|
|
|
|
msg_data);
|
|
|
|
fetch_abort(c->fetch);
|
|
|
|
c->fetch = 0;
|
|
|
|
return;
|
|
|
|
}
|
2003-02-26 00:00:27 +03:00
|
|
|
type = content_lookup(mime_type);
|
2005-01-03 05:09:20 +03:00
|
|
|
res = content_set_type(c,
|
|
|
|
c->download ? CONTENT_OTHER : type,
|
2008-03-19 20:29:34 +03:00
|
|
|
mime_type, (const char **) params);
|
2003-07-08 02:10:51 +04:00
|
|
|
free(mime_type);
|
2003-12-27 23:15:23 +03:00
|
|
|
for (i = 0; params[i]; i++)
|
|
|
|
free(params[i]);
|
|
|
|
free(params);
|
2004-06-21 19:09:59 +04:00
|
|
|
if (!res) {
|
2004-06-11 00:41:26 +04:00
|
|
|
fetch_abort(c->fetch);
|
2004-06-21 19:09:59 +04:00
|
|
|
c->fetch = 0;
|
|
|
|
}
|
2008-02-27 22:13:35 +03:00
|
|
|
|
2008-06-03 05:10:46 +04:00
|
|
|
if (c->cache_data.date || c->cache_data.etag) {
|
2008-02-27 22:13:35 +03:00
|
|
|
/* We've just made a conditional request
|
|
|
|
* that returned with something other
|
|
|
|
* than 304. Therefore, there's a stale
|
|
|
|
* content floating around in the cache.
|
|
|
|
* Hunt it down and mark it as stale, so
|
|
|
|
* it'll get cleaned when unused. We
|
|
|
|
* assume it's either READY or DONE --
|
|
|
|
* anything else is of marginal staleness
|
|
|
|
* (or in error, which will cause it to
|
|
|
|
* be flushed from the cache, anyway)
|
|
|
|
*/
|
|
|
|
struct content *stale_content =
|
|
|
|
content_get_ready(c->url);
|
|
|
|
|
|
|
|
if (stale_content)
|
|
|
|
stale_content->fresh = false;
|
|
|
|
}
|
2003-02-09 15:58:15 +03:00
|
|
|
break;
|
2003-04-18 01:35:02 +04:00
|
|
|
|
2004-07-10 06:35:31 +04:00
|
|
|
case FETCH_PROGRESS:
|
|
|
|
if (size)
|
|
|
|
content_set_status(c,
|
|
|
|
messages_get("RecPercent"),
|
|
|
|
data, (unsigned int)size);
|
|
|
|
else
|
|
|
|
content_set_status(c,
|
|
|
|
messages_get("Received"),
|
|
|
|
data);
|
|
|
|
content_broadcast(c, CONTENT_MSG_STATUS, msg_data);
|
|
|
|
break;
|
|
|
|
|
2008-05-30 08:11:16 +04:00
|
|
|
case FETCH_HEADER:
|
|
|
|
LOG(("FETCH_HEADER \"%.*s\"",
|
|
|
|
(int) size, (char *) data));
|
|
|
|
fetchcache_parse_header(c, data, size);
|
|
|
|
break;
|
|
|
|
|
2003-02-09 15:58:15 +03:00
|
|
|
case FETCH_DATA:
|
2006-02-06 03:10:09 +03:00
|
|
|
if (!content_process_data(c, data, size)) {
|
2004-06-11 00:41:26 +04:00
|
|
|
fetch_abort(c->fetch);
|
2004-06-21 19:09:59 +04:00
|
|
|
c->fetch = 0;
|
|
|
|
}
|
2003-02-09 15:58:15 +03:00
|
|
|
break;
|
2003-04-18 01:35:02 +04:00
|
|
|
|
2003-02-09 15:58:15 +03:00
|
|
|
case FETCH_FINISHED:
|
2008-06-07 03:51:51 +04:00
|
|
|
fetchcache_cache_update(c);
|
2003-06-17 23:24:21 +04:00
|
|
|
c->fetch = 0;
|
2004-06-05 19:03:59 +04:00
|
|
|
content_set_status(c, messages_get("Converting"),
|
|
|
|
c->source_size);
|
2004-04-25 03:42:32 +04:00
|
|
|
content_broadcast(c, CONTENT_MSG_STATUS, msg_data);
|
2003-06-17 23:24:21 +04:00
|
|
|
content_convert(c, c->width, c->height);
|
2003-02-09 15:58:15 +03:00
|
|
|
break;
|
2003-04-18 01:35:02 +04:00
|
|
|
|
2003-02-09 15:58:15 +03:00
|
|
|
case FETCH_ERROR:
|
2006-02-23 18:06:54 +03:00
|
|
|
LOG(("FETCH_ERROR, '%s'", (const char *)data));
|
2003-06-17 23:24:21 +04:00
|
|
|
c->fetch = 0;
|
2004-01-26 17:16:23 +03:00
|
|
|
if (c->no_error_pages) {
|
2004-06-11 00:41:26 +04:00
|
|
|
c->status = CONTENT_STATUS_ERROR;
|
2004-04-25 03:42:32 +04:00
|
|
|
msg_data.error = data;
|
2004-06-11 00:41:26 +04:00
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR,
|
|
|
|
msg_data);
|
2004-01-26 17:16:23 +03:00
|
|
|
} else {
|
|
|
|
content_reset(c);
|
|
|
|
fetchcache_error_page(c, data);
|
|
|
|
}
|
2003-02-09 15:58:15 +03:00
|
|
|
break;
|
2003-04-18 01:35:02 +04:00
|
|
|
|
2003-06-26 15:41:26 +04:00
|
|
|
case FETCH_REDIRECT:
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
fetchcache_redirect(c, data, size);
|
2003-06-26 15:41:26 +04:00
|
|
|
break;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
case FETCH_NOTMODIFIED:
|
|
|
|
fetchcache_notmodified(c, data);
|
|
|
|
break;
|
|
|
|
|
2004-01-05 05:10:59 +03:00
|
|
|
#ifdef WITH_AUTH
|
2003-10-25 23:20:13 +04:00
|
|
|
case FETCH_AUTH:
|
2004-06-09 00:25:04 +04:00
|
|
|
/* data -> string containing the Realm */
|
2006-02-23 18:06:54 +03:00
|
|
|
LOG(("FETCH_AUTH, '%s'", (const char *)data));
|
2004-06-09 00:25:04 +04:00
|
|
|
c->fetch = 0;
|
|
|
|
msg_data.auth_realm = data;
|
|
|
|
content_broadcast(c, CONTENT_MSG_AUTH, msg_data);
|
2004-06-23 15:40:29 +04:00
|
|
|
/* set the status to ERROR so that the content is
|
|
|
|
* destroyed in content_clean() */
|
|
|
|
c->status = CONTENT_STATUS_ERROR;
|
2004-06-09 00:25:04 +04:00
|
|
|
break;
|
2004-01-05 05:10:59 +03:00
|
|
|
#endif
|
2006-02-23 18:06:54 +03:00
|
|
|
|
|
|
|
#ifdef WITH_SSL
|
|
|
|
case FETCH_CERT_ERR:
|
|
|
|
c->fetch = 0;
|
|
|
|
/* set the status to ERROR so that the content is
|
|
|
|
* destroyed in content_clean() */
|
|
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
|
|
|
|
|
|
msg_data.ssl.certs = data;
|
|
|
|
msg_data.ssl.num = size;
|
|
|
|
content_broadcast(c, CONTENT_MSG_SSL, msg_data);
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
|
2003-02-09 15:58:15 +03:00
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-12-27 23:15:23 +03:00
|
|
|
/**
|
|
|
|
* Initialise the fetchcache module.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void fetchcache_init(void)
|
|
|
|
{
|
|
|
|
regcomp_wrapper(&re_content_type,
|
2005-04-01 06:25:11 +04:00
|
|
|
"^([-0-9a-zA-Z_.]+/[-0-9a-zA-Z_.+]+)[ \t]*"
|
2003-12-27 23:15:23 +03:00
|
|
|
"(;[ \t]*([-0-9a-zA-Z_.]+)="
|
|
|
|
"([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
|
|
|
|
REG_EXTENDED);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parse a Content-Type header.
|
|
|
|
*
|
2005-01-02 01:27:05 +03:00
|
|
|
* \param s a Content-Type header
|
|
|
|
* \param params updated to point to an array of strings, ordered attribute,
|
|
|
|
* value, attribute, ..., 0
|
|
|
|
* \return a new string containing the MIME-type, or 0 on memory exhaustion
|
2003-12-27 23:15:23 +03:00
|
|
|
*/
|
|
|
|
|
|
|
|
#define MAX_ATTRS 10
|
|
|
|
|
2004-06-22 21:37:51 +04:00
|
|
|
char *fetchcache_parse_type(const char *s, char **params[])
|
2003-12-27 23:15:23 +03:00
|
|
|
{
|
2005-01-02 01:27:05 +03:00
|
|
|
char *type = 0;
|
2003-12-27 23:15:23 +03:00
|
|
|
unsigned int i;
|
|
|
|
int r;
|
|
|
|
regmatch_t pmatch[2 + MAX_ATTRS * 3];
|
2005-01-02 01:27:05 +03:00
|
|
|
|
|
|
|
*params = malloc((MAX_ATTRS * 2 + 2) * sizeof (*params)[0]);
|
|
|
|
if (!*params)
|
|
|
|
goto no_memory;
|
|
|
|
for (i = 0; i != MAX_ATTRS * 2 + 2; i++)
|
|
|
|
(*params)[i] = 0;
|
2003-12-27 23:15:23 +03:00
|
|
|
|
|
|
|
r = regexec(&re_content_type, s, 2 + MAX_ATTRS * 3, pmatch, 0);
|
|
|
|
if (r) {
|
2008-07-29 13:05:36 +04:00
|
|
|
char *semi;
|
2003-12-27 23:15:23 +03:00
|
|
|
LOG(("failed to parse content-type '%s'", s));
|
2005-05-02 02:20:40 +04:00
|
|
|
/* The mime type must be first, so only copy up to the
|
|
|
|
* first semicolon in the string. This allows us to have
|
|
|
|
* a better attempt at handling pages sent with broken
|
|
|
|
* Content-Type headers. Obviously, any truly broken
|
|
|
|
* Content-Type headers will be unaffected by this heuristic
|
|
|
|
*/
|
2008-07-29 13:05:36 +04:00
|
|
|
semi = strchr(s, ';');
|
2005-05-02 02:20:40 +04:00
|
|
|
if (semi)
|
|
|
|
type = strndup(s, semi - s);
|
|
|
|
else
|
|
|
|
type = strdup(s);
|
2005-01-02 01:27:05 +03:00
|
|
|
if (!type)
|
|
|
|
goto no_memory;
|
|
|
|
return type;
|
2003-12-27 23:15:23 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so);
|
2005-01-02 01:27:05 +03:00
|
|
|
if (!type) {
|
|
|
|
free(*params);
|
|
|
|
return 0;
|
|
|
|
}
|
2003-12-27 23:15:23 +03:00
|
|
|
|
|
|
|
/* parameters */
|
|
|
|
for (i = 0; i != MAX_ATTRS && pmatch[2 + 3 * i].rm_so != -1; i++) {
|
|
|
|
(*params)[2 * i] = strndup(s + pmatch[2 + 3 * i + 1].rm_so,
|
2005-01-02 01:27:05 +03:00
|
|
|
pmatch[2 + 3 * i + 1].rm_eo -
|
|
|
|
pmatch[2 + 3 * i + 1].rm_so);
|
2003-12-27 23:15:23 +03:00
|
|
|
(*params)[2 * i + 1] = strndup(s + pmatch[2 + 3 * i + 2].rm_so,
|
2005-01-02 01:27:05 +03:00
|
|
|
pmatch[2 + 3 * i + 2].rm_eo -
|
|
|
|
pmatch[2 + 3 * i + 2].rm_so);
|
|
|
|
if (!(*params)[2 * i] || !(*params)[2 * i + 1])
|
|
|
|
goto no_memory;
|
2003-12-27 23:15:23 +03:00
|
|
|
}
|
|
|
|
(*params)[2 * i] = 0;
|
|
|
|
|
|
|
|
return type;
|
2005-01-02 01:27:05 +03:00
|
|
|
|
|
|
|
no_memory:
|
|
|
|
for (i = 0; i != MAX_ATTRS * 2 + 2; i++)
|
|
|
|
free((*params)[i]);
|
|
|
|
free(*params);
|
|
|
|
free(type);
|
|
|
|
|
|
|
|
return 0;
|
2003-12-27 23:15:23 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-05-30 08:11:16 +04:00
|
|
|
/**
|
|
|
|
* Parse an HTTP response header.
|
|
|
|
*
|
|
|
|
* See RFC 2616 4.2.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void fetchcache_parse_header(struct content *c, const char *data,
|
|
|
|
size_t size)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
#define SKIP_ST(o) for (i = (o); i < size && (data[i] == ' ' || data[i] == '\t'); i++)
|
|
|
|
|
|
|
|
/* Set fetch response time if not already set */
|
2008-06-03 05:10:46 +04:00
|
|
|
if (c->cache_data.res_time == 0)
|
|
|
|
c->cache_data.res_time = time(NULL);
|
2008-05-30 08:11:16 +04:00
|
|
|
|
|
|
|
if (5 < size && strncasecmp(data, "Date:", 5) == 0) {
|
|
|
|
/* extract Date header */
|
|
|
|
SKIP_ST(5);
|
|
|
|
if (i < size)
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.date = curl_getdate(&data[i], NULL);
|
2008-05-30 08:11:16 +04:00
|
|
|
} else if (4 < size && strncasecmp(data, "Age:", 4) == 0) {
|
|
|
|
/* extract Age header */
|
|
|
|
SKIP_ST(4);
|
|
|
|
if (i < size && '0' <= data[i] && data[i] <= '9')
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.age = atoi(data + i);
|
2008-05-30 08:11:16 +04:00
|
|
|
} else if (8 < size && strncasecmp(data, "Expires:", 8) == 0) {
|
|
|
|
/* extract Expires header */
|
|
|
|
SKIP_ST(8);
|
|
|
|
if (i < size)
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.expires = curl_getdate(&data[i], NULL);
|
2008-05-30 08:11:16 +04:00
|
|
|
} else if (14 < size && strncasecmp(data, "Cache-Control:", 14) == 0) {
|
|
|
|
/* extract and parse Cache-Control header */
|
|
|
|
size_t comma;
|
|
|
|
SKIP_ST(14);
|
|
|
|
|
|
|
|
while (i < size) {
|
|
|
|
for (comma = i; comma < size; comma++)
|
|
|
|
if (data[comma] == ',')
|
|
|
|
break;
|
|
|
|
|
|
|
|
SKIP_ST(i);
|
|
|
|
|
|
|
|
if (8 < comma - i && (strncasecmp(data + i, "no-cache", 8) == 0 || strncasecmp(data + i, "no-store", 8) == 0))
|
|
|
|
/* When we get a disk cache we should
|
|
|
|
* distinguish between these two */
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.no_cache = true;
|
2008-05-30 08:11:16 +04:00
|
|
|
else if (7 < comma - i && strncasecmp(data + i, "max-age", 7) == 0) {
|
|
|
|
for (; i < comma; i++)
|
|
|
|
if (data[i] == '=')
|
|
|
|
break;
|
|
|
|
SKIP_ST(i+1);
|
|
|
|
if (i < comma)
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.max_age =
|
2008-05-30 08:11:16 +04:00
|
|
|
atoi(data + i);
|
|
|
|
}
|
|
|
|
|
|
|
|
i = comma + 1;
|
|
|
|
}
|
|
|
|
} else if (5 < size && strncasecmp(data, "ETag:", 5) == 0) {
|
|
|
|
/* extract ETag header */
|
2008-06-03 05:10:46 +04:00
|
|
|
talloc_free(c->cache_data.etag);
|
|
|
|
c->cache_data.etag = talloc_array(c, char, size);
|
|
|
|
if (!c->cache_data.etag) {
|
2008-05-30 08:11:16 +04:00
|
|
|
LOG(("malloc failed"));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
SKIP_ST(5);
|
2008-06-03 05:10:46 +04:00
|
|
|
strncpy(c->cache_data.etag, data + i, size - i);
|
|
|
|
c->cache_data.etag[size - i] = '\0';
|
2008-06-04 02:17:35 +04:00
|
|
|
for (i = size - i - 1; ((int) i) >= 0 &&
|
2008-06-03 05:10:46 +04:00
|
|
|
(c->cache_data.etag[i] == ' ' ||
|
|
|
|
c->cache_data.etag[i] == '\t' ||
|
|
|
|
c->cache_data.etag[i] == '\r' ||
|
|
|
|
c->cache_data.etag[i] == '\n'); --i)
|
|
|
|
c->cache_data.etag[i] = '\0';
|
2008-05-30 08:11:16 +04:00
|
|
|
} else if (14 < size && strncasecmp(data, "Last-Modified:", 14) == 0) {
|
|
|
|
/* extract Last-Modified header */
|
|
|
|
SKIP_ST(14);
|
|
|
|
if (i < size) {
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.last_modified =
|
2008-05-30 08:11:16 +04:00
|
|
|
curl_getdate(&data[i], NULL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2004-01-23 23:46:29 +03:00
|
|
|
/**
|
|
|
|
* Generate an error page.
|
|
|
|
*
|
|
|
|
* \param c empty content to generate the page in
|
|
|
|
* \param error message to display
|
|
|
|
*/
|
|
|
|
|
|
|
|
void fetchcache_error_page(struct content *c, const char *error)
|
|
|
|
{
|
|
|
|
const char *params[] = { 0 };
|
2004-06-09 00:25:04 +04:00
|
|
|
int length;
|
|
|
|
|
|
|
|
if ((length = snprintf(error_page, sizeof(error_page),
|
|
|
|
messages_get("ErrorPage"), error)) < 0)
|
|
|
|
length = 0;
|
2004-06-11 03:55:23 +04:00
|
|
|
if (!content_set_type(c, CONTENT_HTML, "text/html", params))
|
|
|
|
return;
|
|
|
|
if (!content_process_data(c, error_page, length))
|
|
|
|
return;
|
2004-01-23 23:46:29 +03:00
|
|
|
content_convert(c, c->width, c->height);
|
2008-02-27 21:48:35 +03:00
|
|
|
|
|
|
|
/* Mark content as non-fresh, so it'll get cleaned from the
|
|
|
|
* cache at the earliest opportunity */
|
|
|
|
c->fresh = false;
|
2004-01-23 23:46:29 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-02-06 03:10:09 +03:00
|
|
|
/**
|
2008-06-07 03:51:51 +04:00
|
|
|
* Update a content's cache state
|
2006-02-06 03:10:09 +03:00
|
|
|
*
|
2008-06-07 03:51:51 +04:00
|
|
|
* \param c The content
|
2006-02-06 03:10:09 +03:00
|
|
|
*/
|
|
|
|
|
2008-06-07 03:51:51 +04:00
|
|
|
void fetchcache_cache_update(struct content *c)
|
|
|
|
{
|
|
|
|
if (c->cache_data.date == 0)
|
|
|
|
c->cache_data.date = time(NULL);
|
|
|
|
|
|
|
|
if (c->cache_data.no_cache)
|
|
|
|
c->fresh = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Clone cache info into a content
|
|
|
|
*
|
|
|
|
* \param c The content
|
|
|
|
* \param data Cache data
|
|
|
|
*/
|
|
|
|
|
|
|
|
void fetchcache_cache_clone(struct content *c,
|
2006-02-06 03:10:09 +03:00
|
|
|
const struct cache_data *data)
|
|
|
|
{
|
|
|
|
assert(c && data);
|
|
|
|
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.req_time = data->req_time;
|
|
|
|
c->cache_data.res_time = data->res_time;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
if (data->date != 0)
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.date = data->date;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
if (data->expires != 0)
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.expires = data->expires;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
if (data->age != INVALID_AGE)
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.age = data->age;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
if (data->max_age != INVALID_AGE)
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.max_age = data->max_age;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
if (data->no_cache)
|
2008-06-07 03:51:51 +04:00
|
|
|
c->cache_data.no_cache = data->no_cache;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
if (data->etag) {
|
2008-06-03 05:10:46 +04:00
|
|
|
talloc_free(c->cache_data.etag);
|
|
|
|
c->cache_data.etag = talloc_strdup(c, data->etag);
|
2006-02-06 03:10:09 +03:00
|
|
|
}
|
2006-02-08 03:35:05 +03:00
|
|
|
|
|
|
|
if (data->last_modified)
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.last_modified = data->last_modified;
|
2006-02-06 03:10:09 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Not modified callback handler
|
|
|
|
*/
|
|
|
|
|
2006-02-23 18:06:54 +03:00
|
|
|
void fetchcache_notmodified(struct content *c, const void *data)
|
2006-02-06 03:10:09 +03:00
|
|
|
{
|
|
|
|
struct content *fb;
|
|
|
|
union content_msg_data msg_data;
|
|
|
|
|
2008-05-30 08:11:16 +04:00
|
|
|
assert(c);
|
2006-02-06 03:10:09 +03:00
|
|
|
assert(c->status == CONTENT_STATUS_TYPE_UNKNOWN);
|
|
|
|
|
|
|
|
/* Look for cached content */
|
|
|
|
fb = content_get_ready(c->url);
|
|
|
|
|
|
|
|
if (fb) {
|
|
|
|
/* Found it */
|
|
|
|
intptr_t p1, p2;
|
|
|
|
void (*callback)(content_msg msg,
|
|
|
|
struct content *c, intptr_t p1,
|
|
|
|
intptr_t p2,
|
|
|
|
union content_msg_data data);
|
|
|
|
|
|
|
|
/* Now notify all users that we're changing content */
|
|
|
|
while (c->user_list->next) {
|
|
|
|
p1 = c->user_list->next->p1;
|
|
|
|
p2 = c->user_list->next->p2;
|
|
|
|
callback = c->user_list->next->callback;
|
|
|
|
|
|
|
|
if (!content_add_user(fb, callback, p1, p2)) {
|
|
|
|
c->type = CONTENT_UNKNOWN;
|
|
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
|
|
msg_data.error = messages_get("NoMemory");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR,
|
|
|
|
msg_data);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
content_remove_user(c, callback, p1, p2);
|
2008-02-03 15:04:48 +03:00
|
|
|
|
|
|
|
msg_data.new_url = NULL;
|
2006-02-06 03:10:09 +03:00
|
|
|
callback(CONTENT_MSG_NEWPTR, fb, p1, p2, msg_data);
|
|
|
|
|
|
|
|
/* and catch user up with fallback's state */
|
|
|
|
if (fb->status == CONTENT_STATUS_LOADING) {
|
|
|
|
callback(CONTENT_MSG_LOADING,
|
|
|
|
fb, p1, p2, msg_data);
|
|
|
|
} else if (fb->status == CONTENT_STATUS_READY) {
|
|
|
|
callback(CONTENT_MSG_LOADING,
|
|
|
|
fb, p1, p2, msg_data);
|
|
|
|
if (content_find_user(fb, callback, p1, p2))
|
|
|
|
callback(CONTENT_MSG_READY,
|
|
|
|
fb, p1, p2, msg_data);
|
|
|
|
} else if (fb->status == CONTENT_STATUS_DONE) {
|
|
|
|
callback(CONTENT_MSG_LOADING,
|
|
|
|
fb, p1, p2, msg_data);
|
|
|
|
if (content_find_user(fb, callback, p1, p2))
|
|
|
|
callback(CONTENT_MSG_READY,
|
|
|
|
fb, p1, p2, msg_data);
|
|
|
|
if (content_find_user(fb, callback, p1, p2))
|
|
|
|
callback(CONTENT_MSG_DONE,
|
|
|
|
fb, p1, p2, msg_data);
|
|
|
|
} else if (fb->status == CONTENT_STATUS_ERROR) {
|
|
|
|
/* shouldn't usually occur */
|
|
|
|
msg_data.error = messages_get("MiscError");
|
|
|
|
callback(CONTENT_MSG_ERROR, fb, p1, p2,
|
|
|
|
msg_data);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* mark content invalid */
|
|
|
|
c->fetch = 0;
|
|
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
|
|
|
2008-06-07 03:51:51 +04:00
|
|
|
/* clone our cache control data into the fallback */
|
|
|
|
fetchcache_cache_clone(fb, &c->cache_data);
|
|
|
|
/* and update the fallback's cache state */
|
|
|
|
fetchcache_cache_update(fb);
|
2006-02-06 03:10:09 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* No cached content, so unconditionally refetch */
|
|
|
|
struct content_user *u;
|
2007-01-27 23:58:20 +03:00
|
|
|
const char *ref = fetch_get_referer(c->fetch);
|
2008-01-30 22:56:41 +03:00
|
|
|
const char *parent = fetch_get_parent_url(c->fetch);
|
2007-01-27 23:58:20 +03:00
|
|
|
char *referer = NULL;
|
2008-01-30 22:56:41 +03:00
|
|
|
char *parent_url = NULL;
|
2007-01-27 23:58:20 +03:00
|
|
|
|
|
|
|
if (ref) {
|
|
|
|
referer = strdup(ref);
|
|
|
|
if (!referer) {
|
|
|
|
c->type = CONTENT_UNKNOWN;
|
|
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
|
|
msg_data.error = messages_get("NoMemory");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR,
|
|
|
|
msg_data);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
2006-02-06 03:10:09 +03:00
|
|
|
|
2008-01-30 22:56:41 +03:00
|
|
|
if (parent) {
|
|
|
|
parent_url = strdup(parent);
|
|
|
|
if (!parent_url) {
|
|
|
|
c->type = CONTENT_UNKNOWN;
|
|
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
|
|
msg_data.error = messages_get("NoMemory");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR,
|
|
|
|
msg_data);
|
|
|
|
free(referer);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-02-06 03:10:09 +03:00
|
|
|
fetch_abort(c->fetch);
|
|
|
|
c->fetch = 0;
|
|
|
|
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.date = 0;
|
|
|
|
talloc_free(c->cache_data.etag);
|
|
|
|
c->cache_data.etag = 0;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
for (u = c->user_list->next; u; u = u->next) {
|
2007-01-27 23:58:20 +03:00
|
|
|
fetchcache_go(c, referer, u->callback, u->p1, u->p2,
|
|
|
|
c->width, c->height, 0, 0,
|
2008-01-30 22:56:41 +03:00
|
|
|
false, parent_url);
|
2006-02-06 03:10:09 +03:00
|
|
|
}
|
2007-01-27 23:58:20 +03:00
|
|
|
|
2008-01-30 22:56:41 +03:00
|
|
|
free(parent_url);
|
2007-01-27 23:58:20 +03:00
|
|
|
free(referer);
|
2006-02-06 03:10:09 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
/**
|
|
|
|
* Redirect callback handler
|
|
|
|
*/
|
|
|
|
|
|
|
|
void fetchcache_redirect(struct content *c, const void *data,
|
|
|
|
unsigned long size)
|
|
|
|
{
|
2008-01-30 04:44:57 +03:00
|
|
|
char *url, *url1;
|
2008-01-30 22:56:41 +03:00
|
|
|
char *referer, *parent_url;
|
2008-02-03 15:04:48 +03:00
|
|
|
long http_code;
|
|
|
|
const char *ref;
|
|
|
|
const char *parent;
|
2008-04-19 15:07:42 +04:00
|
|
|
bool can_fetch;
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
union content_msg_data msg_data;
|
|
|
|
url_func_result result;
|
|
|
|
|
|
|
|
/* Preconditions */
|
|
|
|
assert(c && data);
|
|
|
|
assert(c->status == CONTENT_STATUS_TYPE_UNKNOWN);
|
|
|
|
|
2008-01-30 22:56:41 +03:00
|
|
|
/* Extract fetch details */
|
|
|
|
http_code = fetch_http_code(c->fetch);
|
|
|
|
ref = fetch_get_referer(c->fetch);
|
|
|
|
parent = fetch_get_parent_url(c->fetch);
|
|
|
|
|
2008-02-03 15:04:48 +03:00
|
|
|
/* Ensure a redirect happened */
|
|
|
|
assert(300 <= http_code && http_code <= 399);
|
|
|
|
/* 304 is handled by fetch_notmodified() */
|
|
|
|
assert(http_code != 304);
|
|
|
|
|
2008-01-30 22:56:41 +03:00
|
|
|
/* Clone referer and parent url
|
|
|
|
* originals are destroyed in fetch_abort() */
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
referer = ref ? strdup(ref) : NULL;
|
2008-01-30 22:56:41 +03:00
|
|
|
parent_url = parent ? strdup(parent) : NULL;
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
|
|
|
|
/* set the status to ERROR so that this content is
|
|
|
|
* destroyed in content_clean() */
|
|
|
|
fetch_abort(c->fetch);
|
|
|
|
c->fetch = 0;
|
|
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
|
|
|
|
|
|
/* Ensure that referer cloning succeeded
|
|
|
|
* _must_ be after content invalidation */
|
|
|
|
if (ref && !referer) {
|
|
|
|
LOG(("Failed cloning referer"));
|
|
|
|
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2008-01-30 22:56:41 +03:00
|
|
|
/* Ensure parent url cloning succeeded
|
|
|
|
* _must_ be after content invalidation */
|
|
|
|
if (parent && !parent_url) {
|
|
|
|
LOG(("Failed cloning parent url"));
|
|
|
|
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
|
|
|
|
free(referer);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
/** \todo 300, 305, 307
|
|
|
|
* More specifically:
|
|
|
|
* + 300 needs to serve up the fetch body to the user
|
|
|
|
* + 305 needs to refetch using the proxy specified in ::data
|
|
|
|
* + 307 needs to refetch.
|
|
|
|
*
|
|
|
|
* If the original request method was either GET or HEAD, then follow
|
|
|
|
* redirect unconditionally. If the original request method was neither
|
|
|
|
* GET nor HEAD, then the user MUST be asked what to do.
|
|
|
|
*
|
|
|
|
* Note:
|
|
|
|
* For backwards compatibility, all 301, 302 and 303 redirects are
|
|
|
|
* followed unconditionally with a GET request to the new location.
|
|
|
|
*/
|
|
|
|
if (http_code != 301 && http_code != 302 && http_code != 303) {
|
|
|
|
LOG(("Unsupported redirect type %ld", http_code));
|
|
|
|
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
|
2008-01-30 22:56:41 +03:00
|
|
|
free(parent_url);
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
free(referer);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Forcibly stop redirecting if we've followed too many redirects */
|
|
|
|
#define REDIRECT_LIMIT 10
|
|
|
|
if (c->redirect_count > REDIRECT_LIMIT) {
|
|
|
|
LOG(("Too many nested redirects"));
|
|
|
|
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
|
2008-01-30 22:56:41 +03:00
|
|
|
free(parent_url);
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
free(referer);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#undef REDIRECT_LIMIT
|
|
|
|
|
|
|
|
/* redirect URLs must be absolute by HTTP/1.1, but many
|
|
|
|
* sites send relative ones: treat them as relative to
|
|
|
|
* requested URL */
|
2008-01-30 04:44:57 +03:00
|
|
|
result = url_join(data, c->url, &url1);
|
|
|
|
if (result != URL_FUNC_OK) {
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
|
2008-01-30 22:56:41 +03:00
|
|
|
free(parent_url);
|
2008-01-30 04:44:57 +03:00
|
|
|
free(referer);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Normalize redirect target -- this is vital as this URL may
|
|
|
|
* be inserted into the urldb, which expects normalized URLs */
|
|
|
|
result = url_normalize(url1, &url);
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
if (result != URL_FUNC_OK) {
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
|
2008-01-30 04:44:57 +03:00
|
|
|
free(url1);
|
2008-01-30 22:56:41 +03:00
|
|
|
free(parent_url);
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
free(referer);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2008-01-30 04:44:57 +03:00
|
|
|
/* No longer need url1 */
|
|
|
|
free(url1);
|
|
|
|
|
2008-04-19 15:07:42 +04:00
|
|
|
/* Determine if we've got a fetch handler for this url */
|
|
|
|
can_fetch = fetch_can_fetch(url);
|
|
|
|
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
/* Process users of this content */
|
|
|
|
while (c->user_list->next) {
|
|
|
|
intptr_t p1, p2;
|
|
|
|
void (*callback)(content_msg msg,
|
|
|
|
struct content *c, intptr_t p1,
|
|
|
|
intptr_t p2,
|
|
|
|
union content_msg_data data);
|
|
|
|
struct content *replacement;
|
|
|
|
|
|
|
|
p1 = c->user_list->next->p1;
|
|
|
|
p2 = c->user_list->next->p2;
|
|
|
|
callback = c->user_list->next->callback;
|
|
|
|
|
2008-04-19 15:07:42 +04:00
|
|
|
/* If we can't fetch this url, attempt to launch it */
|
|
|
|
if (!can_fetch) {
|
|
|
|
msg_data.launch_url = url;
|
|
|
|
callback(CONTENT_MSG_LAUNCH, c, p1, p2, msg_data);
|
|
|
|
}
|
|
|
|
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
/* Remove user */
|
|
|
|
content_remove_user(c, callback, p1, p2);
|
|
|
|
|
2008-04-19 15:07:42 +04:00
|
|
|
if (can_fetch) {
|
|
|
|
/* Get replacement content -- HTTP GET request */
|
|
|
|
replacement = fetchcache(url, callback, p1, p2,
|
|
|
|
c->width, c->height, c->no_error_pages,
|
|
|
|
NULL, NULL, false, c->download);
|
|
|
|
if (!replacement) {
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR,
|
|
|
|
msg_data);
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
|
2008-04-19 15:07:42 +04:00
|
|
|
free(url);
|
|
|
|
free(parent_url);
|
|
|
|
free(referer);
|
|
|
|
return;
|
|
|
|
}
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
|
2008-04-19 15:07:42 +04:00
|
|
|
/* Set replacement's redirect count to 1 greater
|
|
|
|
* than ours */
|
|
|
|
replacement->redirect_count = c->redirect_count + 1;
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
|
2008-04-19 15:07:42 +04:00
|
|
|
/* Notify user that content has changed */
|
|
|
|
msg_data.new_url = url;
|
|
|
|
callback(CONTENT_MSG_NEWPTR, replacement,
|
|
|
|
p1, p2, msg_data);
|
|
|
|
|
|
|
|
/* Start fetching the replacement content */
|
|
|
|
fetchcache_go(replacement, referer, callback, p1, p2,
|
|
|
|
c->width, c->height, NULL, NULL,
|
|
|
|
false, parent_url);
|
|
|
|
}
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Clean up */
|
|
|
|
free(url);
|
2008-01-30 22:56:41 +03:00
|
|
|
free(parent_url);
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
free(referer);
|
|
|
|
}
|
|
|
|
|
2003-02-09 15:58:15 +03:00
|
|
|
#ifdef TEST
|
|
|
|
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
void callback(fetchcache_msg msg, struct content *c, void *p, char *error)
|
|
|
|
{
|
|
|
|
switch (msg) {
|
|
|
|
case FETCHCACHE_OK:
|
|
|
|
LOG(("FETCHCACHE_OK, url '%s'", p));
|
|
|
|
break;
|
|
|
|
case FETCHCACHE_BADTYPE:
|
|
|
|
LOG(("FETCHCACHE_BADTYPE, url '%s'", p));
|
|
|
|
break;
|
|
|
|
case FETCHCACHE_ERROR:
|
|
|
|
LOG(("FETCHCACHE_ERROR, url '%s', error '%s'", p, error));
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
char *test[] = {"http://www.google.co.uk/", "http://www.ox.ac.uk/", "blah://blah/"};
|
|
|
|
|
|
|
|
int main(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
cache_init();
|
|
|
|
fetch_init();
|
|
|
|
|
|
|
|
for (i = 0; i != sizeof(test) / sizeof(test[0]); i++)
|
|
|
|
fetchcache(test[i], 0, callback, test[i], 800, 0);
|
|
|
|
for (i = 0; i != 5; i++) {
|
|
|
|
fetch_poll();
|
|
|
|
sleep(1);
|
|
|
|
}
|
|
|
|
for (i = 0; i != sizeof(test) / sizeof(test[0]); i++)
|
|
|
|
fetchcache(test[i], 0, callback, test[i], 800, 0);
|
|
|
|
for (i = 0; i != 20; i++) {
|
|
|
|
fetch_poll();
|
|
|
|
sleep(1);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|