2003-06-30 16:44:03 +04:00
|
|
|
/*
|
2005-08-21 16:04:18 +04:00
|
|
|
* Copyright 2005 James Bursa <bursa@users.sourceforge.net>
|
2009-08-05 03:02:23 +04:00
|
|
|
* Copyright 2009 John-Mark Bell <jmb@netsurf-browser.org>
|
2007-08-08 20:16:03 +04:00
|
|
|
*
|
|
|
|
* This file is part of NetSurf, http://www.netsurf-browser.org/
|
|
|
|
*
|
|
|
|
* NetSurf is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; version 2 of the License.
|
|
|
|
*
|
|
|
|
* NetSurf is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2003-02-09 15:58:15 +03:00
|
|
|
*/
|
|
|
|
|
2003-10-01 00:33:45 +04:00
|
|
|
/** \file
|
|
|
|
* High-level fetching, caching and conversion (implementation).
|
|
|
|
*
|
|
|
|
* The implementation checks the cache for the requested URL. If it is not
|
|
|
|
* present, a content is created and a fetch is initiated. As the status of the
|
|
|
|
* fetch changes and data is received, the content is updated appropriately.
|
|
|
|
*/
|
|
|
|
|
2005-01-13 23:29:24 +03:00
|
|
|
#define _GNU_SOURCE /* for strndup */
|
2003-02-09 15:58:15 +03:00
|
|
|
#include <assert.h>
|
2003-02-28 14:49:13 +03:00
|
|
|
#include <string.h>
|
2007-01-30 01:27:15 +03:00
|
|
|
#include <strings.h>
|
2003-12-27 23:15:23 +03:00
|
|
|
#include <sys/types.h>
|
|
|
|
#include <regex.h>
|
2006-02-06 03:10:09 +03:00
|
|
|
#include <time.h>
|
Merged revisions 5309-5406,5409-5422 via svnmerge from
svn://svn.netsurf-browser.org/branches/vince/netsurf-fb
........
r5309 | vince | 2008-09-13 10:59:10 +0100 (Sat, 13 Sep 2008) | 2 lines
first stab at framebuffer frontend
........
r5313 | vince | 2008-09-14 15:08:52 +0100 (Sun, 14 Sep 2008) | 2 lines
add line plotters
........
r5314 | vince | 2008-09-14 15:28:12 +0100 (Sun, 14 Sep 2008) | 2 lines
add rectangle plot to 16bpp plotters
........
r5315 | vince | 2008-09-14 19:58:57 +0100 (Sun, 14 Sep 2008) | 2 lines
improve 16bpp image plot
........
r5316 | vince | 2008-09-15 00:35:32 +0100 (Mon, 15 Sep 2008) | 2 lines
abstract the os specific framebuffer init
........
r5317 | vince | 2008-09-15 11:18:51 +0100 (Mon, 15 Sep 2008) | 2 lines
first cut of linux frontend
........
r5318 | vince | 2008-09-15 12:01:00 +0100 (Mon, 15 Sep 2008) | 2 lines
remove junk includes
........
r5319 | vince | 2008-09-15 12:09:02 +0100 (Mon, 15 Sep 2008) | 2 lines
make plotters OS agnostic again
........
r5322 | vince | 2008-09-15 15:55:01 +0100 (Mon, 15 Sep 2008) | 2 lines
Linux frontend operates
........
r5323 | vince | 2008-09-15 16:32:47 +0100 (Mon, 15 Sep 2008) | 2 lines
abstract out OS specific input
........
r5326 | vince | 2008-09-15 19:21:01 +0100 (Mon, 15 Sep 2008) | 2 lines
Improve linux mode setting
........
r5329 | vince | 2008-09-15 21:13:33 +0100 (Mon, 15 Sep 2008) | 2 lines
improve text clipping
........
r5339 | vince | 2008-09-16 00:07:57 +0100 (Tue, 16 Sep 2008) | 2 lines
possibly fix text clipping issue
........
r5342 | vince | 2008-09-16 00:39:36 +0100 (Tue, 16 Sep 2008) | 2 lines
consolidate polygon plotters
........
r5344 | dsilvers | 2008-09-16 10:21:06 +0100 (Tue, 16 Sep 2008) | 1 line
Fix up the framebuffer target makefile a bit more, add some config options for it
........
r5345 | dsilvers | 2008-09-16 10:22:19 +0100 (Tue, 16 Sep 2008) | 1 line
Ensure the appropriate frontend is selected when building framebuffer
........
r5346 | dsilvers | 2008-09-16 10:27:16 +0100 (Tue, 16 Sep 2008) | 1 line
Update build system to support targetting separate framebuffer frontends in different build trees, update executable to be nsfb-blah
........
r5350 | vince | 2008-09-16 17:15:04 +0100 (Tue, 16 Sep 2008) | 1 line
Add -g to provide symbols for framebuffer link
........
r5351 | vince | 2008-09-16 17:17:09 +0100 (Tue, 16 Sep 2008) | 1 line
framebuffer scheduler now works, plotters tweaked, gui tracks window redraw requirements better, keypresses not duplicated under linux fb
........
r5352 | dsilvers | 2008-09-16 17:38:53 +0100 (Tue, 16 Sep 2008) | 1 line
Ensure we only allow one fetcher at a time
........
r5361 | vince | 2008-09-17 11:48:44 +0100 (Wed, 17 Sep 2008) | 2 lines
initial cursor support
........
r5362 | vince | 2008-09-17 13:56:47 +0100 (Wed, 17 Sep 2008) | 2 lines
add mouse handling
........
r5363 | vince | 2008-09-17 14:14:44 +0100 (Wed, 17 Sep 2008) | 2 lines
add framebuffer resources
........
r5364 | vince | 2008-09-17 17:12:21 +0100 (Wed, 17 Sep 2008) | 2 lines
add reasonable pointer
........
r5366 | vince | 2008-09-17 17:17:25 +0100 (Wed, 17 Sep 2008) | 2 lines
fix pointer alpha
........
r5370 | vince | 2008-09-18 13:43:53 +0100 (Thu, 18 Sep 2008) | 2 lines
warning squash and cleanup ready for trunk merge
........
r5375 | vince | 2008-09-19 14:58:43 +0100 (Fri, 19 Sep 2008) | 2 lines
Working mouse navigation
........
r5377 | vince | 2008-09-20 14:06:22 +0100 (Sat, 20 Sep 2008) | 2 lines
Improve scrolling
........
r5378 | vince | 2008-09-20 14:46:46 +0100 (Sat, 20 Sep 2008) | 2 lines
fix redraw issues with scrolling
........
r5380 | vince | 2008-09-20 17:08:43 +0100 (Sat, 20 Sep 2008) | 3 lines
Alter panning to use its own flag so it doesnt cause invalid redraw
operations
........
r5381 | vince | 2008-09-20 21:52:45 +0100 (Sat, 20 Sep 2008) | 2 lines
add dummy framebuffer
........
r5383 | vince | 2008-09-21 00:00:15 +0100 (Sun, 21 Sep 2008) | 2 lines
fix segfault when cursor is off teh bottom of teh screen
........
r5384 | vince | 2008-09-21 00:06:08 +0100 (Sun, 21 Sep 2008) | 2 lines
fix off by one in pointer fix
........
r5385 | vince | 2008-09-21 00:25:09 +0100 (Sun, 21 Sep 2008) | 2 lines
when fixing bloody silly off by one errors remember to fix *both* references
........
r5387 | vince | 2008-09-21 00:38:13 +0100 (Sun, 21 Sep 2008) | 2 lines
last try at stopping the pointer segfault
........
r5388 | vince | 2008-09-21 16:24:18 +0100 (Sun, 21 Sep 2008) | 2 lines
improve vertical text clipping
........
r5392 | vince | 2008-09-21 23:11:51 +0100 (Sun, 21 Sep 2008) | 2 lines
Improve text plotters
........
r5393 | vince | 2008-09-21 23:34:38 +0100 (Sun, 21 Sep 2008) | 2 lines
fix 32bpp line plotting
........
r5394 | vince | 2008-09-22 00:00:03 +0100 (Mon, 22 Sep 2008) | 2 lines
Fix off by one error in line plotting clipping
........
r5397 | vince | 2008-09-22 13:46:22 +0100 (Mon, 22 Sep 2008) | 2 lines
Fix bitmap tileing
........
r5398 | vince | 2008-09-22 17:46:02 +0100 (Mon, 22 Sep 2008) | 2 lines
enable knockout renderer
........
r5399 | vince | 2008-09-22 18:43:48 +0100 (Mon, 22 Sep 2008) | 2 lines
ensure clipping region lies within window, caused by knockout renderer
........
r5400 | vince | 2008-09-22 19:20:25 +0100 (Mon, 22 Sep 2008) | 2 lines
update cursor to one swiped from X windows
........
r5405 | vince | 2008-09-23 09:09:05 +0100 (Tue, 23 Sep 2008) | 2 lines
fix vertical scroll limit
........
r5412 | dsilvers | 2008-09-23 10:53:14 +0100 (Tue, 23 Sep 2008) | 1 line
Revert noisy fetcher patch
........
r5413 | dsilvers | 2008-09-23 10:58:00 +0100 (Tue, 23 Sep 2008) | 1 line
Add header guards
........
r5414 | dsilvers | 2008-09-23 11:31:31 +0100 (Tue, 23 Sep 2008) | 1 line
Tidy the region clipping slightly
........
r5416 | dsilvers | 2008-09-23 12:05:00 +0100 (Tue, 23 Sep 2008) | 1 line
Rationalise how the framebuffer frontend finds resources and options
........
r5418 | dsilvers | 2008-09-23 13:59:00 +0100 (Tue, 23 Sep 2008) | 1 line
Ensure options are overridden after load, and squash an fb_gui.c warning
........
r5419 | dsilvers | 2008-09-23 14:20:07 +0100 (Tue, 23 Sep 2008) | 1 line
Support fb_mode and fb_device options
........
r5420 | dsilvers | 2008-09-23 14:21:48 +0100 (Tue, 23 Sep 2008) | 1 line
Support option_fb_device in the able frontend
........
r5421 | dsilvers | 2008-09-23 14:25:17 +0100 (Tue, 23 Sep 2008) | 1 line
Ensure target_blank is disabled
........
r5422 | dsilvers | 2008-09-23 14:39:00 +0100 (Tue, 23 Sep 2008) | 1 line
Rework linux fb frontend to support scanning and opening all event devices for input
........
svn path=/trunk/netsurf/; revision=5423
2008-09-23 18:00:40 +04:00
|
|
|
#include <unistd.h>
|
2008-05-30 08:11:16 +04:00
|
|
|
#include <curl/curl.h> /* for curl_getdate() */
|
2007-05-31 02:39:54 +04:00
|
|
|
#include "utils/config.h"
|
|
|
|
#include "content/content.h"
|
|
|
|
#include "content/fetchcache.h"
|
|
|
|
#include "content/fetch.h"
|
2009-08-05 03:02:23 +04:00
|
|
|
#include "content/urldb.h"
|
2007-05-31 02:39:54 +04:00
|
|
|
#include "utils/log.h"
|
|
|
|
#include "utils/messages.h"
|
|
|
|
#include "utils/talloc.h"
|
|
|
|
#include "utils/url.h"
|
|
|
|
#include "utils/utils.h"
|
2003-02-09 15:58:15 +03:00
|
|
|
|
|
|
|
|
2004-01-23 23:46:29 +03:00
|
|
|
static char error_page[1000];
|
2003-12-27 23:15:23 +03:00
|
|
|
static regex_t re_content_type;
|
2006-02-23 18:06:54 +03:00
|
|
|
static void fetchcache_callback(fetch_msg msg, void *p, const void *data,
|
2004-06-22 21:37:51 +04:00
|
|
|
unsigned long size);
|
|
|
|
static char *fetchcache_parse_type(const char *s, char **params[]);
|
2008-05-30 08:11:16 +04:00
|
|
|
static void fetchcache_parse_header(struct content *c, const char *data,
|
|
|
|
size_t size);
|
2004-01-23 23:46:29 +03:00
|
|
|
static void fetchcache_error_page(struct content *c, const char *error);
|
2008-06-07 03:51:51 +04:00
|
|
|
static void fetchcache_cache_update(struct content *c);
|
|
|
|
static void fetchcache_cache_clone(struct content *c,
|
2006-02-06 03:10:09 +03:00
|
|
|
const struct cache_data *data);
|
2006-02-23 18:06:54 +03:00
|
|
|
static void fetchcache_notmodified(struct content *c, const void *data);
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
static void fetchcache_redirect(struct content *c, const void *data,
|
|
|
|
unsigned long size);
|
2009-08-05 03:02:23 +04:00
|
|
|
static void fetchcache_auth(struct content *c, const char *realm);
|
2003-02-09 15:58:15 +03:00
|
|
|
|
|
|
|
|
2003-10-01 00:33:45 +04:00
|
|
|
/**
|
2004-06-11 03:55:23 +04:00
|
|
|
* Retrieve a URL or prepare to fetch, convert, and cache it.
|
2003-10-01 00:33:45 +04:00
|
|
|
*
|
|
|
|
* The caller must supply a callback function which is called when anything
|
|
|
|
* interesting happens to the content which is returned. See content.h.
|
|
|
|
*
|
2004-06-11 03:55:23 +04:00
|
|
|
* \param url address to fetch
|
|
|
|
* \param callback function to call when anything interesting happens to
|
|
|
|
* the new content
|
2005-08-21 16:04:18 +04:00
|
|
|
* \param p1 user parameter for callback (may be a pointer or integer)
|
|
|
|
* \param p2 user parameter for callback (may be a pointer or integer)
|
2004-06-11 03:55:23 +04:00
|
|
|
* \param width available space
|
|
|
|
* \param height available space
|
|
|
|
* \param no_error_pages if an error occurs, send CONTENT_MSG_ERROR instead
|
|
|
|
* of generating an error page
|
|
|
|
* \param post_urlenc url encoded post data, or 0 if none
|
|
|
|
* \param post_multipart multipart post data, or 0 if none
|
2007-01-27 23:58:20 +03:00
|
|
|
* \param verifiable this transaction is verifiable
|
2005-01-03 05:09:20 +03:00
|
|
|
* \param download download, rather than render content
|
2004-06-11 03:55:23 +04:00
|
|
|
* \return a new content, or 0 on memory exhaustion
|
2004-01-26 17:16:23 +03:00
|
|
|
*
|
2004-06-11 03:55:23 +04:00
|
|
|
* On success, call fetchcache_go() to start work on the new content.
|
2003-10-01 00:33:45 +04:00
|
|
|
*/
|
|
|
|
|
2004-06-11 03:55:23 +04:00
|
|
|
struct content * fetchcache(const char *url,
|
2005-08-21 16:04:18 +04:00
|
|
|
void (*callback)(content_msg msg, struct content *c,
|
|
|
|
intptr_t p1, intptr_t p2, union content_msg_data data),
|
|
|
|
intptr_t p1, intptr_t p2,
|
2004-06-11 03:55:23 +04:00
|
|
|
int width, int height,
|
|
|
|
bool no_error_pages,
|
|
|
|
char *post_urlenc,
|
|
|
|
struct form_successful_control *post_multipart,
|
2007-01-27 23:58:20 +03:00
|
|
|
bool verifiable,
|
2005-01-03 05:09:20 +03:00
|
|
|
bool download)
|
2003-02-09 15:58:15 +03:00
|
|
|
{
|
|
|
|
struct content *c;
|
2004-06-11 00:41:26 +04:00
|
|
|
char *url1;
|
2006-02-06 03:10:09 +03:00
|
|
|
char *hash, *query;
|
|
|
|
char *etag = 0;
|
|
|
|
time_t date = 0;
|
2005-04-01 06:25:11 +04:00
|
|
|
|
2006-04-12 12:09:27 +04:00
|
|
|
if (strncasecmp(url, "file:///", 8) &&
|
|
|
|
strncasecmp(url, "file:/", 6) == 0) {
|
|
|
|
/* Manipulate file URLs into correct format */
|
2009-03-02 23:32:05 +03:00
|
|
|
int len = strlen(url) + 1;
|
|
|
|
|
|
|
|
if (strncasecmp(url, "file://", SLEN("file://")) == 0) {
|
2006-07-02 02:27:10 +04:00
|
|
|
/* file://path */
|
2009-03-02 23:32:05 +03:00
|
|
|
url1 = malloc(len + 1 /* + '/' */);
|
2006-04-12 12:09:27 +04:00
|
|
|
if (!url1)
|
|
|
|
return NULL;
|
|
|
|
|
2009-03-02 23:32:05 +03:00
|
|
|
memcpy(url1, "file:///", SLEN("file:///"));
|
|
|
|
memcpy(url1 + SLEN("file:///"),
|
|
|
|
url + SLEN("file://"),
|
|
|
|
len - SLEN("file://"));
|
2006-04-12 12:09:27 +04:00
|
|
|
} else {
|
|
|
|
/* file:/... */
|
2009-03-02 23:32:05 +03:00
|
|
|
url1 = malloc(len + 2 /* + "//" */);
|
2006-04-12 12:09:27 +04:00
|
|
|
if (!url1)
|
|
|
|
return NULL;
|
|
|
|
|
2009-03-02 23:32:05 +03:00
|
|
|
memcpy(url1, "file:///", SLEN("file:///"));
|
|
|
|
memcpy(url1 + SLEN("file:///"),
|
|
|
|
url + SLEN("file:/"),
|
|
|
|
len - SLEN("file:/"));
|
2006-04-12 12:09:27 +04:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* simply duplicate the URL */
|
|
|
|
if ((url1 = strdup(url)) == NULL)
|
|
|
|
return NULL;
|
|
|
|
}
|
2004-06-11 00:41:26 +04:00
|
|
|
|
2003-07-01 02:21:33 +04:00
|
|
|
/* strip fragment identifier */
|
2004-08-14 18:30:12 +04:00
|
|
|
if ((hash = strchr(url1, '#')) != NULL)
|
2005-01-13 23:29:24 +03:00
|
|
|
*hash = 0;
|
2003-06-17 23:24:21 +04:00
|
|
|
|
2006-02-06 03:10:09 +03:00
|
|
|
/* look for query; we don't cache URLs with a query segment */
|
|
|
|
query = strchr(url1, '?');
|
|
|
|
|
2004-01-26 17:16:23 +03:00
|
|
|
LOG(("url %s", url1));
|
2003-02-09 15:58:15 +03:00
|
|
|
|
2006-02-06 03:10:09 +03:00
|
|
|
if (!post_urlenc && !post_multipart && !download && !query) {
|
2004-08-14 18:30:12 +04:00
|
|
|
if ((c = content_get(url1)) != NULL) {
|
2008-06-03 05:10:46 +04:00
|
|
|
struct cache_data *cd = &c->cache_data;
|
2006-02-06 03:10:09 +03:00
|
|
|
int current_age, freshness_lifetime;
|
|
|
|
|
|
|
|
/* Calculate staleness of cached content as per
|
|
|
|
* RFC 2616 13.2.3/13.2.4 */
|
|
|
|
current_age = max(0, (cd->res_time - cd->date));
|
|
|
|
current_age = max(current_age,
|
|
|
|
(cd->age == INVALID_AGE) ? 0
|
|
|
|
: cd->age);
|
|
|
|
current_age += cd->res_time - cd->req_time +
|
|
|
|
time(0) - cd->res_time;
|
|
|
|
freshness_lifetime =
|
|
|
|
(cd->max_age != INVALID_AGE) ? cd->max_age :
|
|
|
|
(cd->expires != 0) ? cd->expires - cd->date :
|
2006-02-08 03:35:05 +03:00
|
|
|
(cd->last_modified != 0) ?
|
|
|
|
(time(0) - cd->last_modified) / 10 :
|
|
|
|
0;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
if (freshness_lifetime > current_age ||
|
|
|
|
cd->date == 0) {
|
|
|
|
/* Ok, either a fresh content or we're
|
|
|
|
* currently fetching the selected content
|
|
|
|
* (therefore it must be fresh) */
|
|
|
|
free(url1);
|
|
|
|
if (!content_add_user(c, callback, p1, p2))
|
|
|
|
return NULL;
|
|
|
|
else
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Ok. We have a cache entry, but it appears stale.
|
|
|
|
* Therefore, validate it. */
|
2006-02-08 03:35:05 +03:00
|
|
|
if (cd->last_modified)
|
|
|
|
date = cd->last_modified;
|
|
|
|
else
|
2008-06-03 05:10:46 +04:00
|
|
|
date = c->cache_data.date;
|
|
|
|
etag = c->cache_data.etag;
|
2003-10-25 20:22:11 +04:00
|
|
|
}
|
2003-04-18 01:35:02 +04:00
|
|
|
}
|
2003-02-09 15:58:15 +03:00
|
|
|
|
2004-01-26 17:16:23 +03:00
|
|
|
c = content_create(url1);
|
2004-07-30 20:14:44 +04:00
|
|
|
free(url1);
|
|
|
|
if (!c)
|
2004-08-14 18:30:12 +04:00
|
|
|
return NULL;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
/* Fill in cache validation fields (if present) */
|
|
|
|
if (date)
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.date = date;
|
2006-02-06 03:10:09 +03:00
|
|
|
if (etag) {
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.etag = talloc_strdup(c, etag);
|
|
|
|
if (!c->cache_data.etag)
|
2006-02-06 03:10:09 +03:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2005-01-02 06:58:21 +03:00
|
|
|
if (!content_add_user(c, callback, p1, p2)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2004-01-20 22:08:34 +03:00
|
|
|
|
2006-02-06 03:10:09 +03:00
|
|
|
if (!post_urlenc && !post_multipart && !download && !query)
|
2004-06-21 19:09:59 +04:00
|
|
|
c->fresh = true;
|
2004-01-23 23:46:29 +03:00
|
|
|
|
2003-06-17 23:24:21 +04:00
|
|
|
c->width = width;
|
|
|
|
c->height = height;
|
2004-01-26 17:16:23 +03:00
|
|
|
c->no_error_pages = no_error_pages;
|
2005-01-03 05:09:20 +03:00
|
|
|
c->download = download;
|
2004-06-11 03:55:23 +04:00
|
|
|
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Start fetching and converting a content.
|
|
|
|
*
|
2004-06-21 19:09:59 +04:00
|
|
|
* \param content content to fetch, as returned by fetchcache()
|
2004-06-11 03:55:23 +04:00
|
|
|
* \param referer referring URL, or 0
|
|
|
|
* \param callback function to call when anything interesting happens to
|
|
|
|
* the new content
|
2005-08-21 16:04:18 +04:00
|
|
|
* \param p1 user parameter for callback
|
|
|
|
* \param p2 user parameter for callback
|
2005-01-25 02:02:37 +03:00
|
|
|
* \param width available space
|
|
|
|
* \param height available space
|
2004-06-11 03:55:23 +04:00
|
|
|
* \param post_urlenc url encoded post data, or 0 if none
|
|
|
|
* \param post_multipart multipart post data, or 0 if none
|
2007-01-27 23:58:20 +03:00
|
|
|
* \param verifiable this transaction is verifiable
|
2009-07-10 04:26:37 +04:00
|
|
|
* \param parent Content which spawned this one, or NULL if none
|
2004-06-11 03:55:23 +04:00
|
|
|
*
|
|
|
|
* Errors will be sent back through the callback.
|
|
|
|
*/
|
|
|
|
|
2007-01-27 23:58:20 +03:00
|
|
|
void fetchcache_go(struct content *content, const char *referer,
|
2005-08-21 16:04:18 +04:00
|
|
|
void (*callback)(content_msg msg, struct content *c,
|
|
|
|
intptr_t p1, intptr_t p2, union content_msg_data data),
|
|
|
|
intptr_t p1, intptr_t p2,
|
2005-01-25 02:02:37 +03:00
|
|
|
int width, int height,
|
2004-06-11 03:55:23 +04:00
|
|
|
char *post_urlenc,
|
|
|
|
struct form_successful_control *post_multipart,
|
2009-07-10 04:26:37 +04:00
|
|
|
bool verifiable, struct content *parent)
|
2004-06-11 03:55:23 +04:00
|
|
|
{
|
|
|
|
char error_message[500];
|
|
|
|
union content_msg_data msg_data;
|
|
|
|
|
|
|
|
LOG(("url %s, status %s", content->url,
|
|
|
|
content_status_name[content->status]));
|
|
|
|
|
2006-08-06 21:51:23 +04:00
|
|
|
/* We may well have been asked to fetch an URL using a protocol
|
|
|
|
* that we can't support. Check for this here and, if we can't
|
|
|
|
* perform the fetch, notify the caller and exit */
|
|
|
|
if (!fetch_can_fetch(content->url)) {
|
|
|
|
|
|
|
|
/* The only case where this should fail is if we're a
|
|
|
|
* brand new content with no active fetch. If we're not,
|
|
|
|
* another content with the same URL somehow got through
|
|
|
|
* the fetch_can_fetch check. That should be impossible.
|
|
|
|
*/
|
|
|
|
assert(content->status == CONTENT_STATUS_TYPE_UNKNOWN &&
|
|
|
|
!content->fetch);
|
|
|
|
|
|
|
|
snprintf(error_message, sizeof error_message,
|
|
|
|
messages_get("InvalidURL"),
|
|
|
|
content->url);
|
|
|
|
|
|
|
|
if (content->no_error_pages) {
|
|
|
|
/* Mark as in error so content is destroyed
|
|
|
|
* on cache clean */
|
|
|
|
content->status = CONTENT_STATUS_ERROR;
|
|
|
|
msg_data.error = error_message;
|
|
|
|
callback(CONTENT_MSG_ERROR,
|
|
|
|
content, p1, p2, msg_data);
|
|
|
|
} else {
|
|
|
|
fetchcache_error_page(content, error_message);
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (content->status == CONTENT_STATUS_TYPE_UNKNOWN &&
|
|
|
|
content->fetch) {
|
2004-06-11 03:55:23 +04:00
|
|
|
/* fetching, but not yet received any response:
|
|
|
|
* no action required */
|
|
|
|
|
2006-02-06 03:10:09 +03:00
|
|
|
} else if (content->status == CONTENT_STATUS_TYPE_UNKNOWN) {
|
|
|
|
/* brand new content: start fetch */
|
|
|
|
char **headers;
|
|
|
|
int i = 0;
|
2008-06-03 05:10:46 +04:00
|
|
|
char *etag = content->cache_data.etag;
|
|
|
|
time_t date = content->cache_data.date;
|
|
|
|
|
|
|
|
content->cache_data.req_time = time(NULL);
|
|
|
|
content->cache_data.res_time = 0;
|
|
|
|
content->cache_data.date = 0;
|
|
|
|
content->cache_data.expires = 0;
|
|
|
|
content->cache_data.age = INVALID_AGE;
|
|
|
|
content->cache_data.max_age = INVALID_AGE;
|
|
|
|
content->cache_data.no_cache = false;
|
|
|
|
content->cache_data.etag = 0;
|
|
|
|
content->cache_data.last_modified = 0;
|
2008-05-30 15:09:50 +04:00
|
|
|
|
2006-02-06 03:10:09 +03:00
|
|
|
headers = malloc(3 * sizeof(char *));
|
|
|
|
if (!headers) {
|
2006-08-06 21:51:23 +04:00
|
|
|
content->status = CONTENT_STATUS_ERROR;
|
2006-02-06 03:10:09 +03:00
|
|
|
msg_data.error = messages_get("NoMemory");
|
|
|
|
callback(CONTENT_MSG_ERROR, content, p1, p2,
|
|
|
|
msg_data);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (etag) {
|
2009-03-02 23:32:05 +03:00
|
|
|
int len = SLEN("If-None-Match: ") + strlen(etag) + 1;
|
|
|
|
|
|
|
|
headers[i] = malloc(len);
|
2006-02-06 03:10:09 +03:00
|
|
|
if (!headers[i]) {
|
|
|
|
free(headers);
|
2006-08-06 21:51:23 +04:00
|
|
|
content->status = CONTENT_STATUS_ERROR;
|
2006-02-06 03:10:09 +03:00
|
|
|
msg_data.error = messages_get("NoMemory");
|
|
|
|
callback(CONTENT_MSG_ERROR, content, p1, p2,
|
|
|
|
msg_data);
|
|
|
|
return;
|
|
|
|
}
|
2009-03-02 23:32:05 +03:00
|
|
|
snprintf(headers[i++], len, "If-None-Match: %s", etag);
|
2006-02-06 03:10:09 +03:00
|
|
|
talloc_free(etag);
|
|
|
|
}
|
|
|
|
if (date) {
|
2009-03-02 23:32:05 +03:00
|
|
|
/* Maximum length of an RFC 1123 date is 29 bytes */
|
|
|
|
int len = SLEN("If-Modified-Since: ") + 29 + 1;
|
|
|
|
|
|
|
|
headers[i] = malloc(len);
|
2006-02-06 03:10:09 +03:00
|
|
|
if (!headers[i]) {
|
|
|
|
while (--i >= 0) {
|
|
|
|
free(headers[i]);
|
|
|
|
}
|
|
|
|
free(headers);
|
2006-08-06 21:51:23 +04:00
|
|
|
content->status = CONTENT_STATUS_ERROR;
|
2006-02-06 03:10:09 +03:00
|
|
|
msg_data.error = messages_get("NoMemory");
|
|
|
|
callback(CONTENT_MSG_ERROR, content, p1, p2,
|
|
|
|
msg_data);
|
|
|
|
return;
|
|
|
|
}
|
2009-03-02 23:32:05 +03:00
|
|
|
snprintf(headers[i++], len, "If-Modified-Since: %s",
|
2006-02-06 03:10:09 +03:00
|
|
|
rfc1123_date(date));
|
|
|
|
}
|
|
|
|
headers[i] = 0;
|
2004-06-11 03:55:23 +04:00
|
|
|
content->fetch = fetch_start(content->url, referer,
|
|
|
|
fetchcache_callback, content,
|
|
|
|
content->no_error_pages,
|
2007-01-27 23:58:20 +03:00
|
|
|
post_urlenc, post_multipart, verifiable,
|
2009-07-10 04:26:37 +04:00
|
|
|
parent, headers);
|
2006-02-06 03:10:09 +03:00
|
|
|
for (i = 0; headers[i]; i++)
|
|
|
|
free(headers[i]);
|
|
|
|
free(headers);
|
2004-06-11 03:55:23 +04:00
|
|
|
if (!content->fetch) {
|
|
|
|
LOG(("warning: fetch_start failed"));
|
|
|
|
snprintf(error_message, sizeof error_message,
|
|
|
|
messages_get("InvalidURL"),
|
|
|
|
content->url);
|
|
|
|
if (content->no_error_pages) {
|
|
|
|
content->status = CONTENT_STATUS_ERROR;
|
|
|
|
msg_data.error = error_message;
|
|
|
|
content_broadcast(content, CONTENT_MSG_ERROR,
|
|
|
|
msg_data);
|
|
|
|
} else {
|
|
|
|
fetchcache_error_page(content, error_message);
|
|
|
|
}
|
2004-01-26 17:16:23 +03:00
|
|
|
}
|
2004-06-11 03:55:23 +04:00
|
|
|
|
|
|
|
/* in these remaining cases, we have to 'catch up' with the content's
|
|
|
|
* status, ie. send the same messages as if the content was
|
|
|
|
* gradually getting to the current status from TYPE_UNKNOWN */
|
|
|
|
} else if (content->status == CONTENT_STATUS_LOADING) {
|
|
|
|
callback(CONTENT_MSG_LOADING, content, p1, p2, msg_data);
|
|
|
|
|
|
|
|
} else if (content->status == CONTENT_STATUS_READY) {
|
|
|
|
callback(CONTENT_MSG_LOADING, content, p1, p2, msg_data);
|
2004-06-23 19:41:50 +04:00
|
|
|
if (content_find_user(content, callback, p1, p2))
|
|
|
|
callback(CONTENT_MSG_READY, content, p1, p2, msg_data);
|
2004-06-11 03:55:23 +04:00
|
|
|
|
|
|
|
} else if (content->status == CONTENT_STATUS_DONE) {
|
|
|
|
callback(CONTENT_MSG_LOADING, content, p1, p2, msg_data);
|
2005-01-25 02:02:37 +03:00
|
|
|
if (content->available_width != width)
|
|
|
|
content_reformat(content, width, height);
|
2004-06-23 19:41:50 +04:00
|
|
|
if (content_find_user(content, callback, p1, p2))
|
|
|
|
callback(CONTENT_MSG_READY, content, p1, p2, msg_data);
|
|
|
|
if (content_find_user(content, callback, p1, p2))
|
|
|
|
callback(CONTENT_MSG_DONE, content, p1, p2, msg_data);
|
2004-06-11 03:55:23 +04:00
|
|
|
|
|
|
|
} else if (content->status == CONTENT_STATUS_ERROR) {
|
|
|
|
/* shouldn't usually occur */
|
|
|
|
msg_data.error = messages_get("MiscError");
|
|
|
|
callback(CONTENT_MSG_ERROR, content, p1, p2, msg_data);
|
2003-07-16 21:38:46 +04:00
|
|
|
}
|
2003-02-09 15:58:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-10-01 00:33:45 +04:00
|
|
|
/**
|
|
|
|
* Callback function for fetch.
|
|
|
|
*
|
|
|
|
* This is called when the status of a fetch changes.
|
|
|
|
*/
|
|
|
|
|
2006-02-23 18:06:54 +03:00
|
|
|
void fetchcache_callback(fetch_msg msg, void *p, const void *data,
|
2004-06-22 21:37:51 +04:00
|
|
|
unsigned long size)
|
2003-02-09 15:58:15 +03:00
|
|
|
{
|
2004-06-11 00:41:26 +04:00
|
|
|
bool res;
|
2003-06-17 23:24:21 +04:00
|
|
|
struct content *c = p;
|
2003-02-09 15:58:15 +03:00
|
|
|
content_type type;
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
char *mime_type;
|
2003-12-27 23:15:23 +03:00
|
|
|
char **params;
|
2009-07-24 03:05:34 +04:00
|
|
|
struct content *parent;
|
2003-12-27 23:15:23 +03:00
|
|
|
unsigned int i;
|
2004-04-25 03:42:32 +04:00
|
|
|
union content_msg_data msg_data;
|
2003-04-18 01:35:02 +04:00
|
|
|
|
2003-02-09 15:58:15 +03:00
|
|
|
switch (msg) {
|
|
|
|
case FETCH_TYPE:
|
2003-08-29 00:04:35 +04:00
|
|
|
c->total_size = size;
|
2007-01-13 03:21:15 +03:00
|
|
|
c->http_code = fetch_http_code(c->fetch);
|
2003-12-27 23:15:23 +03:00
|
|
|
mime_type = fetchcache_parse_type(data, ¶ms);
|
2005-01-02 01:27:05 +03:00
|
|
|
if (!mime_type) {
|
|
|
|
msg_data.error = messages_get("NoMemory");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR,
|
|
|
|
msg_data);
|
|
|
|
fetch_abort(c->fetch);
|
|
|
|
c->fetch = 0;
|
|
|
|
return;
|
|
|
|
}
|
2003-02-26 00:00:27 +03:00
|
|
|
type = content_lookup(mime_type);
|
2009-07-24 03:05:34 +04:00
|
|
|
parent = fetch_get_parent(c->fetch);
|
2005-01-03 05:09:20 +03:00
|
|
|
res = content_set_type(c,
|
|
|
|
c->download ? CONTENT_OTHER : type,
|
2009-07-24 03:05:34 +04:00
|
|
|
mime_type, (const char **) params,
|
|
|
|
parent);
|
2003-07-08 02:10:51 +04:00
|
|
|
free(mime_type);
|
2003-12-27 23:15:23 +03:00
|
|
|
for (i = 0; params[i]; i++)
|
|
|
|
free(params[i]);
|
|
|
|
free(params);
|
2004-06-21 19:09:59 +04:00
|
|
|
if (!res) {
|
2004-06-11 00:41:26 +04:00
|
|
|
fetch_abort(c->fetch);
|
2004-06-21 19:09:59 +04:00
|
|
|
c->fetch = 0;
|
|
|
|
}
|
2008-02-27 22:13:35 +03:00
|
|
|
|
2008-06-03 05:10:46 +04:00
|
|
|
if (c->cache_data.date || c->cache_data.etag) {
|
2008-02-27 22:13:35 +03:00
|
|
|
/* We've just made a conditional request
|
|
|
|
* that returned with something other
|
|
|
|
* than 304. Therefore, there's a stale
|
|
|
|
* content floating around in the cache.
|
|
|
|
* Hunt it down and mark it as stale, so
|
|
|
|
* it'll get cleaned when unused. We
|
|
|
|
* assume it's either READY or DONE --
|
|
|
|
* anything else is of marginal staleness
|
|
|
|
* (or in error, which will cause it to
|
|
|
|
* be flushed from the cache, anyway)
|
|
|
|
*/
|
|
|
|
struct content *stale_content =
|
|
|
|
content_get_ready(c->url);
|
|
|
|
|
|
|
|
if (stale_content)
|
|
|
|
stale_content->fresh = false;
|
|
|
|
}
|
2003-02-09 15:58:15 +03:00
|
|
|
break;
|
2003-04-18 01:35:02 +04:00
|
|
|
|
2004-07-10 06:35:31 +04:00
|
|
|
case FETCH_PROGRESS:
|
|
|
|
if (size)
|
|
|
|
content_set_status(c,
|
|
|
|
messages_get("RecPercent"),
|
|
|
|
data, (unsigned int)size);
|
|
|
|
else
|
|
|
|
content_set_status(c,
|
|
|
|
messages_get("Received"),
|
|
|
|
data);
|
|
|
|
content_broadcast(c, CONTENT_MSG_STATUS, msg_data);
|
|
|
|
break;
|
|
|
|
|
2008-05-30 08:11:16 +04:00
|
|
|
case FETCH_HEADER:
|
|
|
|
fetchcache_parse_header(c, data, size);
|
|
|
|
break;
|
|
|
|
|
2003-02-09 15:58:15 +03:00
|
|
|
case FETCH_DATA:
|
2006-02-06 03:10:09 +03:00
|
|
|
if (!content_process_data(c, data, size)) {
|
2004-06-11 00:41:26 +04:00
|
|
|
fetch_abort(c->fetch);
|
2004-06-21 19:09:59 +04:00
|
|
|
c->fetch = 0;
|
|
|
|
}
|
2003-02-09 15:58:15 +03:00
|
|
|
break;
|
2003-04-18 01:35:02 +04:00
|
|
|
|
2003-02-09 15:58:15 +03:00
|
|
|
case FETCH_FINISHED:
|
2008-06-07 03:51:51 +04:00
|
|
|
fetchcache_cache_update(c);
|
2003-06-17 23:24:21 +04:00
|
|
|
c->fetch = 0;
|
2004-06-05 19:03:59 +04:00
|
|
|
content_set_status(c, messages_get("Converting"),
|
|
|
|
c->source_size);
|
2004-04-25 03:42:32 +04:00
|
|
|
content_broadcast(c, CONTENT_MSG_STATUS, msg_data);
|
2003-06-17 23:24:21 +04:00
|
|
|
content_convert(c, c->width, c->height);
|
2003-02-09 15:58:15 +03:00
|
|
|
break;
|
2003-04-18 01:35:02 +04:00
|
|
|
|
2003-02-09 15:58:15 +03:00
|
|
|
case FETCH_ERROR:
|
2006-02-23 18:06:54 +03:00
|
|
|
LOG(("FETCH_ERROR, '%s'", (const char *)data));
|
2003-06-17 23:24:21 +04:00
|
|
|
c->fetch = 0;
|
2004-01-26 17:16:23 +03:00
|
|
|
if (c->no_error_pages) {
|
2004-06-11 00:41:26 +04:00
|
|
|
c->status = CONTENT_STATUS_ERROR;
|
2004-04-25 03:42:32 +04:00
|
|
|
msg_data.error = data;
|
2004-06-11 00:41:26 +04:00
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR,
|
|
|
|
msg_data);
|
2004-01-26 17:16:23 +03:00
|
|
|
} else {
|
|
|
|
content_reset(c);
|
|
|
|
fetchcache_error_page(c, data);
|
|
|
|
}
|
2003-02-09 15:58:15 +03:00
|
|
|
break;
|
2003-04-18 01:35:02 +04:00
|
|
|
|
2003-06-26 15:41:26 +04:00
|
|
|
case FETCH_REDIRECT:
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
fetchcache_redirect(c, data, size);
|
2003-06-26 15:41:26 +04:00
|
|
|
break;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
case FETCH_NOTMODIFIED:
|
|
|
|
fetchcache_notmodified(c, data);
|
|
|
|
break;
|
|
|
|
|
2003-10-25 23:20:13 +04:00
|
|
|
case FETCH_AUTH:
|
2009-08-05 03:02:23 +04:00
|
|
|
fetchcache_auth(c, data);
|
2004-06-09 00:25:04 +04:00
|
|
|
break;
|
2006-02-23 18:06:54 +03:00
|
|
|
|
|
|
|
case FETCH_CERT_ERR:
|
|
|
|
c->fetch = 0;
|
|
|
|
/* set the status to ERROR so that the content is
|
|
|
|
* destroyed in content_clean() */
|
|
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
|
|
|
|
|
|
msg_data.ssl.certs = data;
|
|
|
|
msg_data.ssl.num = size;
|
|
|
|
content_broadcast(c, CONTENT_MSG_SSL, msg_data);
|
|
|
|
break;
|
|
|
|
|
2003-02-09 15:58:15 +03:00
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-12-27 23:15:23 +03:00
|
|
|
/**
|
|
|
|
* Initialise the fetchcache module.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void fetchcache_init(void)
|
|
|
|
{
|
|
|
|
regcomp_wrapper(&re_content_type,
|
2005-04-01 06:25:11 +04:00
|
|
|
"^([-0-9a-zA-Z_.]+/[-0-9a-zA-Z_.+]+)[ \t]*"
|
2003-12-27 23:15:23 +03:00
|
|
|
"(;[ \t]*([-0-9a-zA-Z_.]+)="
|
|
|
|
"([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$",
|
|
|
|
REG_EXTENDED);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parse a Content-Type header.
|
|
|
|
*
|
2005-01-02 01:27:05 +03:00
|
|
|
* \param s a Content-Type header
|
|
|
|
* \param params updated to point to an array of strings, ordered attribute,
|
|
|
|
* value, attribute, ..., 0
|
|
|
|
* \return a new string containing the MIME-type, or 0 on memory exhaustion
|
2003-12-27 23:15:23 +03:00
|
|
|
*/
|
|
|
|
|
|
|
|
#define MAX_ATTRS 10
|
|
|
|
|
2004-06-22 21:37:51 +04:00
|
|
|
char *fetchcache_parse_type(const char *s, char **params[])
|
2003-12-27 23:15:23 +03:00
|
|
|
{
|
2005-01-02 01:27:05 +03:00
|
|
|
char *type = 0;
|
2003-12-27 23:15:23 +03:00
|
|
|
unsigned int i;
|
|
|
|
int r;
|
|
|
|
regmatch_t pmatch[2 + MAX_ATTRS * 3];
|
2005-01-02 01:27:05 +03:00
|
|
|
|
|
|
|
*params = malloc((MAX_ATTRS * 2 + 2) * sizeof (*params)[0]);
|
|
|
|
if (!*params)
|
|
|
|
goto no_memory;
|
|
|
|
for (i = 0; i != MAX_ATTRS * 2 + 2; i++)
|
|
|
|
(*params)[i] = 0;
|
2003-12-27 23:15:23 +03:00
|
|
|
|
|
|
|
r = regexec(&re_content_type, s, 2 + MAX_ATTRS * 3, pmatch, 0);
|
|
|
|
if (r) {
|
2008-07-29 13:05:36 +04:00
|
|
|
char *semi;
|
2003-12-27 23:15:23 +03:00
|
|
|
LOG(("failed to parse content-type '%s'", s));
|
2005-05-02 02:20:40 +04:00
|
|
|
/* The mime type must be first, so only copy up to the
|
|
|
|
* first semicolon in the string. This allows us to have
|
|
|
|
* a better attempt at handling pages sent with broken
|
|
|
|
* Content-Type headers. Obviously, any truly broken
|
|
|
|
* Content-Type headers will be unaffected by this heuristic
|
|
|
|
*/
|
2008-07-29 13:05:36 +04:00
|
|
|
semi = strchr(s, ';');
|
2005-05-02 02:20:40 +04:00
|
|
|
if (semi)
|
|
|
|
type = strndup(s, semi - s);
|
|
|
|
else
|
|
|
|
type = strdup(s);
|
2005-01-02 01:27:05 +03:00
|
|
|
if (!type)
|
|
|
|
goto no_memory;
|
|
|
|
return type;
|
2003-12-27 23:15:23 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so);
|
2005-01-02 01:27:05 +03:00
|
|
|
if (!type) {
|
|
|
|
free(*params);
|
|
|
|
return 0;
|
|
|
|
}
|
2003-12-27 23:15:23 +03:00
|
|
|
|
|
|
|
/* parameters */
|
|
|
|
for (i = 0; i != MAX_ATTRS && pmatch[2 + 3 * i].rm_so != -1; i++) {
|
|
|
|
(*params)[2 * i] = strndup(s + pmatch[2 + 3 * i + 1].rm_so,
|
2005-01-02 01:27:05 +03:00
|
|
|
pmatch[2 + 3 * i + 1].rm_eo -
|
|
|
|
pmatch[2 + 3 * i + 1].rm_so);
|
2003-12-27 23:15:23 +03:00
|
|
|
(*params)[2 * i + 1] = strndup(s + pmatch[2 + 3 * i + 2].rm_so,
|
2005-01-02 01:27:05 +03:00
|
|
|
pmatch[2 + 3 * i + 2].rm_eo -
|
|
|
|
pmatch[2 + 3 * i + 2].rm_so);
|
|
|
|
if (!(*params)[2 * i] || !(*params)[2 * i + 1])
|
|
|
|
goto no_memory;
|
2003-12-27 23:15:23 +03:00
|
|
|
}
|
|
|
|
(*params)[2 * i] = 0;
|
|
|
|
|
|
|
|
return type;
|
2005-01-02 01:27:05 +03:00
|
|
|
|
|
|
|
no_memory:
|
|
|
|
for (i = 0; i != MAX_ATTRS * 2 + 2; i++)
|
|
|
|
free((*params)[i]);
|
|
|
|
free(*params);
|
|
|
|
free(type);
|
|
|
|
|
|
|
|
return 0;
|
2003-12-27 23:15:23 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-05-30 08:11:16 +04:00
|
|
|
/**
|
|
|
|
* Parse an HTTP response header.
|
|
|
|
*
|
|
|
|
* See RFC 2616 4.2.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void fetchcache_parse_header(struct content *c, const char *data,
|
|
|
|
size_t size)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
#define SKIP_ST(o) for (i = (o); i < size && (data[i] == ' ' || data[i] == '\t'); i++)
|
|
|
|
|
|
|
|
/* Set fetch response time if not already set */
|
2008-06-03 05:10:46 +04:00
|
|
|
if (c->cache_data.res_time == 0)
|
|
|
|
c->cache_data.res_time = time(NULL);
|
2008-05-30 08:11:16 +04:00
|
|
|
|
|
|
|
if (5 < size && strncasecmp(data, "Date:", 5) == 0) {
|
|
|
|
/* extract Date header */
|
|
|
|
SKIP_ST(5);
|
|
|
|
if (i < size)
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.date = curl_getdate(&data[i], NULL);
|
2008-05-30 08:11:16 +04:00
|
|
|
} else if (4 < size && strncasecmp(data, "Age:", 4) == 0) {
|
|
|
|
/* extract Age header */
|
|
|
|
SKIP_ST(4);
|
|
|
|
if (i < size && '0' <= data[i] && data[i] <= '9')
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.age = atoi(data + i);
|
2008-05-30 08:11:16 +04:00
|
|
|
} else if (8 < size && strncasecmp(data, "Expires:", 8) == 0) {
|
|
|
|
/* extract Expires header */
|
|
|
|
SKIP_ST(8);
|
|
|
|
if (i < size)
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.expires = curl_getdate(&data[i], NULL);
|
2008-05-30 08:11:16 +04:00
|
|
|
} else if (14 < size && strncasecmp(data, "Cache-Control:", 14) == 0) {
|
|
|
|
/* extract and parse Cache-Control header */
|
|
|
|
size_t comma;
|
|
|
|
SKIP_ST(14);
|
|
|
|
|
|
|
|
while (i < size) {
|
|
|
|
for (comma = i; comma < size; comma++)
|
|
|
|
if (data[comma] == ',')
|
|
|
|
break;
|
|
|
|
|
|
|
|
SKIP_ST(i);
|
|
|
|
|
|
|
|
if (8 < comma - i && (strncasecmp(data + i, "no-cache", 8) == 0 || strncasecmp(data + i, "no-store", 8) == 0))
|
|
|
|
/* When we get a disk cache we should
|
|
|
|
* distinguish between these two */
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.no_cache = true;
|
2008-05-30 08:11:16 +04:00
|
|
|
else if (7 < comma - i && strncasecmp(data + i, "max-age", 7) == 0) {
|
|
|
|
for (; i < comma; i++)
|
|
|
|
if (data[i] == '=')
|
|
|
|
break;
|
|
|
|
SKIP_ST(i+1);
|
|
|
|
if (i < comma)
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.max_age =
|
2008-05-30 08:11:16 +04:00
|
|
|
atoi(data + i);
|
|
|
|
}
|
|
|
|
|
|
|
|
i = comma + 1;
|
|
|
|
}
|
|
|
|
} else if (5 < size && strncasecmp(data, "ETag:", 5) == 0) {
|
|
|
|
/* extract ETag header */
|
2008-06-03 05:10:46 +04:00
|
|
|
talloc_free(c->cache_data.etag);
|
|
|
|
c->cache_data.etag = talloc_array(c, char, size);
|
|
|
|
if (!c->cache_data.etag) {
|
2008-05-30 08:11:16 +04:00
|
|
|
LOG(("malloc failed"));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
SKIP_ST(5);
|
2008-06-03 05:10:46 +04:00
|
|
|
strncpy(c->cache_data.etag, data + i, size - i);
|
|
|
|
c->cache_data.etag[size - i] = '\0';
|
2008-06-04 02:17:35 +04:00
|
|
|
for (i = size - i - 1; ((int) i) >= 0 &&
|
2008-06-03 05:10:46 +04:00
|
|
|
(c->cache_data.etag[i] == ' ' ||
|
|
|
|
c->cache_data.etag[i] == '\t' ||
|
|
|
|
c->cache_data.etag[i] == '\r' ||
|
|
|
|
c->cache_data.etag[i] == '\n'); --i)
|
|
|
|
c->cache_data.etag[i] = '\0';
|
2008-05-30 08:11:16 +04:00
|
|
|
} else if (14 < size && strncasecmp(data, "Last-Modified:", 14) == 0) {
|
|
|
|
/* extract Last-Modified header */
|
|
|
|
SKIP_ST(14);
|
|
|
|
if (i < size) {
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.last_modified =
|
2008-05-30 08:11:16 +04:00
|
|
|
curl_getdate(&data[i], NULL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2004-01-23 23:46:29 +03:00
|
|
|
/**
|
|
|
|
* Generate an error page.
|
|
|
|
*
|
|
|
|
* \param c empty content to generate the page in
|
|
|
|
* \param error message to display
|
|
|
|
*/
|
|
|
|
|
|
|
|
void fetchcache_error_page(struct content *c, const char *error)
|
|
|
|
{
|
|
|
|
const char *params[] = { 0 };
|
2004-06-09 00:25:04 +04:00
|
|
|
int length;
|
|
|
|
|
|
|
|
if ((length = snprintf(error_page, sizeof(error_page),
|
|
|
|
messages_get("ErrorPage"), error)) < 0)
|
|
|
|
length = 0;
|
2009-07-24 03:05:34 +04:00
|
|
|
if (!content_set_type(c, CONTENT_HTML, "text/html", params, NULL))
|
2004-06-11 03:55:23 +04:00
|
|
|
return;
|
|
|
|
if (!content_process_data(c, error_page, length))
|
|
|
|
return;
|
2004-01-23 23:46:29 +03:00
|
|
|
content_convert(c, c->width, c->height);
|
2008-02-27 21:48:35 +03:00
|
|
|
|
|
|
|
/* Mark content as non-fresh, so it'll get cleaned from the
|
|
|
|
* cache at the earliest opportunity */
|
|
|
|
c->fresh = false;
|
2004-01-23 23:46:29 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-02-06 03:10:09 +03:00
|
|
|
/**
|
2008-06-07 03:51:51 +04:00
|
|
|
* Update a content's cache state
|
2006-02-06 03:10:09 +03:00
|
|
|
*
|
2008-06-07 03:51:51 +04:00
|
|
|
* \param c The content
|
2006-02-06 03:10:09 +03:00
|
|
|
*/
|
|
|
|
|
2008-06-07 03:51:51 +04:00
|
|
|
void fetchcache_cache_update(struct content *c)
|
|
|
|
{
|
|
|
|
if (c->cache_data.date == 0)
|
|
|
|
c->cache_data.date = time(NULL);
|
|
|
|
|
|
|
|
if (c->cache_data.no_cache)
|
|
|
|
c->fresh = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Clone cache info into a content
|
|
|
|
*
|
|
|
|
* \param c The content
|
|
|
|
* \param data Cache data
|
|
|
|
*/
|
|
|
|
|
|
|
|
void fetchcache_cache_clone(struct content *c,
|
2006-02-06 03:10:09 +03:00
|
|
|
const struct cache_data *data)
|
|
|
|
{
|
|
|
|
assert(c && data);
|
|
|
|
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.req_time = data->req_time;
|
|
|
|
c->cache_data.res_time = data->res_time;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
if (data->date != 0)
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.date = data->date;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
if (data->expires != 0)
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.expires = data->expires;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
if (data->age != INVALID_AGE)
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.age = data->age;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
if (data->max_age != INVALID_AGE)
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.max_age = data->max_age;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
if (data->no_cache)
|
2008-06-07 03:51:51 +04:00
|
|
|
c->cache_data.no_cache = data->no_cache;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
if (data->etag) {
|
2008-06-03 05:10:46 +04:00
|
|
|
talloc_free(c->cache_data.etag);
|
|
|
|
c->cache_data.etag = talloc_strdup(c, data->etag);
|
2006-02-06 03:10:09 +03:00
|
|
|
}
|
2006-02-08 03:35:05 +03:00
|
|
|
|
|
|
|
if (data->last_modified)
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.last_modified = data->last_modified;
|
2006-02-06 03:10:09 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Not modified callback handler
|
|
|
|
*/
|
|
|
|
|
2006-02-23 18:06:54 +03:00
|
|
|
void fetchcache_notmodified(struct content *c, const void *data)
|
2006-02-06 03:10:09 +03:00
|
|
|
{
|
|
|
|
struct content *fb;
|
|
|
|
union content_msg_data msg_data;
|
|
|
|
|
2008-05-30 08:11:16 +04:00
|
|
|
assert(c);
|
2006-02-06 03:10:09 +03:00
|
|
|
assert(c->status == CONTENT_STATUS_TYPE_UNKNOWN);
|
|
|
|
|
|
|
|
/* Look for cached content */
|
|
|
|
fb = content_get_ready(c->url);
|
|
|
|
|
|
|
|
if (fb) {
|
|
|
|
/* Found it */
|
|
|
|
intptr_t p1, p2;
|
|
|
|
void (*callback)(content_msg msg,
|
|
|
|
struct content *c, intptr_t p1,
|
|
|
|
intptr_t p2,
|
|
|
|
union content_msg_data data);
|
|
|
|
|
|
|
|
/* Now notify all users that we're changing content */
|
|
|
|
while (c->user_list->next) {
|
|
|
|
p1 = c->user_list->next->p1;
|
|
|
|
p2 = c->user_list->next->p2;
|
|
|
|
callback = c->user_list->next->callback;
|
|
|
|
|
|
|
|
if (!content_add_user(fb, callback, p1, p2)) {
|
|
|
|
c->type = CONTENT_UNKNOWN;
|
|
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
|
|
msg_data.error = messages_get("NoMemory");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR,
|
|
|
|
msg_data);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
content_remove_user(c, callback, p1, p2);
|
2008-02-03 15:04:48 +03:00
|
|
|
|
|
|
|
msg_data.new_url = NULL;
|
2006-02-06 03:10:09 +03:00
|
|
|
callback(CONTENT_MSG_NEWPTR, fb, p1, p2, msg_data);
|
|
|
|
|
|
|
|
/* and catch user up with fallback's state */
|
|
|
|
if (fb->status == CONTENT_STATUS_LOADING) {
|
|
|
|
callback(CONTENT_MSG_LOADING,
|
|
|
|
fb, p1, p2, msg_data);
|
|
|
|
} else if (fb->status == CONTENT_STATUS_READY) {
|
|
|
|
callback(CONTENT_MSG_LOADING,
|
|
|
|
fb, p1, p2, msg_data);
|
|
|
|
if (content_find_user(fb, callback, p1, p2))
|
|
|
|
callback(CONTENT_MSG_READY,
|
|
|
|
fb, p1, p2, msg_data);
|
|
|
|
} else if (fb->status == CONTENT_STATUS_DONE) {
|
|
|
|
callback(CONTENT_MSG_LOADING,
|
|
|
|
fb, p1, p2, msg_data);
|
|
|
|
if (content_find_user(fb, callback, p1, p2))
|
|
|
|
callback(CONTENT_MSG_READY,
|
|
|
|
fb, p1, p2, msg_data);
|
|
|
|
if (content_find_user(fb, callback, p1, p2))
|
|
|
|
callback(CONTENT_MSG_DONE,
|
|
|
|
fb, p1, p2, msg_data);
|
|
|
|
} else if (fb->status == CONTENT_STATUS_ERROR) {
|
|
|
|
/* shouldn't usually occur */
|
|
|
|
msg_data.error = messages_get("MiscError");
|
|
|
|
callback(CONTENT_MSG_ERROR, fb, p1, p2,
|
|
|
|
msg_data);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* mark content invalid */
|
|
|
|
c->fetch = 0;
|
|
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
|
|
|
2008-06-07 03:51:51 +04:00
|
|
|
/* clone our cache control data into the fallback */
|
|
|
|
fetchcache_cache_clone(fb, &c->cache_data);
|
|
|
|
/* and update the fallback's cache state */
|
|
|
|
fetchcache_cache_update(fb);
|
2006-02-06 03:10:09 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* No cached content, so unconditionally refetch */
|
|
|
|
struct content_user *u;
|
2007-01-27 23:58:20 +03:00
|
|
|
const char *ref = fetch_get_referer(c->fetch);
|
2009-07-10 04:26:37 +04:00
|
|
|
struct content *parent = fetch_get_parent(c->fetch);
|
2007-01-27 23:58:20 +03:00
|
|
|
char *referer = NULL;
|
|
|
|
|
|
|
|
if (ref) {
|
|
|
|
referer = strdup(ref);
|
|
|
|
if (!referer) {
|
|
|
|
c->type = CONTENT_UNKNOWN;
|
|
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
|
|
msg_data.error = messages_get("NoMemory");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR,
|
|
|
|
msg_data);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
fetch_abort(c->fetch);
|
|
|
|
c->fetch = 0;
|
|
|
|
|
2008-06-03 05:10:46 +04:00
|
|
|
c->cache_data.date = 0;
|
|
|
|
talloc_free(c->cache_data.etag);
|
|
|
|
c->cache_data.etag = 0;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
for (u = c->user_list->next; u; u = u->next) {
|
2007-01-27 23:58:20 +03:00
|
|
|
fetchcache_go(c, referer, u->callback, u->p1, u->p2,
|
|
|
|
c->width, c->height, 0, 0,
|
2009-07-10 04:26:37 +04:00
|
|
|
false, parent);
|
2006-02-06 03:10:09 +03:00
|
|
|
}
|
2007-01-27 23:58:20 +03:00
|
|
|
|
|
|
|
free(referer);
|
2006-02-06 03:10:09 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
/**
|
|
|
|
* Redirect callback handler
|
|
|
|
*/
|
|
|
|
|
|
|
|
void fetchcache_redirect(struct content *c, const void *data,
|
|
|
|
unsigned long size)
|
|
|
|
{
|
2008-01-30 04:44:57 +03:00
|
|
|
char *url, *url1;
|
2009-07-10 04:26:37 +04:00
|
|
|
char *referer;
|
2009-03-04 16:29:54 +03:00
|
|
|
char *scheme;
|
2008-02-03 15:04:48 +03:00
|
|
|
long http_code;
|
|
|
|
const char *ref;
|
2009-07-10 04:26:37 +04:00
|
|
|
struct content *parent;
|
2008-04-19 15:07:42 +04:00
|
|
|
bool can_fetch;
|
2009-02-16 23:24:54 +03:00
|
|
|
bool parent_was_verifiable;
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
union content_msg_data msg_data;
|
|
|
|
url_func_result result;
|
|
|
|
|
|
|
|
/* Preconditions */
|
|
|
|
assert(c && data);
|
|
|
|
assert(c->status == CONTENT_STATUS_TYPE_UNKNOWN);
|
|
|
|
|
2008-01-30 22:56:41 +03:00
|
|
|
/* Extract fetch details */
|
|
|
|
http_code = fetch_http_code(c->fetch);
|
|
|
|
ref = fetch_get_referer(c->fetch);
|
2009-07-10 04:26:37 +04:00
|
|
|
parent = fetch_get_parent(c->fetch);
|
2009-02-16 23:24:54 +03:00
|
|
|
parent_was_verifiable = fetch_get_verifiable(c->fetch);
|
2008-01-30 22:56:41 +03:00
|
|
|
|
2008-02-03 15:04:48 +03:00
|
|
|
/* Ensure a redirect happened */
|
|
|
|
assert(300 <= http_code && http_code <= 399);
|
|
|
|
/* 304 is handled by fetch_notmodified() */
|
|
|
|
assert(http_code != 304);
|
|
|
|
|
2009-07-10 04:26:37 +04:00
|
|
|
/* Clone referer -- original is destroyed in fetch_abort() */
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
referer = ref ? strdup(ref) : NULL;
|
|
|
|
|
|
|
|
/* set the status to ERROR so that this content is
|
|
|
|
* destroyed in content_clean() */
|
|
|
|
fetch_abort(c->fetch);
|
|
|
|
c->fetch = 0;
|
|
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
|
|
|
|
|
|
/* Ensure that referer cloning succeeded
|
|
|
|
* _must_ be after content invalidation */
|
|
|
|
if (ref && !referer) {
|
|
|
|
LOG(("Failed cloning referer"));
|
|
|
|
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/** \todo 300, 305, 307
|
|
|
|
* More specifically:
|
|
|
|
* + 300 needs to serve up the fetch body to the user
|
|
|
|
* + 305 needs to refetch using the proxy specified in ::data
|
|
|
|
* + 307 needs to refetch.
|
|
|
|
*
|
|
|
|
* If the original request method was either GET or HEAD, then follow
|
|
|
|
* redirect unconditionally. If the original request method was neither
|
|
|
|
* GET nor HEAD, then the user MUST be asked what to do.
|
|
|
|
*
|
|
|
|
* Note:
|
|
|
|
* For backwards compatibility, all 301, 302 and 303 redirects are
|
|
|
|
* followed unconditionally with a GET request to the new location.
|
|
|
|
*/
|
|
|
|
if (http_code != 301 && http_code != 302 && http_code != 303) {
|
|
|
|
LOG(("Unsupported redirect type %ld", http_code));
|
|
|
|
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
|
|
|
|
free(referer);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Forcibly stop redirecting if we've followed too many redirects */
|
|
|
|
#define REDIRECT_LIMIT 10
|
|
|
|
if (c->redirect_count > REDIRECT_LIMIT) {
|
|
|
|
LOG(("Too many nested redirects"));
|
|
|
|
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
|
|
|
|
free(referer);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#undef REDIRECT_LIMIT
|
|
|
|
|
|
|
|
/* redirect URLs must be absolute by HTTP/1.1, but many
|
|
|
|
* sites send relative ones: treat them as relative to
|
|
|
|
* requested URL */
|
2008-01-30 04:44:57 +03:00
|
|
|
result = url_join(data, c->url, &url1);
|
|
|
|
if (result != URL_FUNC_OK) {
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
|
2008-01-30 04:44:57 +03:00
|
|
|
free(referer);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Normalize redirect target -- this is vital as this URL may
|
|
|
|
* be inserted into the urldb, which expects normalized URLs */
|
|
|
|
result = url_normalize(url1, &url);
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
if (result != URL_FUNC_OK) {
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
|
2008-01-30 04:44:57 +03:00
|
|
|
free(url1);
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
free(referer);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2008-01-30 04:44:57 +03:00
|
|
|
/* No longer need url1 */
|
|
|
|
free(url1);
|
|
|
|
|
2009-03-04 16:29:54 +03:00
|
|
|
/* Ensure that redirects to file:/// URLs are trapped */
|
|
|
|
result = url_scheme(url, &scheme);
|
|
|
|
if (result != URL_FUNC_OK) {
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
|
|
|
|
free(url);
|
|
|
|
free(referer);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (strcasecmp(scheme, "file") == 0) {
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
|
|
|
|
free(scheme);
|
|
|
|
free(url);
|
|
|
|
free(referer);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
free(scheme);
|
|
|
|
|
2008-04-19 15:07:42 +04:00
|
|
|
/* Determine if we've got a fetch handler for this url */
|
|
|
|
can_fetch = fetch_can_fetch(url);
|
|
|
|
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
/* Process users of this content */
|
|
|
|
while (c->user_list->next) {
|
|
|
|
intptr_t p1, p2;
|
|
|
|
void (*callback)(content_msg msg,
|
|
|
|
struct content *c, intptr_t p1,
|
|
|
|
intptr_t p2,
|
|
|
|
union content_msg_data data);
|
|
|
|
struct content *replacement;
|
|
|
|
|
|
|
|
p1 = c->user_list->next->p1;
|
|
|
|
p2 = c->user_list->next->p2;
|
|
|
|
callback = c->user_list->next->callback;
|
|
|
|
|
2008-04-19 15:07:42 +04:00
|
|
|
/* If we can't fetch this url, attempt to launch it */
|
|
|
|
if (!can_fetch) {
|
|
|
|
msg_data.launch_url = url;
|
|
|
|
callback(CONTENT_MSG_LAUNCH, c, p1, p2, msg_data);
|
|
|
|
}
|
|
|
|
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
/* Remove user */
|
|
|
|
content_remove_user(c, callback, p1, p2);
|
|
|
|
|
2008-04-19 15:07:42 +04:00
|
|
|
if (can_fetch) {
|
|
|
|
/* Get replacement content -- HTTP GET request */
|
2009-02-16 23:24:54 +03:00
|
|
|
|
|
|
|
/* A note about fetch verifiability: according to
|
|
|
|
* both RFC2109 and 2965, redirects result in an
|
|
|
|
* unverifiable fetch and thus cookies must be handled
|
|
|
|
* differently. Unfortunately, however, other browsers
|
|
|
|
* do not adhere to this rule and just process cookies
|
|
|
|
* as per normal in this case. Websites have come to
|
|
|
|
* depend upon this "feature", so we must do something
|
|
|
|
* which approximates the appropriate behaviour.
|
|
|
|
*
|
|
|
|
* Therefore, a redirected fetch will preserve the
|
|
|
|
* verifiability of the origin fetch. Thus, fetches
|
|
|
|
* for embedded objects will remain unverifiable,
|
|
|
|
* as expected.
|
|
|
|
*/
|
2008-04-19 15:07:42 +04:00
|
|
|
replacement = fetchcache(url, callback, p1, p2,
|
|
|
|
c->width, c->height, c->no_error_pages,
|
2009-02-16 23:24:54 +03:00
|
|
|
NULL, NULL, parent_was_verifiable,
|
|
|
|
c->download);
|
2008-04-19 15:07:42 +04:00
|
|
|
if (!replacement) {
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR,
|
|
|
|
msg_data);
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
|
2008-04-19 15:07:42 +04:00
|
|
|
free(url);
|
|
|
|
free(referer);
|
|
|
|
return;
|
|
|
|
}
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
|
2008-04-19 15:07:42 +04:00
|
|
|
/* Set replacement's redirect count to 1 greater
|
|
|
|
* than ours */
|
|
|
|
replacement->redirect_count = c->redirect_count + 1;
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
|
2008-04-19 15:07:42 +04:00
|
|
|
/* Notify user that content has changed */
|
|
|
|
msg_data.new_url = url;
|
|
|
|
callback(CONTENT_MSG_NEWPTR, replacement,
|
|
|
|
p1, p2, msg_data);
|
|
|
|
|
|
|
|
/* Start fetching the replacement content */
|
|
|
|
fetchcache_go(replacement, referer, callback, p1, p2,
|
|
|
|
c->width, c->height, NULL, NULL,
|
2009-07-10 04:26:37 +04:00
|
|
|
parent_was_verifiable, parent);
|
2008-04-19 15:07:42 +04:00
|
|
|
}
|
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.
The new scheme works as follows:
1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
If redirect, receive NEWPTR for each redirect that occurs, then continue
through LOADING, READY, DONE etc. states as before.
The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.
As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.
svn path=/trunk/netsurf/; revision=3787
2008-01-28 04:35:00 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Clean up */
|
|
|
|
free(url);
|
|
|
|
free(referer);
|
|
|
|
}
|
|
|
|
|
2009-08-05 03:02:23 +04:00
|
|
|
/**
|
|
|
|
* Authentication callback handler
|
|
|
|
*/
|
|
|
|
|
|
|
|
void fetchcache_auth(struct content *c, const char *realm)
|
|
|
|
{
|
|
|
|
char *referer;
|
|
|
|
const char *ref;
|
|
|
|
const char *auth;
|
|
|
|
struct content *parent;
|
|
|
|
bool parent_was_verifiable;
|
|
|
|
union content_msg_data msg_data;
|
|
|
|
char *headers = NULL;
|
|
|
|
|
|
|
|
/* Preconditions */
|
|
|
|
assert(c && realm);
|
|
|
|
assert(c->status == CONTENT_STATUS_TYPE_UNKNOWN);
|
|
|
|
|
|
|
|
/* Extract fetch details */
|
|
|
|
ref = fetch_get_referer(c->fetch);
|
|
|
|
parent = fetch_get_parent(c->fetch);
|
|
|
|
parent_was_verifiable = fetch_get_verifiable(c->fetch);
|
|
|
|
|
|
|
|
/* Clone referer -- original is destroyed in fetch_abort() */
|
|
|
|
referer = ref ? strdup(ref) : NULL;
|
|
|
|
|
|
|
|
fetch_abort(c->fetch);
|
|
|
|
c->fetch = NULL;
|
|
|
|
|
|
|
|
/* Ensure that referer cloning succeeded
|
|
|
|
* _must_ be after content invalidation */
|
|
|
|
if (ref && !referer) {
|
|
|
|
LOG(("Failed cloning referer"));
|
|
|
|
|
|
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
|
|
msg_data.error = messages_get("BadRedirect");
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now, see if we've got some auth details */
|
|
|
|
auth = urldb_get_auth_details(c->url, realm);
|
|
|
|
|
|
|
|
if (auth == NULL || c->tried_with_auth) {
|
|
|
|
/* No authentication details or we tried what we had, so ask
|
|
|
|
* our client for them. */
|
|
|
|
c->tried_with_auth = false; /* Allow rety. */
|
|
|
|
|
|
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
|
|
msg_data.auth_realm = realm;
|
|
|
|
content_broadcast(c, CONTENT_MSG_AUTH, msg_data);
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
/* Flag we're retry fetching with auth data. Will be used to detect
|
|
|
|
* wrong auth data so that we can ask our client for better auth. */
|
|
|
|
c->tried_with_auth = true;
|
|
|
|
|
|
|
|
/* We have authentication details. Fetch with them. */
|
|
|
|
/** \todo all the useful things like headers, POST. */
|
|
|
|
c->fetch = fetch_start(c->url, referer,
|
|
|
|
fetchcache_callback, c,
|
|
|
|
c->no_error_pages,
|
|
|
|
NULL, NULL, parent_was_verifiable,
|
|
|
|
parent, &headers);
|
|
|
|
if (c->fetch == NULL) {
|
|
|
|
char error_message[500];
|
|
|
|
|
|
|
|
LOG(("warning: fetch_start failed"));
|
|
|
|
snprintf(error_message, sizeof error_message,
|
|
|
|
messages_get("InvalidURL"),
|
|
|
|
c->url);
|
|
|
|
if (c->no_error_pages) {
|
|
|
|
c->status = CONTENT_STATUS_ERROR;
|
|
|
|
msg_data.error = error_message;
|
|
|
|
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
|
|
|
} else {
|
|
|
|
fetchcache_error_page(c, error_message);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Clean up */
|
|
|
|
free(referer);
|
|
|
|
}
|
|
|
|
|
2003-02-09 15:58:15 +03:00
|
|
|
#ifdef TEST
|
|
|
|
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
void callback(fetchcache_msg msg, struct content *c, void *p, char *error)
|
|
|
|
{
|
|
|
|
switch (msg) {
|
|
|
|
case FETCHCACHE_OK:
|
|
|
|
LOG(("FETCHCACHE_OK, url '%s'", p));
|
|
|
|
break;
|
|
|
|
case FETCHCACHE_BADTYPE:
|
|
|
|
LOG(("FETCHCACHE_BADTYPE, url '%s'", p));
|
|
|
|
break;
|
|
|
|
case FETCHCACHE_ERROR:
|
|
|
|
LOG(("FETCHCACHE_ERROR, url '%s', error '%s'", p, error));
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
char *test[] = {"http://www.google.co.uk/", "http://www.ox.ac.uk/", "blah://blah/"};
|
|
|
|
|
|
|
|
int main(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
cache_init();
|
|
|
|
fetch_init();
|
|
|
|
|
|
|
|
for (i = 0; i != sizeof(test) / sizeof(test[0]); i++)
|
|
|
|
fetchcache(test[i], 0, callback, test[i], 800, 0);
|
|
|
|
for (i = 0; i != 5; i++) {
|
|
|
|
fetch_poll();
|
|
|
|
sleep(1);
|
|
|
|
}
|
|
|
|
for (i = 0; i != sizeof(test) / sizeof(test[0]); i++)
|
|
|
|
fetchcache(test[i], 0, callback, test[i], 800, 0);
|
|
|
|
for (i = 0; i != 20; i++) {
|
|
|
|
fetch_poll();
|
|
|
|
sleep(1);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|