Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache. The new scheme works as follows: 1) Request content for URL (fetchcache() 2) Start fetch of content (fetchcache_go() 3) If no redirect, continue through LOADING, READY, DONE etc. states as before If redirect, receive NEWPTR for each redirect that occurs, then continue through LOADING, READY, DONE etc. states as before. The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring. As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before. svn path=/trunk/netsurf/; revision=3787
This commit is contained in:
parent
32fe1bd1bf
commit
78d194cb77
|
@ -424,6 +424,7 @@ struct content * content_create(const char *url)
|
|||
c->http_code = 0;
|
||||
c->no_error_pages = false;
|
||||
c->download = false;
|
||||
c->redirect_count = 0;
|
||||
c->error_count = 0;
|
||||
c->cache_data->req_time = 0;
|
||||
c->cache_data->res_time = 0;
|
||||
|
|
|
@ -82,7 +82,6 @@ typedef enum {
|
|||
CONTENT_MSG_DONE, /**< finished */
|
||||
CONTENT_MSG_ERROR, /**< error occurred */
|
||||
CONTENT_MSG_STATUS, /**< new status string */
|
||||
CONTENT_MSG_REDIRECT, /**< replacement URL */
|
||||
CONTENT_MSG_REFORMAT, /**< content_reformat done */
|
||||
CONTENT_MSG_REDRAW, /**< needs redraw (eg. new animation frame) */
|
||||
CONTENT_MSG_NEWPTR, /**< address of structure has changed */
|
||||
|
@ -227,6 +226,7 @@ struct content {
|
|||
|
||||
bool no_error_pages; /**< Used by fetchcache(). */
|
||||
bool download; /**< Used by fetchcache(). */
|
||||
unsigned int redirect_count; /**< Used by fetchcache(). */
|
||||
|
||||
/** Array of first n rendering errors or warnings. */
|
||||
struct {
|
||||
|
|
|
@ -51,6 +51,8 @@ static void fetchcache_error_page(struct content *c, const char *error);
|
|||
static void fetchcache_cache_update(struct content *c,
|
||||
const struct cache_data *data);
|
||||
static void fetchcache_notmodified(struct content *c, const void *data);
|
||||
static void fetchcache_redirect(struct content *c, const void *data,
|
||||
unsigned long size);
|
||||
|
||||
|
||||
/**
|
||||
|
@ -380,11 +382,10 @@ void fetchcache_callback(fetch_msg msg, void *p, const void *data,
|
|||
bool res;
|
||||
struct content *c = p;
|
||||
content_type type;
|
||||
char *mime_type, *url;
|
||||
char *mime_type;
|
||||
char **params;
|
||||
unsigned int i;
|
||||
union content_msg_data msg_data;
|
||||
url_func_result result;
|
||||
|
||||
switch (msg) {
|
||||
case FETCH_TYPE:
|
||||
|
@ -457,37 +458,7 @@ void fetchcache_callback(fetch_msg msg, void *p, const void *data,
|
|||
break;
|
||||
|
||||
case FETCH_REDIRECT:
|
||||
c->fetch = 0;
|
||||
/* redirect URLs must be absolute by HTTP/1.1, but many sites send
|
||||
* relative ones: treat them as relative to requested URL */
|
||||
result = url_join(data, c->url, &url);
|
||||
/* set the status to ERROR so that the content is
|
||||
* destroyed in content_clean() */
|
||||
c->status = CONTENT_STATUS_ERROR;
|
||||
if (result == URL_FUNC_OK) {
|
||||
bool same;
|
||||
|
||||
result = url_compare(c->url, url, &same);
|
||||
|
||||
/* check that we're not attempting to
|
||||
* redirect to the same URL */
|
||||
if (result != URL_FUNC_OK || same) {
|
||||
msg_data.error =
|
||||
messages_get("BadRedirect");
|
||||
content_broadcast(c,
|
||||
CONTENT_MSG_ERROR, msg_data);
|
||||
}
|
||||
else {
|
||||
msg_data.redirect = url;
|
||||
content_broadcast(c,
|
||||
CONTENT_MSG_REDIRECT,
|
||||
msg_data);
|
||||
}
|
||||
free(url);
|
||||
} else {
|
||||
msg_data.error = messages_get("BadRedirect");
|
||||
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
||||
}
|
||||
fetchcache_redirect(c, data, size);
|
||||
break;
|
||||
|
||||
case FETCH_NOTMODIFIED:
|
||||
|
@ -790,6 +761,144 @@ void fetchcache_notmodified(struct content *c, const void *data)
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Redirect callback handler
|
||||
*/
|
||||
|
||||
void fetchcache_redirect(struct content *c, const void *data,
|
||||
unsigned long size)
|
||||
{
|
||||
char *url;
|
||||
char *referer;
|
||||
long http_code = fetch_http_code(c->fetch);
|
||||
const char *ref = fetch_get_referer(c->fetch);
|
||||
union content_msg_data msg_data;
|
||||
url_func_result result;
|
||||
|
||||
/* Preconditions */
|
||||
assert(c && data);
|
||||
assert(c->status == CONTENT_STATUS_TYPE_UNKNOWN);
|
||||
/* Ensure a redirect happened */
|
||||
assert(300 <= http_code && http_code <= 399);
|
||||
/* 304 is handled by fetch_notmodified() */
|
||||
assert(http_code != 304);
|
||||
|
||||
/* Clone referer -- original is destroyed in fetch_abort() */
|
||||
referer = ref ? strdup(ref) : NULL;
|
||||
|
||||
/* set the status to ERROR so that this content is
|
||||
* destroyed in content_clean() */
|
||||
fetch_abort(c->fetch);
|
||||
c->fetch = 0;
|
||||
c->status = CONTENT_STATUS_ERROR;
|
||||
|
||||
/* Ensure that referer cloning succeeded
|
||||
* _must_ be after content invalidation */
|
||||
if (ref && !referer) {
|
||||
LOG(("Failed cloning referer"));
|
||||
|
||||
msg_data.error = messages_get("BadRedirect");
|
||||
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/** \todo 300, 305, 307
|
||||
* More specifically:
|
||||
* + 300 needs to serve up the fetch body to the user
|
||||
* + 305 needs to refetch using the proxy specified in ::data
|
||||
* + 307 needs to refetch.
|
||||
*
|
||||
* If the original request method was either GET or HEAD, then follow
|
||||
* redirect unconditionally. If the original request method was neither
|
||||
* GET nor HEAD, then the user MUST be asked what to do.
|
||||
*
|
||||
* Note:
|
||||
* For backwards compatibility, all 301, 302 and 303 redirects are
|
||||
* followed unconditionally with a GET request to the new location.
|
||||
*/
|
||||
if (http_code != 301 && http_code != 302 && http_code != 303) {
|
||||
LOG(("Unsupported redirect type %ld", http_code));
|
||||
|
||||
msg_data.error = messages_get("BadRedirect");
|
||||
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
||||
|
||||
free(referer);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Forcibly stop redirecting if we've followed too many redirects */
|
||||
#define REDIRECT_LIMIT 10
|
||||
if (c->redirect_count > REDIRECT_LIMIT) {
|
||||
LOG(("Too many nested redirects"));
|
||||
|
||||
msg_data.error = messages_get("BadRedirect");
|
||||
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
||||
|
||||
free(referer);
|
||||
return;
|
||||
}
|
||||
#undef REDIRECT_LIMIT
|
||||
|
||||
/* redirect URLs must be absolute by HTTP/1.1, but many
|
||||
* sites send relative ones: treat them as relative to
|
||||
* requested URL */
|
||||
result = url_join(data, c->url, &url);
|
||||
|
||||
if (result != URL_FUNC_OK) {
|
||||
msg_data.error = messages_get("BadRedirect");
|
||||
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
||||
|
||||
free(referer);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Process users of this content */
|
||||
while (c->user_list->next) {
|
||||
intptr_t p1, p2;
|
||||
void (*callback)(content_msg msg,
|
||||
struct content *c, intptr_t p1,
|
||||
intptr_t p2,
|
||||
union content_msg_data data);
|
||||
struct content *replacement;
|
||||
|
||||
p1 = c->user_list->next->p1;
|
||||
p2 = c->user_list->next->p2;
|
||||
callback = c->user_list->next->callback;
|
||||
|
||||
/* Remove user */
|
||||
content_remove_user(c, callback, p1, p2);
|
||||
|
||||
/* Get replacement content -- HTTP GET request */
|
||||
replacement = fetchcache(url, callback, p1, p2,
|
||||
c->width, c->height, c->no_error_pages,
|
||||
NULL, NULL, false, c->download);
|
||||
if (!replacement) {
|
||||
msg_data.error = messages_get("BadRedirect");
|
||||
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
|
||||
|
||||
free(url);
|
||||
free(referer);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Set replacement's redirect count to 1 greater than ours */
|
||||
replacement->redirect_count = c->redirect_count + 1;
|
||||
|
||||
/* Notify user that content has changed */
|
||||
callback(CONTENT_MSG_NEWPTR, replacement, p1, p2, msg_data);
|
||||
|
||||
/* Start fetching the replacement content */
|
||||
fetchcache_go(replacement, referer, callback, p1, p2,
|
||||
c->width, c->height, NULL, NULL,
|
||||
false, referer ? referer : c->url);
|
||||
}
|
||||
|
||||
/* Clean up */
|
||||
free(url);
|
||||
free(referer);
|
||||
}
|
||||
|
||||
#ifdef TEST
|
||||
|
||||
#include <unistd.h>
|
||||
|
|
40
css/css.c
40
css/css.c
|
@ -431,7 +431,6 @@ bool css_convert(struct content *c, int width, int height)
|
|||
for (i = 0; i != HASH_SIZE; i++)
|
||||
c->data.css.css->rule[i] = 0;
|
||||
c->data.css.import_count = 0;
|
||||
c->data.css.import_url = 0;
|
||||
c->data.css.import_content = 0;
|
||||
c->data.css.origin = CSS_ORIGIN_UA;
|
||||
c->active = 0;
|
||||
|
@ -504,11 +503,9 @@ void css_destroy(struct content *c)
|
|||
/* imported stylesheets */
|
||||
for (i = 0; i != c->data.css.import_count; i++)
|
||||
if (c->data.css.import_content[i] != 0) {
|
||||
free(c->data.css.import_url[i]);
|
||||
content_remove_user(c->data.css.import_content[i],
|
||||
css_atimport_callback, (intptr_t) c, i);
|
||||
}
|
||||
free(c->data.css.import_url);
|
||||
free(c->data.css.import_content);
|
||||
}
|
||||
|
||||
|
@ -790,21 +787,11 @@ void css_atimport(struct content *c, struct css_node *node)
|
|||
char *t, *url, *url1;
|
||||
bool string = false, screen = true;
|
||||
unsigned int i;
|
||||
char **import_url;
|
||||
struct content **import_content;
|
||||
url_func_result res;
|
||||
|
||||
LOG(("@import rule"));
|
||||
|
||||
import_url = realloc(c->data.css.import_url,
|
||||
(c->data.css.import_count + 1) *
|
||||
sizeof(*c->data.css.import_url));
|
||||
if (!import_url) {
|
||||
/** \todo report to user */
|
||||
return;
|
||||
}
|
||||
c->data.css.import_url = import_url;
|
||||
|
||||
import_content = realloc(c->data.css.import_content,
|
||||
(c->data.css.import_count + 1) *
|
||||
sizeof(*c->data.css.import_content));
|
||||
|
@ -889,8 +876,7 @@ void css_atimport(struct content *c, struct css_node *node)
|
|||
/* start the fetch */
|
||||
c->data.css.import_count++;
|
||||
i = c->data.css.import_count - 1;
|
||||
c->data.css.import_url[i] = url1;
|
||||
c->data.css.import_content[i] = fetchcache(c->data.css.import_url[i],
|
||||
c->data.css.import_content[i] = fetchcache(url1,
|
||||
css_atimport_callback, (intptr_t) c, i,
|
||||
c->width, c->height, true, 0, 0, false, false);
|
||||
if (c->data.css.import_content[i]) {
|
||||
|
@ -968,30 +954,6 @@ void css_atimport_callback(content_msg msg, struct content *css,
|
|||
case CONTENT_MSG_STATUS:
|
||||
break;
|
||||
|
||||
case CONTENT_MSG_REDIRECT:
|
||||
c->active--;
|
||||
free(c->data.css.import_url[i]);
|
||||
c->data.css.import_url[i] = strdup(data.redirect);
|
||||
if (!c->data.css.import_url[i]) {
|
||||
/** \todo report to user */
|
||||
/* c->error = 1; */
|
||||
return;
|
||||
}
|
||||
c->data.css.import_content[i] = fetchcache(
|
||||
c->data.css.import_url[i],
|
||||
css_atimport_callback, (intptr_t) c, i,
|
||||
css->width, css->height, true, 0, 0,
|
||||
false, false);
|
||||
if (c->data.css.import_content[i]) {
|
||||
c->active++;
|
||||
fetchcache_go(c->data.css.import_content[i],
|
||||
c->url, css_atimport_callback,
|
||||
(intptr_t) c, i,
|
||||
css->width, css->height,
|
||||
0, 0, false, c->url);
|
||||
}
|
||||
break;
|
||||
|
||||
case CONTENT_MSG_NEWPTR:
|
||||
c->data.css.import_content[i] = css;
|
||||
break;
|
||||
|
|
|
@ -493,7 +493,6 @@ typedef enum {
|
|||
struct content_css_data {
|
||||
struct css_stylesheet *css; /**< Opaque stylesheet data. */
|
||||
unsigned int import_count; /**< Number of entries in import_url. */
|
||||
char **import_url; /**< Imported stylesheet urls. */
|
||||
struct content **import_content; /**< Imported stylesheet contents. */
|
||||
css_origin origin; /**< Origin of stylesheet. */
|
||||
};
|
||||
|
|
|
@ -143,10 +143,6 @@ void callback(content_msg msg, struct content *c, void *p1,
|
|||
done = destroyed = 1;
|
||||
} else if (msg == CONTENT_MSG_STATUS)
|
||||
printf("=== STATUS: %s\n", c->status_message);
|
||||
else if (msg == CONTENT_MSG_REDIRECT) {
|
||||
printf("=== REDIRECT to '%s'\n", data.redirect);
|
||||
done = destroyed = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -509,23 +509,6 @@ void browser_window_callback(content_msg msg, struct content *c,
|
|||
browser_window_set_status(bw, c->status_message);
|
||||
break;
|
||||
|
||||
case CONTENT_MSG_REDIRECT:
|
||||
{
|
||||
const char *prev_url = bw->loading_content->url;
|
||||
|
||||
bw->loading_content = 0;
|
||||
browser_window_set_status(bw,
|
||||
messages_get("Redirecting"));
|
||||
/* the spec says nothing about referrers and
|
||||
* redirects => follow Mozilla and preserve the
|
||||
* referer across the redirect */
|
||||
browser_window_go_post(bw, data.redirect, 0, 0,
|
||||
bw->history_add, bw->referer,
|
||||
bw->download, false,
|
||||
bw->referer ? bw->referer : prev_url);
|
||||
}
|
||||
break;
|
||||
|
||||
case CONTENT_MSG_REFORMAT:
|
||||
if (c == bw->current_content &&
|
||||
c->type == CONTENT_HTML) {
|
||||
|
@ -1177,7 +1160,6 @@ void download_window_callback(fetch_msg msg, void *p, const void *data,
|
|||
break;
|
||||
|
||||
case FETCH_TYPE:
|
||||
case FETCH_REDIRECT:
|
||||
case FETCH_NOTMODIFIED:
|
||||
case FETCH_AUTH:
|
||||
#ifdef WITH_SSL
|
||||
|
|
|
@ -1085,24 +1085,6 @@ void html_convert_css_callback(content_msg msg, struct content *css,
|
|||
content_broadcast(c, CONTENT_MSG_STATUS, data);
|
||||
break;
|
||||
|
||||
case CONTENT_MSG_REDIRECT:
|
||||
c->active--;
|
||||
c->data.html.stylesheet_content[i] = fetchcache(
|
||||
data.redirect,
|
||||
html_convert_css_callback,
|
||||
(intptr_t) c, i, css->width, css->height,
|
||||
true, 0, 0, false, false);
|
||||
if (c->data.html.stylesheet_content[i]) {
|
||||
c->active++;
|
||||
fetchcache_go(c->data.html.stylesheet_content[i],
|
||||
c->url,
|
||||
html_convert_css_callback,
|
||||
(intptr_t) c, i, css->width,
|
||||
css->height, 0, 0, false,
|
||||
c->url);
|
||||
}
|
||||
break;
|
||||
|
||||
case CONTENT_MSG_NEWPTR:
|
||||
c->data.html.stylesheet_content[i] = css;
|
||||
break;
|
||||
|
@ -1169,12 +1151,6 @@ bool html_fetch_object(struct content *c, char *url, struct box *box,
|
|||
return false;
|
||||
}
|
||||
c->data.html.object = object;
|
||||
c->data.html.object[i].url = talloc_strdup(c, url);
|
||||
if (!c->data.html.object[i].url) {
|
||||
content_remove_user(c_fetch, html_object_callback,
|
||||
(intptr_t) c, i);
|
||||
return false;
|
||||
}
|
||||
c->data.html.object[i].box = box;
|
||||
c->data.html.object[i].permitted_types = permitted_types;
|
||||
c->data.html.object[i].background = background;
|
||||
|
@ -1221,8 +1197,6 @@ bool html_replace_object(struct content *c, unsigned int i, char *url,
|
|||
html_object_callback, (intptr_t) c, i);
|
||||
c->data.html.object[i].content = 0;
|
||||
c->data.html.object[i].box->object = 0;
|
||||
talloc_free(c->data.html.object[i].url);
|
||||
c->data.html.object[i].url = 0;
|
||||
}
|
||||
|
||||
/* initialise fetch */
|
||||
|
@ -1234,12 +1208,6 @@ bool html_replace_object(struct content *c, unsigned int i, char *url,
|
|||
if (!c_fetch)
|
||||
return false;
|
||||
|
||||
c->data.html.object[i].url = talloc_strdup(c, url);
|
||||
if (!c->data.html.object[i].url) {
|
||||
content_remove_user(c_fetch, html_object_callback,
|
||||
(intptr_t) c, i);
|
||||
return false;
|
||||
}
|
||||
c->data.html.object[i].content = c_fetch;
|
||||
|
||||
for (page = c; page; page = page->data.html.page) {
|
||||
|
@ -1349,35 +1317,6 @@ void html_object_callback(content_msg msg, struct content *object,
|
|||
/* content_broadcast(c, CONTENT_MSG_STATUS, 0); */
|
||||
break;
|
||||
|
||||
case CONTENT_MSG_REDIRECT:
|
||||
c->active--;
|
||||
talloc_free(c->data.html.object[i].url);
|
||||
c->data.html.object[i].url = talloc_strdup(c,
|
||||
data.redirect);
|
||||
if (!c->data.html.object[i].url) {
|
||||
/** \todo report oom */
|
||||
} else {
|
||||
c->data.html.object[i].content = fetchcache(
|
||||
data.redirect,
|
||||
html_object_callback,
|
||||
(intptr_t) c, i, 0, 0, true,
|
||||
0, 0, false, false);
|
||||
if (!c->data.html.object[i].content) {
|
||||
/** \todo report oom */
|
||||
} else {
|
||||
c->active++;
|
||||
fetchcache_go(c->data.html.object[i].
|
||||
content,
|
||||
c->url,
|
||||
html_object_callback,
|
||||
(intptr_t) c, i,
|
||||
0, 0,
|
||||
0, 0,
|
||||
false, c->url);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case CONTENT_MSG_REFORMAT:
|
||||
break;
|
||||
|
||||
|
@ -1439,7 +1378,6 @@ void html_object_callback(content_msg msg, struct content *object,
|
|||
(msg == CONTENT_MSG_LOADING ||
|
||||
msg == CONTENT_MSG_DONE ||
|
||||
msg == CONTENT_MSG_ERROR ||
|
||||
msg == CONTENT_MSG_REDIRECT ||
|
||||
msg == CONTENT_MSG_AUTH)) {
|
||||
/* all objects have arrived */
|
||||
content_reformat(c, c->available_width, c->height);
|
||||
|
|
|
@ -66,7 +66,6 @@ typedef enum {
|
|||
|
||||
/** An object (<img>, <object>, etc.) in a CONTENT_HTML document. */
|
||||
struct content_html_object {
|
||||
char *url; /**< URL of this object. */
|
||||
struct content *content; /**< Content, or 0. */
|
||||
struct box *box; /**< Node in box tree containing it. */
|
||||
/** Pointer to array of permitted content_type, terminated by
|
||||
|
|
|
@ -1704,14 +1704,6 @@ void plugin_stream_callback(content_msg msg, struct content *c,
|
|||
plugin_STREAM_DESTROY_ERROR);
|
||||
break;
|
||||
|
||||
case CONTENT_MSG_REDIRECT:
|
||||
/* and re-start fetch with new URL */
|
||||
p->c = 0;
|
||||
if (!plugin_start_fetch(p, data.redirect))
|
||||
plugin_destroy_stream(p,
|
||||
plugin_STREAM_DESTROY_ERROR);
|
||||
break;
|
||||
|
||||
case CONTENT_MSG_NEWPTR:
|
||||
p->c = c;
|
||||
break;
|
||||
|
@ -1775,7 +1767,6 @@ void plugin_fetch_callback(fetch_msg msg, void *p, const void *data,
|
|||
break;
|
||||
|
||||
case FETCH_TYPE:
|
||||
case FETCH_REDIRECT:
|
||||
case FETCH_NOTMODIFIED:
|
||||
case FETCH_AUTH:
|
||||
#ifdef WITH_SSL
|
||||
|
|
|
@ -125,7 +125,6 @@ void theme_install_callback(content_msg msg, struct content *c,
|
|||
break;
|
||||
|
||||
case CONTENT_MSG_LOADING:
|
||||
case CONTENT_MSG_REDIRECT:
|
||||
case CONTENT_MSG_REFORMAT:
|
||||
case CONTENT_MSG_REDRAW:
|
||||
case CONTENT_MSG_NEWPTR:
|
||||
|
|
Loading…
Reference in New Issue