[project @ 2003-04-17 21:35:02 by bursa]

Max one fetch from each host at once, fix multiple fetches of same url.

svn path=/import/netsurf/; revision=127
This commit is contained in:
James Bursa 2003-04-17 21:35:02 +00:00
parent 1ab6b6c50b
commit 58c28f9c1a
4 changed files with 238 additions and 29 deletions

View File

@ -1,5 +1,5 @@
/** /**
* $Id: content.c,v 1.7 2003/04/15 17:53:00 bursa Exp $ * $Id: content.c,v 1.8 2003/04/17 21:35:02 bursa Exp $
*/ */
#include <assert.h> #include <assert.h>
@ -121,7 +121,8 @@ void content_revive(struct content *c, unsigned long width, unsigned long height
{ {
assert(c != 0); assert(c != 0);
assert(c->type < CONTENT_OTHER); assert(c->type < CONTENT_OTHER);
assert(c->status == CONTENT_DONE); if (c->status != CONTENT_DONE)
return;
c->available_width = width; c->available_width = width;
handler_map[c->type].revive(c, width, height); handler_map[c->type].revive(c, width, height);
} }

View File

@ -1,11 +1,22 @@
/** /**
* $Id: fetch.c,v 1.5 2003/04/15 17:53:00 bursa Exp $ * $Id: fetch.c,v 1.6 2003/04/17 21:35:02 bursa Exp $
*
* This module handles fetching of data from any url.
*
* Implementation:
* This implementation uses libcurl's 'multi' interface.
*
* Active fetches are held in the linked list fetch_list. There may be at most
* one fetch from each host. Any further fetches are queued until the previous
* one ends.
*/ */
#include <assert.h> #include <assert.h>
#include <string.h> #include <string.h>
#include <strings.h>
#include <time.h> #include <time.h>
#include "curl/curl.h" #include "curl/curl.h"
#include "libxml/uri.h"
#include "netsurf/content/fetch.h" #include "netsurf/content/fetch.h"
#include "netsurf/utils/utils.h" #include "netsurf/utils/utils.h"
#include "netsurf/utils/log.h" #include "netsurf/utils/log.h"
@ -19,13 +30,19 @@ struct fetch
int in_callback : 1; int in_callback : 1;
int aborting : 1; int aborting : 1;
char *url; char *url;
char *referer;
char error_buffer[CURL_ERROR_SIZE]; char error_buffer[CURL_ERROR_SIZE];
void *p; void *p;
struct curl_slist *headers; struct curl_slist *headers;
char *host;
struct fetch *queue;
struct fetch *prev;
struct fetch *next;
}; };
static const char * const user_agent = "NetSurf"; static const char * const user_agent = "NetSurf";
static CURLM * curl_multi; static CURLM * curl_multi;
static struct fetch *fetch_list = 0;
static size_t fetch_curl_data(void * data, size_t size, size_t nmemb, struct fetch *f); static size_t fetch_curl_data(void * data, size_t size, size_t nmemb, struct fetch *f);
static size_t fetch_curl_header(void *data, size_t size, size_t nmemb, struct fetch *f); static size_t fetch_curl_header(void *data, size_t size, size_t nmemb, struct fetch *f);
@ -75,19 +92,52 @@ void fetch_quit(void)
struct fetch * fetch_start(char *url, char *referer, struct fetch * fetch_start(char *url, char *referer,
void (*callback)(fetch_msg msg, void *p, char *data, unsigned long size), void *p) void (*callback)(fetch_msg msg, void *p, char *data, unsigned long size), void *p)
{ {
struct fetch* fetch = (struct fetch*) xcalloc(1, sizeof(struct fetch)); struct fetch *fetch = xcalloc(1, sizeof(*fetch)), *host_fetch;
CURLcode code; CURLcode code;
CURLMcode codem; CURLMcode codem;
xmlURI *uri;
LOG(("fetch %p, url '%s'", fetch, url)); LOG(("fetch %p, url '%s'", fetch, url));
fetch->start_time = time(&fetch->start_time); uri = xmlParseURI(url);
assert(uri != 0);
/* construct a new fetch structure */
fetch->start_time = time(0);
fetch->callback = callback; fetch->callback = callback;
fetch->had_headers = 0; fetch->had_headers = 0;
fetch->in_callback = 0; fetch->in_callback = 0;
fetch->aborting = 0; fetch->aborting = 0;
fetch->url = xstrdup(url); fetch->url = xstrdup(url);
fetch->referer = 0;
if (referer != 0)
fetch->referer = xstrdup(referer);
fetch->p = p; fetch->p = p;
fetch->headers = 0;
fetch->host = xstrdup(uri->server);
fetch->queue = 0;
fetch->prev = 0;
fetch->next = 0;
xmlFreeURI(uri);
/* look for a fetch from the same host */
for (host_fetch = fetch_list;
host_fetch != 0 && strcasecmp(host_fetch->host, fetch->host) != 0;
host_fetch = host_fetch->next)
;
if (host_fetch != 0) {
/* fetch from this host in progress: queue the new fetch */
LOG(("queueing"));
fetch->queue = host_fetch->queue;
host_fetch->queue = fetch;
return fetch;
}
fetch->next = fetch_list;
if (fetch_list != 0)
fetch_list->prev = fetch;
fetch_list = fetch;
/* create the curl easy handle */ /* create the curl easy handle */
fetch->curl_handle = curl_easy_init(); fetch->curl_handle = curl_easy_init();
@ -148,13 +198,59 @@ void fetch_abort(struct fetch *f)
return; return;
} }
/* remove from curl */ /* remove from list of fetches */
if (f->prev == 0)
fetch_list = f->next;
else
f->prev->next = f->next;
if (f->next != 0)
f->next->prev = f->prev;
/* remove from curl multi handle */
codem = curl_multi_remove_handle(curl_multi, f->curl_handle); codem = curl_multi_remove_handle(curl_multi, f->curl_handle);
assert(codem == CURLM_OK); assert(codem == CURLM_OK);
if (f->queue != 0) {
/* start a queued fetch for this host, reusing the handle for this host */
struct fetch *fetch = f->queue;
CURLcode code;
CURLMcode codem;
LOG(("starting queued %p '%s'", fetch, fetch->url));
fetch->prev = 0;
fetch->next = fetch_list;
if (fetch_list != 0)
fetch_list->prev = fetch;
fetch_list = fetch;
fetch->curl_handle = f->curl_handle;
code = curl_easy_setopt(fetch->curl_handle, CURLOPT_URL, fetch->url);
assert(code == CURLE_OK);
code = curl_easy_setopt(fetch->curl_handle, CURLOPT_PRIVATE, fetch);
assert(code == CURLE_OK);
code = curl_easy_setopt(fetch->curl_handle, CURLOPT_ERRORBUFFER, fetch->error_buffer);
assert(code == CURLE_OK);
code = curl_easy_setopt(fetch->curl_handle, CURLOPT_WRITEDATA, fetch);
assert(code == CURLE_OK);
/* TODO: remove referer header if fetch->referer == 0 */
if (fetch->referer != 0) {
code = curl_easy_setopt(fetch->curl_handle, CURLOPT_REFERER, fetch->referer);
assert(code == CURLE_OK);
}
/* add to the global curl multi handle */
codem = curl_multi_add_handle(curl_multi, fetch->curl_handle);
assert(codem == CURLM_OK || codem == CURLM_CALL_MULTI_PERFORM);
} else {
curl_easy_cleanup(f->curl_handle); curl_easy_cleanup(f->curl_handle);
curl_slist_free_all(f->headers); curl_slist_free_all(f->headers);
}
xfree(f->url); xfree(f->url);
xfree(f->host);
xfree(f->referer);
xfree(f); xfree(f);
} }
@ -169,9 +265,10 @@ void fetch_poll(void)
{ {
CURLcode code; CURLcode code;
CURLMcode codem; CURLMcode codem;
int running, queue; int running, queue, finished;
CURLMsg * curl_msg; CURLMsg * curl_msg;
struct fetch *f; struct fetch *f;
void *p;
/* do any possible work on the current fetches */ /* do any possible work on the current fetches */
do { do {
@ -191,8 +288,10 @@ void fetch_poll(void)
LOG(("CURLMSG_DONE, result %i", curl_msg->data.result)); LOG(("CURLMSG_DONE, result %i", curl_msg->data.result));
/* inform the caller that the fetch is done */ /* inform the caller that the fetch is done */
finished = 0;
p = f->p;
if (curl_msg->data.result == CURLE_OK && f->had_headers) if (curl_msg->data.result == CURLE_OK && f->had_headers)
f->callback(FETCH_FINISHED, f->p, 0, 0); finished = 1;
else if (curl_msg->data.result == CURLE_OK) else if (curl_msg->data.result == CURLE_OK)
f->callback(FETCH_ERROR, f->p, "No data received", 0); f->callback(FETCH_ERROR, f->p, "No data received", 0);
else if (curl_msg->data.result != CURLE_WRITE_ERROR) else if (curl_msg->data.result != CURLE_WRITE_ERROR)
@ -201,6 +300,10 @@ void fetch_poll(void)
/* clean up fetch */ /* clean up fetch */
fetch_abort(f); fetch_abort(f);
/* postponed until after abort so that queue fetches are started */
if (finished)
f->callback(FETCH_FINISHED, p, 0, 0);
break; break;
default: default:

View File

@ -1,5 +1,28 @@
/** /**
* $Id: fetch.h,v 1.2 2003/03/15 15:53:20 bursa Exp $ * $Id: fetch.h,v 1.3 2003/04/17 21:35:02 bursa Exp $
*
* This module handles fetching of data from any url.
*
* Usage:
*
* fetch_init() must be called once before any other function. fetch_quit()
* must be called before exiting.
*
* fetch_start() will begin fetching a url. The function returns immediately.
* A pointer to an opaque struct fetch is returned, which can be passed to
* fetch_abort() to abort the fetch at any time. The caller must supply a
* callback function which is called when anything interesting happens. The
* callback function is first called with msg = FETCH_TYPE, with the
* Content-Type header in data, then one or more times with FETCH_DATA with
* some data for the url, and finally with FETCH_FINISHED. Alternatively,
* FETCH_ERROR indicates an error occurred: data contains an error message.
* Some private data can be passed as the last parameter to fetch_start, and
* callbacks will contain this.
*
* fetch_poll() must be called regularly to make progress on fetches.
*
* fetch_filetype() is used internally to determine the mime type of local
* files. It is platform specific, and implemented elsewhere.
*/ */
#ifndef _NETSURF_DESKTOP_FETCH_H_ #ifndef _NETSURF_DESKTOP_FETCH_H_

View File

@ -1,5 +1,5 @@
/** /**
* $Id: fetchcache.c,v 1.7 2003/04/09 21:57:09 bursa Exp $ * $Id: fetchcache.c,v 1.8 2003/04/17 21:35:02 bursa Exp $
*/ */
#include <assert.h> #include <assert.h>
@ -20,8 +20,13 @@ struct fetchcache {
unsigned long width, height; unsigned long width, height;
unsigned long size; unsigned long size;
content_type allowed; content_type allowed;
struct fetchcache *next;
struct fetchcache *prev;
struct fetchcache *next_request;
int active;
}; };
static struct fetchcache *fetchcache_list = 0;
static void fetchcache_free(struct fetchcache *fc); static void fetchcache_free(struct fetchcache *fc);
static void fetchcache_callback(fetchcache_msg msg, void *p, char *data, unsigned long size); static void fetchcache_callback(fetchcache_msg msg, void *p, char *data, unsigned long size);
@ -33,7 +38,7 @@ void fetchcache(const char *url, char *referer,
void *p, unsigned long width, unsigned long height, content_type allowed) void *p, unsigned long width, unsigned long height, content_type allowed)
{ {
struct content *c; struct content *c;
struct fetchcache *fc; struct fetchcache *fc, *fc_url;
c = cache_get(url); c = cache_get(url);
if (c != 0) { if (c != 0) {
@ -59,7 +64,30 @@ void fetchcache(const char *url, char *referer,
fc->height = height; fc->height = height;
fc->size = 0; fc->size = 0;
fc->allowed = allowed; fc->allowed = allowed;
fc->next = 0;
fc->prev = 0;
fc->next_request = 0;
fc->active = 1;
/* check if we're already fetching this url */
for (fc_url = fetchcache_list;
fc_url != 0 && strcmp(fc_url->url, url) != 0;
fc_url = fc_url->next)
;
if (fc_url != 0) {
/* already fetching: add ourselves to list of requestors */
LOG(("already fetching"));
fc->next_request = fc_url->next_request;
fc_url->next_request = fc;
} else {
/* not fetching yet */
if (fetchcache_list != 0)
fetchcache_list->prev = fc;
fc->next = fetchcache_list;
fetchcache_list = fc;
fc->f = fetch_start(fc->url, referer, fetchcache_callback, fc); fc->f = fetch_start(fc->url, referer, fetchcache_callback, fc);
}
} }
@ -67,16 +95,24 @@ void fetchcache_free(struct fetchcache *fc)
{ {
free(fc->url); free(fc->url);
free(fc); free(fc);
if (fc->prev == 0)
fetchcache_list = fc->next;
else
fc->prev->next = fc->next;
if (fc->next != 0)
fc->next->prev = fc->prev;
} }
void fetchcache_callback(fetch_msg msg, void *p, char *data, unsigned long size) void fetchcache_callback(fetch_msg msg, void *p, char *data, unsigned long size)
{ {
struct fetchcache *fc = p; struct fetchcache *fc = p, *fc_url;
content_type type; content_type type;
char *mime_type; char *mime_type;
char *semic; char *semic;
char status[40]; char status[40];
int active = 0;
switch (msg) { switch (msg) {
case FETCH_TYPE: case FETCH_TYPE:
mime_type = strdup(data); mime_type = strdup(data);
@ -84,46 +120,90 @@ void fetchcache_callback(fetch_msg msg, void *p, char *data, unsigned long size)
*semic = 0; /* remove "; charset=..." */ *semic = 0; /* remove "; charset=..." */
type = content_lookup(mime_type); type = content_lookup(mime_type);
LOG(("FETCH_TYPE, type %u", type)); LOG(("FETCH_TYPE, type %u", type));
if ((1 << type) & fc->allowed) {
/* check if each request allows this type */
for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request) {
if (!fc_url->active)
continue;
if ((1 << type) & fc_url->allowed) {
active++;
} else {
fc_url->active = 0;
fc_url->callback(FETCHCACHE_BADTYPE, 0,
fc_url->p, mime_type);
}
}
if (active != 0) {
/* someone is still interested */
fc->c = content_create(type, fc->url); fc->c = content_create(type, fc->url);
fc->c->status_callback = status_callback; fc->c->status_callback = status_callback;
fc->c->status_p = fc; fc->c->status_p = fc;
} else { } else {
/* no request allows the type */
fetch_abort(fc->f); fetch_abort(fc->f);
fc->callback(FETCHCACHE_BADTYPE, 0, fc->p, mime_type); for (; fc != 0; fc = fc_url) {
free(fc); fc_url = fc->next_request;
fetchcache_free(fc);
} }
}
free(mime_type); free(mime_type);
break; break;
case FETCH_DATA: case FETCH_DATA:
LOG(("FETCH_DATA")); LOG(("FETCH_DATA"));
assert(fc->c != 0); assert(fc->c != 0);
fc->size += size; fc->size += size;
sprintf(status, "Received %lu bytes", fc->size); sprintf(status, "Received %lu bytes", fc->size);
fc->callback(FETCHCACHE_STATUS, fc->c, fc->p, status); for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request)
if (fc_url->active)
fc_url->callback(FETCHCACHE_STATUS, fc->c,
fc_url->p, status);
content_process_data(fc->c, data, size); content_process_data(fc->c, data, size);
break; break;
case FETCH_FINISHED: case FETCH_FINISHED:
LOG(("FETCH_FINISHED")); LOG(("FETCH_FINISHED"));
assert(fc->c != 0); assert(fc->c != 0);
sprintf(status, "Converting %lu bytes", fc->size); sprintf(status, "Converting %lu bytes", fc->size);
fc->callback(FETCHCACHE_STATUS, fc->c, fc->p, status); for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request)
if (fc_url->active)
fc_url->callback(FETCHCACHE_STATUS, fc->c,
fc_url->p, status);
if (content_convert(fc->c, fc->width, fc->height) == 0) { if (content_convert(fc->c, fc->width, fc->height) == 0) {
cache_put(fc->c); cache_put(fc->c);
fc->callback(FETCHCACHE_OK, fc->c, fc->p, 0); for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request)
if (fc_url->active)
fc_url->callback(FETCHCACHE_OK, cache_get(fc->url),
fc_url->p, 0);
cache_free(fc->c);
} else { } else {
content_destroy(fc->c); content_destroy(fc->c);
fc->callback(FETCHCACHE_ERROR, 0, fc->p, "Conversion failed"); for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request)
if (fc_url->active)
fc_url->callback(FETCHCACHE_ERROR, 0,
fc_url->p, "Conversion failed");
} }
for (; fc != 0; fc = fc_url) {
fc_url = fc->next_request;
fetchcache_free(fc); fetchcache_free(fc);
}
break; break;
case FETCH_ERROR: case FETCH_ERROR:
LOG(("FETCH_ERROR, '%s'", data)); LOG(("FETCH_ERROR, '%s'", data));
if (fc->c != 0) if (fc->c != 0)
content_destroy(fc->c); content_destroy(fc->c);
fc->callback(FETCHCACHE_ERROR, 0, fc->p, data); for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request)
if (fc_url->active)
fc->callback(FETCHCACHE_ERROR, 0, fc_url->p, data);
for (; fc != 0; fc = fc_url) {
fc_url = fc->next_request;
fetchcache_free(fc); fetchcache_free(fc);
}
break; break;
default: default:
assert(0); assert(0);
} }
@ -132,8 +212,10 @@ void fetchcache_callback(fetch_msg msg, void *p, char *data, unsigned long size)
void status_callback(void *p, const char *status) void status_callback(void *p, const char *status)
{ {
struct fetchcache *fc = p; struct fetchcache *fc = p, *fc_url;
fc->callback(FETCHCACHE_STATUS, fc->c, fc->p, status); for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request)
if (fc_url->active)
fc_url->callback(FETCHCACHE_STATUS, fc->c, fc_url->p, status);
} }