[project @ 2006-03-14 14:21:00 by dsilvers]

Multi-parallel fetch. Needs documenting, works in basic GTK tests, makes things way faster on GTK port. Needs testing on RO and in general more testing is good.

svn path=/import/netsurf/; revision=2127
This commit is contained in:
Daniel Silverstone 2006-03-14 14:21:01 +00:00
parent 63a70da244
commit 40260f177a
3 changed files with 281 additions and 118 deletions

View File

@ -2,6 +2,7 @@
* This file is part of NetSurf, http://netsurf.sourceforge.net/ * This file is part of NetSurf, http://netsurf.sourceforge.net/
* Licensed under the GNU General Public License, * Licensed under the GNU General Public License,
* http://www.opensource.org/licenses/gpl-license * http://www.opensource.org/licenses/gpl-license
* Copyright 2006 Daniel Silverstone <dsilvers@digital-scurf.org>
* Copyright 2004 James Bursa <bursa@users.sourceforge.net> * Copyright 2004 James Bursa <bursa@users.sourceforge.net>
* Copyright 2003 Phil Mellor <monkeyson@users.sourceforge.net> * Copyright 2003 Phil Mellor <monkeyson@users.sourceforge.net>
*/ */
@ -14,10 +15,6 @@
* Active fetches are held in the linked list fetch_list. There may be at most * Active fetches are held in the linked list fetch_list. There may be at most
* one fetch in progress from each host. Any further fetches are queued until * one fetch in progress from each host. Any further fetches are queued until
* the previous one ends. * the previous one ends.
*
* Invariant: only the fetch at the head of each queue is in progress, ie.
* queue_prev == 0 <=> curl_handle != 0
* and queue_prev != 0 <=> curl_handle == 0.
*/ */
#include <assert.h> #include <assert.h>
@ -88,17 +85,26 @@ struct fetch {
#define MAX_CERTS 10 #define MAX_CERTS 10
struct cert_info cert_data[MAX_CERTS]; /**< HTTPS certificate data */ struct cert_info cert_data[MAX_CERTS]; /**< HTTPS certificate data */
#endif #endif
struct fetch *queue_prev; /**< Previous fetch for this host. */ struct fetch *r_prev; /**< Previous active fetch in ::fetch_ring. */
struct fetch *queue_next; /**< Next fetch for this host. */ struct fetch *r_next; /**< Next active fetch in ::fetch_ring. */
struct fetch *prev; /**< Previous active fetch in ::fetch_list. */ };
struct fetch *next; /**< Next active fetch in ::fetch_list. */
struct cache_handle {
CURL *handle; /**< The cached cURL handle */
char *host; /**< The host for which this handle is cached */
struct cache_handle *r_prev; /**< Previous cached handle in ring. */
struct cache_handle *r_next; /**< Next cached handle in ring. */
}; };
static const char * const user_agent = "NetSurf"; static const char * const user_agent = "NetSurf";
CURLM *fetch_curl_multi; /**< Global cURL multi handle. */ CURLM *fetch_curl_multi; /**< Global cURL multi handle. */
/** Curl handle with default options set; not used for transfers. */ /** Curl handle with default options set; not used for transfers. */
static CURL *fetch_blank_curl; static CURL *fetch_blank_curl;
static struct fetch *fetch_list = 0; /**< List of active fetches. */ static struct fetch *fetch_ring = 0; /**< Ring of active fetches. */
static struct fetch *queue_ring = 0; /**< Ring of queued fetches */
static struct cache_handle *handle_ring = 0; /**< Ring of cached handles */
static char fetch_error_buffer[CURL_ERROR_SIZE]; /**< Error buffer for cURL. */ static char fetch_error_buffer[CURL_ERROR_SIZE]; /**< Error buffer for cURL. */
static char fetch_progress_buffer[256]; /**< Progress buffer for cURL */ static char fetch_progress_buffer[256]; /**< Progress buffer for cURL */
static char fetch_proxy_userpwd[100]; /**< Proxy authentication details. */ static char fetch_proxy_userpwd[100]; /**< Proxy authentication details. */
@ -124,6 +130,77 @@ static int fetch_verify_callback(int preverify_ok, X509_STORE_CTX *x509_ctx);
static int fetch_cert_verify_callback(X509_STORE_CTX *x509_ctx, void *parm); static int fetch_cert_verify_callback(X509_STORE_CTX *x509_ctx, void *parm);
#endif #endif
/** Insert the given item into the specified ring.
* Assumes that the element is zeroed as appropriate.
*/
#define RING_INSERT(ring,element) \
LOG(("RING_INSERT(%s, %p(%s))", #ring, element, element->host)); \
if (ring) { \
element->r_next = ring; \
element->r_prev = ring->r_prev; \
ring->r_prev = element; \
element->r_prev->r_next = element; \
} else \
ring = element->r_prev = element->r_next = element
/** Remove the given element from the specified ring.
* Will zero the element as needed
*/
#define RING_REMOVE(ring, element) \
LOG(("RING_REMOVE(%s, %p(%s)", #ring, element, element->host)); \
if (element->r_next != element ) { \
/* Not the only thing in the ring */ \
element->r_next->r_prev = element->r_prev; \
element->r_prev->r_next = element->r_next; \
if (ring == element) ring = element->r_next; \
} else { \
/* Only thing in the ring */ \
ring = 0; \
} \
element->r_next = element->r_prev = 0
/** Find the element (by hostname) in the given ring, leave it in the
* provided element variable
*/
#define RING_FINDBYHOST(ring, element, hostname) \
LOG(("RING_FINDBYHOST(%s, %s)", #ring, hostname)); \
if (ring) { \
element = ring; \
do { \
if (strcasecmp(element->host, hostname) == 0) \
break; \
element = element->r_next; \
} while (element != ring); \
element = 0; \
} else element = 0
/** Measure the size of a ring and put it in the supplied variable */
#define RING_GETSIZE(ringtype, ring, sizevar) \
LOG(("RING_GETSIZE(%s)", #ring)); \
if (ring) { \
ringtype *p = ring; \
sizevar = 0; \
do { \
sizevar++; \
p = p->r_next; \
} while (p != ring); \
} else sizevar = 0
/** Count the number of elements in the ring which match the provided hostname */
#define RING_COUNTBYHOST(ringtype, ring, sizevar, hostname) \
LOG(("RING_COUNTBYHOST(%s, %s)", #ring, hostname)); \
if (ring) { \
ringtype *p = ring; \
sizevar = 0; \
do { \
if (strcasecmp(p->host, hostname) == 0) \
sizevar++; \
p = p->r_next; \
} while (p != ring); \
} else sizevar = 0
static void ns_internal_cache_handle(CURL *handle, char *hostname);
static void ns_internal_dispatch_jobs(void);
/** /**
* Initialise the fetcher. * Initialise the fetcher.
@ -232,9 +309,6 @@ struct fetch * fetch_start(char *url, char *referer,
{ {
char *host; char *host;
struct fetch *fetch; struct fetch *fetch;
struct fetch *host_fetch;
CURLcode code;
CURLMcode codem;
struct curl_slist *slist; struct curl_slist *slist;
url_func_result res; url_func_result res;
char *ref1 = 0, *ref2 = 0; char *ref1 = 0, *ref2 = 0;
@ -308,10 +382,8 @@ struct fetch * fetch_start(char *url, char *referer,
#ifdef WITH_SSL #ifdef WITH_SSL
memset(fetch->cert_data, 0, sizeof(fetch->cert_data)); memset(fetch->cert_data, 0, sizeof(fetch->cert_data));
#endif #endif
fetch->queue_prev = 0; fetch->r_prev = 0;
fetch->queue_next = 0; fetch->r_next = 0;
fetch->prev = 0;
fetch->next = 0;
if (!fetch->url || (referer && if (!fetch->url || (referer &&
(ref1 && ref2 && strcasecmp(ref1, ref2) == 0) && (ref1 && ref2 && strcasecmp(ref1, ref2) == 0) &&
@ -370,44 +442,9 @@ struct fetch * fetch_start(char *url, char *referer,
APPEND(fetch->headers, headers[i]); APPEND(fetch->headers, headers[i]);
} }
/* look for a fetch from the same host */ /* Dump us in the queue and ask the queue to run. */
for (host_fetch = fetch_list; RING_INSERT(queue_ring, fetch);
host_fetch && strcasecmp(host_fetch->host, host) != 0; ns_internal_dispatch_jobs();
host_fetch = host_fetch->next)
;
if (host_fetch) {
/* fetch from this host in progress:
queue the new fetch */
LOG(("queueing"));
fetch->curl_handle = 0;
/* queue at end */
for (; host_fetch->queue_next;
host_fetch = host_fetch->queue_next)
;
fetch->queue_prev = host_fetch;
host_fetch->queue_next = fetch;
return fetch;
}
/* create the curl easy handle */
fetch->curl_handle = curl_easy_duphandle(fetch_blank_curl);
if (!fetch->curl_handle)
goto failed;
code = fetch_set_options(fetch);
if (code != CURLE_OK)
goto failed;
/* add to the global curl multi handle */
codem = curl_multi_add_handle(fetch_curl_multi, fetch->curl_handle);
assert(codem == CURLM_OK || codem == CURLM_CALL_MULTI_PERFORM);
fetch->next = fetch_list;
if (fetch_list != 0)
fetch_list->prev = fetch;
fetch_list = fetch;
fetch_active = true;
return fetch; return fetch;
failed: failed:
@ -426,6 +463,152 @@ failed:
return 0; return 0;
} }
/**
* Initiate a fetch from the queue.
*
* Called with a fetch structure and a CURL handle to be used to fetch the content.
*
* This will return whether or not the fetch was successfully initiated.
*/
static bool ns_internal_initiate_fetch(struct fetch *fetch, CURL *handle)
{
CURLcode code;
CURLMcode codem;
fetch->curl_handle = handle;
/* Initialise the handle */
code = fetch_set_options(fetch);
if (code != CURLE_OK) {
fetch->curl_handle = 0;
return false;
}
/* add to the global curl multi handle */
codem = curl_multi_add_handle(fetch_curl_multi, fetch->curl_handle);
assert(codem == CURLM_OK || codem == CURLM_CALL_MULTI_PERFORM);
fetch_active = true;
return true;
}
/**
* Find a CURL handle to use to dispatch a job
*/
static CURL *ns_internal_get_handle(char *host)
{
struct cache_handle *h;
CURL *ret;
RING_FINDBYHOST(handle_ring, h, host);
if (h) {
ret = h->handle;
free(h->host);
RING_REMOVE(handle_ring, h);
free(h);
} else {
ret = curl_easy_duphandle(fetch_blank_curl);
}
return ret;
}
/**
* Dispatch a single job
*/
static bool ns_internal_dispatch_job(struct fetch *fetch)
{
RING_REMOVE(queue_ring, fetch);
if (!ns_internal_initiate_fetch(fetch, ns_internal_get_handle(fetch->host))) {
RING_INSERT(queue_ring, fetch); /* Put it back on the end of the queue */
return false;
} else {
RING_INSERT(fetch_ring, fetch);
return true;
}
}
/**
* Choose and dispatch a single job. Return false if we failed to dispatch anything.
*
* We don't check the overall dispatch size here because we're not called unless
* there is room in the fetch queue for us.
*/
static bool ns_internal_choose_and_dispatch(void)
{
struct fetch *queueitem;
queueitem = queue_ring;
do {
/* We can dispatch the selected item if there is room in the
* fetch ring
*/
int countbyhost;
RING_COUNTBYHOST(struct fetch, fetch_ring, countbyhost, queueitem->host);
if (countbyhost < option_max_fetchers_per_host) {
/* We can dispatch this item in theory */
return ns_internal_dispatch_job(queueitem);
}
queueitem = queueitem->r_next;
} while (queueitem != queue_ring);
return false;
}
/**
* Dispatch as many jobs as we have room to dispatch.
*/
static void ns_internal_dispatch_jobs(void)
{
int all_active, all_queued;
if (!queue_ring) return; /* Nothing to do, the queue is empty */
RING_GETSIZE(struct fetch, queue_ring, all_queued);
RING_GETSIZE(struct fetch, fetch_ring, all_active);
while( all_queued && all_active < option_max_fetchers ) {
LOG(("%d queued, %d fetching", all_queued, all_active));
if (ns_internal_choose_and_dispatch()) {
all_queued--;
all_active++;
} else {
/* Either a dispatch failed or we ran out. Just stop */
break;
}
}
}
/**
* Cache a CURL handle for the provided host (if wanted)
*
*/
static void ns_internal_cache_handle(CURL *handle, char *host)
{
struct cache_handle *h = 0;
int c;
RING_FINDBYHOST(handle_ring, h, host);
if (h) {
/* Already have a handle cached for this hostname */
curl_easy_cleanup(handle);
return;
}
/* We do not have a handle cached, first up determine if the cache is full */
RING_GETSIZE(struct cache_handle, handle_ring, c);
if (c >= option_max_cached_fetch_handles) {
/* Cache is full, so, we rotate the ring by one and replace the
* oldest handle with this one. We do this without freeing/allocating
* memory (except the hostname) and without removing the entry from the
* ring and then re-inserting it, in order to be as efficient as we can.
*/
h = handle_ring;
handle_ring = h->r_next;
curl_easy_cleanup(h->handle);
h->handle = handle;
free(h->host);
h->host = strdup(host);
return;
}
/* The table isn't full yet, so make a shiny new handle to add to the ring */
h = (struct cache_handle*)malloc(sizeof(struct cache_handle));
h->handle = handle;
h->host = strdup(host);
RING_INSERT(handle_ring, h);
}
/** /**
* Set options specific for a fetch. * Set options specific for a fetch.
@ -532,89 +715,48 @@ void fetch_abort(struct fetch *f)
{ {
assert(f); assert(f);
LOG(("fetch %p, url '%s'", f, f->url)); LOG(("fetch %p, url '%s'", f, f->url));
if (f->queue_prev) { if (f->curl_handle) {
f->queue_prev->queue_next = f->queue_next;
if (f->queue_next)
f->queue_next->queue_prev = f->queue_prev;
fetch_free(f);
} else {
f->abort = true; f->abort = true;
} else {
RING_REMOVE(queue_ring, f);
fetch_free(f);
} }
} }
/** /**
* Clean up a fetch and start any queued fetch for the same host. * Clean up the provided fetch object and free it.
*
* Will prod the queue afterwards to allow pending requests to be initiated.
*/ */
void fetch_stop(struct fetch *f) void fetch_stop(struct fetch *f)
{ {
CURLcode code;
CURLMcode codem; CURLMcode codem;
struct fetch *fetch;
struct fetch *next_fetch;
assert(f); assert(f);
LOG(("fetch %p, url '%s'", f, f->url)); LOG(("fetch %p, url '%s'", f, f->url));
/* remove from list of fetches */
if (f->prev == 0)
fetch_list = f->next;
else
f->prev->next = f->next;
if (f->next != 0)
f->next->prev = f->prev;
/* remove from curl multi handle */
if (f->curl_handle) { if (f->curl_handle) {
/* remove from curl multi handle */
codem = curl_multi_remove_handle(fetch_curl_multi, codem = curl_multi_remove_handle(fetch_curl_multi,
f->curl_handle); f->curl_handle);
assert(codem == CURLM_OK); assert(codem == CURLM_OK);
} /* Put this curl handle into the cache if wanted. */
ns_internal_cache_handle(f->curl_handle, f->host);
if (f->curl_handle && f->queue_next) {
/* start a queued fetch for this host, reusing the handle */
fetch = f->queue_next;
LOG(("starting queued %p '%s'", fetch, fetch->url));
fetch->curl_handle = f->curl_handle;
f->curl_handle = 0; f->curl_handle = 0;
fetch->cachedata.req_time = time(0); /* Remove this from the active set of fetches (if it's still there) */
code = fetch_set_options(fetch); RING_REMOVE(fetch_ring, f);
if (code == CURLE_OK)
/* add to the global curl multi handle */
codem = curl_multi_add_handle(fetch_curl_multi,
fetch->curl_handle);
if (code == CURLE_OK && (codem == CURLM_OK ||
codem == CURLM_CALL_MULTI_PERFORM)) {
/* add to list of fetches */
fetch->prev = 0;
fetch->next = fetch_list;
if (fetch_list != 0)
fetch_list->prev = fetch;
fetch_list = fetch;
fetch->queue_prev = 0;
} else { } else {
/* destroy all queued fetches for this host */ /* Remove this from the queued set of fetches (if it's still there) */
do { RING_REMOVE(queue_ring, f);
fetch->callback(FETCH_ERROR, fetch->p,
messages_get("FetchError"), 0);
next_fetch = fetch->queue_next;
fetch_free(fetch);
fetch = next_fetch;
} while (fetch);
}
} else {
if (f->queue_prev)
f->queue_prev->queue_next = f->queue_next;
if (f->queue_next)
f->queue_next->queue_prev = f->queue_prev;
} }
fetch_free(f); fetch_free(f);
if (!fetch_ring && !queue_ring)
fetch_active = false;
else if (queue_ring)
ns_internal_dispatch_jobs();
} }
@ -685,9 +827,6 @@ void fetch_poll(void)
} }
curl_msg = curl_multi_info_read(fetch_curl_multi, &queue); curl_msg = curl_multi_info_read(fetch_curl_multi, &queue);
} }
if (!fetch_list)
fetch_active = false;
} }
@ -1161,8 +1300,7 @@ struct curl_httppost *fetch_post_convert(struct form_successful_control *control
{ {
struct curl_httppost *post = 0, *last = 0; struct curl_httppost *post = 0, *last = 0;
char *mimetype = 0; char *mimetype = 0;
char *leafname = 0, *temp = 0; char *leafname = 0;
int leaflen;
for (; control; control = control->next) { for (; control; control = control->next) {
if (control->file) { if (control->file) {

View File

@ -77,6 +77,19 @@ char *option_cookie_file = 0;
/** Cookie jar loaction */ /** Cookie jar loaction */
char *option_cookie_jar = 0; char *option_cookie_jar = 0;
/* Fetcher configuration */
/** Maximum simultaneous active fetchers */
int option_max_fetchers = 24;
/** Maximum simultaneous active fetchers per host.
* (<=option_max_fetchers else it makes no sense
*/
int option_max_fetchers_per_host = 5;
/** Maximum number of inactive fetchers cached.
* The total number of handles netsurf will therefore have open
* is this plus option_max_fetchers.
*/
int option_max_cached_fetch_handles = 6;
EXTRA_OPTION_DEFINE EXTRA_OPTION_DEFINE
@ -105,6 +118,12 @@ struct {
{ "ca_bundle", OPTION_STRING, &option_ca_bundle }, { "ca_bundle", OPTION_STRING, &option_ca_bundle },
{ "cookie_file", OPTION_STRING, &option_cookie_file }, { "cookie_file", OPTION_STRING, &option_cookie_file },
{ "cookie_jar", OPTION_STRING, &option_cookie_jar }, { "cookie_jar", OPTION_STRING, &option_cookie_jar },
/* Fetcher options */
{ "max_fetchers", OPTION_INTEGER, &option_max_fetchers },
{ "max_fetchers_per_host",
OPTION_INTEGER, &option_max_fetchers_per_host },
{ "max_cached_fetch_handles",
OPTION_INTEGER, &option_max_cached_fetch_handles },
EXTRA_OPTION_TABLE EXTRA_OPTION_TABLE
}; };

View File

@ -50,6 +50,12 @@ extern char *option_ca_bundle;
extern char *option_cookie_file; extern char *option_cookie_file;
extern char *option_cookie_jar; extern char *option_cookie_jar;
/* Fetcher configuration. */
extern int option_max_fetchers;
extern int option_max_fetchers_per_host;
extern int option_max_cached_fetch_handles;
void options_read(const char *path); void options_read(const char *path);
void options_write(const char *path); void options_write(const char *path);
void options_dump(void); void options_dump(void);