2003-06-30 16:44:03 +04:00
|
|
|
/*
|
|
|
|
* This file is part of NetSurf, http://netsurf.sourceforge.net/
|
|
|
|
* Licensed under the GNU General Public License,
|
|
|
|
* http://www.opensource.org/licenses/gpl-license
|
2004-02-13 19:09:12 +03:00
|
|
|
* Copyright 2004 James Bursa <bursa@users.sourceforge.net>
|
2003-06-30 16:44:03 +04:00
|
|
|
* Copyright 2003 Phil Mellor <monkeyson@users.sourceforge.net>
|
|
|
|
*/
|
|
|
|
|
2003-09-18 03:27:33 +04:00
|
|
|
/** \file
|
|
|
|
* Fetching of data from a URL (implementation).
|
2003-04-18 01:35:02 +04:00
|
|
|
*
|
|
|
|
* This implementation uses libcurl's 'multi' interface.
|
|
|
|
*
|
|
|
|
* Active fetches are held in the linked list fetch_list. There may be at most
|
2003-09-18 03:27:33 +04:00
|
|
|
* one fetch in progress from each host. Any further fetches are queued until
|
|
|
|
* the previous one ends.
|
2003-11-08 22:18:37 +03:00
|
|
|
*
|
|
|
|
* Invariant: only the fetch at the head of each queue is in progress, ie.
|
|
|
|
* queue_prev == 0 <=> curl_handle != 0
|
|
|
|
* and queue_prev != 0 <=> curl_handle == 0.
|
2003-02-09 15:58:15 +03:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <assert.h>
|
2003-08-29 16:57:14 +04:00
|
|
|
#include <stdbool.h>
|
2003-03-15 18:53:20 +03:00
|
|
|
#include <string.h>
|
2003-04-18 01:35:02 +04:00
|
|
|
#include <strings.h>
|
2003-02-09 15:58:15 +03:00
|
|
|
#include <time.h>
|
2006-02-06 03:10:09 +03:00
|
|
|
#include <sys/stat.h>
|
2004-03-22 02:36:05 +03:00
|
|
|
#ifdef riscos
|
|
|
|
#include <unixlib/local.h>
|
|
|
|
#endif
|
2003-02-09 15:58:15 +03:00
|
|
|
#include "curl/curl.h"
|
2004-01-05 05:10:59 +03:00
|
|
|
#include "netsurf/utils/config.h"
|
2003-02-09 15:58:15 +03:00
|
|
|
#include "netsurf/content/fetch.h"
|
2003-08-29 16:57:14 +04:00
|
|
|
#include "netsurf/desktop/options.h"
|
2004-01-05 05:10:59 +03:00
|
|
|
#ifdef WITH_AUTH
|
2003-10-23 04:09:17 +04:00
|
|
|
#include "netsurf/desktop/401login.h"
|
2004-01-05 05:10:59 +03:00
|
|
|
#endif
|
2003-10-25 18:13:49 +04:00
|
|
|
#include "netsurf/render/form.h"
|
2005-12-12 00:54:30 +03:00
|
|
|
#define NDEBUG
|
2003-08-29 16:57:14 +04:00
|
|
|
#include "netsurf/utils/log.h"
|
|
|
|
#include "netsurf/utils/messages.h"
|
2004-03-02 21:02:41 +03:00
|
|
|
#include "netsurf/utils/url.h"
|
2003-08-29 16:57:14 +04:00
|
|
|
#include "netsurf/utils/utils.h"
|
2003-02-09 15:58:15 +03:00
|
|
|
|
2003-09-18 03:27:33 +04:00
|
|
|
|
2003-11-06 22:41:41 +03:00
|
|
|
bool fetch_active; /**< Fetches in progress, please call fetch_poll(). */
|
|
|
|
|
2003-09-18 03:27:33 +04:00
|
|
|
/** Information for a single fetch. */
|
|
|
|
struct fetch {
|
|
|
|
CURL * curl_handle; /**< cURL handle if being fetched, or 0. */
|
2004-06-22 21:37:51 +04:00
|
|
|
void (*callback)(fetch_msg msg, void *p, const char *data,
|
|
|
|
unsigned long size);
|
2003-09-18 03:27:33 +04:00
|
|
|
/**< Callback function. */
|
|
|
|
bool had_headers; /**< Headers have been processed. */
|
2004-06-22 21:37:51 +04:00
|
|
|
bool abort; /**< Abort requested. */
|
2004-03-28 21:18:52 +04:00
|
|
|
bool stopped; /**< Download stopped on purpose. */
|
2003-09-18 03:27:33 +04:00
|
|
|
bool only_2xx; /**< Only HTTP 2xx responses acceptable. */
|
2004-03-27 03:50:58 +03:00
|
|
|
bool cookies; /**< Send & accept cookies. */
|
2003-09-18 03:27:33 +04:00
|
|
|
char *url; /**< URL. */
|
|
|
|
char *referer; /**< URL for Referer header. */
|
|
|
|
void *p; /**< Private data for callback. */
|
|
|
|
struct curl_slist *headers; /**< List of request headers. */
|
|
|
|
char *host; /**< Host part of URL. */
|
|
|
|
char *location; /**< Response Location header, or 0. */
|
|
|
|
unsigned long content_length; /**< Response Content-Length, or 0. */
|
2003-10-23 04:09:17 +04:00
|
|
|
char *realm; /**< HTTP Auth Realm */
|
2003-10-25 18:13:49 +04:00
|
|
|
char *post_urlenc; /**< Url encoded POST string, or 0. */
|
2004-03-27 03:50:58 +03:00
|
|
|
struct curl_httppost *post_multipart; /**< Multipart post data, or 0. */
|
2006-02-06 03:10:09 +03:00
|
|
|
struct cache_data cachedata; /**< Cache control data */
|
|
|
|
time_t last_modified; /**< If-Modified-Since time */
|
2006-02-07 03:44:52 +03:00
|
|
|
time_t file_etag; /**< ETag for local objects */
|
2003-11-08 22:18:37 +03:00
|
|
|
struct fetch *queue_prev; /**< Previous fetch for this host. */
|
|
|
|
struct fetch *queue_next; /**< Next fetch for this host. */
|
2003-09-18 03:27:33 +04:00
|
|
|
struct fetch *prev; /**< Previous active fetch in ::fetch_list. */
|
|
|
|
struct fetch *next; /**< Next active fetch in ::fetch_list. */
|
2003-02-09 15:58:15 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
static const char * const user_agent = "NetSurf";
|
2005-12-20 00:54:51 +03:00
|
|
|
CURLM *fetch_curl_multi; /**< Global cURL multi handle. */
|
2004-03-27 03:50:58 +03:00
|
|
|
/** Curl handle with default options set; not used for transfers. */
|
|
|
|
static CURL *fetch_blank_curl;
|
2003-09-18 03:27:33 +04:00
|
|
|
static struct fetch *fetch_list = 0; /**< List of active fetches. */
|
2004-03-27 03:50:58 +03:00
|
|
|
static char fetch_error_buffer[CURL_ERROR_SIZE]; /**< Error buffer for cURL. */
|
2004-07-10 06:35:31 +04:00
|
|
|
static char fetch_progress_buffer[256]; /**< Progress buffer for cURL */
|
2005-01-23 01:42:48 +03:00
|
|
|
static char fetch_proxy_userpwd[100]; /**< Proxy authentication details. */
|
2003-02-09 15:58:15 +03:00
|
|
|
|
2004-03-27 03:50:58 +03:00
|
|
|
static CURLcode fetch_set_options(struct fetch *f);
|
|
|
|
static void fetch_free(struct fetch *f);
|
2004-06-22 21:37:51 +04:00
|
|
|
static void fetch_stop(struct fetch *f);
|
2004-03-27 03:50:58 +03:00
|
|
|
static void fetch_done(CURL *curl_handle, CURLcode result);
|
2004-07-10 06:35:31 +04:00
|
|
|
static int fetch_curl_progress(void *clientp, double dltotal, double dlnow,
|
|
|
|
double ultotal, double ulnow);
|
2004-06-22 21:37:51 +04:00
|
|
|
static size_t fetch_curl_data(void *data, size_t size, size_t nmemb,
|
|
|
|
struct fetch *f);
|
|
|
|
static size_t fetch_curl_header(char *data, size_t size, size_t nmemb,
|
|
|
|
struct fetch *f);
|
2003-08-29 16:57:14 +04:00
|
|
|
static bool fetch_process_headers(struct fetch *f);
|
2006-02-06 03:10:09 +03:00
|
|
|
static struct curl_httppost *fetch_post_convert(
|
|
|
|
struct form_successful_control *control);
|
2003-02-09 15:58:15 +03:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
2003-09-18 03:27:33 +04:00
|
|
|
* Initialise the fetcher.
|
|
|
|
*
|
|
|
|
* Must be called once before any other function.
|
2003-02-09 15:58:15 +03:00
|
|
|
*/
|
|
|
|
|
|
|
|
void fetch_init(void)
|
|
|
|
{
|
|
|
|
CURLcode code;
|
|
|
|
|
|
|
|
code = curl_global_init(CURL_GLOBAL_ALL);
|
|
|
|
if (code != CURLE_OK)
|
2004-03-27 03:50:58 +03:00
|
|
|
die("Failed to initialise the fetch module "
|
|
|
|
"(curl_global_init failed).");
|
2003-02-09 15:58:15 +03:00
|
|
|
|
2005-12-20 00:54:51 +03:00
|
|
|
fetch_curl_multi = curl_multi_init();
|
|
|
|
if (!fetch_curl_multi)
|
2004-03-27 03:50:58 +03:00
|
|
|
die("Failed to initialise the fetch module "
|
|
|
|
"(curl_multi_init failed).");
|
2003-06-25 03:22:00 +04:00
|
|
|
|
2004-03-27 03:50:58 +03:00
|
|
|
/* Create a curl easy handle with the options that are common to all
|
|
|
|
fetches. */
|
|
|
|
fetch_blank_curl = curl_easy_init();
|
|
|
|
if (!fetch_blank_curl)
|
|
|
|
die("Failed to initialise the fetch module "
|
|
|
|
"(curl_easy_init failed).");
|
|
|
|
|
|
|
|
#define SETOPT(option, value) \
|
|
|
|
code = curl_easy_setopt(fetch_blank_curl, option, value); \
|
|
|
|
if (code != CURLE_OK) \
|
|
|
|
goto curl_easy_setopt_failed;
|
|
|
|
|
|
|
|
SETOPT(CURLOPT_VERBOSE, 1);
|
|
|
|
SETOPT(CURLOPT_ERRORBUFFER, fetch_error_buffer);
|
|
|
|
SETOPT(CURLOPT_WRITEFUNCTION, fetch_curl_data);
|
|
|
|
SETOPT(CURLOPT_HEADERFUNCTION, fetch_curl_header);
|
2004-07-10 06:35:31 +04:00
|
|
|
SETOPT(CURLOPT_PROGRESSFUNCTION, fetch_curl_progress);
|
|
|
|
SETOPT(CURLOPT_NOPROGRESS, 0);
|
2004-03-27 03:50:58 +03:00
|
|
|
SETOPT(CURLOPT_USERAGENT, user_agent);
|
2004-04-03 18:58:46 +04:00
|
|
|
SETOPT(CURLOPT_ENCODING, "gzip");
|
2004-03-27 03:50:58 +03:00
|
|
|
SETOPT(CURLOPT_LOW_SPEED_LIMIT, 1L);
|
|
|
|
SETOPT(CURLOPT_LOW_SPEED_TIME, 60L);
|
|
|
|
SETOPT(CURLOPT_NOSIGNAL, 1L);
|
|
|
|
SETOPT(CURLOPT_CONNECTTIMEOUT, 60L);
|
2006-01-08 04:51:33 +03:00
|
|
|
|
|
|
|
if (option_ca_bundle)
|
|
|
|
SETOPT(CURLOPT_CAINFO, option_ca_bundle);
|
2004-03-27 03:50:58 +03:00
|
|
|
|
2004-06-11 02:40:56 +04:00
|
|
|
if (!option_ssl_verify_certificates) {
|
2006-02-06 03:10:09 +03:00
|
|
|
/* disable verification of SSL certificates.
|
|
|
|
* security? we've heard of it...
|
|
|
|
*/
|
|
|
|
SETOPT(CURLOPT_SSL_VERIFYPEER, 0L);
|
2004-06-11 02:40:56 +04:00
|
|
|
SETOPT(CURLOPT_SSL_VERIFYHOST, 0L);
|
|
|
|
}
|
|
|
|
|
2004-03-27 03:50:58 +03:00
|
|
|
return;
|
|
|
|
|
|
|
|
curl_easy_setopt_failed:
|
|
|
|
die("Failed to initialise the fetch module "
|
|
|
|
"(curl_easy_setopt failed).");
|
2003-02-09 15:58:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
2003-09-18 03:27:33 +04:00
|
|
|
* Clean up for quit.
|
|
|
|
*
|
|
|
|
* Must be called before exiting.
|
2003-02-09 15:58:15 +03:00
|
|
|
*/
|
|
|
|
|
|
|
|
void fetch_quit(void)
|
|
|
|
{
|
|
|
|
CURLMcode codem;
|
|
|
|
|
2004-03-27 03:50:58 +03:00
|
|
|
curl_easy_cleanup(fetch_blank_curl);
|
|
|
|
|
2005-12-20 00:54:51 +03:00
|
|
|
codem = curl_multi_cleanup(fetch_curl_multi);
|
2003-02-09 15:58:15 +03:00
|
|
|
if (codem != CURLM_OK)
|
|
|
|
LOG(("curl_multi_cleanup failed: ignoring"));
|
|
|
|
|
|
|
|
curl_global_cleanup();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
2003-09-18 03:27:33 +04:00
|
|
|
* Start fetching data for the given URL.
|
|
|
|
*
|
|
|
|
* The function returns immediately. The fetch may be queued for later
|
|
|
|
* processing.
|
|
|
|
*
|
|
|
|
* A pointer to an opaque struct fetch is returned, which can be passed to
|
2004-03-27 03:50:58 +03:00
|
|
|
* fetch_abort() to abort the fetch at any time. Returns 0 if memory is
|
|
|
|
* exhausted (or some other fatal error occurred).
|
2003-09-18 03:27:33 +04:00
|
|
|
*
|
|
|
|
* The caller must supply a callback function which is called when anything
|
|
|
|
* interesting happens. The callback function is first called with msg
|
|
|
|
* FETCH_TYPE, with the Content-Type header in data, then one or more times
|
|
|
|
* with FETCH_DATA with some data for the url, and finally with
|
|
|
|
* FETCH_FINISHED. Alternatively, FETCH_ERROR indicates an error occurred:
|
|
|
|
* data contains an error message. FETCH_REDIRECT may replace the FETCH_TYPE,
|
|
|
|
* FETCH_DATA, FETCH_FINISHED sequence if the server sends a replacement URL.
|
2003-02-09 15:58:15 +03:00
|
|
|
*
|
2003-09-18 03:27:33 +04:00
|
|
|
* Some private data can be passed as the last parameter to fetch_start, and
|
|
|
|
* callbacks will contain this.
|
2003-02-09 15:58:15 +03:00
|
|
|
*/
|
|
|
|
|
|
|
|
struct fetch * fetch_start(char *url, char *referer,
|
2004-06-22 21:37:51 +04:00
|
|
|
void (*callback)(fetch_msg msg, void *p, const char *data,
|
2004-03-27 03:50:58 +03:00
|
|
|
unsigned long size),
|
|
|
|
void *p, bool only_2xx, char *post_urlenc,
|
2006-02-06 03:10:09 +03:00
|
|
|
struct form_successful_control *post_multipart, bool cookies,
|
|
|
|
char *headers[])
|
2003-02-09 15:58:15 +03:00
|
|
|
{
|
2004-03-27 03:50:58 +03:00
|
|
|
char *host;
|
|
|
|
struct fetch *fetch;
|
|
|
|
struct fetch *host_fetch;
|
2003-02-09 15:58:15 +03:00
|
|
|
CURLcode code;
|
|
|
|
CURLMcode codem;
|
2004-03-27 03:50:58 +03:00
|
|
|
struct curl_slist *slist;
|
2004-08-09 20:11:58 +04:00
|
|
|
url_func_result res;
|
2004-10-02 01:31:55 +04:00
|
|
|
char *ref1 = 0, *ref2 = 0;
|
2006-02-06 03:10:09 +03:00
|
|
|
int i;
|
2004-03-27 03:50:58 +03:00
|
|
|
|
|
|
|
fetch = malloc(sizeof (*fetch));
|
|
|
|
if (!fetch)
|
|
|
|
return 0;
|
|
|
|
|
2004-08-09 20:11:58 +04:00
|
|
|
res = url_host(url, &host);
|
|
|
|
/* we only fail memory exhaustion */
|
|
|
|
if (res == URL_FUNC_NOMEM)
|
|
|
|
goto failed;
|
2005-04-09 14:00:00 +04:00
|
|
|
if (!host)
|
|
|
|
host = strdup("");
|
|
|
|
if (!host)
|
|
|
|
goto failed;
|
2003-02-09 15:58:15 +03:00
|
|
|
|
2004-10-02 01:31:55 +04:00
|
|
|
res = url_scheme(url, &ref1);
|
|
|
|
/* we only fail memory exhaustion */
|
|
|
|
if (res == URL_FUNC_NOMEM)
|
|
|
|
goto failed;
|
|
|
|
|
|
|
|
if (referer) {
|
|
|
|
res = url_scheme(referer, &ref2);
|
|
|
|
/* we only fail memory exhaustion */
|
|
|
|
if (res == URL_FUNC_NOMEM)
|
|
|
|
goto failed;
|
|
|
|
}
|
|
|
|
|
2003-02-09 15:58:15 +03:00
|
|
|
LOG(("fetch %p, url '%s'", fetch, url));
|
2003-04-18 01:35:02 +04:00
|
|
|
|
|
|
|
/* construct a new fetch structure */
|
2004-03-27 03:50:58 +03:00
|
|
|
fetch->curl_handle = 0;
|
2003-02-09 15:58:15 +03:00
|
|
|
fetch->callback = callback;
|
2003-08-29 16:57:14 +04:00
|
|
|
fetch->had_headers = false;
|
2004-06-22 21:37:51 +04:00
|
|
|
fetch->abort = false;
|
2004-03-28 21:18:52 +04:00
|
|
|
fetch->stopped = false;
|
2003-08-29 16:57:14 +04:00
|
|
|
fetch->only_2xx = only_2xx;
|
2004-03-27 03:50:58 +03:00
|
|
|
fetch->cookies = cookies;
|
|
|
|
fetch->url = strdup(url);
|
2003-04-18 01:35:02 +04:00
|
|
|
fetch->referer = 0;
|
2004-10-02 01:31:55 +04:00
|
|
|
/* only send the referer if the schemes match */
|
|
|
|
if (referer) {
|
|
|
|
if (ref1 && ref2 && strcasecmp(ref1, ref2) == 0)
|
|
|
|
fetch->referer = strdup(referer);
|
|
|
|
}
|
2003-02-09 15:58:15 +03:00
|
|
|
fetch->p = p;
|
2003-04-18 01:35:02 +04:00
|
|
|
fetch->headers = 0;
|
2004-03-27 03:50:58 +03:00
|
|
|
fetch->host = host;
|
|
|
|
fetch->location = 0;
|
2003-08-29 00:04:35 +04:00
|
|
|
fetch->content_length = 0;
|
2004-03-27 03:50:58 +03:00
|
|
|
fetch->realm = 0;
|
2003-10-25 18:13:49 +04:00
|
|
|
fetch->post_urlenc = 0;
|
2003-10-25 20:22:11 +04:00
|
|
|
fetch->post_multipart = 0;
|
2003-10-25 18:13:49 +04:00
|
|
|
if (post_urlenc)
|
2004-03-27 03:50:58 +03:00
|
|
|
fetch->post_urlenc = strdup(post_urlenc);
|
2003-10-25 20:22:11 +04:00
|
|
|
else if (post_multipart)
|
|
|
|
fetch->post_multipart = fetch_post_convert(post_multipart);
|
2006-02-06 03:10:09 +03:00
|
|
|
fetch->cachedata.req_time = time(0);
|
|
|
|
fetch->cachedata.res_time = 0;
|
|
|
|
fetch->cachedata.date = 0;
|
|
|
|
fetch->cachedata.expires = 0;
|
|
|
|
fetch->cachedata.age = INVALID_AGE;
|
|
|
|
fetch->cachedata.max_age = INVALID_AGE;
|
|
|
|
fetch->cachedata.no_cache = false;
|
|
|
|
fetch->cachedata.etag = 0;
|
2006-02-08 03:35:05 +03:00
|
|
|
fetch->cachedata.last_modified = 0;
|
2006-02-06 03:10:09 +03:00
|
|
|
fetch->last_modified = 0;
|
2006-02-07 03:44:52 +03:00
|
|
|
fetch->file_etag = 0;
|
2003-11-08 22:18:37 +03:00
|
|
|
fetch->queue_prev = 0;
|
|
|
|
fetch->queue_next = 0;
|
2003-04-18 01:35:02 +04:00
|
|
|
fetch->prev = 0;
|
|
|
|
fetch->next = 0;
|
|
|
|
|
2004-10-02 01:31:55 +04:00
|
|
|
if (!fetch->url || (referer &&
|
|
|
|
(ref1 && ref2 && strcasecmp(ref1, ref2) == 0) &&
|
|
|
|
!fetch->referer) ||
|
2004-03-27 03:50:58 +03:00
|
|
|
(post_urlenc && !fetch->post_urlenc) ||
|
|
|
|
(post_multipart && !fetch->post_multipart))
|
|
|
|
goto failed;
|
|
|
|
|
2004-10-02 01:31:55 +04:00
|
|
|
/* these aren't needed past here */
|
|
|
|
if (ref1) {
|
|
|
|
free(ref1);
|
|
|
|
ref1 = 0;
|
|
|
|
}
|
|
|
|
if (ref2) {
|
|
|
|
free(ref2);
|
|
|
|
ref2 = 0;
|
|
|
|
}
|
|
|
|
|
2004-03-27 03:50:58 +03:00
|
|
|
#define APPEND(list, value) \
|
|
|
|
slist = curl_slist_append(list, value); \
|
|
|
|
if (!slist) \
|
|
|
|
goto failed; \
|
|
|
|
list = slist;
|
|
|
|
|
|
|
|
/* remove curl default headers */
|
|
|
|
APPEND(fetch->headers, "Accept:");
|
|
|
|
APPEND(fetch->headers, "Pragma:");
|
2006-02-14 02:04:32 +03:00
|
|
|
|
|
|
|
/* when doing a POST libcurl sends Expect: 100-continue" by default
|
|
|
|
* which fails with lighttpd, so disable it (see bug 1429054) */
|
|
|
|
APPEND(fetch->headers, "Expect:");
|
|
|
|
|
2004-03-27 03:50:58 +03:00
|
|
|
if (option_accept_language) {
|
|
|
|
char s[80];
|
|
|
|
snprintf(s, sizeof s, "Accept-Language: %s, *;q=0.1",
|
|
|
|
option_accept_language);
|
|
|
|
s[sizeof s - 1] = 0;
|
|
|
|
APPEND(fetch->headers, s);
|
|
|
|
}
|
2006-02-14 02:04:32 +03:00
|
|
|
|
2006-02-06 03:10:09 +03:00
|
|
|
/* And add any headers specified by the caller */
|
|
|
|
for (i = 0; headers[i]; i++) {
|
|
|
|
if (strncasecmp(headers[i], "If-Modified-Since:", 18) == 0) {
|
|
|
|
char *d = headers[i] + 18;
|
|
|
|
for (; *d && (*d == ' ' || *d == '\t'); d++)
|
|
|
|
/* do nothing */;
|
|
|
|
fetch->last_modified = curl_getdate(d, NULL);
|
|
|
|
}
|
2006-02-07 03:44:52 +03:00
|
|
|
else if (strncasecmp(headers[i], "If-None-Match:", 14) == 0) {
|
|
|
|
char *d = headers[i] + 14;
|
|
|
|
for (; *d && (*d == ' ' || *d == '\t' || *d == '"');
|
|
|
|
d++)
|
|
|
|
/* do nothing */;
|
|
|
|
fetch->file_etag = atoi(d);
|
|
|
|
}
|
2006-02-06 03:10:09 +03:00
|
|
|
APPEND(fetch->headers, headers[i]);
|
|
|
|
}
|
2004-03-27 03:50:58 +03:00
|
|
|
|
2003-04-18 01:35:02 +04:00
|
|
|
/* look for a fetch from the same host */
|
2005-04-09 14:00:00 +04:00
|
|
|
for (host_fetch = fetch_list;
|
|
|
|
host_fetch && strcasecmp(host_fetch->host, host) != 0;
|
|
|
|
host_fetch = host_fetch->next)
|
|
|
|
;
|
|
|
|
if (host_fetch) {
|
|
|
|
/* fetch from this host in progress:
|
|
|
|
queue the new fetch */
|
|
|
|
LOG(("queueing"));
|
|
|
|
fetch->curl_handle = 0;
|
|
|
|
/* queue at end */
|
|
|
|
for (; host_fetch->queue_next;
|
|
|
|
host_fetch = host_fetch->queue_next)
|
2003-04-25 12:03:15 +04:00
|
|
|
;
|
2005-04-09 14:00:00 +04:00
|
|
|
fetch->queue_prev = host_fetch;
|
|
|
|
host_fetch->queue_next = fetch;
|
|
|
|
return fetch;
|
2003-04-18 01:35:02 +04:00
|
|
|
}
|
|
|
|
|
2003-02-09 15:58:15 +03:00
|
|
|
/* create the curl easy handle */
|
2004-03-27 03:50:58 +03:00
|
|
|
fetch->curl_handle = curl_easy_duphandle(fetch_blank_curl);
|
|
|
|
if (!fetch->curl_handle)
|
|
|
|
goto failed;
|
2003-04-10 01:57:09 +04:00
|
|
|
|
2004-03-27 03:50:58 +03:00
|
|
|
code = fetch_set_options(fetch);
|
|
|
|
if (code != CURLE_OK)
|
|
|
|
goto failed;
|
2003-02-09 15:58:15 +03:00
|
|
|
|
2004-03-27 03:50:58 +03:00
|
|
|
/* add to the global curl multi handle */
|
2005-12-20 00:54:51 +03:00
|
|
|
codem = curl_multi_add_handle(fetch_curl_multi, fetch->curl_handle);
|
2004-03-27 03:50:58 +03:00
|
|
|
assert(codem == CURLM_OK || codem == CURLM_CALL_MULTI_PERFORM);
|
2003-06-02 03:02:56 +04:00
|
|
|
|
2004-06-11 03:55:23 +04:00
|
|
|
fetch->next = fetch_list;
|
|
|
|
if (fetch_list != 0)
|
|
|
|
fetch_list->prev = fetch;
|
|
|
|
fetch_list = fetch;
|
|
|
|
fetch_active = true;
|
|
|
|
|
2004-03-27 03:50:58 +03:00
|
|
|
return fetch;
|
2003-10-26 02:51:45 +04:00
|
|
|
|
2004-03-27 03:50:58 +03:00
|
|
|
failed:
|
|
|
|
free(host);
|
2004-10-02 01:31:55 +04:00
|
|
|
if (ref1)
|
|
|
|
free(ref1);
|
|
|
|
if (ref2)
|
|
|
|
free(ref2);
|
2004-03-27 03:50:58 +03:00
|
|
|
free(fetch->url);
|
|
|
|
free(fetch->referer);
|
|
|
|
free(fetch->post_urlenc);
|
|
|
|
if (fetch->post_multipart)
|
|
|
|
curl_formfree(fetch->post_multipart);
|
|
|
|
curl_slist_free_all(fetch->headers);
|
|
|
|
free(fetch);
|
|
|
|
return 0;
|
|
|
|
}
|
2003-10-25 23:20:13 +04:00
|
|
|
|
2003-10-25 18:13:49 +04:00
|
|
|
|
2004-03-27 03:50:58 +03:00
|
|
|
/**
|
|
|
|
* Set options specific for a fetch.
|
|
|
|
*/
|
2003-06-02 03:02:56 +04:00
|
|
|
|
2004-03-27 03:50:58 +03:00
|
|
|
CURLcode fetch_set_options(struct fetch *f)
|
|
|
|
{
|
|
|
|
CURLcode code;
|
|
|
|
struct login *li;
|
2003-12-26 19:20:57 +03:00
|
|
|
|
2004-03-27 03:50:58 +03:00
|
|
|
#undef SETOPT
|
|
|
|
#define SETOPT(option, value) \
|
|
|
|
code = curl_easy_setopt(f->curl_handle, option, value); \
|
|
|
|
if (code != CURLE_OK) \
|
|
|
|
return code;
|
|
|
|
|
|
|
|
SETOPT(CURLOPT_URL, f->url);
|
|
|
|
SETOPT(CURLOPT_PRIVATE, f);
|
|
|
|
SETOPT(CURLOPT_WRITEDATA, f);
|
|
|
|
SETOPT(CURLOPT_WRITEHEADER, f);
|
2004-07-10 06:35:31 +04:00
|
|
|
SETOPT(CURLOPT_PROGRESSDATA, f);
|
2004-03-27 03:50:58 +03:00
|
|
|
SETOPT(CURLOPT_REFERER, f->referer);
|
|
|
|
SETOPT(CURLOPT_HTTPHEADER, f->headers);
|
|
|
|
if (f->post_urlenc) {
|
|
|
|
SETOPT(CURLOPT_POSTFIELDS, f->post_urlenc);
|
|
|
|
} else if (f->post_multipart) {
|
|
|
|
SETOPT(CURLOPT_HTTPPOST, f->post_multipart);
|
|
|
|
} else {
|
|
|
|
SETOPT(CURLOPT_HTTPGET, 1L);
|
|
|
|
}
|
|
|
|
if (f->cookies) {
|
2006-01-08 04:51:33 +03:00
|
|
|
if (option_cookie_file)
|
|
|
|
SETOPT(CURLOPT_COOKIEFILE, option_cookie_file);
|
|
|
|
if (option_cookie_jar)
|
|
|
|
SETOPT(CURLOPT_COOKIEJAR, option_cookie_jar);
|
2004-03-27 03:50:58 +03:00
|
|
|
} else {
|
|
|
|
SETOPT(CURLOPT_COOKIEFILE, 0);
|
|
|
|
SETOPT(CURLOPT_COOKIEJAR, 0);
|
|
|
|
}
|
2004-08-14 18:30:12 +04:00
|
|
|
if ((li = login_list_get(f->url)) != NULL) {
|
2004-03-27 03:50:58 +03:00
|
|
|
SETOPT(CURLOPT_HTTPAUTH, CURLAUTH_ANY);
|
|
|
|
SETOPT(CURLOPT_USERPWD, li->logindetails);
|
|
|
|
} else {
|
|
|
|
SETOPT(CURLOPT_USERPWD, 0);
|
|
|
|
}
|
|
|
|
if (option_http_proxy && option_http_proxy_host) {
|
|
|
|
SETOPT(CURLOPT_PROXY, option_http_proxy_host);
|
|
|
|
SETOPT(CURLOPT_PROXYPORT, (long) option_http_proxy_port);
|
2004-07-27 19:49:28 +04:00
|
|
|
if (option_http_proxy_auth != OPTION_HTTP_PROXY_AUTH_NONE) {
|
|
|
|
SETOPT(CURLOPT_PROXYAUTH,
|
|
|
|
option_http_proxy_auth ==
|
|
|
|
OPTION_HTTP_PROXY_AUTH_BASIC ?
|
|
|
|
(long) CURLAUTH_BASIC :
|
|
|
|
(long) CURLAUTH_NTLM);
|
2005-01-23 01:42:48 +03:00
|
|
|
snprintf(fetch_proxy_userpwd,
|
|
|
|
sizeof fetch_proxy_userpwd,
|
|
|
|
"%s:%s",
|
2004-07-27 19:49:28 +04:00
|
|
|
option_http_proxy_auth_user,
|
|
|
|
option_http_proxy_auth_pass);
|
2005-01-23 01:42:48 +03:00
|
|
|
SETOPT(CURLOPT_PROXYUSERPWD, fetch_proxy_userpwd);
|
2004-07-27 19:49:28 +04:00
|
|
|
}
|
2004-03-27 03:50:58 +03:00
|
|
|
}
|
2003-02-09 15:58:15 +03:00
|
|
|
|
2004-03-27 03:50:58 +03:00
|
|
|
return CURLE_OK;
|
2003-02-09 15:58:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
2004-06-22 21:37:51 +04:00
|
|
|
* Abort a fetch.
|
2003-02-09 15:58:15 +03:00
|
|
|
*/
|
|
|
|
|
|
|
|
void fetch_abort(struct fetch *f)
|
2004-06-22 21:37:51 +04:00
|
|
|
{
|
|
|
|
assert(f);
|
|
|
|
LOG(("fetch %p, url '%s'", f, f->url));
|
2004-09-04 02:33:05 +04:00
|
|
|
if (f->queue_prev) {
|
|
|
|
f->queue_prev->queue_next = f->queue_next;
|
|
|
|
if (f->queue_next)
|
|
|
|
f->queue_next->queue_prev = f->queue_prev;
|
|
|
|
fetch_free(f);
|
|
|
|
} else {
|
|
|
|
f->abort = true;
|
|
|
|
}
|
2004-06-22 21:37:51 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Clean up a fetch and start any queued fetch for the same host.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void fetch_stop(struct fetch *f)
|
2003-02-09 15:58:15 +03:00
|
|
|
{
|
2004-03-27 03:50:58 +03:00
|
|
|
CURLcode code;
|
2003-02-09 15:58:15 +03:00
|
|
|
CURLMcode codem;
|
2004-03-27 03:50:58 +03:00
|
|
|
struct fetch *fetch;
|
|
|
|
struct fetch *next_fetch;
|
2003-02-09 15:58:15 +03:00
|
|
|
|
2004-06-22 21:37:51 +04:00
|
|
|
assert(f);
|
2003-02-09 15:58:15 +03:00
|
|
|
LOG(("fetch %p, url '%s'", f, f->url));
|
2003-06-02 05:09:50 +04:00
|
|
|
|
2003-04-18 01:35:02 +04:00
|
|
|
/* remove from list of fetches */
|
|
|
|
if (f->prev == 0)
|
|
|
|
fetch_list = f->next;
|
|
|
|
else
|
|
|
|
f->prev->next = f->next;
|
|
|
|
if (f->next != 0)
|
|
|
|
f->next->prev = f->prev;
|
|
|
|
|
|
|
|
/* remove from curl multi handle */
|
2003-09-17 21:54:39 +04:00
|
|
|
if (f->curl_handle) {
|
2005-12-20 00:54:51 +03:00
|
|
|
codem = curl_multi_remove_handle(fetch_curl_multi,
|
|
|
|
f->curl_handle);
|
2003-09-17 21:44:12 +04:00
|
|
|
assert(codem == CURLM_OK);
|
|
|
|
}
|
2003-04-18 01:35:02 +04:00
|
|
|
|
2003-11-08 22:18:37 +03:00
|
|
|
if (f->curl_handle && f->queue_next) {
|
2004-03-27 03:50:58 +03:00
|
|
|
/* start a queued fetch for this host, reusing the handle */
|
|
|
|
fetch = f->queue_next;
|
2003-04-18 01:35:02 +04:00
|
|
|
|
|
|
|
LOG(("starting queued %p '%s'", fetch, fetch->url));
|
|
|
|
|
|
|
|
fetch->curl_handle = f->curl_handle;
|
2004-03-27 03:50:58 +03:00
|
|
|
f->curl_handle = 0;
|
2006-02-06 03:10:09 +03:00
|
|
|
fetch->cachedata.req_time = time(0);
|
2004-03-27 03:50:58 +03:00
|
|
|
code = fetch_set_options(fetch);
|
|
|
|
if (code == CURLE_OK)
|
|
|
|
/* add to the global curl multi handle */
|
2005-12-20 00:54:51 +03:00
|
|
|
codem = curl_multi_add_handle(fetch_curl_multi,
|
2004-03-27 03:50:58 +03:00
|
|
|
fetch->curl_handle);
|
|
|
|
|
|
|
|
if (code == CURLE_OK && (codem == CURLM_OK ||
|
|
|
|
codem == CURLM_CALL_MULTI_PERFORM)) {
|
|
|
|
/* add to list of fetches */
|
|
|
|
fetch->prev = 0;
|
|
|
|
fetch->next = fetch_list;
|
|
|
|
if (fetch_list != 0)
|
|
|
|
fetch_list->prev = fetch;
|
|
|
|
fetch_list = fetch;
|
|
|
|
fetch->queue_prev = 0;
|
2003-10-25 20:22:11 +04:00
|
|
|
} else {
|
2004-03-27 03:50:58 +03:00
|
|
|
/* destroy all queued fetches for this host */
|
|
|
|
do {
|
|
|
|
fetch->callback(FETCH_ERROR, fetch->p,
|
2004-06-22 21:37:51 +04:00
|
|
|
messages_get("FetchError"), 0);
|
2004-03-27 03:50:58 +03:00
|
|
|
next_fetch = fetch->queue_next;
|
|
|
|
fetch_free(fetch);
|
|
|
|
fetch = next_fetch;
|
|
|
|
} while (fetch);
|
2003-10-25 18:13:49 +04:00
|
|
|
}
|
2003-04-18 01:35:02 +04:00
|
|
|
|
|
|
|
} else {
|
2003-11-08 22:18:37 +03:00
|
|
|
if (f->queue_prev)
|
|
|
|
f->queue_prev->queue_next = f->queue_next;
|
|
|
|
if (f->queue_next)
|
|
|
|
f->queue_next->queue_prev = f->queue_prev;
|
2003-04-18 01:35:02 +04:00
|
|
|
}
|
2003-02-09 15:58:15 +03:00
|
|
|
|
2004-03-27 03:50:58 +03:00
|
|
|
fetch_free(f);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Free a fetch structure and associated resources.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void fetch_free(struct fetch *f)
|
|
|
|
{
|
|
|
|
if (f->curl_handle)
|
|
|
|
curl_easy_cleanup(f->curl_handle);
|
|
|
|
free(f->url);
|
2003-06-26 15:41:26 +04:00
|
|
|
free(f->host);
|
|
|
|
free(f->referer);
|
|
|
|
free(f->location);
|
2003-10-23 04:09:17 +04:00
|
|
|
free(f->realm);
|
2004-03-27 03:50:58 +03:00
|
|
|
if (f->headers)
|
|
|
|
curl_slist_free_all(f->headers);
|
2003-10-25 18:13:49 +04:00
|
|
|
free(f->post_urlenc);
|
2003-10-25 20:22:11 +04:00
|
|
|
if (f->post_multipart)
|
|
|
|
curl_formfree(f->post_multipart);
|
2006-02-06 03:10:09 +03:00
|
|
|
free(f->cachedata.etag);
|
2004-03-27 03:50:58 +03:00
|
|
|
free(f);
|
2003-02-09 15:58:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
2003-09-18 03:27:33 +04:00
|
|
|
* Do some work on current fetches.
|
2003-02-09 15:58:15 +03:00
|
|
|
*
|
2003-09-18 03:27:33 +04:00
|
|
|
* Must be called regularly to make progress on fetches.
|
2003-02-09 15:58:15 +03:00
|
|
|
*/
|
|
|
|
|
|
|
|
void fetch_poll(void)
|
|
|
|
{
|
2003-08-29 16:57:14 +04:00
|
|
|
int running, queue;
|
2004-03-27 03:50:58 +03:00
|
|
|
CURLMcode codem;
|
|
|
|
CURLMsg *curl_msg;
|
2003-02-09 15:58:15 +03:00
|
|
|
|
|
|
|
/* do any possible work on the current fetches */
|
|
|
|
do {
|
2005-12-20 00:54:51 +03:00
|
|
|
codem = curl_multi_perform(fetch_curl_multi, &running);
|
2003-02-09 15:58:15 +03:00
|
|
|
assert(codem == CURLM_OK || codem == CURLM_CALL_MULTI_PERFORM);
|
|
|
|
} while (codem == CURLM_CALL_MULTI_PERFORM);
|
|
|
|
|
|
|
|
/* process curl results */
|
2005-12-20 00:54:51 +03:00
|
|
|
curl_msg = curl_multi_info_read(fetch_curl_multi, &queue);
|
2003-02-09 15:58:15 +03:00
|
|
|
while (curl_msg) {
|
|
|
|
switch (curl_msg->msg) {
|
|
|
|
case CURLMSG_DONE:
|
2004-03-27 03:50:58 +03:00
|
|
|
fetch_done(curl_msg->easy_handle,
|
|
|
|
curl_msg->data.result);
|
2003-02-09 15:58:15 +03:00
|
|
|
break;
|
|
|
|
default:
|
2004-03-27 03:50:58 +03:00
|
|
|
break;
|
2003-02-09 15:58:15 +03:00
|
|
|
}
|
2005-12-20 00:54:51 +03:00
|
|
|
curl_msg = curl_multi_info_read(fetch_curl_multi, &queue);
|
2003-02-09 15:58:15 +03:00
|
|
|
}
|
2003-11-06 22:41:41 +03:00
|
|
|
|
|
|
|
if (!fetch_list)
|
|
|
|
fetch_active = false;
|
2003-02-09 15:58:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2004-03-27 03:50:58 +03:00
|
|
|
/**
|
|
|
|
* Handle a completed fetch (CURLMSG_DONE from curl_multi_info_read()).
|
|
|
|
*
|
|
|
|
* \param curl_handle curl easy handle of fetch
|
|
|
|
*/
|
|
|
|
|
|
|
|
void fetch_done(CURL *curl_handle, CURLcode result)
|
|
|
|
{
|
|
|
|
bool finished = false;
|
|
|
|
bool error = false;
|
2004-07-10 14:05:16 +04:00
|
|
|
bool abort;
|
2004-03-27 03:50:58 +03:00
|
|
|
struct fetch *f;
|
|
|
|
void *p;
|
2004-06-22 21:37:51 +04:00
|
|
|
void (*callback)(fetch_msg msg, void *p, const char *data,
|
2004-03-27 03:50:58 +03:00
|
|
|
unsigned long size);
|
|
|
|
CURLcode code;
|
2006-02-06 03:10:09 +03:00
|
|
|
struct cache_data cachedata;
|
2004-03-27 03:50:58 +03:00
|
|
|
|
|
|
|
/* find the structure associated with this fetch */
|
|
|
|
code = curl_easy_getinfo(curl_handle, CURLINFO_PRIVATE, &f);
|
|
|
|
assert(code == CURLE_OK);
|
|
|
|
|
2004-07-10 14:05:16 +04:00
|
|
|
abort = f->abort;
|
2004-03-27 03:50:58 +03:00
|
|
|
callback = f->callback;
|
|
|
|
p = f->p;
|
|
|
|
|
2004-09-04 02:33:05 +04:00
|
|
|
if (!abort && result == CURLE_OK) {
|
2004-03-27 03:50:58 +03:00
|
|
|
/* fetch completed normally */
|
2004-11-12 02:55:23 +03:00
|
|
|
if (f->stopped ||
|
|
|
|
(!f->had_headers &&
|
|
|
|
fetch_process_headers(f)))
|
2004-03-27 03:50:58 +03:00
|
|
|
; /* redirect with no body or similar */
|
|
|
|
else
|
|
|
|
finished = true;
|
2004-03-28 21:18:52 +04:00
|
|
|
} else if (result == CURLE_WRITE_ERROR && f->stopped)
|
2004-03-27 03:50:58 +03:00
|
|
|
/* CURLE_WRITE_ERROR occurs when fetch_curl_data
|
|
|
|
* returns 0, which we use to abort intentionally */
|
|
|
|
;
|
|
|
|
else
|
|
|
|
error = true;
|
|
|
|
|
2006-02-06 03:10:09 +03:00
|
|
|
/* If finished, acquire cache info to pass to callback */
|
|
|
|
if (finished) {
|
|
|
|
memcpy(&cachedata, &f->cachedata, sizeof(struct cache_data));
|
|
|
|
f->cachedata.etag = 0;
|
|
|
|
}
|
|
|
|
|
2004-06-22 21:37:51 +04:00
|
|
|
/* clean up fetch and start any queued fetch for this host */
|
|
|
|
fetch_stop(f);
|
2004-03-27 03:50:58 +03:00
|
|
|
|
2004-06-22 21:37:51 +04:00
|
|
|
/* postponed until after stop so that queue fetches are started */
|
2004-07-10 14:05:16 +04:00
|
|
|
if (abort)
|
|
|
|
; /* fetch was aborted: no callback */
|
2006-02-06 03:10:09 +03:00
|
|
|
else if (finished) {
|
|
|
|
callback(FETCH_FINISHED, p, (const char *)&cachedata, 0);
|
|
|
|
free(cachedata.etag);
|
|
|
|
}
|
2004-03-27 03:50:58 +03:00
|
|
|
else if (error)
|
|
|
|
callback(FETCH_ERROR, p, fetch_error_buffer, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2004-07-10 06:35:31 +04:00
|
|
|
/**
|
2004-07-10 14:05:16 +04:00
|
|
|
* Callback function for fetch progress.
|
2004-07-10 06:35:31 +04:00
|
|
|
*/
|
2004-07-10 14:05:16 +04:00
|
|
|
|
2004-07-10 06:35:31 +04:00
|
|
|
int fetch_curl_progress(void *clientp, double dltotal, double dlnow,
|
|
|
|
double ultotal, double ulnow)
|
|
|
|
{
|
2004-07-10 14:05:16 +04:00
|
|
|
struct fetch *f = (struct fetch *) clientp;
|
2004-07-10 06:35:31 +04:00
|
|
|
double percent;
|
|
|
|
|
2004-07-10 14:05:16 +04:00
|
|
|
if (f->abort)
|
|
|
|
return 0;
|
|
|
|
|
2004-07-10 06:35:31 +04:00
|
|
|
if (dltotal > 0) {
|
|
|
|
percent = dlnow * 100.0f / dltotal;
|
|
|
|
snprintf(fetch_progress_buffer, 255,
|
2004-07-10 14:05:16 +04:00
|
|
|
messages_get("Progress"),
|
2004-07-10 06:35:31 +04:00
|
|
|
human_friendly_bytesize(dlnow),
|
|
|
|
human_friendly_bytesize(dltotal));
|
|
|
|
f->callback(FETCH_PROGRESS, f->p, fetch_progress_buffer,
|
2004-07-10 14:05:16 +04:00
|
|
|
(unsigned long) percent);
|
|
|
|
} else {
|
2004-07-10 06:35:31 +04:00
|
|
|
snprintf(fetch_progress_buffer, 255,
|
2004-07-10 14:05:16 +04:00
|
|
|
messages_get("ProgressU"),
|
2004-07-10 06:35:31 +04:00
|
|
|
human_friendly_bytesize(dlnow));
|
|
|
|
f->callback(FETCH_PROGRESS, f->p, fetch_progress_buffer, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-02-09 15:58:15 +03:00
|
|
|
/**
|
2003-09-18 03:27:33 +04:00
|
|
|
* Callback function for cURL.
|
2003-02-09 15:58:15 +03:00
|
|
|
*/
|
|
|
|
|
2004-06-22 21:37:51 +04:00
|
|
|
size_t fetch_curl_data(void *data, size_t size, size_t nmemb,
|
|
|
|
struct fetch *f)
|
2003-02-09 15:58:15 +03:00
|
|
|
{
|
2003-02-26 00:00:27 +03:00
|
|
|
LOG(("fetch %p, size %u", f, size * nmemb));
|
2003-02-09 15:58:15 +03:00
|
|
|
|
2004-06-22 21:37:51 +04:00
|
|
|
if (f->abort || (!f->had_headers && fetch_process_headers(f))) {
|
2004-03-28 21:18:52 +04:00
|
|
|
f->stopped = true;
|
2003-08-25 20:17:11 +04:00
|
|
|
return 0;
|
2003-08-29 16:57:14 +04:00
|
|
|
}
|
2003-02-09 15:58:15 +03:00
|
|
|
|
|
|
|
/* send data to the caller */
|
|
|
|
LOG(("FETCH_DATA"));
|
|
|
|
f->callback(FETCH_DATA, f->p, data, size * nmemb);
|
2004-06-22 21:37:51 +04:00
|
|
|
|
|
|
|
if (f->abort) {
|
2004-06-11 00:41:26 +04:00
|
|
|
f->stopped = true;
|
|
|
|
return 0;
|
|
|
|
}
|
2003-02-09 15:58:15 +03:00
|
|
|
|
|
|
|
return size * nmemb;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-04-10 01:57:09 +04:00
|
|
|
/**
|
2003-09-18 03:27:33 +04:00
|
|
|
* Callback function for headers.
|
2003-04-10 01:57:09 +04:00
|
|
|
*/
|
|
|
|
|
2004-06-22 21:37:51 +04:00
|
|
|
size_t fetch_curl_header(char *data, size_t size, size_t nmemb,
|
|
|
|
struct fetch *f)
|
2003-04-10 01:57:09 +04:00
|
|
|
{
|
2004-06-11 02:40:56 +04:00
|
|
|
int i;
|
2003-06-26 15:41:26 +04:00
|
|
|
size *= nmemb;
|
2006-02-06 03:10:09 +03:00
|
|
|
|
|
|
|
#define SKIP_ST(o) for (i = (o); i < (int) size && (data[i] == ' ' || data[i] == '\t'); i++)
|
|
|
|
|
|
|
|
/* Set fetch response time if not already set */
|
|
|
|
if (f->cachedata.res_time == 0)
|
|
|
|
f->cachedata.res_time = time(0);
|
|
|
|
|
2003-06-26 15:41:26 +04:00
|
|
|
if (12 < size && strncasecmp(data, "Location:", 9) == 0) {
|
|
|
|
/* extract Location header */
|
2004-03-27 03:50:58 +03:00
|
|
|
free(f->location);
|
|
|
|
f->location = malloc(size);
|
|
|
|
if (!f->location) {
|
|
|
|
LOG(("malloc failed"));
|
|
|
|
return size;
|
|
|
|
}
|
2006-02-06 03:10:09 +03:00
|
|
|
SKIP_ST(9);
|
2003-06-26 15:41:26 +04:00
|
|
|
strncpy(f->location, data + i, size - i);
|
2004-05-02 01:37:29 +04:00
|
|
|
f->location[size - i] = '\0';
|
|
|
|
for (i = size - i - 1; i >= 0 &&
|
|
|
|
(f->location[i] == ' ' ||
|
2003-06-26 15:41:26 +04:00
|
|
|
f->location[i] == '\t' ||
|
|
|
|
f->location[i] == '\r' ||
|
2004-05-02 01:37:29 +04:00
|
|
|
f->location[i] == '\n'); i--)
|
2003-06-26 15:41:26 +04:00
|
|
|
f->location[i] = '\0';
|
2003-08-29 00:04:35 +04:00
|
|
|
} else if (15 < size && strncasecmp(data, "Content-Length:", 15) == 0) {
|
|
|
|
/* extract Content-Length header */
|
2006-02-06 03:10:09 +03:00
|
|
|
SKIP_ST(15);
|
2005-07-16 20:23:08 +04:00
|
|
|
if (i < (int)size && '0' <= data[i] && data[i] <= '9')
|
2003-08-29 00:04:35 +04:00
|
|
|
f->content_length = atol(data + i);
|
2004-01-05 05:10:59 +03:00
|
|
|
#ifdef WITH_AUTH
|
2006-02-06 03:10:09 +03:00
|
|
|
} else if (17 < size && strncasecmp(data, "WWW-Authenticate:", 17) == 0) {
|
2004-05-02 01:37:29 +04:00
|
|
|
/* extract the first Realm from WWW-Authenticate header */
|
2004-03-27 03:50:58 +03:00
|
|
|
free(f->realm);
|
|
|
|
f->realm = malloc(size);
|
|
|
|
if (!f->realm) {
|
|
|
|
LOG(("malloc failed"));
|
|
|
|
return size;
|
|
|
|
}
|
2006-02-06 03:10:09 +03:00
|
|
|
SKIP_ST(17);
|
2004-06-11 02:40:56 +04:00
|
|
|
while (i < (int)size && data[++i] == '"')
|
2004-05-02 01:37:29 +04:00
|
|
|
/* */;
|
|
|
|
strncpy(f->realm, data + i, size - i);
|
|
|
|
f->realm[size - i] = '\0';
|
|
|
|
for (i = size - i - 1; i >= 0 &&
|
|
|
|
(f->realm[i] == ' ' ||
|
|
|
|
f->realm[i] == '"' ||
|
|
|
|
f->realm[i] == '\t' ||
|
|
|
|
f->realm[i] == '\r' ||
|
|
|
|
f->realm[i] == '\n'); --i)
|
|
|
|
f->realm[i] = '\0';
|
2004-01-05 05:10:59 +03:00
|
|
|
#endif
|
2006-02-06 03:10:09 +03:00
|
|
|
} else if (5 < size && strncasecmp(data, "Date:", 5) == 0) {
|
|
|
|
/* extract Date header */
|
|
|
|
SKIP_ST(5);
|
|
|
|
if (i < (int) size)
|
|
|
|
f->cachedata.date = curl_getdate(&data[i], NULL);
|
|
|
|
} else if (4 < size && strncasecmp(data, "Age:", 4) == 0) {
|
|
|
|
/* extract Age header */
|
|
|
|
SKIP_ST(4);
|
|
|
|
if (i < (int) size && '0' <= data[i] && data[i] <= '9')
|
|
|
|
f->cachedata.age = atoi(data + i);
|
|
|
|
} else if (8 < size && strncasecmp(data, "Expires:", 8) == 0) {
|
|
|
|
/* extract Expires header */
|
|
|
|
SKIP_ST(8);
|
|
|
|
if (i < (int) size)
|
|
|
|
f->cachedata.expires = curl_getdate(&data[i], NULL);
|
|
|
|
} else if (14 < size && strncasecmp(data, "Cache-Control:", 14) == 0) {
|
|
|
|
/* extract and parse Cache-Control header */
|
|
|
|
int comma;
|
|
|
|
SKIP_ST(14);
|
|
|
|
|
|
|
|
while (i < (int) size) {
|
|
|
|
for (comma = i; comma < (int) size; comma++)
|
|
|
|
if (data[comma] == ',')
|
|
|
|
break;
|
|
|
|
|
|
|
|
SKIP_ST(i);
|
|
|
|
|
|
|
|
if (8 < comma - i && (strncasecmp(data + i, "no-cache", 8) == 0 || strncasecmp(data + i, "no-store", 8) == 0))
|
|
|
|
/* When we get a disk cache we should
|
|
|
|
* distinguish between these two */
|
|
|
|
f->cachedata.no_cache = true;
|
|
|
|
else if (7 < comma - i && strncasecmp(data + i, "max-age", 7) == 0) {
|
|
|
|
for (; i < comma; i++)
|
|
|
|
if (data[i] == '=')
|
|
|
|
break;
|
|
|
|
SKIP_ST(i+1);
|
|
|
|
if (i < comma)
|
|
|
|
f->cachedata.max_age =
|
|
|
|
atoi(data + i);
|
|
|
|
}
|
|
|
|
|
|
|
|
i = comma + 1;
|
|
|
|
}
|
|
|
|
} else if (5 < size && strncasecmp(data, "ETag:", 5) == 0) {
|
|
|
|
/* extract ETag header */
|
|
|
|
free(f->cachedata.etag);
|
|
|
|
f->cachedata.etag = malloc(size);
|
|
|
|
if (!f->cachedata.etag) {
|
|
|
|
LOG(("malloc failed"));
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
SKIP_ST(5);
|
|
|
|
strncpy(f->cachedata.etag, data + i, size - i);
|
|
|
|
f->cachedata.etag[size - i] = '\0';
|
|
|
|
for (i = size - i - 1; i >= 0 &&
|
|
|
|
(f->cachedata.etag[i] == ' ' ||
|
|
|
|
f->cachedata.etag[i] == '\t' ||
|
|
|
|
f->cachedata.etag[i] == '\r' ||
|
|
|
|
f->cachedata.etag[i] == '\n'); --i)
|
|
|
|
f->cachedata.etag[i] = '\0';
|
2006-02-08 03:35:05 +03:00
|
|
|
} else if (14 < size && strncasecmp(data, "Last-Modified:", 14) == 0) {
|
|
|
|
/* extract Last-Modified header */
|
|
|
|
SKIP_ST(14);
|
|
|
|
if (i < (int) size) {
|
|
|
|
f->cachedata.last_modified =
|
|
|
|
curl_getdate(&data[i], NULL);
|
|
|
|
}
|
2003-06-26 15:41:26 +04:00
|
|
|
}
|
2006-02-06 03:10:09 +03:00
|
|
|
|
2003-06-26 15:41:26 +04:00
|
|
|
return size;
|
2006-02-06 03:10:09 +03:00
|
|
|
#undef SKIP_ST
|
2003-04-10 01:57:09 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-08-25 20:17:11 +04:00
|
|
|
/**
|
|
|
|
* Find the status code and content type and inform the caller.
|
2003-08-29 16:57:14 +04:00
|
|
|
*
|
|
|
|
* Return true if the fetch is being aborted.
|
2003-08-25 20:17:11 +04:00
|
|
|
*/
|
|
|
|
|
2003-08-29 16:57:14 +04:00
|
|
|
bool fetch_process_headers(struct fetch *f)
|
2003-08-25 20:17:11 +04:00
|
|
|
{
|
|
|
|
long http_code;
|
|
|
|
const char *type;
|
|
|
|
CURLcode code;
|
2006-02-06 03:10:09 +03:00
|
|
|
struct stat s;
|
2006-02-07 03:44:52 +03:00
|
|
|
char *url_path = 0;
|
2003-08-25 20:17:11 +04:00
|
|
|
|
2003-08-29 16:57:14 +04:00
|
|
|
f->had_headers = true;
|
2003-08-25 20:17:11 +04:00
|
|
|
|
2006-02-06 03:10:09 +03:00
|
|
|
/* Set fetch response time if not already set */
|
|
|
|
if (f->cachedata.res_time == 0)
|
|
|
|
f->cachedata.res_time = time(0);
|
|
|
|
|
2003-08-25 20:17:11 +04:00
|
|
|
code = curl_easy_getinfo(f->curl_handle, CURLINFO_HTTP_CODE, &http_code);
|
2003-10-23 04:09:17 +04:00
|
|
|
assert(code == CURLE_OK);
|
2003-08-25 20:17:11 +04:00
|
|
|
LOG(("HTTP status code %li", http_code));
|
|
|
|
|
2006-02-06 03:10:09 +03:00
|
|
|
if (http_code == 304 && !f->post_urlenc && !f->post_multipart) {
|
|
|
|
/* Not Modified && GET request */
|
|
|
|
f->callback(FETCH_NOTMODIFIED, f->p,
|
|
|
|
(const char *)&f->cachedata, 0);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2003-08-29 16:57:14 +04:00
|
|
|
/* handle HTTP redirects (3xx response codes) */
|
2003-08-25 20:17:11 +04:00
|
|
|
if (300 <= http_code && http_code < 400 && f->location != 0) {
|
|
|
|
LOG(("FETCH_REDIRECT, '%s'", f->location));
|
|
|
|
f->callback(FETCH_REDIRECT, f->p, f->location, 0);
|
2003-08-29 16:57:14 +04:00
|
|
|
return true;
|
2003-08-25 20:17:11 +04:00
|
|
|
}
|
|
|
|
|
2003-10-23 04:09:17 +04:00
|
|
|
/* handle HTTP 401 (Authentication errors) */
|
2004-01-05 05:10:59 +03:00
|
|
|
#ifdef WITH_AUTH
|
2004-03-27 03:50:58 +03:00
|
|
|
if (http_code == 401) {
|
|
|
|
f->callback(FETCH_AUTH, f->p, f->realm,0);
|
|
|
|
return true;
|
|
|
|
}
|
2004-01-05 05:10:59 +03:00
|
|
|
#endif
|
2003-10-23 04:09:17 +04:00
|
|
|
|
2003-08-29 16:57:14 +04:00
|
|
|
/* handle HTTP errors (non 2xx response codes) */
|
|
|
|
if (f->only_2xx && strncmp(f->url, "http", 4) == 0 &&
|
|
|
|
(http_code < 200 || 299 < http_code)) {
|
2004-06-22 21:37:51 +04:00
|
|
|
f->callback(FETCH_ERROR, f->p, messages_get("Not2xx"), 0);
|
2003-08-29 16:57:14 +04:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* find MIME type from headers or filetype for local files */
|
2003-08-25 20:17:11 +04:00
|
|
|
code = curl_easy_getinfo(f->curl_handle, CURLINFO_CONTENT_TYPE, &type);
|
|
|
|
assert(code == CURLE_OK);
|
|
|
|
|
2006-02-07 03:44:52 +03:00
|
|
|
if (strncmp(f->url, "file:///", 8) == 0)
|
|
|
|
url_path = curl_unescape(f->url + 7,
|
|
|
|
(int) strlen(f->url) - 7);
|
|
|
|
else if (strncmp(f->url, "file:/", 6) == 0)
|
|
|
|
url_path = curl_unescape(f->url + 5,
|
|
|
|
(int) strlen(f->url) - 5);
|
|
|
|
|
|
|
|
if (url_path && stat(url_path, &s) == 0) {
|
|
|
|
/* file: URL and file exists */
|
|
|
|
/* create etag */
|
|
|
|
free(f->cachedata.etag);
|
|
|
|
f->cachedata.etag = malloc(13);
|
|
|
|
if (f->cachedata.etag)
|
|
|
|
sprintf(f->cachedata.etag,
|
|
|
|
"\"%10d\"", (int)s.st_mtime);
|
|
|
|
|
2006-02-08 03:35:05 +03:00
|
|
|
/* don't set last modified time so as to ensure that local
|
|
|
|
* files are revalidated at all times. */
|
|
|
|
|
2006-02-07 03:44:52 +03:00
|
|
|
/* If performed a conditional request and unmodified ... */
|
|
|
|
if (f->last_modified && f->file_etag &&
|
|
|
|
f->last_modified > s.st_mtime &&
|
|
|
|
f->file_etag == s.st_mtime) {
|
|
|
|
f->callback(FETCH_NOTMODIFIED, f->p,
|
|
|
|
(const char *)&f->cachedata, 0);
|
|
|
|
curl_free(url_path);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2003-08-25 20:17:11 +04:00
|
|
|
if (type == 0) {
|
|
|
|
type = "text/html";
|
2006-02-07 03:44:52 +03:00
|
|
|
if (url_path) {
|
2003-12-26 03:17:55 +03:00
|
|
|
type = fetch_filetype(url_path);
|
2003-08-25 20:17:11 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-02-07 03:44:52 +03:00
|
|
|
curl_free(url_path);
|
|
|
|
|
2003-08-25 20:17:11 +04:00
|
|
|
LOG(("FETCH_TYPE, '%s'", type));
|
2004-06-22 21:37:51 +04:00
|
|
|
f->callback(FETCH_TYPE, f->p, type, f->content_length);
|
|
|
|
if (f->abort)
|
2003-08-29 16:57:14 +04:00
|
|
|
return true;
|
2003-08-25 20:17:11 +04:00
|
|
|
|
2003-08-29 16:57:14 +04:00
|
|
|
return false;
|
2003-08-25 20:17:11 +04:00
|
|
|
}
|
|
|
|
|
2003-04-10 01:57:09 +04:00
|
|
|
|
2003-10-25 20:22:11 +04:00
|
|
|
/**
|
|
|
|
* Convert a list of struct ::form_successful_control to a list of
|
2004-03-27 03:50:58 +03:00
|
|
|
* struct curl_httppost for libcurl.
|
2003-10-25 20:22:11 +04:00
|
|
|
*/
|
2004-03-27 03:50:58 +03:00
|
|
|
struct curl_httppost *fetch_post_convert(struct form_successful_control *control)
|
2003-10-25 20:22:11 +04:00
|
|
|
{
|
2004-03-27 03:50:58 +03:00
|
|
|
struct curl_httppost *post = 0, *last = 0;
|
2004-03-22 00:32:15 +03:00
|
|
|
char *mimetype = 0;
|
2004-03-22 02:36:05 +03:00
|
|
|
char *leafname = 0, *temp = 0;
|
2006-01-07 05:33:36 +03:00
|
|
|
int leaflen;
|
2003-10-25 20:22:11 +04:00
|
|
|
|
|
|
|
for (; control; control = control->next) {
|
2004-03-27 03:50:58 +03:00
|
|
|
if (control->file) {
|
|
|
|
mimetype = fetch_mimetype(control->value);
|
2004-03-22 02:36:05 +03:00
|
|
|
#ifdef riscos
|
2004-03-27 03:50:58 +03:00
|
|
|
temp = strrchr(control->value, '.');
|
|
|
|
if (!temp)
|
|
|
|
temp = control->value; /* already leafname */
|
|
|
|
else
|
|
|
|
temp += 1;
|
2006-01-07 05:33:36 +03:00
|
|
|
|
|
|
|
leaflen = strlen(temp);
|
|
|
|
|
|
|
|
leafname = malloc(leaflen + 1);
|
2004-03-27 03:50:58 +03:00
|
|
|
if (!leafname) {
|
2006-01-07 05:33:36 +03:00
|
|
|
LOG(("malloc failed"));
|
2004-03-27 03:50:58 +03:00
|
|
|
free(mimetype);
|
|
|
|
continue;
|
|
|
|
}
|
2006-01-07 05:33:36 +03:00
|
|
|
memcpy(leafname, temp, leaflen + 1);
|
|
|
|
|
|
|
|
/* and s/\//\./g */
|
|
|
|
for (temp = leafname; *temp; temp++)
|
|
|
|
if (*temp == '/')
|
|
|
|
*temp = '.';
|
2004-03-22 02:36:05 +03:00
|
|
|
#else
|
2004-03-27 03:50:58 +03:00
|
|
|
leafname = strrchr(control->value, '/') ;
|
|
|
|
if (!leafname)
|
|
|
|
leafname = control->value;
|
|
|
|
else
|
|
|
|
leafname += 1;
|
2004-03-22 02:36:05 +03:00
|
|
|
#endif
|
2004-03-27 03:50:58 +03:00
|
|
|
curl_formadd(&post, &last,
|
2004-03-21 23:07:14 +03:00
|
|
|
CURLFORM_COPYNAME, control->name,
|
2004-04-07 23:19:31 +04:00
|
|
|
CURLFORM_FILE, control->value,
|
|
|
|
CURLFORM_FILENAME, leafname,
|
2004-03-22 00:32:15 +03:00
|
|
|
CURLFORM_CONTENTTYPE,
|
|
|
|
(mimetype != 0 ? mimetype : "text/plain"),
|
2004-03-21 23:07:14 +03:00
|
|
|
CURLFORM_END);
|
2004-03-22 02:36:05 +03:00
|
|
|
#ifdef riscos
|
2004-03-27 03:50:58 +03:00
|
|
|
free(leafname);
|
2004-03-22 02:36:05 +03:00
|
|
|
#endif
|
2004-03-27 03:50:58 +03:00
|
|
|
free(mimetype);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
curl_formadd(&post, &last,
|
|
|
|
CURLFORM_COPYNAME, control->name,
|
|
|
|
CURLFORM_COPYCONTENTS, control->value,
|
|
|
|
CURLFORM_END);
|
2004-03-21 23:07:14 +03:00
|
|
|
}
|
2003-10-25 20:22:11 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
return post;
|
|
|
|
}
|
|
|
|
|
2004-04-02 17:51:13 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Check if a URL's scheme can be fetched.
|
|
|
|
*
|
|
|
|
* \param url URL to check
|
|
|
|
* \return true if the scheme is supported
|
|
|
|
*/
|
|
|
|
|
|
|
|
bool fetch_can_fetch(const char *url)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
const char *semi;
|
2004-08-14 19:07:21 +04:00
|
|
|
size_t len;
|
2004-04-02 17:51:13 +04:00
|
|
|
curl_version_info_data *data;
|
|
|
|
|
2004-08-14 19:07:21 +04:00
|
|
|
if ((semi = strchr(url, ':')) == NULL)
|
2004-04-02 17:51:13 +04:00
|
|
|
return false;
|
|
|
|
len = semi - url;
|
|
|
|
|
|
|
|
data = curl_version_info(CURLVERSION_NOW);
|
|
|
|
|
|
|
|
for (i = 0; data->protocols[i]; i++)
|
|
|
|
if (strlen(data->protocols[i]) == len &&
|
|
|
|
strncasecmp(url, data->protocols[i], len) == 0)
|
|
|
|
return true;
|
2004-04-07 23:19:31 +04:00
|
|
|
|
2004-04-02 17:51:13 +04:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2004-06-28 03:24:11 +04:00
|
|
|
/**
|
|
|
|
* Change the callback function for a fetch.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void fetch_change_callback(struct fetch *fetch,
|
|
|
|
void (*callback)(fetch_msg msg, void *p, const char *data,
|
|
|
|
unsigned long size),
|
|
|
|
void *p)
|
|
|
|
{
|
|
|
|
assert(fetch);
|
|
|
|
fetch->callback = callback;
|
|
|
|
fetch->p = p;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-02-09 15:58:15 +03:00
|
|
|
/**
|
|
|
|
* testing framework
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifdef TEST
|
2003-02-26 00:00:27 +03:00
|
|
|
#include <unistd.h>
|
|
|
|
|
2003-02-09 15:58:15 +03:00
|
|
|
struct test {char *url; struct fetch *f;};
|
|
|
|
|
|
|
|
void callback(fetch_msg msg, struct test *t, char *data, unsigned long size)
|
|
|
|
{
|
|
|
|
printf("%s: ", t->url);
|
|
|
|
switch (msg) {
|
|
|
|
case FETCH_TYPE:
|
|
|
|
printf("FETCH_TYPE '%s'", data);
|
|
|
|
break;
|
|
|
|
case FETCH_DATA:
|
|
|
|
printf("FETCH_DATA %lu", size);
|
|
|
|
break;
|
|
|
|
case FETCH_FINISHED:
|
|
|
|
printf("FETCH_FINISHED");
|
|
|
|
break;
|
|
|
|
case FETCH_ERROR:
|
|
|
|
printf("FETCH_ERROR '%s'", data);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
struct test test[] = {
|
2003-02-26 00:00:27 +03:00
|
|
|
{"http://127.0.0.1/", 0},
|
|
|
|
{"http://netsurf.strcprstskrzkrk.co.uk/", 0},
|
2003-02-09 15:58:15 +03:00
|
|
|
{"http://www.oxfordstudent.com/", 0},
|
|
|
|
{"http://www.google.co.uk/", 0},
|
2003-02-26 00:00:27 +03:00
|
|
|
{"http://news.bbc.co.uk/", 0},
|
2003-02-09 15:58:15 +03:00
|
|
|
{"http://doesnt.exist/", 0},
|
|
|
|
{"blah://blah", 0},
|
|
|
|
};
|
|
|
|
|
|
|
|
int main(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
fetch_init();
|
|
|
|
for (i = 0; i != sizeof(test) / sizeof(test[0]); i++)
|
|
|
|
test[i].f = fetch_start(test[i].url, 0, callback, &test[i]);
|
|
|
|
while (1) {
|
|
|
|
fetch_poll();
|
|
|
|
sleep(1);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|