2003-06-30 16:44:03 +04:00
|
|
|
/*
|
2007-06-10 21:46:44 +04:00
|
|
|
* Copyright 2006,2007 Daniel Silverstone <dsilvers@digital-scurf.org>
|
2007-05-02 06:38:38 +04:00
|
|
|
* Copyright 2007 James Bursa <bursa@users.sourceforge.net>
|
2003-06-30 16:44:03 +04:00
|
|
|
* Copyright 2003 Phil Mellor <monkeyson@users.sourceforge.net>
|
2007-08-08 20:16:03 +04:00
|
|
|
*
|
|
|
|
* This file is part of NetSurf, http://www.netsurf-browser.org/
|
|
|
|
*
|
|
|
|
* NetSurf is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; version 2 of the License.
|
|
|
|
*
|
|
|
|
* NetSurf is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2003-06-30 16:44:03 +04:00
|
|
|
*/
|
|
|
|
|
2003-09-18 03:27:33 +04:00
|
|
|
/** \file
|
2014-06-19 21:27:24 +04:00
|
|
|
* Implementation of fetching of data from a URL.
|
|
|
|
*
|
|
|
|
* The implementation is the fetch factory and the generic operations
|
|
|
|
* around the fetcher specific methods.
|
2003-04-18 01:35:02 +04:00
|
|
|
*
|
2007-01-02 16:07:28 +03:00
|
|
|
* Active fetches are held in the circular linked list ::fetch_ring. There may
|
|
|
|
* be at most ::option_max_fetchers_per_host active requests per Host: header.
|
|
|
|
* There may be at most ::option_max_fetchers active requests overall. Inactive
|
2014-06-19 21:27:24 +04:00
|
|
|
* fetches are stored in the ::queue_ring waiting for use.
|
2003-02-09 15:58:15 +03:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <assert.h>
|
2007-05-02 06:38:38 +04:00
|
|
|
#include <errno.h>
|
2003-08-29 16:57:14 +04:00
|
|
|
#include <stdbool.h>
|
2003-03-15 18:53:20 +03:00
|
|
|
#include <string.h>
|
2003-04-18 01:35:02 +04:00
|
|
|
#include <strings.h>
|
2003-02-09 15:58:15 +03:00
|
|
|
#include <time.h>
|
2010-12-20 13:32:15 +03:00
|
|
|
#include <libwapcaplet/libwapcaplet.h>
|
2014-06-26 22:04:14 +04:00
|
|
|
#include <curl/curl.h>
|
2010-12-20 13:32:15 +03:00
|
|
|
|
2007-05-31 02:39:54 +04:00
|
|
|
#include "utils/config.h"
|
2014-06-26 22:04:14 +04:00
|
|
|
#include "desktop/gui_factory.h"
|
2014-01-24 23:25:07 +04:00
|
|
|
#include "utils/corestrings.h"
|
2013-05-26 01:46:27 +04:00
|
|
|
#include "utils/nsoption.h"
|
2007-05-31 02:39:54 +04:00
|
|
|
#include "utils/log.h"
|
|
|
|
#include "utils/messages.h"
|
2011-09-27 15:07:32 +04:00
|
|
|
#include "utils/nsurl.h"
|
2007-05-31 02:39:54 +04:00
|
|
|
#include "utils/utils.h"
|
2007-06-10 21:46:44 +04:00
|
|
|
#include "utils/ring.h"
|
2003-09-18 03:27:33 +04:00
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
#include "content/fetch.h"
|
|
|
|
#include "content/fetchers.h"
|
|
|
|
#include "content/fetchers/resource.h"
|
|
|
|
#include "content/fetchers/about.h"
|
|
|
|
#include "content/fetchers/curl.h"
|
|
|
|
#include "content/fetchers/data.h"
|
|
|
|
#include "content/fetchers/file.h"
|
2014-08-26 02:58:56 +04:00
|
|
|
#include "javascript/fetcher.h"
|
2014-06-19 21:27:24 +04:00
|
|
|
#include "content/urldb.h"
|
|
|
|
|
2008-05-29 23:32:31 +04:00
|
|
|
/* Define this to turn on verbose fetch logging */
|
2011-01-06 03:31:54 +03:00
|
|
|
#undef DEBUG_FETCH_VERBOSE
|
2014-06-19 21:27:24 +04:00
|
|
|
|
2014-07-02 20:07:05 +04:00
|
|
|
/** Verbose fetcher logging */
|
2014-06-26 22:04:14 +04:00
|
|
|
#ifdef DEBUG_FETCH_VERBOSE
|
|
|
|
#define FETCH_LOG(x) LOG(x)
|
|
|
|
#else
|
|
|
|
#define FETCH_LOG(x)
|
|
|
|
#endif
|
2003-11-06 22:41:41 +03:00
|
|
|
|
2014-07-02 20:07:05 +04:00
|
|
|
/** The maximum number of fetchers that can be added */
|
2014-09-05 03:40:52 +04:00
|
|
|
#define MAX_FETCHERS 10
|
2014-07-02 20:07:05 +04:00
|
|
|
|
|
|
|
/** The time in ms between polling the fetchers.
|
|
|
|
*
|
|
|
|
* \todo The schedule timeout should be profiled to see if there is a
|
|
|
|
* better value or even if it needs to be dynamic.
|
|
|
|
*/
|
|
|
|
#define SCHEDULE_TIME 10
|
|
|
|
|
|
|
|
/** The fdset timeout in ms */
|
|
|
|
#define FDSET_TIMEOUT 1000
|
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
/**
|
|
|
|
* Information about a fetcher for a given scheme.
|
|
|
|
*/
|
2007-06-10 21:46:44 +04:00
|
|
|
typedef struct scheme_fetcher_s {
|
2014-06-19 21:27:24 +04:00
|
|
|
lwc_string *scheme; /**< The scheme. */
|
|
|
|
|
|
|
|
struct fetcher_operation_table ops; /**< The fetchers operations. */
|
|
|
|
int refcount; /**< When zero the fetcher is no longer in use. */
|
2007-06-10 21:46:44 +04:00
|
|
|
} scheme_fetcher;
|
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
static scheme_fetcher fetchers[MAX_FETCHERS];
|
2006-02-23 18:06:54 +03:00
|
|
|
|
2003-09-18 03:27:33 +04:00
|
|
|
/** Information for a single fetch. */
|
|
|
|
struct fetch {
|
2007-06-10 21:46:44 +04:00
|
|
|
fetch_callback callback;/**< Callback function. */
|
2011-09-27 15:07:32 +04:00
|
|
|
nsurl *url; /**< URL. */
|
|
|
|
nsurl *referer; /**< Referer URL. */
|
2007-01-27 23:58:20 +03:00
|
|
|
bool send_referer; /**< Valid to send the referer */
|
2008-01-30 22:56:41 +03:00
|
|
|
bool verifiable; /**< Transaction is verifiable */
|
2003-09-18 03:27:33 +04:00
|
|
|
void *p; /**< Private data for callback. */
|
2011-09-27 15:07:32 +04:00
|
|
|
lwc_string *host; /**< Host part of URL, interned */
|
2007-01-13 03:19:02 +03:00
|
|
|
long http_code; /**< HTTP response code, or 0. */
|
2014-06-19 21:27:24 +04:00
|
|
|
int fetcherd; /**< Fetcher descriptor for this fetch */
|
2007-07-04 22:05:16 +04:00
|
|
|
void *fetcher_handle; /**< The handle for the fetcher. */
|
|
|
|
bool fetch_is_active; /**< This fetch is active. */
|
2006-03-14 17:21:01 +03:00
|
|
|
struct fetch *r_prev; /**< Previous active fetch in ::fetch_ring. */
|
|
|
|
struct fetch *r_next; /**< Next active fetch in ::fetch_ring. */
|
|
|
|
};
|
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
static struct fetch *fetch_ring = NULL; /**< Ring of active fetches. */
|
|
|
|
static struct fetch *queue_ring = NULL; /**< Ring of queued fetches */
|
2006-03-14 17:21:01 +03:00
|
|
|
|
2014-01-19 22:17:32 +04:00
|
|
|
/******************************************************************************
|
|
|
|
* fetch internals *
|
|
|
|
******************************************************************************/
|
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
static inline void fetch_ref_fetcher(int fetcherd)
|
2014-01-19 22:17:32 +04:00
|
|
|
{
|
2014-06-19 21:27:24 +04:00
|
|
|
fetchers[fetcherd].refcount++;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void fetch_unref_fetcher(int fetcherd)
|
|
|
|
{
|
|
|
|
fetchers[fetcherd].refcount--;
|
|
|
|
if (fetchers[fetcherd].refcount == 0) {
|
|
|
|
fetchers[fetcherd].ops.finalise(fetchers[fetcherd].scheme);
|
|
|
|
lwc_string_unref(fetchers[fetcherd].scheme);
|
2014-01-19 22:17:32 +04:00
|
|
|
}
|
|
|
|
}
|
2003-02-09 15:58:15 +03:00
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
/**
|
|
|
|
* Find a suitable fetcher for a scheme.
|
|
|
|
*/
|
|
|
|
static int get_fetcher_for_scheme(lwc_string *scheme)
|
|
|
|
{
|
|
|
|
int fetcherd;
|
|
|
|
bool match;
|
|
|
|
|
|
|
|
for (fetcherd = 0; fetcherd < MAX_FETCHERS; fetcherd++) {
|
|
|
|
if ((fetchers[fetcherd].refcount > 0) &&
|
|
|
|
(lwc_string_isequal(fetchers[fetcherd].scheme,
|
|
|
|
scheme, &match) == lwc_error_ok) &&
|
|
|
|
(match == true)) {
|
|
|
|
return fetcherd;
|
2014-06-26 22:04:14 +04:00
|
|
|
}
|
2014-06-19 21:27:24 +04:00
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2003-02-09 15:58:15 +03:00
|
|
|
/**
|
2014-01-19 22:17:32 +04:00
|
|
|
* Dispatch a single job
|
2003-02-09 15:58:15 +03:00
|
|
|
*/
|
2014-01-19 22:17:32 +04:00
|
|
|
static bool fetch_dispatch_job(struct fetch *fetch)
|
|
|
|
{
|
|
|
|
RING_REMOVE(queue_ring, fetch);
|
2014-06-26 22:04:14 +04:00
|
|
|
FETCH_LOG(("Attempting to start fetch %p, fetcher %p, url %s", fetch,
|
2014-01-19 22:17:32 +04:00
|
|
|
fetch->fetcher_handle, nsurl_access(fetch->url)));
|
2014-06-26 22:04:14 +04:00
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
if (!fetchers[fetch->fetcherd].ops.start(fetch->fetcher_handle)) {
|
2014-01-19 22:17:32 +04:00
|
|
|
RING_INSERT(queue_ring, fetch); /* Put it back on the end of the queue */
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
RING_INSERT(fetch_ring, fetch);
|
|
|
|
fetch->fetch_is_active = true;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2003-02-09 15:58:15 +03:00
|
|
|
|
2014-01-19 22:17:32 +04:00
|
|
|
/**
|
|
|
|
* Choose and dispatch a single job. Return false if we failed to dispatch
|
|
|
|
* anything.
|
|
|
|
*
|
|
|
|
* We don't check the overall dispatch size here because we're not called unless
|
|
|
|
* there is room in the fetch queue for us.
|
|
|
|
*/
|
|
|
|
static bool fetch_choose_and_dispatch(void)
|
2003-02-09 15:58:15 +03:00
|
|
|
{
|
2014-01-19 22:17:32 +04:00
|
|
|
bool same_host;
|
|
|
|
struct fetch *queueitem;
|
|
|
|
queueitem = queue_ring;
|
|
|
|
do {
|
|
|
|
/* We can dispatch the selected item if there is room in the
|
|
|
|
* fetch ring
|
|
|
|
*/
|
|
|
|
int countbyhost;
|
|
|
|
RING_COUNTBYLWCHOST(struct fetch, fetch_ring, countbyhost,
|
|
|
|
queueitem->host);
|
|
|
|
if (countbyhost < nsoption_int(max_fetchers_per_host)) {
|
|
|
|
/* We can dispatch this item in theory */
|
|
|
|
return fetch_dispatch_job(queueitem);
|
|
|
|
}
|
|
|
|
/* skip over other items with the same host */
|
|
|
|
same_host = true;
|
|
|
|
while (same_host == true && queueitem->r_next != queue_ring) {
|
|
|
|
if (lwc_string_isequal(queueitem->host,
|
|
|
|
queueitem->r_next->host, &same_host) ==
|
|
|
|
lwc_error_ok && same_host == true) {
|
|
|
|
queueitem = queueitem->r_next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
queueitem = queueitem->r_next;
|
|
|
|
} while (queueitem != queue_ring);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-06-26 22:04:14 +04:00
|
|
|
static void dump_rings(void)
|
2014-01-19 22:17:32 +04:00
|
|
|
{
|
|
|
|
#ifdef DEBUG_FETCH_VERBOSE
|
|
|
|
struct fetch *q;
|
|
|
|
struct fetch *f;
|
|
|
|
|
|
|
|
q = queue_ring;
|
|
|
|
if (q) {
|
|
|
|
do {
|
2014-06-26 22:04:14 +04:00
|
|
|
LOG(("queue_ring: %s", nsurl_access(q->url)));
|
2014-01-19 22:17:32 +04:00
|
|
|
q = q->r_next;
|
|
|
|
} while (q != queue_ring);
|
|
|
|
}
|
|
|
|
f = fetch_ring;
|
|
|
|
if (f) {
|
|
|
|
do {
|
2014-06-26 22:04:14 +04:00
|
|
|
LOG(("fetch_ring: %s", nsurl_access(f->url)));
|
2014-01-19 22:17:32 +04:00
|
|
|
f = f->r_next;
|
|
|
|
} while (f != fetch_ring);
|
|
|
|
}
|
|
|
|
#endif
|
2014-06-26 22:04:14 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Dispatch as many jobs as we have room to dispatch.
|
|
|
|
*
|
|
|
|
* @return true if there are active fetchers that require polling else false.
|
|
|
|
*/
|
|
|
|
static bool fetch_dispatch_jobs(void)
|
|
|
|
{
|
|
|
|
int all_active;
|
|
|
|
int all_queued;
|
|
|
|
|
|
|
|
RING_GETSIZE(struct fetch, queue_ring, all_queued);
|
|
|
|
RING_GETSIZE(struct fetch, fetch_ring, all_active);
|
|
|
|
|
|
|
|
FETCH_LOG(("queue_ring %i, fetch_ring %i", all_queued, all_active));
|
|
|
|
dump_rings();
|
2014-01-19 22:17:32 +04:00
|
|
|
|
2014-06-26 22:04:14 +04:00
|
|
|
while ((all_queued != 0) &&
|
|
|
|
(all_active < nsoption_int(max_fetchers)) &&
|
|
|
|
fetch_choose_and_dispatch()) {
|
2014-01-19 22:17:32 +04:00
|
|
|
all_queued--;
|
|
|
|
all_active++;
|
2014-06-26 22:04:14 +04:00
|
|
|
FETCH_LOG(("%d queued, %d fetching",
|
|
|
|
all_queued, all_active));
|
|
|
|
}
|
|
|
|
|
|
|
|
FETCH_LOG(("Fetch ring is now %d elements.", all_active));
|
|
|
|
FETCH_LOG(("Queue ring is now %d elements.", all_queued));
|
|
|
|
|
|
|
|
return (all_active > 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void fetcher_poll(void *unused)
|
|
|
|
{
|
|
|
|
int fetcherd;
|
|
|
|
|
|
|
|
if (fetch_dispatch_jobs()) {
|
|
|
|
FETCH_LOG(("Polling fetchers"));
|
|
|
|
for (fetcherd = 0; fetcherd < MAX_FETCHERS; fetcherd++) {
|
|
|
|
if (fetchers[fetcherd].refcount > 0) {
|
|
|
|
/* fetcher present */
|
|
|
|
fetchers[fetcherd].ops.poll(fetchers[fetcherd].scheme);
|
|
|
|
}
|
2014-01-19 22:17:32 +04:00
|
|
|
}
|
2014-06-26 22:04:14 +04:00
|
|
|
|
|
|
|
/* schedule active fetchers to run again in 10ms */
|
2014-07-02 20:07:05 +04:00
|
|
|
guit->browser->schedule(SCHEDULE_TIME, fetcher_poll, NULL);
|
2014-01-19 22:17:32 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************************
|
|
|
|
* Public API *
|
|
|
|
******************************************************************************/
|
|
|
|
|
|
|
|
/* exported interface documented in content/fetch.h */
|
2014-06-19 21:27:24 +04:00
|
|
|
nserror fetcher_init(void)
|
2014-01-19 22:17:32 +04:00
|
|
|
{
|
|
|
|
fetch_curl_register();
|
|
|
|
fetch_data_register();
|
|
|
|
fetch_file_register();
|
|
|
|
fetch_resource_register();
|
|
|
|
fetch_about_register();
|
2014-08-26 02:58:56 +04:00
|
|
|
fetch_javascript_register();
|
2014-01-19 22:17:32 +04:00
|
|
|
|
|
|
|
return NSERROR_OK;
|
|
|
|
}
|
2003-02-09 15:58:15 +03:00
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
/* exported interface documented in content/fetchers.h */
|
|
|
|
void fetcher_quit(void)
|
2003-02-09 15:58:15 +03:00
|
|
|
{
|
2014-06-19 21:27:24 +04:00
|
|
|
int fetcherd; /* fetcher index */
|
|
|
|
for (fetcherd = 0; fetcherd < MAX_FETCHERS; fetcherd++) {
|
2014-07-02 17:50:23 +04:00
|
|
|
if (fetchers[fetcherd].refcount > 1) {
|
|
|
|
/* fetcher still has reference at quit. This
|
|
|
|
* should not happen as the fetch should have
|
|
|
|
* been aborted in llcache shutdown.
|
|
|
|
*
|
|
|
|
* This appears to be normal behaviour if a
|
|
|
|
* curl operation is still in progress at exit
|
|
|
|
* as the abort waits for curl to complete.
|
|
|
|
*
|
|
|
|
* We could make the user wait for curl to
|
|
|
|
* complete but we are exiting anyway so thats
|
|
|
|
* unhelpful. Instead we just log it and force
|
|
|
|
* the reference count to allow the fetcher to
|
|
|
|
* be stopped.
|
|
|
|
*/
|
|
|
|
LOG(("Fetcher for scheme %s still has %d active users at quit.",
|
2014-07-02 18:01:24 +04:00
|
|
|
lwc_string_data(fetchers[fetcherd].scheme),
|
|
|
|
fetchers[fetcherd].refcount));
|
2014-07-02 17:50:23 +04:00
|
|
|
|
2014-07-02 18:01:24 +04:00
|
|
|
fetchers[fetcherd].refcount = 1;
|
2014-07-02 17:50:23 +04:00
|
|
|
}
|
|
|
|
if (fetchers[fetcherd].refcount == 1) {
|
2014-06-19 21:27:24 +04:00
|
|
|
|
|
|
|
fetch_unref_fetcher(fetcherd);
|
2007-07-04 22:05:16 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
/* exported interface documented in content/fetchers.h */
|
|
|
|
nserror
|
|
|
|
fetcher_add(lwc_string *scheme, const struct fetcher_operation_table *ops)
|
2007-07-04 22:05:16 +04:00
|
|
|
{
|
2014-06-19 21:27:24 +04:00
|
|
|
int fetcherd;
|
|
|
|
|
|
|
|
/* find unused fetcher descriptor */
|
|
|
|
for (fetcherd = 0; fetcherd < MAX_FETCHERS; fetcherd++) {
|
|
|
|
if (fetchers[fetcherd].refcount == 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (fetcherd == MAX_FETCHERS) {
|
|
|
|
return NSERROR_INIT_FAILED;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!ops->initialise(scheme)) {
|
|
|
|
return NSERROR_INIT_FAILED;
|
2007-07-04 22:05:16 +04:00
|
|
|
}
|
2014-06-19 21:27:24 +04:00
|
|
|
|
|
|
|
fetchers[fetcherd].scheme = scheme;
|
|
|
|
fetchers[fetcherd].ops = *ops;
|
|
|
|
|
|
|
|
fetch_ref_fetcher(fetcherd);
|
|
|
|
|
|
|
|
return NSERROR_OK;
|
2003-02-09 15:58:15 +03:00
|
|
|
}
|
|
|
|
|
2014-06-26 22:04:14 +04:00
|
|
|
/* exported interface documented in content/fetch.h */
|
|
|
|
nserror fetcher_fdset(fd_set *read_fd_set,
|
|
|
|
fd_set *write_fd_set,
|
|
|
|
fd_set *except_fd_set,
|
|
|
|
int *maxfd_out)
|
|
|
|
{
|
|
|
|
CURLMcode code;
|
|
|
|
int maxfd;
|
|
|
|
int fetcherd; /* fetcher index */
|
|
|
|
|
|
|
|
if (!fetch_dispatch_jobs()) {
|
|
|
|
FETCH_LOG(("No jobs"));
|
|
|
|
*maxfd_out = -1;
|
|
|
|
return NSERROR_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
FETCH_LOG(("Polling fetchers"));
|
|
|
|
|
|
|
|
for (fetcherd = 0; fetcherd < MAX_FETCHERS; fetcherd++) {
|
|
|
|
if (fetchers[fetcherd].refcount > 0) {
|
|
|
|
/* fetcher present */
|
|
|
|
fetchers[fetcherd].ops.poll(fetchers[fetcherd].scheme);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
FD_ZERO(read_fd_set);
|
|
|
|
FD_ZERO(write_fd_set);
|
|
|
|
FD_ZERO(except_fd_set);
|
|
|
|
code = curl_multi_fdset(fetch_curl_multi,
|
|
|
|
read_fd_set,
|
|
|
|
write_fd_set,
|
|
|
|
except_fd_set,
|
|
|
|
&maxfd);
|
|
|
|
assert(code == CURLM_OK);
|
|
|
|
|
|
|
|
if (maxfd >= 0) {
|
|
|
|
/* change the scheduled poll to happen is a 1000ms as
|
|
|
|
* we assume fetching an fdset means the fetchers will
|
|
|
|
* be run by the client waking up on data available on
|
|
|
|
* the fd and re-calling fetcher_fdset() if this does
|
|
|
|
* not happen the fetch polling will continue as
|
|
|
|
* usual.
|
|
|
|
*/
|
|
|
|
/** @note adjusting the schedule time is only done for
|
|
|
|
* curl currently. This is because as it is assumed to
|
|
|
|
* be the only fetcher that can possibly have fd to
|
|
|
|
* select on. All the other fetchers continue to need
|
|
|
|
* polling frequently.
|
|
|
|
*/
|
2014-07-02 20:07:05 +04:00
|
|
|
guit->browser->schedule(FDSET_TIMEOUT, fetcher_poll, NULL);
|
2014-06-26 22:04:14 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
*maxfd_out = maxfd;
|
|
|
|
|
|
|
|
return NSERROR_OK;
|
|
|
|
}
|
|
|
|
|
2014-01-19 22:17:32 +04:00
|
|
|
/* exported interface documented in content/fetch.h */
|
2014-06-19 21:27:24 +04:00
|
|
|
struct fetch *
|
|
|
|
fetch_start(nsurl *url,
|
|
|
|
nsurl *referer,
|
|
|
|
fetch_callback callback,
|
|
|
|
void *p,
|
|
|
|
bool only_2xx,
|
|
|
|
const char *post_urlenc,
|
|
|
|
const struct fetch_multipart_data *post_multipart,
|
|
|
|
bool verifiable,
|
|
|
|
bool downgrade_tls,
|
|
|
|
const char *headers[])
|
2003-02-09 15:58:15 +03:00
|
|
|
{
|
2004-03-27 03:50:58 +03:00
|
|
|
struct fetch *fetch;
|
2011-09-27 15:07:32 +04:00
|
|
|
lwc_string *scheme;
|
|
|
|
bool match;
|
2004-03-27 03:50:58 +03:00
|
|
|
|
|
|
|
fetch = malloc(sizeof (*fetch));
|
2014-06-19 21:27:24 +04:00
|
|
|
if (fetch == NULL) {
|
2009-05-28 15:56:56 +04:00
|
|
|
return NULL;
|
2014-06-19 21:27:24 +04:00
|
|
|
}
|
2004-03-27 03:50:58 +03:00
|
|
|
|
2009-05-28 15:56:56 +04:00
|
|
|
/* The URL we're fetching must have a scheme */
|
2011-09-27 15:07:32 +04:00
|
|
|
scheme = nsurl_get_component(url, NSURL_SCHEME);
|
2012-10-11 17:38:10 +04:00
|
|
|
assert(scheme != NULL);
|
2006-06-30 02:04:56 +04:00
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
/* try and obtain a fetcher for this scheme */
|
|
|
|
fetch->fetcherd = get_fetcher_for_scheme(scheme);
|
|
|
|
if (fetch->fetcherd == -1) {
|
|
|
|
lwc_string_unref(scheme);
|
2014-07-08 18:57:48 +04:00
|
|
|
free(fetch);
|
2014-06-19 21:27:24 +04:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2014-06-26 22:04:14 +04:00
|
|
|
FETCH_LOG(("fetch %p, url '%s'", fetch, nsurl_access(url)));
|
2003-04-18 01:35:02 +04:00
|
|
|
|
|
|
|
/* construct a new fetch structure */
|
2003-02-09 15:58:15 +03:00
|
|
|
fetch->callback = callback;
|
2011-09-27 15:07:32 +04:00
|
|
|
fetch->url = nsurl_ref(url);
|
2008-01-30 22:56:41 +03:00
|
|
|
fetch->verifiable = verifiable;
|
2003-02-09 15:58:15 +03:00
|
|
|
fetch->p = p;
|
2006-10-01 17:17:52 +04:00
|
|
|
fetch->http_code = 0;
|
2009-05-28 15:56:56 +04:00
|
|
|
fetch->r_prev = NULL;
|
|
|
|
fetch->r_next = NULL;
|
|
|
|
fetch->referer = NULL;
|
2007-07-08 15:35:53 +04:00
|
|
|
fetch->send_referer = false;
|
|
|
|
fetch->fetcher_handle = NULL;
|
2007-07-04 22:05:16 +04:00
|
|
|
fetch->fetch_is_active = false;
|
2011-09-27 15:07:32 +04:00
|
|
|
fetch->host = nsurl_get_component(url, NSURL_HOST);
|
2014-01-19 22:17:32 +04:00
|
|
|
|
2007-07-04 22:05:16 +04:00
|
|
|
if (referer != NULL) {
|
2011-09-27 15:07:32 +04:00
|
|
|
lwc_string *ref_scheme;
|
|
|
|
fetch->referer = nsurl_ref(referer);
|
|
|
|
|
|
|
|
ref_scheme = nsurl_get_component(referer, NSURL_SCHEME);
|
|
|
|
/* Not a problem if referer has no scheme */
|
2009-05-28 15:56:56 +04:00
|
|
|
|
2009-06-19 15:15:06 +04:00
|
|
|
/* Determine whether to send the Referer header */
|
2012-03-22 13:34:34 +04:00
|
|
|
if (nsoption_bool(send_referer) && ref_scheme != NULL) {
|
2014-01-19 22:17:32 +04:00
|
|
|
/* User permits us to send the header
|
2009-06-19 15:15:06 +04:00
|
|
|
* Only send it if:
|
|
|
|
* 1) The fetch and referer schemes match
|
|
|
|
* or 2) The fetch is https and the referer is http
|
|
|
|
*
|
|
|
|
* This ensures that referer information is only sent
|
|
|
|
* across schemes in the special case of an https
|
|
|
|
* request from a page served over http. The inverse
|
|
|
|
* (https -> http) should not send the referer (15.1.3)
|
|
|
|
*/
|
2011-09-27 15:07:32 +04:00
|
|
|
bool match1;
|
|
|
|
bool match2;
|
2013-10-23 18:15:08 +04:00
|
|
|
if (lwc_string_isequal(scheme, ref_scheme,
|
2014-01-19 22:17:32 +04:00
|
|
|
&match) != lwc_error_ok) {
|
2012-08-09 20:30:02 +04:00
|
|
|
match = false;
|
|
|
|
}
|
2014-01-24 23:25:07 +04:00
|
|
|
if (lwc_string_isequal(scheme, corestring_lwc_https,
|
2014-01-19 22:17:32 +04:00
|
|
|
&match1) != lwc_error_ok) {
|
2012-08-09 20:30:02 +04:00
|
|
|
match1 = false;
|
|
|
|
}
|
2014-01-24 23:25:07 +04:00
|
|
|
if (lwc_string_isequal(ref_scheme, corestring_lwc_http,
|
2014-01-19 22:17:32 +04:00
|
|
|
&match2) != lwc_error_ok) {
|
2012-08-09 20:30:02 +04:00
|
|
|
match2= false;
|
|
|
|
}
|
2011-09-27 15:07:32 +04:00
|
|
|
if (match == true || (match1 == true && match2 == true))
|
2009-06-19 15:15:06 +04:00
|
|
|
fetch->send_referer = true;
|
|
|
|
}
|
2011-09-27 15:07:32 +04:00
|
|
|
if (ref_scheme != NULL)
|
|
|
|
lwc_string_unref(ref_scheme);
|
2007-07-04 22:05:16 +04:00
|
|
|
}
|
2003-04-18 01:35:02 +04:00
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
/* these aren't needed past here */
|
|
|
|
lwc_string_unref(scheme);
|
2007-06-10 22:08:22 +04:00
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
/* try and set up the fetch */
|
|
|
|
fetch->fetcher_handle = fetchers[fetch->fetcherd].ops.setup(fetch, url,
|
|
|
|
only_2xx, downgrade_tls,
|
|
|
|
post_urlenc, post_multipart,
|
|
|
|
headers);
|
|
|
|
if (fetch->fetcher_handle == NULL) {
|
2007-06-10 22:08:22 +04:00
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
if (fetch->host != NULL)
|
|
|
|
lwc_string_unref(fetch->host);
|
2007-06-10 22:08:22 +04:00
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
if (fetch->url != NULL)
|
|
|
|
nsurl_unref(fetch->url);
|
2007-06-10 22:08:22 +04:00
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
if (fetch->referer != NULL)
|
|
|
|
nsurl_unref(fetch->referer);
|
2007-06-10 22:08:22 +04:00
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
free(fetch);
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Rah, got it, so ref the fetcher. */
|
|
|
|
fetch_ref_fetcher(fetch->fetcherd);
|
2007-06-10 22:08:22 +04:00
|
|
|
|
2014-06-26 22:04:14 +04:00
|
|
|
/* Dump new fetch in the queue. */
|
2006-03-14 17:21:01 +03:00
|
|
|
RING_INSERT(queue_ring, fetch);
|
2014-06-26 22:04:14 +04:00
|
|
|
|
|
|
|
/* Ask the queue to run. */
|
|
|
|
if (fetch_dispatch_jobs()) {
|
|
|
|
FETCH_LOG(("scheduling poll"));
|
|
|
|
/* schedule active fetchers to run again in 10ms */
|
|
|
|
guit->browser->schedule(10, fetcher_poll, NULL);
|
|
|
|
}
|
2009-05-28 15:56:56 +04:00
|
|
|
|
2004-03-27 03:50:58 +03:00
|
|
|
return fetch;
|
|
|
|
}
|
2003-10-25 23:20:13 +04:00
|
|
|
|
2014-01-19 22:17:32 +04:00
|
|
|
/* exported interface documented in content/fetch.h */
|
2003-02-09 15:58:15 +03:00
|
|
|
void fetch_abort(struct fetch *f)
|
2004-06-22 21:37:51 +04:00
|
|
|
{
|
|
|
|
assert(f);
|
2014-06-26 22:04:14 +04:00
|
|
|
FETCH_LOG(("fetch %p, fetcher %p, url '%s'", f, f->fetcher_handle,
|
2014-01-19 22:17:32 +04:00
|
|
|
nsurl_access(f->url)));
|
2014-06-19 21:27:24 +04:00
|
|
|
fetchers[f->fetcherd].ops.abort(f->fetcher_handle);
|
2004-03-27 03:50:58 +03:00
|
|
|
}
|
|
|
|
|
2014-01-19 22:17:32 +04:00
|
|
|
/* exported interface documented in content/fetch.h */
|
2004-03-27 03:50:58 +03:00
|
|
|
void fetch_free(struct fetch *f)
|
|
|
|
{
|
2014-06-26 22:04:14 +04:00
|
|
|
FETCH_LOG(("Freeing fetch %p, fetcher %p", f, f->fetcher_handle));
|
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
fetchers[f->fetcherd].ops.free(f->fetcher_handle);
|
|
|
|
|
|
|
|
fetch_unref_fetcher(f->fetcherd);
|
|
|
|
|
2011-09-27 15:07:32 +04:00
|
|
|
nsurl_unref(f->url);
|
2014-06-26 22:04:14 +04:00
|
|
|
if (f->referer != NULL) {
|
2011-09-27 15:07:32 +04:00
|
|
|
nsurl_unref(f->referer);
|
2014-06-26 22:04:14 +04:00
|
|
|
}
|
|
|
|
if (f->host != NULL) {
|
2011-09-27 15:07:32 +04:00
|
|
|
lwc_string_unref(f->host);
|
2014-06-26 22:04:14 +04:00
|
|
|
}
|
2014-01-19 22:17:32 +04:00
|
|
|
free(f);
|
2003-02-09 15:58:15 +03:00
|
|
|
}
|
|
|
|
|
2007-07-04 22:05:16 +04:00
|
|
|
|
2003-10-25 20:22:11 +04:00
|
|
|
|
2014-01-19 22:17:32 +04:00
|
|
|
/* exported interface documented in content/fetch.h */
|
2011-09-27 18:42:45 +04:00
|
|
|
bool fetch_can_fetch(const nsurl *url)
|
2004-04-02 17:51:13 +04:00
|
|
|
{
|
2011-09-27 15:07:32 +04:00
|
|
|
lwc_string *scheme = nsurl_get_component(url, NSURL_SCHEME);
|
2014-06-19 21:27:24 +04:00
|
|
|
int fetcherd;
|
2011-09-27 01:07:19 +04:00
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
fetcherd = get_fetcher_for_scheme(scheme);
|
|
|
|
lwc_string_unref(scheme);
|
2011-11-27 18:14:36 +04:00
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
if (fetcherd == -1) {
|
|
|
|
return false;
|
2007-07-04 22:05:16 +04:00
|
|
|
}
|
2007-06-10 22:08:22 +04:00
|
|
|
|
2014-06-19 21:27:24 +04:00
|
|
|
return fetchers[fetcherd].ops.acceptable(url);
|
2004-04-02 17:51:13 +04:00
|
|
|
}
|
|
|
|
|
2014-01-19 22:17:32 +04:00
|
|
|
/* exported interface documented in content/fetch.h */
|
2004-06-28 03:24:11 +04:00
|
|
|
void fetch_change_callback(struct fetch *fetch,
|
2007-07-04 22:05:16 +04:00
|
|
|
fetch_callback callback,
|
|
|
|
void *p)
|
2004-06-28 03:24:11 +04:00
|
|
|
{
|
|
|
|
assert(fetch);
|
|
|
|
fetch->callback = callback;
|
|
|
|
fetch->p = p;
|
|
|
|
}
|
|
|
|
|
2014-01-19 22:17:32 +04:00
|
|
|
/* exported interface documented in content/fetch.h */
|
2007-01-13 03:19:02 +03:00
|
|
|
long fetch_http_code(struct fetch *fetch)
|
|
|
|
{
|
|
|
|
return fetch->http_code;
|
|
|
|
}
|
|
|
|
|
2014-01-19 22:17:32 +04:00
|
|
|
/* exported interface documented in content/fetch.h */
|
2009-02-16 23:24:54 +03:00
|
|
|
bool fetch_get_verifiable(struct fetch *fetch)
|
|
|
|
{
|
|
|
|
assert(fetch);
|
|
|
|
|
|
|
|
return fetch->verifiable;
|
|
|
|
}
|
|
|
|
|
2014-01-19 22:17:32 +04:00
|
|
|
/* exported interface documented in content/fetch.h */
|
|
|
|
struct fetch_multipart_data *
|
|
|
|
fetch_multipart_data_clone(const struct fetch_multipart_data *list)
|
2010-03-28 16:56:39 +04:00
|
|
|
{
|
|
|
|
struct fetch_multipart_data *clone, *last = NULL;
|
|
|
|
struct fetch_multipart_data *result = NULL;
|
|
|
|
|
|
|
|
for (; list != NULL; list = list->next) {
|
|
|
|
clone = malloc(sizeof(struct fetch_multipart_data));
|
|
|
|
if (clone == NULL) {
|
|
|
|
if (result != NULL)
|
|
|
|
fetch_multipart_data_destroy(result);
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
clone->file = list->file;
|
|
|
|
|
|
|
|
clone->name = strdup(list->name);
|
|
|
|
if (clone->name == NULL) {
|
|
|
|
free(clone);
|
|
|
|
if (result != NULL)
|
|
|
|
fetch_multipart_data_destroy(result);
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
clone->value = strdup(list->value);
|
|
|
|
if (clone->value == NULL) {
|
|
|
|
free(clone->name);
|
|
|
|
free(clone);
|
|
|
|
if (result != NULL)
|
|
|
|
fetch_multipart_data_destroy(result);
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2014-01-05 01:49:03 +04:00
|
|
|
if (clone->file) {
|
|
|
|
clone->rawfile = strdup(list->rawfile);
|
|
|
|
if (clone->rawfile == NULL) {
|
|
|
|
free(clone->value);
|
|
|
|
free(clone->name);
|
|
|
|
free(clone);
|
|
|
|
if (result != NULL)
|
|
|
|
fetch_multipart_data_destroy(result);
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
clone->rawfile = NULL;
|
|
|
|
}
|
|
|
|
|
2010-03-28 16:56:39 +04:00
|
|
|
clone->next = NULL;
|
|
|
|
|
|
|
|
if (result == NULL)
|
|
|
|
result = clone;
|
|
|
|
else
|
|
|
|
last->next = clone;
|
|
|
|
|
|
|
|
last = clone;
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2014-01-19 22:17:32 +04:00
|
|
|
/* exported interface documented in content/fetch.h */
|
2010-03-28 16:56:39 +04:00
|
|
|
void fetch_multipart_data_destroy(struct fetch_multipart_data *list)
|
|
|
|
{
|
|
|
|
struct fetch_multipart_data *next;
|
|
|
|
|
|
|
|
for (; list != NULL; list = next) {
|
|
|
|
next = list->next;
|
|
|
|
free(list->name);
|
|
|
|
free(list->value);
|
2014-01-05 01:15:52 +04:00
|
|
|
if (list->file) {
|
2014-06-26 22:04:14 +04:00
|
|
|
FETCH_LOG(("Freeing rawfile: %s", list->rawfile));
|
2014-01-04 23:34:04 +04:00
|
|
|
free(list->rawfile);
|
2014-01-05 01:15:52 +04:00
|
|
|
}
|
2010-03-28 16:56:39 +04:00
|
|
|
free(list);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-19 22:17:32 +04:00
|
|
|
/* exported interface documented in content/fetch.h */
|
2007-06-10 21:46:44 +04:00
|
|
|
void
|
2011-11-09 01:51:42 +04:00
|
|
|
fetch_send_callback(const fetch_msg *msg, struct fetch *fetch)
|
2006-02-23 18:06:54 +03:00
|
|
|
{
|
2011-11-09 01:51:42 +04:00
|
|
|
fetch->callback(msg, fetch->p);
|
2006-02-23 18:06:54 +03:00
|
|
|
}
|
|
|
|
|
2007-07-04 22:44:13 +04:00
|
|
|
|
2014-01-19 22:17:32 +04:00
|
|
|
/* exported interface documented in content/fetch.h */
|
2007-07-04 22:44:13 +04:00
|
|
|
void fetch_remove_from_queues(struct fetch *fetch)
|
2006-02-23 18:06:54 +03:00
|
|
|
{
|
2014-06-26 22:04:14 +04:00
|
|
|
FETCH_LOG(("Fetch %p, fetcher %p can be freed",
|
|
|
|
fetch, fetch->fetcher_handle));
|
2007-07-04 22:05:16 +04:00
|
|
|
|
2014-01-19 22:17:32 +04:00
|
|
|
/* Go ahead and free the fetch properly now */
|
2007-07-04 22:05:16 +04:00
|
|
|
if (fetch->fetch_is_active) {
|
|
|
|
RING_REMOVE(fetch_ring, fetch);
|
|
|
|
} else {
|
|
|
|
RING_REMOVE(queue_ring, fetch);
|
|
|
|
}
|
|
|
|
|
2008-05-29 23:32:31 +04:00
|
|
|
#ifdef DEBUG_FETCH_VERBOSE
|
2014-06-26 22:04:14 +04:00
|
|
|
int all_active;
|
|
|
|
int all_queued;
|
2014-01-19 22:17:32 +04:00
|
|
|
|
2014-06-26 22:04:14 +04:00
|
|
|
RING_GETSIZE(struct fetch, fetch_ring, all_active);
|
2014-01-19 22:17:32 +04:00
|
|
|
RING_GETSIZE(struct fetch, queue_ring, all_queued);
|
|
|
|
|
2014-06-26 22:04:14 +04:00
|
|
|
LOG(("Fetch ring is now %d elements.", all_active));
|
|
|
|
|
2007-07-04 22:05:16 +04:00
|
|
|
LOG(("Queue ring is now %d elements.", all_queued));
|
2008-05-29 23:32:31 +04:00
|
|
|
#endif
|
2006-02-23 18:06:54 +03:00
|
|
|
}
|
2003-02-09 15:58:15 +03:00
|
|
|
|
2007-07-04 22:44:13 +04:00
|
|
|
|
2014-01-19 22:17:32 +04:00
|
|
|
/* exported interface documented in content/fetch.h */
|
|
|
|
void fetch_set_http_code(struct fetch *fetch, long http_code)
|
2003-02-09 15:58:15 +03:00
|
|
|
{
|
2014-06-26 22:04:14 +04:00
|
|
|
FETCH_LOG(("Setting HTTP code to %ld", http_code));
|
|
|
|
|
2007-07-04 22:05:16 +04:00
|
|
|
fetch->http_code = http_code;
|
2003-02-09 15:58:15 +03:00
|
|
|
}
|
|
|
|
|
2014-01-19 22:17:32 +04:00
|
|
|
/* exported interface documented in content/fetch.h */
|
2011-09-27 15:07:32 +04:00
|
|
|
const char *fetch_get_referer_to_send(struct fetch *fetch)
|
2003-02-09 15:58:15 +03:00
|
|
|
{
|
2007-07-04 22:05:16 +04:00
|
|
|
if (fetch->send_referer)
|
2011-09-27 15:07:32 +04:00
|
|
|
return nsurl_access(fetch->referer);
|
2007-07-04 22:05:16 +04:00
|
|
|
return NULL;
|
2003-02-09 15:58:15 +03:00
|
|
|
}
|
2008-01-30 22:56:41 +03:00
|
|
|
|
2014-01-19 22:17:32 +04:00
|
|
|
/* exported interface documented in content/fetch.h */
|
|
|
|
void fetch_set_cookie(struct fetch *fetch, const char *data)
|
2008-01-30 22:56:41 +03:00
|
|
|
{
|
|
|
|
assert(fetch && data);
|
|
|
|
|
2010-04-08 15:47:05 +04:00
|
|
|
/* If the fetch is unverifiable err on the side of caution and
|
|
|
|
* do not set the cookie */
|
2008-01-30 22:56:41 +03:00
|
|
|
|
2010-04-08 15:47:05 +04:00
|
|
|
if (fetch->verifiable) {
|
2008-01-30 22:56:41 +03:00
|
|
|
/* If the transaction's verifiable, we don't require
|
|
|
|
* that the request uri and the parent domain match,
|
2010-04-08 15:47:05 +04:00
|
|
|
* so don't pass in any referer/parent in this case. */
|
2012-10-08 23:32:57 +04:00
|
|
|
urldb_set_cookie(data, fetch->url, NULL);
|
2010-04-10 16:24:25 +04:00
|
|
|
} else if (fetch->referer != NULL) {
|
|
|
|
/* Permit the cookie to be set if the fetch is unverifiable
|
|
|
|
* and the fetch URI domain matches the referer. */
|
|
|
|
/** \todo Long-term, this needs to be replaced with a
|
|
|
|
* comparison against the origin fetch URI. In the case
|
|
|
|
* where a nested object requests a fetch, the origin URI
|
|
|
|
* is the nested object's parent URI, whereas the referer
|
|
|
|
* for the fetch will be the nested object's URI. */
|
2012-10-08 23:32:57 +04:00
|
|
|
urldb_set_cookie(data, fetch->url, fetch->referer);
|
2008-01-30 22:56:41 +03:00
|
|
|
}
|
|
|
|
}
|