2008-03-05 17:21:29 +03:00
|
|
|
/*
|
|
|
|
* Copyright 2008 Rob Kendrick <rjek@netsurf-browser.org>
|
|
|
|
*
|
|
|
|
* This file is part of NetSurf.
|
|
|
|
*
|
|
|
|
* NetSurf is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; version 2 of the License.
|
|
|
|
*
|
|
|
|
* NetSurf is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* data: URL handling. See http://tools.ietf.org/html/rfc2397 */
|
|
|
|
|
|
|
|
#include <assert.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <stdbool.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <strings.h>
|
|
|
|
#include <time.h>
|
2010-01-29 14:32:53 +03:00
|
|
|
|
2008-03-05 17:21:29 +03:00
|
|
|
#include <curl/curl.h> /* for URL unescaping functions */
|
2010-01-29 14:32:53 +03:00
|
|
|
|
2011-09-27 01:07:19 +04:00
|
|
|
#include <libwapcaplet/libwapcaplet.h>
|
|
|
|
|
2008-03-05 17:21:29 +03:00
|
|
|
#include "utils/config.h"
|
|
|
|
#include "content/fetch.h"
|
2011-02-16 02:18:10 +03:00
|
|
|
#include "content/fetchers/data.h"
|
2008-03-05 17:21:29 +03:00
|
|
|
#include "content/urldb.h"
|
|
|
|
#include "desktop/netsurf.h"
|
2014-01-25 00:08:41 +04:00
|
|
|
#include "utils/corestrings.h"
|
2013-05-26 01:46:27 +04:00
|
|
|
#include "utils/nsoption.h"
|
2008-03-05 17:21:29 +03:00
|
|
|
#include "utils/log.h"
|
|
|
|
#include "utils/messages.h"
|
|
|
|
#include "utils/utils.h"
|
|
|
|
#include "utils/ring.h"
|
|
|
|
#include "utils/base64.h"
|
|
|
|
|
|
|
|
struct fetch_data_context {
|
|
|
|
struct fetch *parent_fetch;
|
|
|
|
char *url;
|
|
|
|
char *mimetype;
|
|
|
|
char *data;
|
|
|
|
size_t datalen;
|
|
|
|
bool base64;
|
2008-03-06 14:28:00 +03:00
|
|
|
|
|
|
|
bool aborted;
|
|
|
|
bool locked;
|
2008-03-05 17:21:29 +03:00
|
|
|
|
|
|
|
struct fetch_data_context *r_next, *r_prev;
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct fetch_data_context *ring = NULL;
|
|
|
|
|
2008-03-06 01:57:46 +03:00
|
|
|
static CURL *curl;
|
|
|
|
|
2011-09-27 01:07:19 +04:00
|
|
|
static bool fetch_data_initialise(lwc_string *scheme)
|
2008-03-05 17:21:29 +03:00
|
|
|
{
|
2011-09-27 01:07:19 +04:00
|
|
|
LOG(("fetch_data_initialise called for %s", lwc_string_data(scheme)));
|
2008-03-06 01:57:46 +03:00
|
|
|
if ( (curl = curl_easy_init()) == NULL)
|
|
|
|
return false;
|
|
|
|
else
|
|
|
|
return true;
|
2008-03-05 17:21:29 +03:00
|
|
|
}
|
|
|
|
|
2011-09-27 01:07:19 +04:00
|
|
|
static void fetch_data_finalise(lwc_string *scheme)
|
2008-03-05 17:21:29 +03:00
|
|
|
{
|
2011-09-27 01:07:19 +04:00
|
|
|
LOG(("fetch_data_finalise called for %s", lwc_string_data(scheme)));
|
2008-03-06 01:57:46 +03:00
|
|
|
curl_easy_cleanup(curl);
|
2008-03-05 17:21:29 +03:00
|
|
|
}
|
|
|
|
|
2011-11-27 18:14:36 +04:00
|
|
|
static bool fetch_data_can_fetch(const nsurl *url)
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2011-09-29 19:31:54 +04:00
|
|
|
static void *fetch_data_setup(struct fetch *parent_fetch, nsurl *url,
|
2013-01-05 03:13:23 +04:00
|
|
|
bool only_2xx, bool downgrade_tls, const char *post_urlenc,
|
2010-04-10 21:13:53 +04:00
|
|
|
const struct fetch_multipart_data *post_multipart,
|
2008-03-05 17:21:29 +03:00
|
|
|
const char **headers)
|
|
|
|
{
|
|
|
|
struct fetch_data_context *ctx = calloc(1, sizeof(*ctx));
|
|
|
|
|
2008-03-05 18:01:42 +03:00
|
|
|
if (ctx == NULL)
|
|
|
|
return NULL;
|
2008-03-06 14:28:00 +03:00
|
|
|
|
2008-03-05 17:21:29 +03:00
|
|
|
ctx->parent_fetch = parent_fetch;
|
2011-10-08 16:47:42 +04:00
|
|
|
|
2011-09-29 19:31:54 +04:00
|
|
|
/* TODO: keep as nsurl to avoid copy */
|
2011-10-08 16:47:42 +04:00
|
|
|
ctx->url = malloc(nsurl_length(url) + 1);
|
2008-03-06 01:57:46 +03:00
|
|
|
if (ctx->url == NULL) {
|
|
|
|
free(ctx);
|
|
|
|
return NULL;
|
|
|
|
}
|
2011-10-08 16:47:42 +04:00
|
|
|
memcpy(ctx->url, nsurl_access(url), nsurl_length(url) + 1);
|
2008-03-06 14:28:00 +03:00
|
|
|
|
|
|
|
RING_INSERT(ring, ctx);
|
2008-03-06 01:57:46 +03:00
|
|
|
|
2008-03-05 17:21:29 +03:00
|
|
|
return ctx;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool fetch_data_start(void *ctx)
|
2008-03-06 01:57:46 +03:00
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void fetch_data_free(void *ctx)
|
|
|
|
{
|
|
|
|
struct fetch_data_context *c = ctx;
|
|
|
|
|
|
|
|
free(c->url);
|
|
|
|
free(c->data);
|
|
|
|
free(c->mimetype);
|
|
|
|
RING_REMOVE(ring, c);
|
|
|
|
free(ctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void fetch_data_abort(void *ctx)
|
2008-03-05 17:21:29 +03:00
|
|
|
{
|
|
|
|
struct fetch_data_context *c = ctx;
|
2008-03-06 14:28:00 +03:00
|
|
|
|
|
|
|
/* To avoid the poll loop having to deal with the fetch context
|
|
|
|
* disappearing from under it, we simply flag the abort here.
|
|
|
|
* The poll loop itself will perform the appropriate cleanup.
|
|
|
|
*/
|
|
|
|
c->aborted = true;
|
|
|
|
}
|
|
|
|
|
2011-11-09 01:51:42 +04:00
|
|
|
static void fetch_data_send_callback(const fetch_msg *msg,
|
|
|
|
struct fetch_data_context *c)
|
2008-03-06 14:28:00 +03:00
|
|
|
{
|
|
|
|
c->locked = true;
|
2011-11-09 01:51:42 +04:00
|
|
|
fetch_send_callback(msg, c->parent_fetch);
|
2008-03-06 14:28:00 +03:00
|
|
|
c->locked = false;
|
2008-03-06 01:57:46 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static bool fetch_data_process(struct fetch_data_context *c)
|
|
|
|
{
|
2011-11-09 01:51:42 +04:00
|
|
|
fetch_msg msg;
|
2008-03-05 17:21:29 +03:00
|
|
|
char *params;
|
|
|
|
char *comma;
|
2008-03-06 01:57:46 +03:00
|
|
|
char *unescaped;
|
2008-10-13 20:39:54 +04:00
|
|
|
int templen;
|
2008-03-05 17:21:29 +03:00
|
|
|
|
|
|
|
/* format of a data: URL is:
|
|
|
|
* data:[<mimetype>][;base64],<data>
|
|
|
|
* The mimetype is optional. If it is missing, the , before the
|
|
|
|
* data must still be there.
|
|
|
|
*/
|
|
|
|
|
2012-07-03 17:32:13 +04:00
|
|
|
LOG(("url: %.140s", c->url));
|
2008-03-06 02:09:36 +03:00
|
|
|
|
|
|
|
if (strlen(c->url) < 6) {
|
|
|
|
/* 6 is the minimum possible length (data:,) */
|
2011-11-09 01:51:42 +04:00
|
|
|
msg.type = FETCH_ERROR;
|
|
|
|
msg.data.error = "Malformed data: URL";
|
|
|
|
fetch_data_send_callback(&msg, c);
|
2008-03-06 02:09:36 +03:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2008-03-05 17:21:29 +03:00
|
|
|
/* skip the data: part */
|
2008-05-25 19:51:30 +04:00
|
|
|
params = c->url + SLEN("data:");
|
2008-03-05 17:21:29 +03:00
|
|
|
|
|
|
|
/* find the comma */
|
|
|
|
if ( (comma = strchr(params, ',')) == NULL) {
|
2011-11-09 01:51:42 +04:00
|
|
|
msg.type = FETCH_ERROR;
|
|
|
|
msg.data.error = "Malformed data: URL";
|
|
|
|
fetch_data_send_callback(&msg, c);
|
2008-03-05 17:21:29 +03:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (params[0] == ',') {
|
|
|
|
/* there is no mimetype here, assume text/plain */
|
|
|
|
c->mimetype = strdup("text/plain;charset=US-ASCII");
|
|
|
|
} else {
|
|
|
|
/* make a copy of everything between data: and the comma */
|
|
|
|
c->mimetype = strndup(params, comma - params);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (c->mimetype == NULL) {
|
2011-11-09 01:51:42 +04:00
|
|
|
msg.type = FETCH_ERROR;
|
|
|
|
msg.data.error =
|
|
|
|
"Unable to allocate memory for mimetype in data: URL";
|
|
|
|
fetch_data_send_callback(&msg, c);
|
2008-03-05 17:21:29 +03:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (strcmp(c->mimetype + strlen(c->mimetype) - 7, ";base64") == 0) {
|
|
|
|
c->base64 = true;
|
|
|
|
c->mimetype[strlen(c->mimetype) - 7] = '\0';
|
|
|
|
} else {
|
|
|
|
c->base64 = false;
|
|
|
|
}
|
|
|
|
|
2008-03-06 01:57:46 +03:00
|
|
|
/* we URL unescape the data first, just incase some insane page
|
|
|
|
* decides to nest URL and base64 encoding. Like, say, Acid2.
|
|
|
|
*/
|
2008-10-13 20:39:54 +04:00
|
|
|
templen = c->datalen;
|
|
|
|
unescaped = curl_easy_unescape(curl, comma + 1, 0, &templen);
|
|
|
|
c->datalen = templen;
|
|
|
|
if (unescaped == NULL) {
|
2011-11-09 01:51:42 +04:00
|
|
|
msg.type = FETCH_ERROR;
|
|
|
|
msg.data.error = "Unable to URL decode data: URL";
|
|
|
|
fetch_data_send_callback(&msg, c);
|
2008-03-06 01:57:46 +03:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2008-03-05 17:21:29 +03:00
|
|
|
if (c->base64) {
|
2008-03-06 01:57:46 +03:00
|
|
|
c->data = malloc(c->datalen); /* safe: always gets smaller */
|
|
|
|
if (base64_decode(unescaped, c->datalen, c->data,
|
2008-03-06 14:28:00 +03:00
|
|
|
&(c->datalen)) == false) {
|
2011-11-09 01:51:42 +04:00
|
|
|
msg.type = FETCH_ERROR;
|
|
|
|
msg.data.error = "Unable to Base64 decode data: URL";
|
|
|
|
fetch_data_send_callback(&msg, c);
|
2008-03-06 01:57:46 +03:00
|
|
|
curl_free(unescaped);
|
2008-03-05 17:21:29 +03:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else {
|
2008-03-06 01:57:46 +03:00
|
|
|
c->data = malloc(c->datalen);
|
2008-03-05 17:21:29 +03:00
|
|
|
if (c->data == NULL) {
|
2011-11-09 01:51:42 +04:00
|
|
|
msg.type = FETCH_ERROR;
|
|
|
|
msg.data.error =
|
|
|
|
"Unable to allocate memory for data: URL";
|
|
|
|
fetch_data_send_callback(&msg, c);
|
2008-03-06 01:57:46 +03:00
|
|
|
curl_free(unescaped);
|
2008-03-05 17:21:29 +03:00
|
|
|
return false;
|
|
|
|
}
|
2008-03-06 01:57:46 +03:00
|
|
|
memcpy(c->data, unescaped, c->datalen);
|
2008-03-05 17:21:29 +03:00
|
|
|
}
|
|
|
|
|
2008-03-06 01:57:46 +03:00
|
|
|
curl_free(unescaped);
|
|
|
|
|
2008-03-05 17:21:29 +03:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2011-09-27 01:07:19 +04:00
|
|
|
static void fetch_data_poll(lwc_string *scheme)
|
2008-03-05 17:21:29 +03:00
|
|
|
{
|
2011-11-09 01:51:42 +04:00
|
|
|
fetch_msg msg;
|
2008-03-06 14:28:00 +03:00
|
|
|
struct fetch_data_context *c, *next;
|
2008-03-05 17:21:29 +03:00
|
|
|
|
2008-03-06 01:57:46 +03:00
|
|
|
if (ring == NULL) return;
|
2008-03-05 17:21:29 +03:00
|
|
|
|
2008-03-06 14:28:00 +03:00
|
|
|
/* Iterate over ring, processing each pending fetch */
|
|
|
|
c = ring;
|
|
|
|
do {
|
|
|
|
/* Ignore fetches that have been flagged as locked.
|
|
|
|
* This allows safe re-entrant calls to this function.
|
|
|
|
* Re-entrancy can occur if, as a result of a callback,
|
|
|
|
* the interested party causes fetch_poll() to be called
|
|
|
|
* again.
|
|
|
|
*/
|
|
|
|
if (c->locked == true) {
|
2011-02-24 01:41:17 +03:00
|
|
|
next = c->r_next;
|
2008-03-06 14:28:00 +03:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Only process non-aborted fetches */
|
2011-02-24 01:41:17 +03:00
|
|
|
if (c->aborted == false && fetch_data_process(c) == true) {
|
2010-03-28 16:56:39 +04:00
|
|
|
char header[64];
|
|
|
|
|
2008-03-06 14:28:00 +03:00
|
|
|
fetch_set_http_code(c->parent_fetch, 200);
|
2008-03-19 20:36:07 +03:00
|
|
|
LOG(("setting data: MIME type to %s, length to %zd",
|
2008-03-06 14:28:00 +03:00
|
|
|
c->mimetype, c->datalen));
|
|
|
|
/* Any callback can result in the fetch being aborted.
|
|
|
|
* Therefore, we _must_ check for this after _every_
|
|
|
|
* call to fetch_data_send_callback().
|
|
|
|
*/
|
2010-03-28 16:56:39 +04:00
|
|
|
snprintf(header, sizeof header, "Content-Type: %s",
|
|
|
|
c->mimetype);
|
2011-11-09 01:51:42 +04:00
|
|
|
msg.type = FETCH_HEADER;
|
|
|
|
msg.data.header_or_data.buf = (const uint8_t *) header;
|
|
|
|
msg.data.header_or_data.len = strlen(header);
|
|
|
|
fetch_data_send_callback(&msg, c);
|
2010-03-28 16:56:39 +04:00
|
|
|
|
2011-02-24 01:41:17 +03:00
|
|
|
if (c->aborted == false) {
|
|
|
|
snprintf(header, sizeof header,
|
2012-08-13 00:50:36 +04:00
|
|
|
"Content-Length: %"SSIZET_FMT,
|
2010-03-28 16:56:39 +04:00
|
|
|
c->datalen);
|
2011-11-09 01:51:42 +04:00
|
|
|
msg.type = FETCH_HEADER;
|
|
|
|
msg.data.header_or_data.buf =
|
|
|
|
(const uint8_t *) header;
|
|
|
|
msg.data.header_or_data.len = strlen(header);
|
|
|
|
fetch_data_send_callback(&msg, c);
|
2011-02-24 01:41:17 +03:00
|
|
|
}
|
2010-03-28 16:56:39 +04:00
|
|
|
|
2011-02-24 01:41:17 +03:00
|
|
|
if (c->aborted == false) {
|
2011-11-09 01:51:42 +04:00
|
|
|
msg.type = FETCH_DATA;
|
|
|
|
msg.data.header_or_data.buf =
|
|
|
|
(const uint8_t *) c->data;
|
|
|
|
msg.data.header_or_data.len = c->datalen;
|
|
|
|
fetch_data_send_callback(&msg, c);
|
2008-03-06 14:28:00 +03:00
|
|
|
}
|
2011-02-24 01:41:17 +03:00
|
|
|
|
|
|
|
if (c->aborted == false) {
|
2011-11-09 01:51:42 +04:00
|
|
|
msg.type = FETCH_FINISHED;
|
|
|
|
fetch_data_send_callback(&msg, c);
|
2008-03-06 01:57:46 +03:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
LOG(("Processing of %s failed!", c->url));
|
2008-03-06 14:28:00 +03:00
|
|
|
|
|
|
|
/* Ensure that we're unlocked here. If we aren't,
|
|
|
|
* then fetch_data_process() is broken.
|
|
|
|
*/
|
|
|
|
assert(c->locked == false);
|
2008-03-06 01:57:46 +03:00
|
|
|
}
|
2008-03-06 14:28:00 +03:00
|
|
|
|
2011-02-24 01:41:17 +03:00
|
|
|
/* Compute next fetch item at the last possible moment as
|
|
|
|
* processing this item may have added to the ring.
|
|
|
|
*/
|
|
|
|
next = c->r_next;
|
|
|
|
|
2008-03-06 14:28:00 +03:00
|
|
|
fetch_remove_from_queues(c->parent_fetch);
|
|
|
|
fetch_free(c->parent_fetch);
|
|
|
|
|
|
|
|
/* Advance to next ring entry, exiting if we've reached
|
|
|
|
* the start of the ring or the ring has become empty
|
|
|
|
*/
|
|
|
|
} while ( (c = next) != ring && ring != NULL);
|
2008-03-05 17:21:29 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
void fetch_data_register(void)
|
|
|
|
{
|
2014-01-25 00:08:41 +04:00
|
|
|
lwc_string *scheme = lwc_string_ref(corestring_lwc_data);
|
2011-09-27 01:07:19 +04:00
|
|
|
|
|
|
|
fetch_add_fetcher(scheme,
|
2008-03-05 17:21:29 +03:00
|
|
|
fetch_data_initialise,
|
2011-11-27 18:14:36 +04:00
|
|
|
fetch_data_can_fetch,
|
2008-03-05 17:21:29 +03:00
|
|
|
fetch_data_setup,
|
|
|
|
fetch_data_start,
|
|
|
|
fetch_data_abort,
|
|
|
|
fetch_data_free,
|
|
|
|
fetch_data_poll,
|
|
|
|
fetch_data_finalise);
|
|
|
|
}
|