Add new fetch callback FETCH_HEADER for headers and move as much header parsing as possible from fetch_curl.c to fetchcache.c. This simplifies fetch_curl.c and will make it possible to store response headers in future.
svn path=/trunk/netsurf/; revision=4226
This commit is contained in:
parent
a76164ab51
commit
ae992eab4d
|
@ -30,6 +30,7 @@
|
||||||
typedef enum {
|
typedef enum {
|
||||||
FETCH_TYPE,
|
FETCH_TYPE,
|
||||||
FETCH_PROGRESS,
|
FETCH_PROGRESS,
|
||||||
|
FETCH_HEADER,
|
||||||
FETCH_DATA,
|
FETCH_DATA,
|
||||||
FETCH_FINISHED,
|
FETCH_FINISHED,
|
||||||
FETCH_ERROR,
|
FETCH_ERROR,
|
||||||
|
|
|
@ -31,6 +31,7 @@
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <regex.h>
|
#include <regex.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
#include <curl/curl.h> /* for curl_getdate() */
|
||||||
#include "utils/config.h"
|
#include "utils/config.h"
|
||||||
#include "content/content.h"
|
#include "content/content.h"
|
||||||
#include "content/fetchcache.h"
|
#include "content/fetchcache.h"
|
||||||
|
@ -47,6 +48,8 @@ static regex_t re_content_type;
|
||||||
static void fetchcache_callback(fetch_msg msg, void *p, const void *data,
|
static void fetchcache_callback(fetch_msg msg, void *p, const void *data,
|
||||||
unsigned long size);
|
unsigned long size);
|
||||||
static char *fetchcache_parse_type(const char *s, char **params[]);
|
static char *fetchcache_parse_type(const char *s, char **params[]);
|
||||||
|
static void fetchcache_parse_header(struct content *c, const char *data,
|
||||||
|
size_t size);
|
||||||
static void fetchcache_error_page(struct content *c, const char *error);
|
static void fetchcache_error_page(struct content *c, const char *error);
|
||||||
static void fetchcache_cache_update(struct content *c,
|
static void fetchcache_cache_update(struct content *c,
|
||||||
const struct cache_data *data);
|
const struct cache_data *data);
|
||||||
|
@ -445,6 +448,12 @@ void fetchcache_callback(fetch_msg msg, void *p, const void *data,
|
||||||
content_broadcast(c, CONTENT_MSG_STATUS, msg_data);
|
content_broadcast(c, CONTENT_MSG_STATUS, msg_data);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case FETCH_HEADER:
|
||||||
|
LOG(("FETCH_HEADER \"%.*s\"",
|
||||||
|
(int) size, (char *) data));
|
||||||
|
fetchcache_parse_header(c, data, size);
|
||||||
|
break;
|
||||||
|
|
||||||
case FETCH_DATA:
|
case FETCH_DATA:
|
||||||
if (!content_process_data(c, data, size)) {
|
if (!content_process_data(c, data, size)) {
|
||||||
fetch_abort(c->fetch);
|
fetch_abort(c->fetch);
|
||||||
|
@ -453,8 +462,6 @@ void fetchcache_callback(fetch_msg msg, void *p, const void *data,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case FETCH_FINISHED:
|
case FETCH_FINISHED:
|
||||||
fetchcache_cache_update(c,
|
|
||||||
(const struct cache_data *)data);
|
|
||||||
c->fetch = 0;
|
c->fetch = 0;
|
||||||
content_set_status(c, messages_get("Converting"),
|
content_set_status(c, messages_get("Converting"),
|
||||||
c->source_size);
|
c->source_size);
|
||||||
|
@ -604,6 +611,96 @@ no_memory:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse an HTTP response header.
|
||||||
|
*
|
||||||
|
* See RFC 2616 4.2.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void fetchcache_parse_header(struct content *c, const char *data,
|
||||||
|
size_t size)
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
#define SKIP_ST(o) for (i = (o); i < size && (data[i] == ' ' || data[i] == '\t'); i++)
|
||||||
|
|
||||||
|
/* Set fetch response time if not already set */
|
||||||
|
if (c->cache_data->res_time == 0)
|
||||||
|
c->cache_data->res_time = time(0);
|
||||||
|
|
||||||
|
if (5 < size && strncasecmp(data, "Date:", 5) == 0) {
|
||||||
|
/* extract Date header */
|
||||||
|
SKIP_ST(5);
|
||||||
|
if (i < size)
|
||||||
|
c->cache_data->date = curl_getdate(&data[i], NULL);
|
||||||
|
} else if (4 < size && strncasecmp(data, "Age:", 4) == 0) {
|
||||||
|
/* extract Age header */
|
||||||
|
SKIP_ST(4);
|
||||||
|
if (i < size && '0' <= data[i] && data[i] <= '9')
|
||||||
|
c->cache_data->age = atoi(data + i);
|
||||||
|
} else if (8 < size && strncasecmp(data, "Expires:", 8) == 0) {
|
||||||
|
/* extract Expires header */
|
||||||
|
SKIP_ST(8);
|
||||||
|
if (i < size)
|
||||||
|
c->cache_data->expires = curl_getdate(&data[i], NULL);
|
||||||
|
} else if (14 < size && strncasecmp(data, "Cache-Control:", 14) == 0) {
|
||||||
|
/* extract and parse Cache-Control header */
|
||||||
|
size_t comma;
|
||||||
|
SKIP_ST(14);
|
||||||
|
|
||||||
|
while (i < size) {
|
||||||
|
for (comma = i; comma < size; comma++)
|
||||||
|
if (data[comma] == ',')
|
||||||
|
break;
|
||||||
|
|
||||||
|
SKIP_ST(i);
|
||||||
|
|
||||||
|
if (8 < comma - i && (strncasecmp(data + i, "no-cache", 8) == 0 || strncasecmp(data + i, "no-store", 8) == 0))
|
||||||
|
/* When we get a disk cache we should
|
||||||
|
* distinguish between these two */
|
||||||
|
c->cache_data->no_cache = true;
|
||||||
|
else if (7 < comma - i && strncasecmp(data + i, "max-age", 7) == 0) {
|
||||||
|
for (; i < comma; i++)
|
||||||
|
if (data[i] == '=')
|
||||||
|
break;
|
||||||
|
SKIP_ST(i+1);
|
||||||
|
if (i < comma)
|
||||||
|
c->cache_data->max_age =
|
||||||
|
atoi(data + i);
|
||||||
|
}
|
||||||
|
|
||||||
|
i = comma + 1;
|
||||||
|
}
|
||||||
|
} else if (5 < size && strncasecmp(data, "ETag:", 5) == 0) {
|
||||||
|
/* extract ETag header */
|
||||||
|
free(c->cache_data->etag);
|
||||||
|
c->cache_data->etag = talloc_array(c, char, size);
|
||||||
|
if (!c->cache_data->etag) {
|
||||||
|
LOG(("malloc failed"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
SKIP_ST(5);
|
||||||
|
strncpy(c->cache_data->etag, data + i, size - i);
|
||||||
|
c->cache_data->etag[size - i] = '\0';
|
||||||
|
for (i = size - i - 1; i >= 0 &&
|
||||||
|
(c->cache_data->etag[i] == ' ' ||
|
||||||
|
c->cache_data->etag[i] == '\t' ||
|
||||||
|
c->cache_data->etag[i] == '\r' ||
|
||||||
|
c->cache_data->etag[i] == '\n'); --i)
|
||||||
|
c->cache_data->etag[i] = '\0';
|
||||||
|
} else if (14 < size && strncasecmp(data, "Last-Modified:", 14) == 0) {
|
||||||
|
/* extract Last-Modified header */
|
||||||
|
SKIP_ST(14);
|
||||||
|
if (i < size) {
|
||||||
|
c->cache_data->last_modified =
|
||||||
|
curl_getdate(&data[i], NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate an error page.
|
* Generate an error page.
|
||||||
*
|
*
|
||||||
|
@ -682,7 +779,7 @@ void fetchcache_notmodified(struct content *c, const void *data)
|
||||||
struct content *fb;
|
struct content *fb;
|
||||||
union content_msg_data msg_data;
|
union content_msg_data msg_data;
|
||||||
|
|
||||||
assert(c && data);
|
assert(c);
|
||||||
assert(c->status == CONTENT_STATUS_TYPE_UNKNOWN);
|
assert(c->status == CONTENT_STATUS_TYPE_UNKNOWN);
|
||||||
|
|
||||||
/* Look for cached content */
|
/* Look for cached content */
|
||||||
|
@ -748,8 +845,7 @@ void fetchcache_notmodified(struct content *c, const void *data)
|
||||||
c->status = CONTENT_STATUS_ERROR;
|
c->status = CONTENT_STATUS_ERROR;
|
||||||
|
|
||||||
/* and update fallback's cache control data */
|
/* and update fallback's cache control data */
|
||||||
fetchcache_cache_update(fb,
|
fetchcache_cache_update(fb, c->cache_data);
|
||||||
(const struct cache_data *)data);
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* No cached content, so unconditionally refetch */
|
/* No cached content, so unconditionally refetch */
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
* This implementation uses libcurl's 'multi' interface.
|
* This implementation uses libcurl's 'multi' interface.
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* The CURL handles are cached in the cache_ring. There are at most
|
* The CURL handles are cached in the curl_handle_ring. There are at most
|
||||||
* ::option_max_cached_fetch_handles in this ring.
|
* ::option_max_cached_fetch_handles in this ring.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -81,7 +81,6 @@ struct curl_fetch_info {
|
||||||
char *post_urlenc; /**< Url encoded POST string, or 0. */
|
char *post_urlenc; /**< Url encoded POST string, or 0. */
|
||||||
unsigned long http_code; /**< HTTP result code from cURL. */
|
unsigned long http_code; /**< HTTP result code from cURL. */
|
||||||
struct curl_httppost *post_multipart; /**< Multipart post data, or 0. */
|
struct curl_httppost *post_multipart; /**< Multipart post data, or 0. */
|
||||||
struct cache_data cachedata; /**< Cache control data */
|
|
||||||
time_t last_modified; /**< If-Modified-Since time */
|
time_t last_modified; /**< If-Modified-Since time */
|
||||||
time_t file_etag; /**< ETag for local objects */
|
time_t file_etag; /**< ETag for local objects */
|
||||||
#ifdef WITH_SSL
|
#ifdef WITH_SSL
|
||||||
|
@ -340,15 +339,6 @@ void * fetch_curl_setup(struct fetch *parent_fetch, const char *url,
|
||||||
fetch->post_urlenc = strdup(post_urlenc);
|
fetch->post_urlenc = strdup(post_urlenc);
|
||||||
else if (post_multipart)
|
else if (post_multipart)
|
||||||
fetch->post_multipart = fetch_curl_post_convert(post_multipart);
|
fetch->post_multipart = fetch_curl_post_convert(post_multipart);
|
||||||
fetch->cachedata.req_time = time(0);
|
|
||||||
fetch->cachedata.res_time = 0;
|
|
||||||
fetch->cachedata.date = 0;
|
|
||||||
fetch->cachedata.expires = 0;
|
|
||||||
fetch->cachedata.age = INVALID_AGE;
|
|
||||||
fetch->cachedata.max_age = INVALID_AGE;
|
|
||||||
fetch->cachedata.no_cache = false;
|
|
||||||
fetch->cachedata.etag = 0;
|
|
||||||
fetch->cachedata.last_modified = 0;
|
|
||||||
fetch->last_modified = 0;
|
fetch->last_modified = 0;
|
||||||
fetch->file_etag = 0;
|
fetch->file_etag = 0;
|
||||||
fetch->http_code = 0;
|
fetch->http_code = 0;
|
||||||
|
@ -700,7 +690,6 @@ void fetch_curl_free(void *vf)
|
||||||
free(f->post_urlenc);
|
free(f->post_urlenc);
|
||||||
if (f->post_multipart)
|
if (f->post_multipart)
|
||||||
curl_formfree(f->post_multipart);
|
curl_formfree(f->post_multipart);
|
||||||
free(f->cachedata.etag);
|
|
||||||
|
|
||||||
#ifdef WITH_SSL
|
#ifdef WITH_SSL
|
||||||
for (i = 0; i < MAX_CERTS && f->cert_data[i].cert; i++) {
|
for (i = 0; i < MAX_CERTS && f->cert_data[i].cert; i++) {
|
||||||
|
@ -764,7 +753,6 @@ void fetch_curl_done(CURL *curl_handle, CURLcode result)
|
||||||
bool abort;
|
bool abort;
|
||||||
struct curl_fetch_info *f;
|
struct curl_fetch_info *f;
|
||||||
CURLcode code;
|
CURLcode code;
|
||||||
struct cache_data cachedata;
|
|
||||||
#ifdef WITH_SSL
|
#ifdef WITH_SSL
|
||||||
struct cert_info certs[MAX_CERTS];
|
struct cert_info certs[MAX_CERTS];
|
||||||
memset(certs, 0, sizeof(certs));
|
memset(certs, 0, sizeof(certs));
|
||||||
|
@ -810,19 +798,10 @@ void fetch_curl_done(CURL *curl_handle, CURLcode result)
|
||||||
|
|
||||||
fetch_curl_stop(f);
|
fetch_curl_stop(f);
|
||||||
|
|
||||||
/* If finished, acquire cache info to pass to callback */
|
|
||||||
if (finished) {
|
|
||||||
memcpy(&cachedata, &f->cachedata, sizeof(struct cache_data));
|
|
||||||
f->cachedata.etag = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (abort)
|
if (abort)
|
||||||
; /* fetch was aborted: no callback */
|
; /* fetch was aborted: no callback */
|
||||||
else if (finished) {
|
else if (finished)
|
||||||
fetch_send_callback(FETCH_FINISHED, f->fetch_handle,
|
fetch_send_callback(FETCH_FINISHED, f->fetch_handle, 0, 0);
|
||||||
&cachedata, 0);
|
|
||||||
free(cachedata.etag);
|
|
||||||
}
|
|
||||||
#ifdef WITH_SSL
|
#ifdef WITH_SSL
|
||||||
else if (cert) {
|
else if (cert) {
|
||||||
int i;
|
int i;
|
||||||
|
@ -1009,6 +988,8 @@ size_t fetch_curl_data(void *data, size_t size, size_t nmemb,
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Callback function for headers.
|
* Callback function for headers.
|
||||||
|
*
|
||||||
|
* See RFC 2616 4.2.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
size_t fetch_curl_header(char *data, size_t size, size_t nmemb,
|
size_t fetch_curl_header(char *data, size_t size, size_t nmemb,
|
||||||
|
@ -1017,11 +998,9 @@ size_t fetch_curl_header(char *data, size_t size, size_t nmemb,
|
||||||
int i;
|
int i;
|
||||||
size *= nmemb;
|
size *= nmemb;
|
||||||
|
|
||||||
#define SKIP_ST(o) for (i = (o); i < (int) size && (data[i] == ' ' || data[i] == '\t'); i++)
|
fetch_send_callback(FETCH_HEADER, f->fetch_handle, data, size);
|
||||||
|
|
||||||
/* Set fetch response time if not already set */
|
#define SKIP_ST(o) for (i = (o); i < (int) size && (data[i] == ' ' || data[i] == '\t'); i++)
|
||||||
if (f->cachedata.res_time == 0)
|
|
||||||
f->cachedata.res_time = time(0);
|
|
||||||
|
|
||||||
if (12 < size && strncasecmp(data, "Location:", 9) == 0) {
|
if (12 < size && strncasecmp(data, "Location:", 9) == 0) {
|
||||||
/* extract Location header */
|
/* extract Location header */
|
||||||
|
@ -1075,73 +1054,6 @@ size_t fetch_curl_header(char *data, size_t size, size_t nmemb,
|
||||||
f->realm[i] = '\0';
|
f->realm[i] = '\0';
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
} else if (5 < size && strncasecmp(data, "Date:", 5) == 0) {
|
|
||||||
/* extract Date header */
|
|
||||||
SKIP_ST(5);
|
|
||||||
if (i < (int) size)
|
|
||||||
f->cachedata.date = curl_getdate(&data[i], NULL);
|
|
||||||
} else if (4 < size && strncasecmp(data, "Age:", 4) == 0) {
|
|
||||||
/* extract Age header */
|
|
||||||
SKIP_ST(4);
|
|
||||||
if (i < (int) size && '0' <= data[i] && data[i] <= '9')
|
|
||||||
f->cachedata.age = atoi(data + i);
|
|
||||||
} else if (8 < size && strncasecmp(data, "Expires:", 8) == 0) {
|
|
||||||
/* extract Expires header */
|
|
||||||
SKIP_ST(8);
|
|
||||||
if (i < (int) size)
|
|
||||||
f->cachedata.expires = curl_getdate(&data[i], NULL);
|
|
||||||
} else if (14 < size && strncasecmp(data, "Cache-Control:", 14) == 0) {
|
|
||||||
/* extract and parse Cache-Control header */
|
|
||||||
int comma;
|
|
||||||
SKIP_ST(14);
|
|
||||||
|
|
||||||
while (i < (int) size) {
|
|
||||||
for (comma = i; comma < (int) size; comma++)
|
|
||||||
if (data[comma] == ',')
|
|
||||||
break;
|
|
||||||
|
|
||||||
SKIP_ST(i);
|
|
||||||
|
|
||||||
if (8 < comma - i && (strncasecmp(data + i, "no-cache", 8) == 0 || strncasecmp(data + i, "no-store", 8) == 0))
|
|
||||||
/* When we get a disk cache we should
|
|
||||||
* distinguish between these two */
|
|
||||||
f->cachedata.no_cache = true;
|
|
||||||
else if (7 < comma - i && strncasecmp(data + i, "max-age", 7) == 0) {
|
|
||||||
for (; i < comma; i++)
|
|
||||||
if (data[i] == '=')
|
|
||||||
break;
|
|
||||||
SKIP_ST(i+1);
|
|
||||||
if (i < comma)
|
|
||||||
f->cachedata.max_age =
|
|
||||||
atoi(data + i);
|
|
||||||
}
|
|
||||||
|
|
||||||
i = comma + 1;
|
|
||||||
}
|
|
||||||
} else if (5 < size && strncasecmp(data, "ETag:", 5) == 0) {
|
|
||||||
/* extract ETag header */
|
|
||||||
free(f->cachedata.etag);
|
|
||||||
f->cachedata.etag = malloc(size);
|
|
||||||
if (!f->cachedata.etag) {
|
|
||||||
LOG(("malloc failed"));
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
SKIP_ST(5);
|
|
||||||
strncpy(f->cachedata.etag, data + i, size - i);
|
|
||||||
f->cachedata.etag[size - i] = '\0';
|
|
||||||
for (i = size - i - 1; i >= 0 &&
|
|
||||||
(f->cachedata.etag[i] == ' ' ||
|
|
||||||
f->cachedata.etag[i] == '\t' ||
|
|
||||||
f->cachedata.etag[i] == '\r' ||
|
|
||||||
f->cachedata.etag[i] == '\n'); --i)
|
|
||||||
f->cachedata.etag[i] = '\0';
|
|
||||||
} else if (14 < size && strncasecmp(data, "Last-Modified:", 14) == 0) {
|
|
||||||
/* extract Last-Modified header */
|
|
||||||
SKIP_ST(14);
|
|
||||||
if (i < (int) size) {
|
|
||||||
f->cachedata.last_modified =
|
|
||||||
curl_getdate(&data[i], NULL);
|
|
||||||
}
|
|
||||||
} else if (11 < size && strncasecmp(data, "Set-Cookie:", 11) == 0) {
|
} else if (11 < size && strncasecmp(data, "Set-Cookie:", 11) == 0) {
|
||||||
/* extract Set-Cookie header */
|
/* extract Set-Cookie header */
|
||||||
SKIP_ST(11);
|
SKIP_ST(11);
|
||||||
|
@ -1170,10 +1082,6 @@ bool fetch_curl_process_headers(struct curl_fetch_info *f)
|
||||||
|
|
||||||
f->had_headers = true;
|
f->had_headers = true;
|
||||||
|
|
||||||
/* Set fetch response time if not already set */
|
|
||||||
if (f->cachedata.res_time == 0)
|
|
||||||
f->cachedata.res_time = time(0);
|
|
||||||
|
|
||||||
if (!f->http_code)
|
if (!f->http_code)
|
||||||
{
|
{
|
||||||
code = curl_easy_getinfo(f->curl_handle, CURLINFO_HTTP_CODE,
|
code = curl_easy_getinfo(f->curl_handle, CURLINFO_HTTP_CODE,
|
||||||
|
@ -1186,8 +1094,7 @@ bool fetch_curl_process_headers(struct curl_fetch_info *f)
|
||||||
|
|
||||||
if (http_code == 304 && !f->post_urlenc && !f->post_multipart) {
|
if (http_code == 304 && !f->post_urlenc && !f->post_multipart) {
|
||||||
/* Not Modified && GET request */
|
/* Not Modified && GET request */
|
||||||
fetch_send_callback(FETCH_NOTMODIFIED, f->fetch_handle,
|
fetch_send_callback(FETCH_NOTMODIFIED, f->fetch_handle, 0, 0);
|
||||||
(const char *)&f->cachedata, 0);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1225,11 +1132,11 @@ bool fetch_curl_process_headers(struct curl_fetch_info *f)
|
||||||
if (url_path && stat(url_path, &s) == 0) {
|
if (url_path && stat(url_path, &s) == 0) {
|
||||||
/* file: URL and file exists */
|
/* file: URL and file exists */
|
||||||
/* create etag */
|
/* create etag */
|
||||||
free(f->cachedata.etag);
|
/*free(f->cachedata.etag);
|
||||||
f->cachedata.etag = malloc(13);
|
f->cachedata.etag = malloc(13);
|
||||||
if (f->cachedata.etag)
|
if (f->cachedata.etag)
|
||||||
sprintf(f->cachedata.etag,
|
sprintf(f->cachedata.etag,
|
||||||
"\"%10d\"", (int)s.st_mtime);
|
"\"%10d\"", (int)s.st_mtime);*/
|
||||||
|
|
||||||
/* don't set last modified time so as to ensure that local
|
/* don't set last modified time so as to ensure that local
|
||||||
* files are revalidated at all times. */
|
* files are revalidated at all times. */
|
||||||
|
@ -1239,7 +1146,7 @@ bool fetch_curl_process_headers(struct curl_fetch_info *f)
|
||||||
f->last_modified > s.st_mtime &&
|
f->last_modified > s.st_mtime &&
|
||||||
f->file_etag == s.st_mtime) {
|
f->file_etag == s.st_mtime) {
|
||||||
fetch_send_callback(FETCH_NOTMODIFIED, f->fetch_handle,
|
fetch_send_callback(FETCH_NOTMODIFIED, f->fetch_handle,
|
||||||
(const char *)&f->cachedata, 0);
|
0, 0);
|
||||||
curl_free(url_path);
|
curl_free(url_path);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue