netsurf/content/fetchers/file.c
2016-05-30 11:29:39 +01:00

832 lines
19 KiB
C

/*
* Copyright 2010 Vincent Sanders <vince@netsurf-browser.org>
*
* This file is part of NetSurf.
*
* NetSurf is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* NetSurf is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file
*
* file scheme URL handling. Based on the data fetcher by Rob Kendrick
*/
#include "utils/config.h"
#include <stdlib.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <stdbool.h>
#include <string.h>
#include <strings.h>
#include <time.h>
#include <stdio.h>
#include <stdarg.h>
#ifdef HAVE_MMAP
#include <sys/mman.h>
#endif
#include <libwapcaplet/libwapcaplet.h>
#include "utils/nsurl.h"
#include "utils/dirent.h"
#include "utils/corestrings.h"
#include "utils/messages.h"
#include "utils/utils.h"
#include "utils/ring.h"
#include "utils/file.h"
#include "netsurf/fetch.h"
#include "desktop/gui_internal.h"
#include "content/dirlist.h"
#include "content/fetch.h"
#include "content/fetchers.h"
#include "content/fetchers/file.h"
/* Maximum size of read buffer */
#define FETCH_FILE_MAX_BUF_SIZE (1024 * 1024)
/** Context for a fetch */
struct fetch_file_context {
struct fetch_file_context *r_next, *r_prev;
struct fetch *fetchh; /**< Handle for this fetch */
bool aborted; /**< Flag indicating fetch has been aborted */
bool locked; /**< Flag indicating entry is already entered */
nsurl *url; /**< The full url the fetch refers to */
char *path; /**< The actual path to be used with open() */
time_t file_etag; /**< Request etag for file (previous st.m_time) */
};
static struct fetch_file_context *ring = NULL;
/** issue fetch callbacks with locking */
static inline bool fetch_file_send_callback(const fetch_msg *msg,
struct fetch_file_context *ctx)
{
ctx->locked = true;
fetch_send_callback(msg, ctx->fetchh);
ctx->locked = false;
return ctx->aborted;
}
static bool fetch_file_send_header(struct fetch_file_context *ctx,
const char *fmt, ...)
{
fetch_msg msg;
char header[64];
va_list ap;
va_start(ap, fmt);
vsnprintf(header, sizeof header, fmt, ap);
va_end(ap);
msg.type = FETCH_HEADER;
msg.data.header_or_data.buf = (const uint8_t *) header;
msg.data.header_or_data.len = strlen(header);
fetch_file_send_callback(&msg, ctx);
return ctx->aborted;
}
/** callback to initialise the file fetcher. */
static bool fetch_file_initialise(lwc_string *scheme)
{
return true;
}
/** callback to initialise the file fetcher. */
static void fetch_file_finalise(lwc_string *scheme)
{
}
static bool fetch_file_can_fetch(const nsurl *url)
{
return true;
}
/** callback to set up a file fetch context. */
static void *
fetch_file_setup(struct fetch *fetchh,
nsurl *url,
bool only_2xx,
bool downgrade_tls,
const char *post_urlenc,
const struct fetch_multipart_data *post_multipart,
const char **headers)
{
struct fetch_file_context *ctx;
int i;
nserror ret;
ctx = calloc(1, sizeof(*ctx));
if (ctx == NULL)
return NULL;
ret = guit->file->nsurl_to_path(url, &ctx->path);
if (ret != NSERROR_OK) {
free(ctx);
return NULL;
}
ctx->url = nsurl_ref(url);
/* Scan request headers looking for If-None-Match */
for (i = 0; headers[i] != NULL; i++) {
if (strncasecmp(headers[i], "If-None-Match:",
SLEN("If-None-Match:")) == 0) {
/* If-None-Match: "12345678" */
const char *d = headers[i] + SLEN("If-None-Match:");
/* Scan to first digit, if any */
while (*d != '\0' && (*d < '0' || '9' < *d))
d++;
/* Convert to time_t */
if (*d != '\0')
ctx->file_etag = atoi(d);
}
}
ctx->fetchh = fetchh;
RING_INSERT(ring, ctx);
return ctx;
}
/** callback to free a file fetch */
static void fetch_file_free(void *ctx)
{
struct fetch_file_context *c = ctx;
nsurl_unref(c->url);
free(c->path);
RING_REMOVE(ring, c);
free(ctx);
}
/** callback to start a file fetch */
static bool fetch_file_start(void *ctx)
{
return true;
}
/** callback to abort a file fetch */
static void fetch_file_abort(void *ctx)
{
struct fetch_file_context *c = ctx;
/* To avoid the poll loop having to deal with the fetch context
* disappearing from under it, we simply flag the abort here.
* The poll loop itself will perform the appropriate cleanup.
*/
c->aborted = true;
}
static int fetch_file_errno_to_http_code(int error_no)
{
switch (error_no) {
case ENAMETOOLONG:
return 400;
case EACCES:
return 403;
case ENOENT:
return 404;
default:
break;
}
return 500;
}
static void fetch_file_process_error(struct fetch_file_context *ctx, int code)
{
fetch_msg msg;
char buffer[1024];
const char *title;
char key[8];
/* content is going to return error code */
fetch_set_http_code(ctx->fetchh, code);
/* content type */
if (fetch_file_send_header(ctx, "Content-Type: text/html"))
goto fetch_file_process_error_aborted;
snprintf(key, sizeof key, "HTTP%03d", code);
title = messages_get(key);
snprintf(buffer, sizeof buffer, "<html><head><title>%s</title></head>"
"<body><h1>%s</h1>"
"<p>Error %d while fetching file %s</p></body></html>",
title, title, code, nsurl_access(ctx->url));
msg.type = FETCH_DATA;
msg.data.header_or_data.buf = (const uint8_t *) buffer;
msg.data.header_or_data.len = strlen(buffer);
if (fetch_file_send_callback(&msg, ctx))
goto fetch_file_process_error_aborted;
msg.type = FETCH_FINISHED;
fetch_file_send_callback(&msg, ctx);
fetch_file_process_error_aborted:
return;
}
/** Process object as a regular file */
static void fetch_file_process_plain(struct fetch_file_context *ctx,
struct stat *fdstat)
{
#ifdef HAVE_MMAP
fetch_msg msg;
char *buf = NULL;
size_t buf_size;
int fd; /**< The file descriptor of the object */
/* Check if we can just return not modified */
if (ctx->file_etag != 0 && ctx->file_etag == fdstat->st_mtime) {
fetch_set_http_code(ctx->fetchh, 304);
msg.type = FETCH_NOTMODIFIED;
fetch_file_send_callback(&msg, ctx);
return;
}
fd = open(ctx->path, O_RDONLY);
if (fd < 0) {
/* process errors as appropriate */
fetch_file_process_error(ctx,
fetch_file_errno_to_http_code(errno));
return;
}
/* set buffer size */
buf_size = fdstat->st_size;
/* allocate the buffer storage */
if (buf_size > 0) {
buf = mmap(NULL, buf_size, PROT_READ, MAP_SHARED, fd, 0);
if (buf == MAP_FAILED) {
msg.type = FETCH_ERROR;
msg.data.error = "Unable to map memory for file data buffer";
fetch_file_send_callback(&msg, ctx);
close(fd);
return;
}
}
/* fetch is going to be successful */
fetch_set_http_code(ctx->fetchh, 200);
/* Any callback can result in the fetch being aborted.
* Therefore, we _must_ check for this after _every_ call to
* fetch_file_send_callback().
*/
/* content type */
if (fetch_file_send_header(ctx, "Content-Type: %s",
guit->fetch->filetype(ctx->path))) {
goto fetch_file_process_aborted;
}
/* content length */
if (fetch_file_send_header(ctx, "Content-Length: %" PRIsizet,
fdstat->st_size)) {
goto fetch_file_process_aborted;
}
/* create etag */
if (fetch_file_send_header(ctx, "ETag: \"%10" PRId64 "\"",
(int64_t) fdstat->st_mtime)) {
goto fetch_file_process_aborted;
}
msg.type = FETCH_DATA;
msg.data.header_or_data.buf = (const uint8_t *) buf;
msg.data.header_or_data.len = buf_size;
fetch_file_send_callback(&msg, ctx);
if (ctx->aborted == false) {
msg.type = FETCH_FINISHED;
fetch_file_send_callback(&msg, ctx);
}
fetch_file_process_aborted:
if (buf != NULL)
munmap(buf, buf_size);
close(fd);
#else
fetch_msg msg;
char *buf;
size_t buf_size;
ssize_t tot_read = 0;
ssize_t res;
FILE *infile;
/* Check if we can just return not modified */
if (ctx->file_etag != 0 && ctx->file_etag == fdstat->st_mtime) {
fetch_set_http_code(ctx->fetchh, 304);
msg.type = FETCH_NOTMODIFIED;
fetch_file_send_callback(&msg, ctx);
return;
}
infile = fopen(ctx->path, "rb");
if (infile == NULL) {
/* process errors as appropriate */
fetch_file_process_error(ctx,
fetch_file_errno_to_http_code(errno));
return;
}
/* set buffer size */
buf_size = fdstat->st_size;
if (buf_size > FETCH_FILE_MAX_BUF_SIZE)
buf_size = FETCH_FILE_MAX_BUF_SIZE;
/* allocate the buffer storage */
buf = malloc(buf_size);
if (buf == NULL) {
msg.type = FETCH_ERROR;
msg.data.error =
"Unable to allocate memory for file data buffer";
fetch_file_send_callback(&msg, ctx);
fclose(infile);
return;
}
/* fetch is going to be successful */
fetch_set_http_code(ctx->fetchh, 200);
/* Any callback can result in the fetch being aborted.
* Therefore, we _must_ check for this after _every_ call to
* fetch_file_send_callback().
*/
/* content type */
if (fetch_file_send_header(ctx, "Content-Type: %s",
guit->fetch->filetype(ctx->path))) {
goto fetch_file_process_aborted;
}
/* content length */
if (fetch_file_send_header(ctx, "Content-Length: %" PRIsizet,
fdstat->st_size)) {
goto fetch_file_process_aborted;
}
/* create etag */
if (fetch_file_send_header(ctx, "ETag: \"%10" PRId64 "\"",
(int64_t) fdstat->st_mtime)) {
goto fetch_file_process_aborted;
}
/* main data loop */
while (tot_read < fdstat->st_size) {
res = fread(buf, 1, buf_size, infile);
if (res == 0) {
if (feof(infile)) {
msg.type = FETCH_ERROR;
msg.data.error = "Unexpected EOF reading file";
fetch_file_send_callback(&msg, ctx);
goto fetch_file_process_aborted;
} else {
msg.type = FETCH_ERROR;
msg.data.error = "Error reading file";
fetch_file_send_callback(&msg, ctx);
goto fetch_file_process_aborted;
}
}
tot_read += res;
msg.type = FETCH_DATA;
msg.data.header_or_data.buf = (const uint8_t *) buf;
msg.data.header_or_data.len = res;
if (fetch_file_send_callback(&msg, ctx))
break;
}
if (ctx->aborted == false) {
msg.type = FETCH_FINISHED;
fetch_file_send_callback(&msg, ctx);
}
fetch_file_process_aborted:
fclose(infile);
free(buf);
#endif
return;
}
static char *gen_nice_title(char *path)
{
char *nice_path, *cnv, *tmp;
char *title;
int title_length;
/* Convert path for display */
nice_path = malloc(strlen(path) * SLEN("&amp;") + 1);
if (nice_path == NULL) {
return NULL;
}
/* Escape special HTML characters */
for (cnv = nice_path, tmp = path; *tmp != '\0'; tmp++) {
if (*tmp == '<') {
*cnv++ = '&';
*cnv++ = 'l';
*cnv++ = 't';
*cnv++ = ';';
} else if (*tmp == '>') {
*cnv++ = '&';
*cnv++ = 'g';
*cnv++ = 't';
*cnv++ = ';';
} else if (*tmp == '&') {
*cnv++ = '&';
*cnv++ = 'a';
*cnv++ = 'm';
*cnv++ = 'p';
*cnv++ = ';';
} else {
*cnv++ = *tmp;
}
}
*cnv = '\0';
/* Construct a localised title string */
title_length = (cnv - nice_path) + strlen(messages_get("FileIndex"));
title = malloc(title_length + 1);
if (title == NULL) {
free(nice_path);
return NULL;
}
/* Set title to localised "Index of <nice_path>" */
snprintf(title, title_length, messages_get("FileIndex"), nice_path);
free(nice_path);
return title;
}
/**
* Generate an output row of the directory listing.
*
* \param ctx The file fetching context.
* \param ent current directory entry.
* \param even is the row an even row.
* \param buffer The output buffer.
* \param buffer_len The space available in the output buffer.
* \return NSERROR_OK or error code on faliure.
*/
static nserror
process_dir_ent(struct fetch_file_context *ctx,
struct dirent *ent,
bool even,
char *buffer,
size_t buffer_len)
{
nserror ret;
char *urlpath = NULL; /* buffer for leaf entry path */
struct stat ent_stat; /* stat result of leaf entry */
char datebuf[64]; /* buffer for date text */
char timebuf[64]; /* buffer for time text */
nsurl *url;
/* skip hidden files */
if (ent->d_name[0] == '.') {
return NSERROR_BAD_PARAMETER;
}
ret = netsurf_mkpath(&urlpath, NULL, 2, ctx->path, ent->d_name);
if (ret != NSERROR_OK) {
return ret;
}
if (stat(urlpath, &ent_stat) != 0) {
ent_stat.st_mode = 0;
datebuf[0] = 0;
timebuf[0] = 0;
} else {
/* Get date in output format */
if (strftime((char *)&datebuf, sizeof datebuf, "%a %d %b %Y",
localtime(&ent_stat.st_mtime)) == 0) {
datebuf[0] = '-';
datebuf[1] = 0;
}
/* Get time in output format */
if (strftime((char *)&timebuf, sizeof timebuf, "%H:%M",
localtime(&ent_stat.st_mtime)) == 0) {
timebuf[0] = '-';
timebuf[1] = 0;
}
}
ret = guit->file->path_to_nsurl(urlpath, &url);
if (ret != NSERROR_OK) {
free(urlpath);
return ret;
}
if (S_ISREG(ent_stat.st_mode)) {
/* regular file */
dirlist_generate_row(even,
false,
url,
ent->d_name,
guit->fetch->filetype(urlpath),
ent_stat.st_size,
datebuf, timebuf,
buffer, buffer_len);
} else if (S_ISDIR(ent_stat.st_mode)) {
/* directory */
dirlist_generate_row(even,
true,
url,
ent->d_name,
messages_get("FileDirectory"),
-1,
datebuf, timebuf,
buffer, buffer_len);
} else {
/* something else */
dirlist_generate_row(even,
false,
url,
ent->d_name,
"",
-1,
datebuf, timebuf,
buffer, buffer_len);
}
nsurl_unref(url);
free(urlpath);
return NSERROR_OK;
}
/**
* Comparison function for sorting directories.
*
* Correctly orders non zero-padded numerical parts.
* ie. produces "file1, file2, file10" rather than "file1, file10, file2".
*
* \param d1 first directory entry
* \param d2 second directory entry
*/
static int dir_sort_alpha(const struct dirent **d1, const struct dirent **d2)
{
const char *s1 = (*d1)->d_name;
const char *s2 = (*d2)->d_name;
while (*s1 != '\0' && *s2 != '\0') {
if ((*s1 >= '0' && *s1 <= '9') &&
(*s2 >= '0' && *s2 <= '9')) {
int n1 = 0, n2 = 0;
while (*s1 >= '0' && *s1 <= '9') {
n1 = n1 * 10 + (*s1) - '0';
s1++;
}
while (*s2 >= '0' && *s2 <= '9') {
n2 = n2 * 10 + (*s2) - '0';
s2++;
}
if (n1 != n2) {
return n1 - n2;
}
if (*s1 == '\0' || *s2 == '\0')
break;
}
if (tolower(*s1) != tolower(*s2))
break;
s1++;
s2++;
}
return tolower(*s1) - tolower(*s2);
}
static void fetch_file_process_dir(struct fetch_file_context *ctx,
struct stat *fdstat)
{
fetch_msg msg;
char buffer[1024]; /* Output buffer */
bool even = false; /* formatting flag */
char *title; /* pretty printed title */
nserror err; /* result from url routines */
nsurl *up; /* url of parent */
struct dirent **listing = NULL; /* directory entry listing */
int i; /* directory entry index */
int n; /* number of directory entries */
n = scandir(ctx->path, &listing, 0, dir_sort_alpha);
if (n < 0) {
fetch_file_process_error(ctx,
fetch_file_errno_to_http_code(errno));
return;
}
/* fetch is going to be successful */
fetch_set_http_code(ctx->fetchh, 200);
/* force no-cache */
if (fetch_file_send_header(ctx, "Cache-Control: no-cache"))
goto fetch_file_process_dir_aborted;
/* content type */
if (fetch_file_send_header(ctx, "Content-Type: text/html"))
goto fetch_file_process_dir_aborted;
msg.type = FETCH_DATA;
msg.data.header_or_data.buf = (const uint8_t *) buffer;
/* directory listing top */
dirlist_generate_top(buffer, sizeof buffer);
msg.data.header_or_data.len = strlen(buffer);
if (fetch_file_send_callback(&msg, ctx))
goto fetch_file_process_dir_aborted;
/* directory listing title */
title = gen_nice_title(ctx->path);
dirlist_generate_title(title, buffer, sizeof buffer);
free(title);
msg.data.header_or_data.len = strlen(buffer);
if (fetch_file_send_callback(&msg, ctx))
goto fetch_file_process_dir_aborted;
/* Print parent directory link */
err = nsurl_parent(ctx->url, &up);
if (err == NSERROR_OK) {
if (nsurl_compare(ctx->url, up, NSURL_COMPLETE) == false) {
/* different URL; have parent */
dirlist_generate_parent_link(nsurl_access(up),
buffer, sizeof buffer);
msg.data.header_or_data.len = strlen(buffer);
fetch_file_send_callback(&msg, ctx);
}
nsurl_unref(up);
if (ctx->aborted)
goto fetch_file_process_dir_aborted;
}
/* directory list headings */
dirlist_generate_headings(buffer, sizeof buffer);
msg.data.header_or_data.len = strlen(buffer);
if (fetch_file_send_callback(&msg, ctx))
goto fetch_file_process_dir_aborted;
for (i = 0; i < n; i++) {
err = process_dir_ent(ctx, listing[i], even, buffer,
sizeof(buffer));
if (err == NSERROR_OK) {
msg.data.header_or_data.len = strlen(buffer);
if (fetch_file_send_callback(&msg, ctx))
goto fetch_file_process_dir_aborted;
even = !even;
}
}
/* directory listing bottom */
dirlist_generate_bottom(buffer, sizeof buffer);
msg.data.header_or_data.len = strlen(buffer);
if (fetch_file_send_callback(&msg, ctx))
goto fetch_file_process_dir_aborted;
msg.type = FETCH_FINISHED;
fetch_file_send_callback(&msg, ctx);
fetch_file_process_dir_aborted:
if (listing != NULL) {
for (i = 0; i < n; i++) {
free(listing[i]);
}
free(listing);
}
}
/* process a file fetch */
static void fetch_file_process(struct fetch_file_context *ctx)
{
struct stat fdstat; /**< The objects stat */
if (stat(ctx->path, &fdstat) != 0) {
/* process errors as appropriate */
fetch_file_process_error(ctx,
fetch_file_errno_to_http_code(errno));
return;
}
if (S_ISDIR(fdstat.st_mode)) {
/* directory listing */
fetch_file_process_dir(ctx, &fdstat);
return;
} else if (S_ISREG(fdstat.st_mode)) {
/* regular file */
fetch_file_process_plain(ctx, &fdstat);
return;
} else {
/* unhandled type of file */
fetch_file_process_error(ctx, 501);
}
return;
}
/** callback to poll for additional file fetch contents */
static void fetch_file_poll(lwc_string *scheme)
{
struct fetch_file_context *c, *next;
if (ring == NULL) return;
/* Iterate over ring, processing each pending fetch */
c = ring;
do {
/* Ignore fetches that have been flagged as locked.
* This allows safe re-entrant calls to this function.
* Re-entrancy can occur if, as a result of a callback,
* the interested party causes fetch_poll() to be called
* again.
*/
if (c->locked == true) {
next = c->r_next;
continue;
}
/* Only process non-aborted fetches */
if (c->aborted == false) {
/* file fetches can be processed in one go */
fetch_file_process(c);
}
/* Compute next fetch item at the last possible moment as
* processing this item may have added to the ring.
*/
next = c->r_next;
fetch_remove_from_queues(c->fetchh);
fetch_free(c->fetchh);
/* Advance to next ring entry, exiting if we've reached
* the start of the ring or the ring has become empty
*/
} while ( (c = next) != ring && ring != NULL);
}
nserror fetch_file_register(void)
{
lwc_string *scheme = lwc_string_ref(corestring_lwc_file);
const struct fetcher_operation_table fetcher_ops = {
.initialise = fetch_file_initialise,
.acceptable = fetch_file_can_fetch,
.setup = fetch_file_setup,
.start = fetch_file_start,
.abort = fetch_file_abort,
.free = fetch_file_free,
.poll = fetch_file_poll,
.finalise = fetch_file_finalise
};
return fetcher_add(scheme, &fetcher_ops);
}