netsurf/content/fetchers/file.c
Vincent Sanders 7d9c9dba36 Split utils header into string functions and everything else
split out the string handling API from the rest of the utils header
and fix up all the fallout.
2016-04-21 22:36:21 +01:00

831 lines
19 KiB
C

/*
* Copyright 2010 Vincent Sanders <vince@netsurf-browser.org>
*
* This file is part of NetSurf.
*
* NetSurf is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* NetSurf is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file
*
* file scheme URL handling. Based on the data fetcher by Rob Kendrick
*/
#include "utils/config.h"
#include <stdlib.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <stdbool.h>
#include <string.h>
#include <time.h>
#include <stdio.h>
#include <stdarg.h>
#ifdef HAVE_MMAP
#include <sys/mman.h>
#endif
#include <libwapcaplet/libwapcaplet.h>
#include "utils/nsurl.h"
#include "utils/dirent.h"
#include "utils/corestrings.h"
#include "utils/messages.h"
#include "utils/utils.h"
#include "utils/ring.h"
#include "utils/file.h"
#include "desktop/gui_fetch.h"
#include "desktop/gui_internal.h"
#include "content/dirlist.h"
#include "content/fetch.h"
#include "content/fetchers.h"
#include "content/fetchers/file.h"
/* Maximum size of read buffer */
#define FETCH_FILE_MAX_BUF_SIZE (1024 * 1024)
/** Context for a fetch */
struct fetch_file_context {
struct fetch_file_context *r_next, *r_prev;
struct fetch *fetchh; /**< Handle for this fetch */
bool aborted; /**< Flag indicating fetch has been aborted */
bool locked; /**< Flag indicating entry is already entered */
nsurl *url; /**< The full url the fetch refers to */
char *path; /**< The actual path to be used with open() */
time_t file_etag; /**< Request etag for file (previous st.m_time) */
};
static struct fetch_file_context *ring = NULL;
/** issue fetch callbacks with locking */
static inline bool fetch_file_send_callback(const fetch_msg *msg,
struct fetch_file_context *ctx)
{
ctx->locked = true;
fetch_send_callback(msg, ctx->fetchh);
ctx->locked = false;
return ctx->aborted;
}
static bool fetch_file_send_header(struct fetch_file_context *ctx,
const char *fmt, ...)
{
fetch_msg msg;
char header[64];
va_list ap;
va_start(ap, fmt);
vsnprintf(header, sizeof header, fmt, ap);
va_end(ap);
msg.type = FETCH_HEADER;
msg.data.header_or_data.buf = (const uint8_t *) header;
msg.data.header_or_data.len = strlen(header);
fetch_file_send_callback(&msg, ctx);
return ctx->aborted;
}
/** callback to initialise the file fetcher. */
static bool fetch_file_initialise(lwc_string *scheme)
{
return true;
}
/** callback to initialise the file fetcher. */
static void fetch_file_finalise(lwc_string *scheme)
{
}
static bool fetch_file_can_fetch(const nsurl *url)
{
return true;
}
/** callback to set up a file fetch context. */
static void *
fetch_file_setup(struct fetch *fetchh,
nsurl *url,
bool only_2xx,
bool downgrade_tls,
const char *post_urlenc,
const struct fetch_multipart_data *post_multipart,
const char **headers)
{
struct fetch_file_context *ctx;
int i;
nserror ret;
ctx = calloc(1, sizeof(*ctx));
if (ctx == NULL)
return NULL;
ret = guit->file->nsurl_to_path(url, &ctx->path);
if (ret != NSERROR_OK) {
free(ctx);
return NULL;
}
ctx->url = nsurl_ref(url);
/* Scan request headers looking for If-None-Match */
for (i = 0; headers[i] != NULL; i++) {
if (strncasecmp(headers[i], "If-None-Match:",
SLEN("If-None-Match:")) == 0) {
/* If-None-Match: "12345678" */
const char *d = headers[i] + SLEN("If-None-Match:");
/* Scan to first digit, if any */
while (*d != '\0' && (*d < '0' || '9' < *d))
d++;
/* Convert to time_t */
if (*d != '\0')
ctx->file_etag = atoi(d);
}
}
ctx->fetchh = fetchh;
RING_INSERT(ring, ctx);
return ctx;
}
/** callback to free a file fetch */
static void fetch_file_free(void *ctx)
{
struct fetch_file_context *c = ctx;
nsurl_unref(c->url);
free(c->path);
RING_REMOVE(ring, c);
free(ctx);
}
/** callback to start a file fetch */
static bool fetch_file_start(void *ctx)
{
return true;
}
/** callback to abort a file fetch */
static void fetch_file_abort(void *ctx)
{
struct fetch_file_context *c = ctx;
/* To avoid the poll loop having to deal with the fetch context
* disappearing from under it, we simply flag the abort here.
* The poll loop itself will perform the appropriate cleanup.
*/
c->aborted = true;
}
static int fetch_file_errno_to_http_code(int error_no)
{
switch (error_no) {
case ENAMETOOLONG:
return 400;
case EACCES:
return 403;
case ENOENT:
return 404;
default:
break;
}
return 500;
}
static void fetch_file_process_error(struct fetch_file_context *ctx, int code)
{
fetch_msg msg;
char buffer[1024];
const char *title;
char key[8];
/* content is going to return error code */
fetch_set_http_code(ctx->fetchh, code);
/* content type */
if (fetch_file_send_header(ctx, "Content-Type: text/html"))
goto fetch_file_process_error_aborted;
snprintf(key, sizeof key, "HTTP%03d", code);
title = messages_get(key);
snprintf(buffer, sizeof buffer, "<html><head><title>%s</title></head>"
"<body><h1>%s</h1>"
"<p>Error %d while fetching file %s</p></body></html>",
title, title, code, nsurl_access(ctx->url));
msg.type = FETCH_DATA;
msg.data.header_or_data.buf = (const uint8_t *) buffer;
msg.data.header_or_data.len = strlen(buffer);
if (fetch_file_send_callback(&msg, ctx))
goto fetch_file_process_error_aborted;
msg.type = FETCH_FINISHED;
fetch_file_send_callback(&msg, ctx);
fetch_file_process_error_aborted:
return;
}
/** Process object as a regular file */
static void fetch_file_process_plain(struct fetch_file_context *ctx,
struct stat *fdstat)
{
#ifdef HAVE_MMAP
fetch_msg msg;
char *buf = NULL;
size_t buf_size;
int fd; /**< The file descriptor of the object */
/* Check if we can just return not modified */
if (ctx->file_etag != 0 && ctx->file_etag == fdstat->st_mtime) {
fetch_set_http_code(ctx->fetchh, 304);
msg.type = FETCH_NOTMODIFIED;
fetch_file_send_callback(&msg, ctx);
return;
}
fd = open(ctx->path, O_RDONLY);
if (fd < 0) {
/* process errors as appropriate */
fetch_file_process_error(ctx,
fetch_file_errno_to_http_code(errno));
return;
}
/* set buffer size */
buf_size = fdstat->st_size;
/* allocate the buffer storage */
if (buf_size > 0) {
buf = mmap(NULL, buf_size, PROT_READ, MAP_SHARED, fd, 0);
if (buf == MAP_FAILED) {
msg.type = FETCH_ERROR;
msg.data.error = "Unable to map memory for file data buffer";
fetch_file_send_callback(&msg, ctx);
close(fd);
return;
}
}
/* fetch is going to be successful */
fetch_set_http_code(ctx->fetchh, 200);
/* Any callback can result in the fetch being aborted.
* Therefore, we _must_ check for this after _every_ call to
* fetch_file_send_callback().
*/
/* content type */
if (fetch_file_send_header(ctx, "Content-Type: %s",
guit->fetch->filetype(ctx->path))) {
goto fetch_file_process_aborted;
}
/* content length */
if (fetch_file_send_header(ctx, "Content-Length: %" PRIsizet,
fdstat->st_size)) {
goto fetch_file_process_aborted;
}
/* create etag */
if (fetch_file_send_header(ctx, "ETag: \"%10" PRId64 "\"",
(int64_t) fdstat->st_mtime)) {
goto fetch_file_process_aborted;
}
msg.type = FETCH_DATA;
msg.data.header_or_data.buf = (const uint8_t *) buf;
msg.data.header_or_data.len = buf_size;
fetch_file_send_callback(&msg, ctx);
if (ctx->aborted == false) {
msg.type = FETCH_FINISHED;
fetch_file_send_callback(&msg, ctx);
}
fetch_file_process_aborted:
if (buf != NULL)
munmap(buf, buf_size);
close(fd);
#else
fetch_msg msg;
char *buf;
size_t buf_size;
ssize_t tot_read = 0;
ssize_t res;
FILE *infile;
/* Check if we can just return not modified */
if (ctx->file_etag != 0 && ctx->file_etag == fdstat->st_mtime) {
fetch_set_http_code(ctx->fetchh, 304);
msg.type = FETCH_NOTMODIFIED;
fetch_file_send_callback(&msg, ctx);
return;
}
infile = fopen(ctx->path, "rb");
if (infile == NULL) {
/* process errors as appropriate */
fetch_file_process_error(ctx,
fetch_file_errno_to_http_code(errno));
return;
}
/* set buffer size */
buf_size = fdstat->st_size;
if (buf_size > FETCH_FILE_MAX_BUF_SIZE)
buf_size = FETCH_FILE_MAX_BUF_SIZE;
/* allocate the buffer storage */
buf = malloc(buf_size);
if (buf == NULL) {
msg.type = FETCH_ERROR;
msg.data.error =
"Unable to allocate memory for file data buffer";
fetch_file_send_callback(&msg, ctx);
fclose(infile);
return;
}
/* fetch is going to be successful */
fetch_set_http_code(ctx->fetchh, 200);
/* Any callback can result in the fetch being aborted.
* Therefore, we _must_ check for this after _every_ call to
* fetch_file_send_callback().
*/
/* content type */
if (fetch_file_send_header(ctx, "Content-Type: %s",
guit->fetch->filetype(ctx->path))) {
goto fetch_file_process_aborted;
}
/* content length */
if (fetch_file_send_header(ctx, "Content-Length: %" PRIsizet,
fdstat->st_size)) {
goto fetch_file_process_aborted;
}
/* create etag */
if (fetch_file_send_header(ctx, "ETag: \"%10" PRId64 "\"",
(int64_t) fdstat->st_mtime)) {
goto fetch_file_process_aborted;
}
/* main data loop */
while (tot_read < fdstat->st_size) {
res = fread(buf, 1, buf_size, infile);
if (res == 0) {
if (feof(infile)) {
msg.type = FETCH_ERROR;
msg.data.error = "Unexpected EOF reading file";
fetch_file_send_callback(&msg, ctx);
goto fetch_file_process_aborted;
} else {
msg.type = FETCH_ERROR;
msg.data.error = "Error reading file";
fetch_file_send_callback(&msg, ctx);
goto fetch_file_process_aborted;
}
}
tot_read += res;
msg.type = FETCH_DATA;
msg.data.header_or_data.buf = (const uint8_t *) buf;
msg.data.header_or_data.len = res;
if (fetch_file_send_callback(&msg, ctx))
break;
}
if (ctx->aborted == false) {
msg.type = FETCH_FINISHED;
fetch_file_send_callback(&msg, ctx);
}
fetch_file_process_aborted:
fclose(infile);
free(buf);
#endif
return;
}
static char *gen_nice_title(char *path)
{
char *nice_path, *cnv, *tmp;
char *title;
int title_length;
/* Convert path for display */
nice_path = malloc(strlen(path) * SLEN("&amp;") + 1);
if (nice_path == NULL) {
return NULL;
}
/* Escape special HTML characters */
for (cnv = nice_path, tmp = path; *tmp != '\0'; tmp++) {
if (*tmp == '<') {
*cnv++ = '&';
*cnv++ = 'l';
*cnv++ = 't';
*cnv++ = ';';
} else if (*tmp == '>') {
*cnv++ = '&';
*cnv++ = 'g';
*cnv++ = 't';
*cnv++ = ';';
} else if (*tmp == '&') {
*cnv++ = '&';
*cnv++ = 'a';
*cnv++ = 'm';
*cnv++ = 'p';
*cnv++ = ';';
} else {
*cnv++ = *tmp;
}
}
*cnv = '\0';
/* Construct a localised title string */
title_length = (cnv - nice_path) + strlen(messages_get("FileIndex"));
title = malloc(title_length + 1);
if (title == NULL) {
free(nice_path);
return NULL;
}
/* Set title to localised "Index of <nice_path>" */
snprintf(title, title_length, messages_get("FileIndex"), nice_path);
free(nice_path);
return title;
}
/**
* Generate an output row of the directory listing.
*
* \param ctx The file fetching context.
* \param ent current directory entry.
* \param even is the row an even row.
* \param buffer The output buffer.
* \param buffer_len The space available in the output buffer.
* \return NSERROR_OK or error code on faliure.
*/
static nserror
process_dir_ent(struct fetch_file_context *ctx,
struct dirent *ent,
bool even,
char *buffer,
size_t buffer_len)
{
nserror ret;
char *urlpath = NULL; /* buffer for leaf entry path */
struct stat ent_stat; /* stat result of leaf entry */
char datebuf[64]; /* buffer for date text */
char timebuf[64]; /* buffer for time text */
nsurl *url;
/* skip hidden files */
if (ent->d_name[0] == '.') {
return NSERROR_BAD_PARAMETER;
}
ret = netsurf_mkpath(&urlpath, NULL, 2, ctx->path, ent->d_name);
if (ret != NSERROR_OK) {
return ret;
}
if (stat(urlpath, &ent_stat) != 0) {
ent_stat.st_mode = 0;
datebuf[0] = 0;
timebuf[0] = 0;
} else {
/* Get date in output format */
if (strftime((char *)&datebuf, sizeof datebuf, "%a %d %b %Y",
localtime(&ent_stat.st_mtime)) == 0) {
datebuf[0] = '-';
datebuf[1] = 0;
}
/* Get time in output format */
if (strftime((char *)&timebuf, sizeof timebuf, "%H:%M",
localtime(&ent_stat.st_mtime)) == 0) {
timebuf[0] = '-';
timebuf[1] = 0;
}
}
ret = guit->file->path_to_nsurl(urlpath, &url);
if (ret != NSERROR_OK) {
free(urlpath);
return ret;
}
if (S_ISREG(ent_stat.st_mode)) {
/* regular file */
dirlist_generate_row(even,
false,
url,
ent->d_name,
guit->fetch->filetype(urlpath),
ent_stat.st_size,
datebuf, timebuf,
buffer, buffer_len);
} else if (S_ISDIR(ent_stat.st_mode)) {
/* directory */
dirlist_generate_row(even,
true,
url,
ent->d_name,
messages_get("FileDirectory"),
-1,
datebuf, timebuf,
buffer, buffer_len);
} else {
/* something else */
dirlist_generate_row(even,
false,
url,
ent->d_name,
"",
-1,
datebuf, timebuf,
buffer, buffer_len);
}
nsurl_unref(url);
free(urlpath);
return NSERROR_OK;
}
/**
* Comparison function for sorting directories.
*
* Correctly orders non zero-padded numerical parts.
* ie. produces "file1, file2, file10" rather than "file1, file10, file2".
*
* \param d1 first directory entry
* \param d2 second directory entry
*/
static int dir_sort_alpha(const struct dirent **d1, const struct dirent **d2)
{
const char *s1 = (*d1)->d_name;
const char *s2 = (*d2)->d_name;
while (*s1 != '\0' && *s2 != '\0') {
if ((*s1 >= '0' && *s1 <= '9') &&
(*s2 >= '0' && *s2 <= '9')) {
int n1 = 0, n2 = 0;
while (*s1 >= '0' && *s1 <= '9') {
n1 = n1 * 10 + (*s1) - '0';
s1++;
}
while (*s2 >= '0' && *s2 <= '9') {
n2 = n2 * 10 + (*s2) - '0';
s2++;
}
if (n1 != n2) {
return n1 - n2;
}
if (*s1 == '\0' || *s2 == '\0')
break;
}
if (tolower(*s1) != tolower(*s2))
break;
s1++;
s2++;
}
return tolower(*s1) - tolower(*s2);
}
static void fetch_file_process_dir(struct fetch_file_context *ctx,
struct stat *fdstat)
{
fetch_msg msg;
char buffer[1024]; /* Output buffer */
bool even = false; /* formatting flag */
char *title; /* pretty printed title */
nserror err; /* result from url routines */
nsurl *up; /* url of parent */
struct dirent **listing = NULL; /* directory entry listing */
int i; /* directory entry index */
int n; /* number of directory entries */
n = scandir(ctx->path, &listing, 0, dir_sort_alpha);
if (n < 0) {
fetch_file_process_error(ctx,
fetch_file_errno_to_http_code(errno));
return;
}
/* fetch is going to be successful */
fetch_set_http_code(ctx->fetchh, 200);
/* force no-cache */
if (fetch_file_send_header(ctx, "Cache-Control: no-cache"))
goto fetch_file_process_dir_aborted;
/* content type */
if (fetch_file_send_header(ctx, "Content-Type: text/html"))
goto fetch_file_process_dir_aborted;
msg.type = FETCH_DATA;
msg.data.header_or_data.buf = (const uint8_t *) buffer;
/* directory listing top */
dirlist_generate_top(buffer, sizeof buffer);
msg.data.header_or_data.len = strlen(buffer);
if (fetch_file_send_callback(&msg, ctx))
goto fetch_file_process_dir_aborted;
/* directory listing title */
title = gen_nice_title(ctx->path);
dirlist_generate_title(title, buffer, sizeof buffer);
free(title);
msg.data.header_or_data.len = strlen(buffer);
if (fetch_file_send_callback(&msg, ctx))
goto fetch_file_process_dir_aborted;
/* Print parent directory link */
err = nsurl_parent(ctx->url, &up);
if (err == NSERROR_OK) {
if (nsurl_compare(ctx->url, up, NSURL_COMPLETE) == false) {
/* different URL; have parent */
dirlist_generate_parent_link(nsurl_access(up),
buffer, sizeof buffer);
msg.data.header_or_data.len = strlen(buffer);
fetch_file_send_callback(&msg, ctx);
}
nsurl_unref(up);
if (ctx->aborted)
goto fetch_file_process_dir_aborted;
}
/* directory list headings */
dirlist_generate_headings(buffer, sizeof buffer);
msg.data.header_or_data.len = strlen(buffer);
if (fetch_file_send_callback(&msg, ctx))
goto fetch_file_process_dir_aborted;
for (i = 0; i < n; i++) {
err = process_dir_ent(ctx, listing[i], even, buffer,
sizeof(buffer));
if (err == NSERROR_OK) {
msg.data.header_or_data.len = strlen(buffer);
if (fetch_file_send_callback(&msg, ctx))
goto fetch_file_process_dir_aborted;
even = !even;
}
}
/* directory listing bottom */
dirlist_generate_bottom(buffer, sizeof buffer);
msg.data.header_or_data.len = strlen(buffer);
if (fetch_file_send_callback(&msg, ctx))
goto fetch_file_process_dir_aborted;
msg.type = FETCH_FINISHED;
fetch_file_send_callback(&msg, ctx);
fetch_file_process_dir_aborted:
if (listing != NULL) {
for (i = 0; i < n; i++) {
free(listing[i]);
}
free(listing);
}
}
/* process a file fetch */
static void fetch_file_process(struct fetch_file_context *ctx)
{
struct stat fdstat; /**< The objects stat */
if (stat(ctx->path, &fdstat) != 0) {
/* process errors as appropriate */
fetch_file_process_error(ctx,
fetch_file_errno_to_http_code(errno));
return;
}
if (S_ISDIR(fdstat.st_mode)) {
/* directory listing */
fetch_file_process_dir(ctx, &fdstat);
return;
} else if (S_ISREG(fdstat.st_mode)) {
/* regular file */
fetch_file_process_plain(ctx, &fdstat);
return;
} else {
/* unhandled type of file */
fetch_file_process_error(ctx, 501);
}
return;
}
/** callback to poll for additional file fetch contents */
static void fetch_file_poll(lwc_string *scheme)
{
struct fetch_file_context *c, *next;
if (ring == NULL) return;
/* Iterate over ring, processing each pending fetch */
c = ring;
do {
/* Ignore fetches that have been flagged as locked.
* This allows safe re-entrant calls to this function.
* Re-entrancy can occur if, as a result of a callback,
* the interested party causes fetch_poll() to be called
* again.
*/
if (c->locked == true) {
next = c->r_next;
continue;
}
/* Only process non-aborted fetches */
if (c->aborted == false) {
/* file fetches can be processed in one go */
fetch_file_process(c);
}
/* Compute next fetch item at the last possible moment as
* processing this item may have added to the ring.
*/
next = c->r_next;
fetch_remove_from_queues(c->fetchh);
fetch_free(c->fetchh);
/* Advance to next ring entry, exiting if we've reached
* the start of the ring or the ring has become empty
*/
} while ( (c = next) != ring && ring != NULL);
}
nserror fetch_file_register(void)
{
lwc_string *scheme = lwc_string_ref(corestring_lwc_file);
const struct fetcher_operation_table fetcher_ops = {
.initialise = fetch_file_initialise,
.acceptable = fetch_file_can_fetch,
.setup = fetch_file_setup,
.start = fetch_file_start,
.abort = fetch_file_abort,
.free = fetch_file_free,
.poll = fetch_file_poll,
.finalise = fetch_file_finalise
};
return fetcher_add(scheme, &fetcher_ops);
}