netsurf/render/html.c

/*
 * Copyright 2007 James Bursa <bursa@users.sourceforge.net>
 *
 * This file is part of NetSurf, http://www.netsurf-browser.org/
 *
 * NetSurf is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2 of the License.
 *
 * NetSurf is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/** \file
 * Content for text/html (implementation).
 */

#define _GNU_SOURCE /* for strndup() */

#include <assert.h>
#include <ctype.h>
#include <stdint.h>
#include <string.h>
#include <strings.h>
#include <stdlib.h>
#ifdef WITH_HUBBUB
#include <hubbub/hubbub.h>
#include <hubbub/parser.h>
#include <hubbub/tree.h>
#endif
#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
#include "utils/config.h"
#include "content/content.h"
#include "content/fetch.h"
#include "content/fetchcache.h"
#include "desktop/browser.h"
#include "desktop/gui.h"
#include "desktop/options.h"
#include "render/box.h"
#include "render/font.h"
#include "render/html.h"
#include "render/imagemap.h"
#include "render/layout.h"
#include "utils/log.h"
#include "utils/messages.h"
#include "utils/talloc.h"
#include "utils/url.h"
#include "utils/utils.h"

#define CHUNK 4096


#ifndef WITH_HUBBUB
static bool html_set_parser_encoding(struct content *c, const char *encoding);
static const char *html_detect_encoding(const char **data, unsigned int *size);
#endif
static void html_convert_css_callback(content_msg msg, struct content *css,
		intptr_t p1, intptr_t p2, union content_msg_data data);
static bool html_meta_refresh(struct content *c, xmlNode *head);
static bool html_head(struct content *c, xmlNode *head);
static bool html_find_stylesheets(struct content *c, xmlNode *html,
		xmlNode *head);
static bool html_find_inline_stylesheets(struct content *c, xmlNode *html);
static bool html_process_style_element(struct content *c, xmlNode *style);
static void html_object_callback(content_msg msg, struct content *object,
		intptr_t p1, intptr_t p2, union content_msg_data data);
static void html_object_done(struct box *box, struct content *object,
			     bool background);
static void html_object_failed(struct box *box, struct content *content,
		bool background);
static bool html_object_type_permitted(const content_type type,
		const content_type *permitted_types);
static void html_object_refresh(void *p);
static void html_destroy_frameset(struct content_html_frames *frameset);
static void html_destroy_iframe(struct content_html_iframe *iframe);
static void html_set_status(struct content *c, const char *extra);
static void html_dump_frameset(struct content_html_frames *frame,
		unsigned int depth);

static const char empty_document[] =
	"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\""
	"	\"http://www.w3.org/TR/html4/strict.dtd\">"
	"<html>"
	"<head>"
	"<title>Empty document</title>"
	"</head>"
	"<body>"
	"<h1>Empty document</h1>"
	"<p>The document sent by the server is empty.</p>"
	"</body>"
	"</html>";


#ifdef WITH_HUBBUB

const char const *ns_prefixes[NUM_NAMESPACES] =
		{ NULL, NULL, "math", "svg", "xlink", "xml", "xmlns" };

const char const *ns_urls[NUM_NAMESPACES] = {
	NULL,
	"http://www.w3.org/1999/xhtml",
	"http://www.w3.org/1998/Math/MathML",
	"http://www.w3.org/2000/svg",
	"http://www.w3.org/1999/xlink",
	"http://www.w3.org/XML/1998/namespace",
	"http://www.w3.org/2000/xmlns/"
};


static int create_comment(void *ctx, const hubbub_string *data, void **result);
static int create_doctype(void *ctx, const hubbub_doctype *doctype,
		void **result);
static int create_element(void *ctx, const hubbub_tag *tag, void **result);
static int create_text(void *ctx, const hubbub_string *data, void **result);
static int ref_node(void *ctx, void *node);
static int unref_node(void *ctx, void *node);
static int append_child(void *ctx, void *parent, void *child, void **result);
static int insert_before(void *ctx, void *parent, void *child, void *ref_child,
		void **result);
static int remove_child(void *ctx, void *parent, void *child, void **result);
static int clone_node(void *ctx, void *node, bool deep, void **result);
static int reparent_children(void *ctx, void *node, void *new_parent);
static int get_parent(void *ctx, void *node, bool element_only, void **result);
static int has_children(void *ctx, void *node, bool *result);
static int form_associate(void *ctx, void *form, void *node);
static int add_attributes(void *ctx, void *node,
		const hubbub_attribute *attributes, uint32_t n_attributes);
static int set_quirks_mode(void *ctx, hubbub_quirks_mode mode);
static int change_encoding(void *ctx, const char *mibenum);

static hubbub_tree_handler tree_handler = {
	create_comment,
	create_doctype,
	create_element,
	create_text,
	ref_node,
	unref_node,
	append_child,
	insert_before,
	remove_child,
	clone_node,
	reparent_children,
	get_parent,
	has_children,
	form_associate,
	add_attributes,
	set_quirks_mode,
	change_encoding,
	NULL
};


/*** Tree construction functions ***/

int create_comment(void *ctx, const hubbub_string *data, void **result)
{
	xmlNode *node = xmlNewComment(NULL);

	node->content = xmlStrndup(data->ptr, data->len);
	node->_private = (void *)1;
	*result = node;

	return 0;
}

int create_doctype(void *ctx, const hubbub_doctype *doctype, void **result)
{
	/* Make a node that doesn't really exist, then don't append it
	 * later. */
	xmlNode *node = xmlNewComment(NULL);

	node->_private = (void *)1;
	*result = node;

	return 0;
}

int create_element(void *ctx, const hubbub_tag *tag, void **result)
{
	struct content *c = ctx;
	struct content_html_data *html = &c->data.html;

	char *name = strndup((const char *) tag->name.ptr,
			tag->name.len);

	xmlNode *node = xmlNewNode(NULL, BAD_CAST name);
	node->_private = (void *)1;
	*result = node;

	if (html->has_ns == false) {
		for (size_t i = 1; i < NUM_NAMESPACES; i++) {
			html->ns[i] = xmlNewNs(node,
					BAD_CAST ns_urls[i],
					BAD_CAST ns_prefixes[i]);
		}
		html->has_ns = true;
	}

	xmlSetNs(node, html->ns[tag->ns]);

	free(name);

	for (size_t i = 0; i < tag->n_attributes; i++) {
		hubbub_attribute *attr = &tag->attributes[i];

		char *name = strndup((const char *) attr->name.ptr,
				attr->name.len);
		char *value = strndup((const char *) attr->value.ptr,
				attr->value.len);

		if (attr->ns == HUBBUB_NS_NULL) {
			xmlNewProp(node, BAD_CAST name, BAD_CAST value);
		} else {
			xmlNewNsProp(node, html->ns[attr->ns], BAD_CAST name,
					BAD_CAST value);
		}

		free(name);
		free(value);
	}

	return 0;
}

int create_text(void *ctx, const hubbub_string *data, void **result)
{
	xmlNode *node = xmlNewTextLen(BAD_CAST data->ptr, data->len);
	node->_private = (void *)1;
	*result = node;

	return 0;
}

int ref_node(void *ctx, void *node)
{
	xmlNode *n = node;
	n->_private = (void *)((uintptr_t)n->_private + 1);

	return 0;
}

int unref_node(void *ctx, void *node)
{
	xmlNode *n = node;
	n->_private = (void *)((uintptr_t)n->_private - 1);

	if (n->_private == (void *)0 && n->parent == NULL) {
		xmlFreeNode(n);
	}

	return 0;
}

int append_child(void *ctx, void *parent, void *child, void **result)
{
	xmlNode *nparent = parent;
	xmlNode *nchild = child;

	if (nchild->type == XML_TEXT_NODE &&
			nparent->last != NULL &&
			nparent->last->type == XML_TEXT_NODE) {
		xmlNode *clone;
		clone_node(ctx, nchild, false, (void **) &clone);
		*result = xmlAddChild(parent, clone);
		/* node referenced by clone_node */
	} else {
		*result = xmlAddChild(parent, child);
		ref_node(ctx, *result);
	}

	return 0;
}

/* insert 'child' before 'ref_child', under 'parent' */
int insert_before(void *ctx, void *parent, void *child, void *ref_child,
		void **result)
{
	*result = xmlAddPrevSibling(ref_child, child);
	ref_node(ctx, *result);

	return 0;
}

int remove_child(void *ctx, void *parent, void *child, void **result)
{
	xmlUnlinkNode(child);
	*result = child;

	ref_node(ctx, *result);

	return 0;
}

int clone_node(void *ctx, void *node, bool deep, void **result)
{
	xmlNode *n = xmlCopyNode(node, deep ? 1 : 2);
	n->_private = (void *)1;
	*result = n;

	return 0;
}

/* Take all of the child nodes of "node" and append them to "new_parent" */
int reparent_children(void *ctx, void *node, void *new_parent)
{
	xmlNode *n = (xmlNode *) node;
	xmlNode *p = (xmlNode *) new_parent;

	for (xmlNode *child = n->children; child != NULL; ) {
		xmlNode *next = child->next;

		xmlUnlinkNode(child);

		if (xmlAddChild(p, child) == NULL)
			return 1;

		child = next;
	}

	return 0;
}

int get_parent(void *ctx, void *node, bool element_only, void **result)
{
	*result = ((xmlNode *)node)->parent;

	if (*result != NULL && element_only &&
			((xmlNode *) *result)->type != XML_ELEMENT_NODE)
		*result = NULL;

	if (*result != NULL)
		ref_node(ctx, *result);

	return 0;
}

int has_children(void *ctx, void *node, bool *result)
{
	*result = ((xmlNode *)node)->children ? true : false;

	return 0;
}

int form_associate(void *ctx, void *form, void *node)
{
	return 0;
}

int add_attributes(void *ctx, void *node,
		const hubbub_attribute *attributes, uint32_t n_attributes)
{
	struct content *c = ctx;
	struct content_html_data *html = &c->data.html;

	for (size_t i = 0; i < n_attributes; i++) {
		const hubbub_attribute *attr = &attributes[i];

		char *name = strndup((const char *) attr->name.ptr,
				attr->name.len);
		char *value = strndup((const char *) attr->value.ptr,
				attr->value.len);

		if (attr->ns == HUBBUB_NS_NULL) {
			xmlNewProp(node, BAD_CAST name, BAD_CAST value);
		} else {
			xmlNewNsProp(node, html->ns[attr->ns], BAD_CAST name,
					BAD_CAST value);
		}

		free(name);
		free(value);
	}

	return 0;
}

int set_quirks_mode(void *ctx, hubbub_quirks_mode mode)
{
	return 0;
}

int change_encoding(void *ctx, const char *name)
{
	struct content *c = ctx;
	struct content_html_data *html = &c->data.html;

	/* If we have an encoding here, it means we are *certain* */
	if (html->encoding) {
		return 0;
	}

	/* Find the confidence otherwise (can only be from a BOM) */
	uint32_t source;
	const char *charset = hubbub_parser_read_charset(html->parser, &source);

	if (source == HUBBUB_CHARSET_CONFIDENT) {
		html->encoding_source = ENCODING_SOURCE_DETECTED;
		html->encoding = (char *) charset;
		return 0;
	}

	/* So here we have something of confidence tentative... */
	/* http://www.whatwg.org/specs/web-apps/current-work/#change */

	/* 2. "If the new encoding is identical or equivalent to the encoding
	 * that is already being used to interpret the input stream, then set
	 * the confidence to confident and abort these steps." */

	/* Whatever happens, the encoding should be set here; either for
	 * reprocessing with a different charset, or for confirming that the
	 * charset is in fact correct */
	html->encoding = (char *) name;
	html->encoding_source = ENCODING_SOURCE_META;

	/* Equal encodings will have the same string pointers */
	return (charset == name) ? 0 : 1;
}


/**
 * Talloc'd-up allocation hook for Hubbub.
 */
static void *html_hubbub_realloc(void *ptr, size_t len, void *pw)
{
	return talloc_realloc_size(pw, ptr, len);
}


/**
 * Create, set up, and whatnot, a Hubbub parser instance, along with the
 * relevant libxml2 bits.
 */
static int html_create_parser(struct content *c)
{
	struct content_html_data *html = &c->data.html;
	hubbub_parser_optparams param;

	html->parser = hubbub_parser_create(html->encoding,
			html_hubbub_realloc,
			c);
	if (!html->parser)
		return 1;

	html->document = xmlNewDoc(BAD_CAST "1.0");
	if (!html->document)
		return 1;

	html->tree_handler = tree_handler;
	html->tree_handler.ctx = c;
	param.tree_handler = &html->tree_handler;
	hubbub_parser_setopt(html->parser, HUBBUB_PARSER_TREE_HANDLER, &param);

	param.document_node = html->document;
	hubbub_parser_setopt(html->parser, HUBBUB_PARSER_DOCUMENT_NODE, &param);

	return 0;
}


#endif


/**
 * Create a CONTENT_HTML.
 *
 * The content_html_data structure is initialized and the HTML parser is
 * created.
 */

bool html_create(struct content *c, const char *params[])
{
	unsigned int i;
	struct content_html_data *html = &c->data.html;
	union content_msg_data msg_data;

	html->parser = 0;
#ifdef WITH_HUBBUB
	html->document = 0;
	html->has_ns = false;
	memset(html->ns, 0, sizeof(html->ns));
#endif
	html->encoding_handler = 0;
	html->encoding = 0;
	html->getenc = true;
	html->base_url = c->url;
	html->base_target = NULL;
	html->layout = 0;
	html->background_colour = TRANSPARENT;
	html->stylesheet_count = 0;
	html->stylesheet_content = 0;
	html->style = 0;
	html->working_stylesheet = 0;
	html->object_count = 0;
	html->object = 0;
	html->forms = 0;
	html->imagemaps = 0;
	html->bw = 0;
	html->frameset = 0;
	html->iframe = 0;
	html->page = 0;
	html->index = 0;
	html->box = 0;
	html->font_func = &nsfont;

	for (i = 0; params[i]; i += 2) {
		if (strcasecmp(params[i], "charset") == 0) {
			html->encoding = talloc_strdup(c, params[i + 1]);
			if (!html->encoding)
				goto no_memory;
			html->encoding_source = ENCODING_SOURCE_HEADER;
			html->getenc = false;
			break;
		}
	}

#ifndef WITH_HUBBUB
	html->parser = htmlCreatePushParserCtxt(0, 0, "", 0, 0,
			XML_CHAR_ENCODING_NONE);
	if (!html->parser)
		goto no_memory;
#else

	/* Set up the parser, libxml2 document, and that */
	if (html_create_parser(c) != 0)
		goto no_memory;

#endif

#ifndef WITH_HUBBUB
	if (html->encoding) {
		/* an encoding was specified in the Content-Type header */
		if (!html_set_parser_encoding(c, html->encoding))
			return false;
	}
#endif

	return true;

no_memory:
	msg_data.error = messages_get("NoMemory");
	content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
	return false;
}


/**
 * Process data for CONTENT_HTML.
 *
 * The data is parsed in chunks of size CHUNK, multitasking in between.
 */

bool html_process_data(struct content *c, char *data, unsigned int size)
{
	unsigned long x;

#ifndef WITH_HUBBUB
	if (c->data.html.getenc) {
		/* No encoding was specified in the Content-Type header.
		 * Attempt to detect if the encoding is not 8-bit. If the
		 * encoding is 8-bit, leave the parser unchanged, so that it
		 * searches for a <meta http-equiv="content-type"
		 * content="text/html; charset=...">. */
		const char *encoding;
		encoding = html_detect_encoding((const char **) &data, &size);
		if (encoding) {
			if (!html_set_parser_encoding(c, encoding))
				return false;
			c->data.html.encoding = talloc_strdup(c, encoding);
			if (!c->data.html.encoding)
				return false;
			c->data.html.encoding_source =
					ENCODING_SOURCE_DETECTED;
		}
		c->data.html.getenc = false;

		/* The data we received may have solely consisted of a BOM.
		 * If so, it will have been stripped by html_detect_encoding.
		 * Therefore, we'll have nothing to do in that case. */
		if (size == 0)
			return true;
	}
#endif

#ifdef WITH_HUBBUB
	hubbub_error err;
#endif

	for (x = 0; x + CHUNK <= size; x += CHUNK) {
#ifdef WITH_HUBBUB
		err = hubbub_parser_parse_chunk(
				c->data.html.parser,
				(uint8_t *) data + x, CHUNK);
		if (err == HUBBUB_ENCODINGCHANGE) {
			goto encoding_change;
		}
#else
		htmlParseChunk(c->data.html.parser, data + x, CHUNK, 0);
#endif
		gui_multitask();
	}

#ifdef WITH_HUBBUB
	err = hubbub_parser_parse_chunk(
			c->data.html.parser,
			(uint8_t *) data + x, (size - x));
	if (err == HUBBUB_ENCODINGCHANGE) {
		goto encoding_change;
	}
#else
	htmlParseChunk(c->data.html.parser, data + x, (int) (size - x), 0);
#endif

#ifndef WITH_HUBBUB
	if (!c->data.html.encoding && c->data.html.parser->input->encoding) {
		/* The encoding was not in headers or detected,
		 * and the parser found a <meta http-equiv="content-type"
		 * content="text/html; charset=...">. */

		/* However, if that encoding is non-ASCII-compatible,
		 * ignore it, as it can't possibly be correct */
		if (strncasecmp((const char *) c->data.html.parser->
					input->encoding,
				"UTF-16", 6) == 0 || /* UTF-16(LE|BE)? */
			strncasecmp((const char *) c->data.html.parser->
					input->encoding,
				"UTF-32", 6) == 0) { /* UTF-32(LE|BE)? */
			c->data.html.encoding = talloc_strdup(c, "ISO-8859-1");
			c->data.html.encoding_source =
					ENCODING_SOURCE_DETECTED;
		} else {
			c->data.html.encoding = talloc_strdup(c,
				(const char *) c->data.html.parser->
						input->encoding);
			c->data.html.encoding_source = ENCODING_SOURCE_META;
		}

		if (!c->data.html.encoding) {
			union content_msg_data msg_data;

			msg_data.error = messages_get("NoMemory");
			content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
			return false;
		}

		/* have the encoding; don't attempt to detect it */
		c->data.html.getenc = false;

		/* now, we must reset the parser such that it reparses
		 * using the correct charset, and then reparse any document
		 * source we've got. we achieve this by recreating the
		 * parser in its entirety as this is simpler than resetting
		 * the existing one and ensuring it's still set up correctly.
		 */
		if (c->data.html.parser->myDoc)
			xmlFreeDoc(c->data.html.parser->myDoc);
		htmlFreeParserCtxt(c->data.html.parser);

		c->data.html.parser = htmlCreatePushParserCtxt(0, 0, "", 0,
				0, XML_CHAR_ENCODING_NONE);
		if (!c->data.html.parser) {
			union content_msg_data msg_data;

			msg_data.error = messages_get("NoMemory");
			content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
			return false;
		}
		if (!html_set_parser_encoding(c, c->data.html.encoding))
			return false;

		/* and reparse received document source - the recursion
		 * is safe as we've just set c->data.html.encoding so
		 * we'll never get back in here. */
		if (!html_process_data(c, c->source_data, c->source_size))
			return false;
	}
#endif

	return true;

#ifdef WITH_HUBBUB

encoding_change:

	/* Free up hubbub, libxml2 etc */
	hubbub_parser_destroy(c->data.html.parser);
	if (c->data.html.document) {
		xmlFreeDoc(c->data.html.document);
	}
	c->data.html.has_ns = false;
	memset(c->data.html.ns, 0, sizeof(c->data.html.ns));

	/* Set up the parser, libxml2 document, and that */
	if (html_create_parser(c) != 0) {
		union content_msg_data msg_data;

		msg_data.error = messages_get("NoMemory");
		content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
		return false;
	}

	/* Recurse to reprocess all that data.  This is safe because
	 * the encoding is now specified at parser-start which means
	 * it cannot be changed again. */
	return html_process_data(c, c->source_data, c->source_size);

#endif

}


#ifndef WITH_HUBBUB

/**
 * Set the HTML parser character encoding.
 *
 * \param  c         content of type CONTENT_HTML
 * \param  encoding  name of encoding
 * \return  true on success, false on error and error reported
 */
bool html_set_parser_encoding(struct content *c, const char *encoding)
{
	struct content_html_data *html = &c->data.html;
	xmlError *error;
	char error_message[500];
	union content_msg_data msg_data;

	html->encoding_handler = xmlFindCharEncodingHandler(encoding);
	if (!html->encoding_handler) {
		/* either out of memory, or no handler available */
		/* assume no handler available, which is not a fatal error */
		LOG(("no encoding handler for \"%s\"", encoding));
		/* \todo  warn user and ask them to install iconv? */
		return true;
	}

	xmlCtxtResetLastError(html->parser);
	if (xmlSwitchToEncoding(html->parser, html->encoding_handler)) {
		error = xmlCtxtGetLastError(html->parser);
		snprintf(error_message, sizeof error_message,
				"%s xmlSwitchToEncoding(): %s",
				messages_get("MiscError"),
				error ? error->message : "failed");
		msg_data.error = error_message;
		content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
		return false;
	}

	/* Dirty hack to get around libxml oddness:
	 * 1) When creating a push parser context, the input flow's encoding
	 *    string is not set (whether an encoding is specified or not)
	 * 2) When switching encoding (as above), the input flow's encoding
	 *    string is never changed
	 * 3) When handling a meta charset, the input flow's encoding string
	 *    is checked to determine if an encoding has already been set.
	 *    If it has been set, then the meta charset is ignored.
	 *
	 * The upshot of this is that, if we don't explicitly set the input
	 * flow's encoding string here, any meta charset in the document
	 * will override our setting, which is incorrect behaviour.
	 *
	 * Ideally, this would be fixed in libxml, but that requires rather
	 * more knowledge than I currently have of what libxml is doing.
	 */
	if (!html->parser->input->encoding)
		html->parser->input->encoding =
				xmlStrdup((const xmlChar *) encoding);

	/* Ensure noone else attempts to reset the encoding */
	html->getenc = false;

	return true;
}


/**
 * Attempt to detect the encoding of some HTML data.
 *
 * \param  data  Pointer to HTML source data
 * \param  size  Pointer to length of data
 * \return  a constant string giving the encoding, or 0 if the encoding
 *          appears to be some 8-bit encoding
 *
 * If a BOM is encountered, *data and *size will be modified to skip over it
 */

const char *html_detect_encoding(const char **data, unsigned int *size)
{
	const unsigned char *d = (const unsigned char *) *data;

	/* this detection assumes that the first two characters are <= 0xff */
	if (*size < 4)
		return 0;

	if (d[0] == 0x00 && d[1] == 0x00 &&
			d[2] == 0xfe && d[3] == 0xff) { /* BOM 00 00 fe ff */
		*data += 4;
		*size -= 4;
		return "UTF-32BE";
	} else if (d[0] == 0xff && d[1] == 0xfe &&
			d[2] == 0x00 && d[3] == 0x00) { /* BOM ff fe 00 00 */
		*data += 4;
		*size -= 4;
		return "UTF-32LE";
	}
	else if (d[0] == 0x00 && d[1] != 0x00 &&
			d[2] == 0x00 && d[3] != 0x00)   /* 00 xx 00 xx */
		return "UTF-16BE";
	else if (d[0] != 0x00 && d[1] == 0x00 &&
			d[2] != 0x00 && d[3] == 0x00)   /* xx 00 xx 00 */
		return "UTF-16LE";
	else if (d[0] == 0x00 && d[1] == 0x00 &&
			d[2] == 0x00 && d[3] != 0x00)   /* 00 00 00 xx */
		return "ISO-10646-UCS-4";
	else if (d[0] != 0x00 && d[1] == 0x00 &&
			d[2] == 0x00 && d[3] == 0x00)   /* xx 00 00 00 */
		return "ISO-10646-UCS-4";
	else if (d[0] == 0xfe && d[1] == 0xff) {        /* BOM fe ff */
		*data += 2;
		*size -= 2;
		return "UTF-16BE";
	} else if (d[0] == 0xff && d[1] == 0xfe) {      /* BOM ff fe */
		*data += 2;
		*size -= 2;
		return "UTF-16LE";
	} else if (d[0] == 0xef && d[1] == 0xbb &&
			d[2] == 0xbf) {                 /* BOM ef bb bf */
		*data += 3;
		*size -= 3;
		return "UTF-8";
	}

	return 0;
}


#endif


/**
 * Convert a CONTENT_HTML for display.
 *
 * The following steps are carried out in order:
 *
 *  - parsing to an XML tree is completed
 *  - stylesheets are fetched
 *  - the XML tree is converted to a box tree and object fetches are started
 *  - the box tree is laid out
 *
 * On exit, the content status will be either CONTENT_STATUS_DONE if the
 * document is completely loaded or CONTENT_STATUS_READY if objects are still
 * being fetched.
 */

bool html_convert(struct content *c, int width, int height)
{
	xmlDoc *document;
	xmlNode *html, *head;
	union content_msg_data msg_data;
	unsigned int time_before, time_taken;

	/* finish parsing */
	if (c->source_size == 0)
#ifndef WITH_HUBBUB
		htmlParseChunk(c->data.html.parser, empty_document,
				sizeof empty_document, 0);
#else
		hubbub_parser_parse_chunk(c->data.html.parser,
				(uint8_t *) empty_document,
				sizeof empty_document);
#endif

#ifndef WITH_HUBBUB
	htmlParseChunk(c->data.html.parser, "", 0, 1);
	document = c->data.html.parser->myDoc;
	/*xmlDebugDumpDocument(stderr, c->data.html.parser->myDoc);*/
	htmlFreeParserCtxt(c->data.html.parser);
	c->data.html.parser = 0;
#else
	hubbub_parser_completed(c->data.html.parser);
	hubbub_parser_destroy(c->data.html.parser);
	c->data.html.parser = 0;
	document = c->data.html.document;
	/*xmlDebugDumpDocument(stderr, document);*/
#endif
	if (!document) {
		LOG(("Parsing failed"));
		msg_data.error = messages_get("ParsingFail");
		content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
		return false;
	}

	/* locate html and head elements */
	html = xmlDocGetRootElement(document);
	if (html == 0 || strcmp((const char *) html->name, "html") != 0) {
		LOG(("html element not found"));
		xmlFreeDoc(document);
		msg_data.error = messages_get("ParsingFail");
		content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
		return false;
	}
	for (head = html->children;
			head != 0 && head->type != XML_ELEMENT_NODE;
			head = head->next)
		;
	if (head && strcmp((const char *) head->name, "head") != 0) {
		head = 0;
		LOG(("head element not found"));
	}

	if (head) {
		if (!html_head(c, head)) {
			msg_data.error = messages_get("NoMemory");
			content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
			return false;
		}

		/* handle meta refresh */
		if (!html_meta_refresh(c, head))
			return false;
	}

	/* get stylesheets */
	if (!html_find_stylesheets(c, html, head))
		return false;

	/* convert xml tree to box tree */
	LOG(("XML to box"));
	content_set_status(c, messages_get("Processing"));
	content_broadcast(c, CONTENT_MSG_STATUS, msg_data);
	if (!xml_to_box(html, c)) {
		msg_data.error = messages_get("NoMemory");
		content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
		return false;
	}
	/*box_dump(c->data.html.layout->children, 0);*/
	/*if (c->data.html.frameset)
		html_dump_frameset(c->data.html.frameset, 0);*/

	/* extract image maps - can't do this sensibly in xml_to_box */
	if (!imagemap_extract(html, c)) {
		LOG(("imagemap extraction failed"));
		msg_data.error = messages_get("NoMemory");
		content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
		return false;
	}
	/*imagemap_dump(c);*/

	/* XML tree not required past this point */
	xmlFreeDoc(document);

	/* layout the box tree */
	html_set_status(c, messages_get("Formatting"));
	content_broadcast(c, CONTENT_MSG_STATUS, msg_data);
	LOG(("Layout document"));
	time_before = wallclock();
	html_reformat(c, width, height);
	time_taken = wallclock() - time_before;
	LOG(("Layout took %dcs", time_taken));
	c->reformat_time = wallclock() +
		((time_taken < option_min_reflow_period ?
		option_min_reflow_period : time_taken * 1.25));
	LOG(("Scheduling relayout no sooner than %dcs",
		c->reformat_time - wallclock()));
	/*box_dump(c->data.html.layout->children, 0);*/

	if (c->active == 0)
		c->status = CONTENT_STATUS_DONE;
	else
		c->status = CONTENT_STATUS_READY;
	html_set_status(c, "");

	return true;
}


/**
 * Process elements in <head>.
 *
 * \param  c     content structure
 * \param  head  xml node of head element
 * \return  true on success, false on memory exhaustion
 *
 * The title and base href are extracted if present.
 */

bool html_head(struct content *c, xmlNode *head)
{
	xmlNode *node;
	xmlChar *s;

	c->title = 0;

	for (node = head->children; node != 0; node = node->next) {
		if (node->type != XML_ELEMENT_NODE)
			continue;

		LOG(("Node: %s", node->name));
		if (!c->title && strcmp((const char *) node->name,
				"title") == 0) {
			xmlChar *title = xmlNodeGetContent(node);
			char *title2;
			if (!title)
				return false;
			title2 = squash_whitespace((const char *) title);
			xmlFree(title);
			if (!title2)
				return false;
			c->title = talloc_strdup(c, title2);
			free(title2);
			if (!c->title)
				return false;

		} else if (strcmp((const char *) node->name, "base") == 0) {
			char *href = (char *) xmlGetProp(node,
					(const xmlChar *) "href");
			if (href) {
				char *url;
				url_func_result res;
				res = url_normalize(href, &url);
				if (res == URL_FUNC_OK) {
					c->data.html.base_url =
							talloc_strdup(c, url);
					free(url);
				}
				xmlFree(href);
			}
			/* don't use the central values to ease freeing later on */
			if ((s = xmlGetProp(node, (const xmlChar *) "target"))) {
				if ((!strcasecmp((const char *) s, "_blank")) ||
						(!strcasecmp((const char *) s,
								"_top")) ||
						(!strcasecmp((const char *) s,
								"_parent")) ||
						(!strcasecmp((const char *) s,
								"_self")) ||
						('a' <= s[0] && s[0] <= 'z') ||
						('A' <= s[0] && s[0] <= 'Z')) {  /* [6.16] */
					c->data.html.base_target =
						talloc_strdup(c,
							(const char *) s);
					if (!c->data.html.base_target) {
						xmlFree(s);
						return false;
					}
				}
				xmlFree(s);
			}
		}
	}
	return true;
}


/**
 * Search for meta refresh
 *
 * http://wp.netscape.com/assist/net_sites/pushpull.html
 *
 * \param c content structure
 * \param head xml node of head element
 * \return true on success, false otherwise (error reported)
 */

bool html_meta_refresh(struct content *c, xmlNode *head)
{
	xmlNode *n;
	xmlChar *equiv, *content;
	union content_msg_data msg_data;
	char *url, *end, *refresh = NULL, quote = 0;
	url_func_result res;

	for (n = head == 0 ? 0 : head->children; n; n = n->next) {
		if (n->type != XML_ELEMENT_NODE)
			continue;

		/* Recurse into noscript elements */
		if (strcmp((const char *) n->name, "noscript") == 0) {
			if (!html_meta_refresh(c, n)) {
				/* Some error occurred */
				return false;
			} else if (c->refresh) {
				/* Meta refresh found - stop */
				return true;
			}
		}

		if (strcmp((const char *) n->name, "meta")) {
			continue;
		}

		equiv = xmlGetProp(n, (const xmlChar *) "http-equiv");
		if (!equiv)
			continue;

		if (strcasecmp((const char *) equiv, "refresh")) {
			xmlFree(equiv);
			continue;
		}

		xmlFree(equiv);

		content = xmlGetProp(n, (const xmlChar *) "content");
		if (!content)
			continue;

		end = (char *) content + strlen((const char *) content);

		/* content  := *LWS 1*DIGIT *LWS [';' *LWS *1url *LWS]
		 * url      := "url" *LWS '=' *LWS (url-nq | url-sq | url-dq)
		 * url-nq   := *urlchar
		 * url-sq   := "'" *(urlchar | '"') "'"
		 * url-dq   := '"' *(urlchar | "'") '"'
		 * urlchar  := [#x9#x21#x23-#x26#x28-#x7E] | nonascii
		 * nonascii := [#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
		 */

		/* *LWS 1*DIGIT */
		msg_data.delay = (int)strtol((char *) content, &url, 10);
		/* a very small delay and self-referencing URL can cause a loop
		 * that grinds machines to a halt. To prevent this we set a
		 * minimum refresh delay of 1s. */
		if (msg_data.delay < 1)
			msg_data.delay = 1;

		/* *LWS */
		while (url < end && isspace(*url)) {
			url++;
		}

		/* ';' */
		if (url < end && *url == ';')
			url++;

		/* *LWS */
		while (url < end && isspace(*url)) {
			url++;
		}

		if (url == end) {
			/* Just delay specified, so refresh current page */
			xmlFree(content);

			c->refresh = talloc_strdup(c, c->url);
			if (!c->refresh) {
				msg_data.error = messages_get("NoMemory");
				content_broadcast(c,
					CONTENT_MSG_ERROR, msg_data);
				return false;
			}

			content_broadcast(c, CONTENT_MSG_REFRESH, msg_data);
			break;
		}

		/* "url" */
		if (url <= end - 3) {
			if (strncasecmp(url, "url", 3) == 0) {
				url += 3;
			} else {
				/* Unexpected input, ignore this header */
				continue;
			}
		} else {
			/* Insufficient input, ignore this header */
			continue;
		}

		/* *LWS */
		while (url < end && isspace(*url)) {
			url++;
		}

		/* '=' */
		if (url < end) {
			if (*url == '=') {
				url++;
			} else {
				/* Unexpected input, ignore this header */
				continue;
			}
		} else {
			/* Insufficient input, ignore this header */
			continue;
		}

		/* *LWS */
		while (url < end && isspace(*url)) {
			url++;
		}

		/* '"' or "'" */
		if (url < end && (*url == '"' || *url == '\'')) {
			quote = *url;
			url++;
		}

		/* Start of URL */
		refresh = url;

		if (quote != 0) {
			/* url-sq | url-dq */
			while (url < end && *url != quote)
				url++;
		} else {
			/* url-nq */
			while (url < end && !isspace(*url))
				url++;
		}

		/* '"' or "'" or *LWS (we don't care) */
		if (url < end)
			*url = '\0';

		res = url_join(refresh, c->data.html.base_url, &refresh);

		xmlFree(content);

		if (res == URL_FUNC_NOMEM) {
			msg_data.error = messages_get("NoMemory");
			content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
			return false;
		} else if (res == URL_FUNC_FAILED) {
			/* This isn't fatal so carry on looking */
			continue;
		}

		c->refresh = talloc_strdup(c, refresh);

		free(refresh);

		if (!c->refresh) {
			msg_data.error = messages_get("NoMemory");
			content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
			return false;
		}

		content_broadcast(c, CONTENT_MSG_REFRESH, msg_data);
	}

	return true;
}


/**
 * Process inline stylesheets and fetch linked stylesheets.
 *
 * \param  c     content structure
 * \param  head  xml node of html element
 * \param  head  xml node of head element, or 0 if none
 * \return  true on success, false if an error occurred
 */

bool html_find_stylesheets(struct content *c, xmlNode *html,
		xmlNode *head)
{
	xmlNode *node;
	char *rel, *type, *media, *href, *url;
	unsigned int i = STYLESHEET_START;
	unsigned int last_active = 0;
	union content_msg_data msg_data;
	url_func_result res;
	struct content **stylesheet_content;

	/* stylesheet 0 is the base style sheet,
	 * stylesheet 1 is the adblocking stylesheet,
	 * stylesheet 2 is any <style> elements */
	c->data.html.stylesheet_content = talloc_array(c, struct content *,
			STYLESHEET_START);
	if (!c->data.html.stylesheet_content)
		goto no_memory;
	c->data.html.stylesheet_content[STYLESHEET_ADBLOCK] = 0;
	c->data.html.stylesheet_content[STYLESHEET_STYLE] = 0;
	c->data.html.stylesheet_count = STYLESHEET_START;

	c->active = 0;

	c->data.html.stylesheet_content[STYLESHEET_BASE] = fetchcache(
			default_stylesheet_url,
			html_convert_css_callback, (intptr_t) c,
			STYLESHEET_BASE, c->width, c->height,
			true, 0, 0, false, false);
	if (!c->data.html.stylesheet_content[STYLESHEET_BASE])
		goto no_memory;
	c->active++;
	fetchcache_go(c->data.html.stylesheet_content[STYLESHEET_BASE],
			c->url, html_convert_css_callback, (intptr_t) c,
			STYLESHEET_BASE, c->width, c->height,
			0, 0, false, 0);

	if (option_block_ads) {
		c->data.html.stylesheet_content[STYLESHEET_ADBLOCK] =
				fetchcache(adblock_stylesheet_url,
				html_convert_css_callback, (intptr_t) c,
				STYLESHEET_ADBLOCK, c->width,
				c->height, true, 0, 0, false, false);
		if (!c->data.html.stylesheet_content[STYLESHEET_ADBLOCK])
			goto no_memory;
		c->active++;
		fetchcache_go(c->data.html.
				stylesheet_content[STYLESHEET_ADBLOCK],
				c->url, html_convert_css_callback,
				(intptr_t) c, STYLESHEET_ADBLOCK, c->width,
				c->height, 0, 0, false, 0);
	}

	for (node = head == 0 ? 0 : head->children; node; node = node->next) {
		if (node->type != XML_ELEMENT_NODE)
			continue;

		if (strcmp((const char *) node->name, "link") != 0)
			continue;

		/* rel=<space separated list, including 'stylesheet'> */
		if ((rel = (char *) xmlGetProp(node, (const xmlChar *) "rel")) == NULL)
			continue;
		if (strcasestr(rel, "stylesheet") == 0) {
			xmlFree(rel);
			continue;
		} else if (strcasestr(rel, "alternate")) {
			/* Ignore alternate stylesheets */
			xmlFree(rel);
			continue;
		}
		xmlFree(rel);

		/* type='text/css' or not present */
		if ((type = (char *) xmlGetProp(node, (const xmlChar *) "type")) != NULL) {
			if (strcmp(type, "text/css") != 0) {
				xmlFree(type);
				continue;
			}
			xmlFree(type);
		}

		/* media contains 'screen' or 'all' or not present */
		if ((media = (char *) xmlGetProp(node, (const xmlChar *) "media")) != NULL) {
			if (strcasestr(media, "screen") == 0 &&
					strcasestr(media, "all") == 0) {
				xmlFree(media);
				continue;
			}
			xmlFree(media);
		}

		/* href='...' */
		if ((href = (char *) xmlGetProp(node, (const xmlChar *) "href")) == NULL)
			continue;

		/* TODO: only the first preferred stylesheets (ie. those with a
		 * title attribute) should be loaded (see HTML4 14.3) */

		res = url_join(href, c->data.html.base_url, &url);
		xmlFree(href);
		if (res != URL_FUNC_OK)
			continue;

		LOG(("linked stylesheet %i '%s'", i, url));

		/* start fetch */
		stylesheet_content = talloc_realloc(c,
				c->data.html.stylesheet_content,
				struct content *, i + 1);
		if (!stylesheet_content)
			goto no_memory;
		c->data.html.stylesheet_content = stylesheet_content;
		c->data.html.stylesheet_content[i] = fetchcache(url,
				html_convert_css_callback,
				(intptr_t) c, i, c->width, c->height,
				true, 0, 0, false, false);
		if (!c->data.html.stylesheet_content[i])
			goto no_memory;
		c->active++;
		fetchcache_go(c->data.html.stylesheet_content[i],
				c->url,
				html_convert_css_callback,
				(intptr_t) c, i, c->width, c->height,
				0, 0, false, c->url);
		free(url);
		i++;
	}

	c->data.html.stylesheet_count = i;

	if (!html_find_inline_stylesheets(c, html))
		return false;

	if (c->data.html.stylesheet_content[STYLESHEET_STYLE] != 0) {
		if (css_convert(c->data.html.stylesheet_content[STYLESHEET_STYLE], c->width,
				c->height)) {
			if (!content_add_user(c->data.html.stylesheet_content[STYLESHEET_STYLE],
					html_convert_css_callback,
					(intptr_t) c, STYLESHEET_STYLE)) {
				/* no memory */
				c->data.html.stylesheet_content[STYLESHEET_STYLE] = 0;
				goto no_memory;
			}
		} else {
			/* conversion failed */
			c->data.html.stylesheet_content[STYLESHEET_STYLE] = 0;
		}
	}

	/* complete the fetches */
	while (c->active != 0) {
		if (c->active != last_active) {
			html_set_status(c, "");
			content_broadcast(c, CONTENT_MSG_STATUS, msg_data);
			last_active = c->active;
		}
		fetch_poll();
		gui_multitask();
	}

	/* check that the base stylesheet loaded; layout fails without it */
	if (!c->data.html.stylesheet_content[STYLESHEET_BASE]) {
		msg_data.error = "Base stylesheet failed to load";
		content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
		return false;
	}

	assert(c->data.html.stylesheet_content[STYLESHEET_BASE]);
	css_set_origin(c->data.html.stylesheet_content[STYLESHEET_BASE],
			CSS_ORIGIN_UA);

	/* any of our other stylesheet pointers could be NULL at this point if
	 * the CSS file(s) failed to load/fetch */
	if (c->data.html.stylesheet_content[STYLESHEET_ADBLOCK])
		css_set_origin(c->data.html.stylesheet_content[
				STYLESHEET_ADBLOCK], CSS_ORIGIN_UA);
	if (c->data.html.stylesheet_content[STYLESHEET_STYLE])
		css_set_origin(c->data.html.stylesheet_content[
				STYLESHEET_STYLE], CSS_ORIGIN_AUTHOR);
	for (i = STYLESHEET_START; i != c->data.html.stylesheet_count; i++)
		if (c->data.html.stylesheet_content[i])
			css_set_origin(c->data.html.stylesheet_content[i],
					CSS_ORIGIN_AUTHOR);

	c->data.html.working_stylesheet = css_make_working_stylesheet(
			c->data.html.stylesheet_content,
			c->data.html.stylesheet_count);
	if (!c->data.html.working_stylesheet)
		goto no_memory;

	return true;

no_memory:
	msg_data.error = messages_get("NoMemory");
	content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
	return false;
}


/**
 * Process inline stylesheets in the document.
 *
 * \param  c     content structure
 * \param  head  xml node of html element
 * \return  true on success, false if an error occurred
 */

bool html_find_inline_stylesheets(struct content *c, xmlNode *html)
{
	xmlNode *node = html;

	/* depth-first search the tree for style elements */
	while (node) {
		if (node->children) {  /* 1. children */
			node = node->children;
		} else if (node->next) {  /* 2. siblings */
			node = node->next;
		} else {  /* 3. ancestor siblings */
			while (node && !node->next)
				node = node->parent;
			if (!node)
				break;
			node = node->next;
		}

		assert(node);

		if (node->type != XML_ELEMENT_NODE)
			continue;
		if (strcmp((const char *) node->name, "style") != 0)
			continue;

		if (!html_process_style_element(c, node))
			return false;
	}

	return true;
}


/**
 * Process an inline stylesheet in the document.
 *
 * \param  c      content structure
 * \param  style  xml node of style element
 * \return  true on success, false if an error occurred
 */

bool html_process_style_element(struct content *c, xmlNode *style)
{
	xmlNode *child;
	char *type, *media, *data;
	union content_msg_data msg_data;

	/* type='text/css', or not present (invalid but common) */
	if ((type = (char *) xmlGetProp(style, (const xmlChar *) "type"))) {
		if (strcmp(type, "text/css") != 0) {
			xmlFree(type);
			return true;
		}
		xmlFree(type);
	}

	/* media contains 'screen' or 'all' or not present */
	if ((media = (char *) xmlGetProp(style, (const xmlChar *) "media"))) {
		if (strcasestr(media, "screen") == 0 &&
				strcasestr(media, "all") == 0) {
			xmlFree(media);
			return true;
		}
		xmlFree(media);
	}

	/* create stylesheet */
	if (c->data.html.stylesheet_content[STYLESHEET_STYLE] == 0) {
		const char *params[] = { 0 };
		c->data.html.stylesheet_content[STYLESHEET_STYLE] =
				content_create(c->data.html.base_url);
		if (!c->data.html.stylesheet_content[STYLESHEET_STYLE])
			goto no_memory;
		if (!content_set_type(c->data.html.
				stylesheet_content[STYLESHEET_STYLE],
				CONTENT_CSS, "text/css", params))
			/** \todo  not necessarily caused by
			 *  memory exhaustion */
			goto no_memory;
	}

	/* can't just use xmlNodeGetContent(style), because that won't
	 * give the content of comments which may be used to 'hide'
	 * the content */
	for (child = style->children; child != 0; child = child->next) {
		data = (char *) xmlNodeGetContent(child);
		if (!content_process_data(c->data.html.
				stylesheet_content[STYLESHEET_STYLE],
				data, strlen(data))) {
			xmlFree(data);
			/** \todo  not necessarily caused by
			 *  memory exhaustion */
			goto no_memory;
		}
		xmlFree(data);
	}

	return true;

no_memory:
	msg_data.error = messages_get("NoMemory");
	content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
	return false;
}


/**
 * Callback for fetchcache() for linked stylesheets.
 */

void html_convert_css_callback(content_msg msg, struct content *css,
		intptr_t p1, intptr_t p2, union content_msg_data data)
{
	struct content *c = (struct content *) p1;
	unsigned int i = p2;

	switch (msg) {
		case CONTENT_MSG_LOADING:
			/* check that the stylesheet is really CSS */
			if (css->type != CONTENT_CSS) {
				c->data.html.stylesheet_content[i] = 0;
				c->active--;
				LOG(("%s is not CSS", css->url));
				content_add_error(c, "NotCSS", 0);
				html_set_status(c, messages_get("NotCSS"));
				content_broadcast(c, CONTENT_MSG_STATUS, data);
				content_remove_user(css,
						html_convert_css_callback,
						(intptr_t) c, i);
				if (!css->user_list->next) {
					/* we were the only user and we
					 * don't want this content, so
					 * stop it fetching and mark it
					 * as having an error so it gets
					 * removed from the cache next time
					 * content_clean() gets called */
					fetch_abort(css->fetch);
					css->fetch = 0;
					css->status = CONTENT_STATUS_ERROR;
				}
			}
			break;

		case CONTENT_MSG_READY:
			break;

		case CONTENT_MSG_DONE:
			LOG(("got stylesheet '%s'", css->url));
			c->active--;
			break;

		case CONTENT_MSG_LAUNCH:
			/* Fall through */
		case CONTENT_MSG_ERROR:
			LOG(("stylesheet %s failed: %s", css->url, data.error));
			/* The stylesheet we were fetching may have been
			 * redirected, in that case, the object pointers
			 * will differ, so ensure that the object that's
			 * in error is still in use by us before invalidating
			 * the pointer */
			if (c->data.html.stylesheet_content[i] == css) {
				c->data.html.stylesheet_content[i] = 0;
				c->active--;
				content_add_error(c, "?", 0);
			}
			break;

		case CONTENT_MSG_STATUS:
			html_set_status(c, css->status_message);
			content_broadcast(c, CONTENT_MSG_STATUS, data);
			break;

		case CONTENT_MSG_NEWPTR:
			c->data.html.stylesheet_content[i] = css;
			break;

#ifdef WITH_AUTH
		case CONTENT_MSG_AUTH:
			c->data.html.stylesheet_content[i] = 0;
			c->active--;
			content_add_error(c, "?", 0);
			break;
#endif

#ifdef WITH_SSL
		case CONTENT_MSG_SSL:
			c->data.html.stylesheet_content[i] = 0;
			c->active--;
			content_add_error(c, "?", 0);
			break;
#endif

		default:
			assert(0);
	}
}


/**
 * Start a fetch for an object required by a page.
 *
 * \param  c                 content of type CONTENT_HTML
 * \param  url               URL of object to fetch (copied)
 * \param  box               box that will contain the object
 * \param  permitted_types   array of types, terminated by CONTENT_UNKNOWN,
 *	      or 0 if all types except OTHER and UNKNOWN acceptable
 * \param  available_width   estimate of width of object
 * \param  available_height  estimate of height of object
 * \param  background        this is a background image
 * \param  frame             name of frame, or 0 if not a frame (copied)
 * \return  true on success, false on memory exhaustion
 */

bool html_fetch_object(struct content *c, char *url, struct box *box,
		const content_type *permitted_types,
		int available_width, int available_height,
		bool background)
{
	unsigned int i = c->data.html.object_count;
	struct content_html_object *object;
	struct content *c_fetch;

	/* initialise fetch */
	c_fetch = fetchcache(url, html_object_callback,
			(intptr_t) c, i, available_width, available_height,
			true, 0, 0, false, false);
	if (!c_fetch)
		return false;

	/* add to object list */
	object = talloc_realloc(c, c->data.html.object,
			struct content_html_object, i + 1);
	if (!object) {
		content_remove_user(c_fetch, html_object_callback,
				(intptr_t) c, i);
		return false;
	}
	c->data.html.object = object;
	c->data.html.object[i].box = box;
	c->data.html.object[i].permitted_types = permitted_types;
       	c->data.html.object[i].background = background;
	c->data.html.object[i].content = c_fetch;
	c->data.html.object_count++;
	c->active++;

	/* start fetch */
	fetchcache_go(c_fetch, c->url,
			html_object_callback, (intptr_t) c, i,
			available_width, available_height,
			0, 0, false, c->url);

	return true;
}


/**
 * Start a fetch for an object required by a page, replacing an existing object.
 *
 * \param  c               content of type CONTENT_HTML
 * \param  i               index of object to replace in c->data.html.object
 * \param  url             URL of object to fetch (copied)
 * \param  post_urlenc     url encoded post data, or 0 if none
 * \param  post_multipart  multipart post data, or 0 if none
 * \return  true on success, false on memory exhaustion
 */

bool html_replace_object(struct content *c, unsigned int i, char *url,
		char *post_urlenc,
		struct form_successful_control *post_multipart)
{
	struct content *c_fetch;
	struct content *page;

	assert(c->type == CONTENT_HTML);

	if (c->data.html.object[i].content) {
		/* remove existing object */
		if (c->data.html.object[i].content->status !=
				CONTENT_STATUS_DONE)
			c->active--;
		content_remove_user(c->data.html.object[i].content,
				html_object_callback, (intptr_t) c, i);
		c->data.html.object[i].content = 0;
		c->data.html.object[i].box->object = 0;
	}

	/* initialise fetch */
	c_fetch = fetchcache(url, html_object_callback,
			(intptr_t) c, i,
			c->data.html.object[i].box->width,
			c->data.html.object[i].box->height,
			false, post_urlenc, post_multipart, false, false);
	if (!c_fetch)
		return false;

	c->data.html.object[i].content = c_fetch;

	for (page = c; page; page = page->data.html.page) {
		assert(page->type == CONTENT_HTML);
		page->active++;
		page->status = CONTENT_STATUS_READY;
	}

	/* start fetch */
	fetchcache_go(c_fetch, c->url,
			html_object_callback, (intptr_t) c, i,
			c->data.html.object[i].box->width,
			c->data.html.object[i].box->height,
			post_urlenc, post_multipart, false, c->url);

	return true;
}


/**
 * Callback for fetchcache() for objects.
 */

void html_object_callback(content_msg msg, struct content *object,
		intptr_t p1, intptr_t p2, union content_msg_data data)
{
	struct content *c = (struct content *) p1;
	unsigned int i = p2;
	int x, y;
	struct box *box = c->data.html.object[i].box;

	switch (msg) {
		case CONTENT_MSG_LOADING:
			/* check if the type is acceptable for this object */
			if (html_object_type_permitted(object->type,
				c->data.html.object[i].permitted_types)) {
				if (c->data.html.bw)
					content_open(object,
							c->data.html.bw, c,
							i, box,
							box->object_params);
				break;
			}

			/* not acceptable */
			c->data.html.object[i].content = 0;
			c->active--;
			content_add_error(c, "?", 0);
			html_set_status(c, messages_get("BadObject"));
			content_broadcast(c, CONTENT_MSG_STATUS, data);
			content_remove_user(object, html_object_callback,
					(intptr_t) c, i);
			if (!object->user_list->next) {
				/* we were the only user and we
				 * don't want this content, so
				 * stop it fetching and mark it
				 * as having an error so it gets
				 * removed from the cache next time
				 * content_clean() gets called */
				fetch_abort(object->fetch);
				object->fetch = 0;
				object->status = CONTENT_STATUS_ERROR;
			}
			html_object_failed(box, c,
					c->data.html.object[i].background);
			break;

		case CONTENT_MSG_READY:
			if (object->type == CONTENT_HTML) {
				html_object_done(box, object,
					c->data.html.object[i].background);
				if (c->status == CONTENT_STATUS_READY ||
						c->status ==
						CONTENT_STATUS_DONE)
					content_reformat(c,
							c->available_width,
							c->height);
			}
			break;

		case CONTENT_MSG_DONE:
			html_object_done(box, object,
					c->data.html.object[i].background);
			c->active--;
			break;

		case CONTENT_MSG_LAUNCH:
			/* Fall through */
		case CONTENT_MSG_ERROR:
			/* The object we were fetching may have been
			 * redirected, in that case, the object pointers
			 * will differ, so ensure that the object that's
			 * in error is still in use by us before invalidating
			 * the pointer */
			if (c->data.html.object[i].content == object) {
				c->data.html.object[i].content = 0;
				c->active--;
				content_add_error(c, "?", 0);
				html_set_status(c, data.error);
				content_broadcast(c, CONTENT_MSG_STATUS,
						data);
				html_object_failed(box, c,
					c->data.html.object[i].background);
			}
			break;

		case CONTENT_MSG_STATUS:
			html_set_status(c, object->status_message);
			/* content_broadcast(c, CONTENT_MSG_STATUS, 0); */
			break;

		case CONTENT_MSG_REFORMAT:
			break;

		case CONTENT_MSG_REDRAW:
			if (!box_visible(box))
				break;
			box_coords(box, &x, &y);
			if (object == data.redraw.object) {
				data.redraw.x = data.redraw.x *
						box->width / object->width;
				data.redraw.y = data.redraw.y *
						box->height / object->height;
				data.redraw.width = data.redraw.width *
						box->width / object->width;
				data.redraw.height = data.redraw.height *
						box->height / object->height;
				data.redraw.object_width = box->width;
				data.redraw.object_height = box->height;
			}
			data.redraw.x += x + box->padding[LEFT];
			data.redraw.y += y + box->padding[TOP];
			data.redraw.object_x += x + box->padding[LEFT];
			data.redraw.object_y += y + box->padding[TOP];
			content_broadcast(c, CONTENT_MSG_REDRAW, data);
			break;

		case CONTENT_MSG_NEWPTR:
			c->data.html.object[i].content = object;
			break;

#ifdef WITH_AUTH
		case CONTENT_MSG_AUTH:
			c->data.html.object[i].content = 0;
			c->active--;
			content_add_error(c, "?", 0);
			break;
#endif

#ifdef WITH_SSL
		case CONTENT_MSG_SSL:
			c->data.html.object[i].content = 0;
			c->active--;
			content_add_error(c, "?", 0);
			break;
#endif

		case CONTENT_MSG_REFRESH:
			if (object->type == CONTENT_HTML)
				/* only for HTML objects */
				schedule(data.delay * 100,
						html_object_refresh, object);
			break;

		default:
			assert(0);
	}

	if (c->status == CONTENT_STATUS_READY && c->active == 0 &&
			(msg == CONTENT_MSG_LOADING ||
			msg == CONTENT_MSG_DONE ||
			msg == CONTENT_MSG_ERROR ||
			msg == CONTENT_MSG_AUTH)) {
		/* all objects have arrived */
		content_reformat(c, c->available_width, c->height);
		html_set_status(c, "");
		content_set_done(c);
	}
	/* If  1) the configuration option to reflow pages while objects are
	 *        fetched is set
	 *     2) an object is newly fetched & converted,
	 *     3) the object's parent HTML is ready for reformat,
	 *     4) the time since the previous reformat is more than the
	 *        configured minimum time between reformats
	 * then reformat the page to display newly fetched objects */
	else if (option_incremental_reflow && msg == CONTENT_MSG_DONE &&
			(c->status == CONTENT_STATUS_READY ||
			 c->status == CONTENT_STATUS_DONE) &&
			(wallclock() > c->reformat_time)) {
		unsigned int time_before = wallclock(), time_taken;
		content_reformat(c, c->available_width, c->height);
		time_taken = wallclock() - time_before;
		c->reformat_time = wallclock() +
			((time_taken < option_min_reflow_period ?
			option_min_reflow_period : time_taken * 1.25));
	}
	if (c->status == CONTENT_STATUS_READY)
		html_set_status(c, "");
}


/**
 * Update a box whose content has completed rendering.
 */

void html_object_done(struct box *box, struct content *object,
		      bool background)
{
	struct box *b;

	if (background) {
		box->background = object;
		return;
	}

	box->object = object;

	/* invalidate parent min, max widths */
	for (b = box; b; b = b->parent)
		b->max_width = UNKNOWN_MAX_WIDTH;

	/* delete any clones of this box */
	while (box->next && box->next->clone) {
		/* box_free_box(box->next); */
		box->next = box->next->next;
	}
}


/**
 * Handle object fetching or loading failure.
 *
 * \param  box         box containing object which failed to load
 * \param  content     document of type CONTENT_HTML
 * \param  background  the object was the background image for the box
 *
 * Any fallback content for the object is made visible.
 */

void html_object_failed(struct box *box, struct content *content,
		bool background)
{
	struct box *b, *ic;

	if (background)
		return;
	if (!box->fallback)
		return;

	/* make fallback boxes into children or siblings, as appropriate */
	if (box->type != BOX_INLINE) {
		/* easy case: fallbacks become children */
		assert(box->type == BOX_BLOCK ||
				box->type == BOX_TABLE_CELL ||
				box->type == BOX_INLINE_BLOCK);
		box->children = box->fallback;
		box->last = box->children;
		while (box->last->next)
			box->last = box->last->next;
		box->fallback = 0;
		box_normalise_block(box, content);
	} else {
		assert(box->parent->type == BOX_INLINE_CONTAINER);
		if (box->fallback->type == BOX_INLINE_CONTAINER &&
				!box->fallback->next) {
			/* the fallback is a single inline container: splice
			 * it into this inline container */
			for (b = box->fallback->children; b; b = b->next)
				b->parent = box->parent;
			box->fallback->last->next = box->next;
			if (!box->next)
				box->parent->last = box->fallback->last;
			box->next = box->fallback->children;
			box->next->prev = box;
			box->fallback = 0;
		} else {
			if (box->next) {
				/* split this inline container into two inline
				 * containers */
				ic = box_create(0, 0, 0, 0, 0, content);
				if (!ic) {
					union content_msg_data msg_data;

					msg_data.error =
						messages_get("NoMemory");
					content_broadcast(content,
							CONTENT_MSG_ERROR,
							msg_data);
					return;
				}
				ic->type = BOX_INLINE_CONTAINER;
				box_insert_sibling(box->parent, ic);
				ic->children = box->next;
				ic->last = box->parent->last;
				ic->children->prev = 0;
				box->next = 0;
				box->parent->last = box;
				for (b = ic->children; b; b = b->next)
					b->parent = ic;
			}
			/* insert the fallback after the parent */
			for (b = box->fallback; b->next; b = b->next)
				b->parent = box->parent->parent;
			b->parent = box->parent->parent;
			/* [b is the last fallback box] */
			b->next = box->parent->next;
			if (b->next)
				b->next->prev = b;
			box->parent->next = box->fallback;
			box->fallback->prev = box->parent;
			box->fallback = 0;
			box_normalise_block(box->parent->parent, content);
		}
	}

	/* invalidate parent min, max widths */
	for (b = box->parent; b; b = b->parent)
		b->max_width = UNKNOWN_MAX_WIDTH;
	box->width = UNKNOWN_WIDTH;
}


/**
 * Check if a type is in a list.
 *
 * \param type the content_type to search for
 * \param permitted_types array of types, terminated by CONTENT_UNKNOWN,
 *	      or 0 if all types except OTHER and UNKNOWN acceptable
 * \return the type is in the list or acceptable
 */

bool html_object_type_permitted(const content_type type,
		const content_type *permitted_types)
{
	if (permitted_types) {
		for (; *permitted_types != CONTENT_UNKNOWN; permitted_types++)
			if (*permitted_types == type)
				return true;
	} else if (type < CONTENT_OTHER) {
		return true;
	}
	return false;
}


/**
 * schedule() callback for object refresh
 */

void html_object_refresh(void *p)
{
	struct content *c = (struct content *)p;

	assert(c->type == CONTENT_HTML);

	/* Ignore if refresh URL has gone
	 * (may happen if fetch errored) */
	if (!c->refresh)
		return;

	c->fresh = false;

	if (!html_replace_object(c->data.html.page, c->data.html.index,
			c->refresh, 0, 0)) {
		/** \todo handle memory exhaustion */
	}
}

/**
 * Stop loading a CONTENT_HTML in state READY.
 */

void html_stop(struct content *c)
{
	unsigned int i;
	struct content *object;

	assert(c->status == CONTENT_STATUS_READY);

	for (i = 0; i != c->data.html.object_count; i++) {
		object = c->data.html.object[i].content;
		if (!object)
			continue;

		if (object->status == CONTENT_STATUS_DONE)
			; /* already loaded: do nothing */
		else if (object->status == CONTENT_STATUS_READY)
			content_stop(object, html_object_callback,
					(intptr_t) c, i);
		else {
			content_remove_user(c->data.html.object[i].content,
					 html_object_callback, (intptr_t) c, i);
			c->data.html.object[i].content = 0;
		}
	}
	c->status = CONTENT_STATUS_DONE;
}


/**
 * Reformat a CONTENT_HTML to a new width.
 */

void html_reformat(struct content *c, int width, int height)
{
	struct box *layout;

	layout_document(c, width, height);
	layout = c->data.html.layout;

	/* width and height are at least margin box of document */
	c->width = layout->x + layout->padding[LEFT] + layout->width +
			layout->padding[RIGHT] + layout->border[RIGHT] +
			layout->margin[RIGHT];
	c->height = layout->y + layout->padding[TOP] + layout->height +
			layout->padding[BOTTOM] + layout->border[BOTTOM] +
			layout->margin[BOTTOM];

	/* if boxes overflow right or bottom edge, expand to contain it */
	if (c->width < layout->x + layout->descendant_x1)
		c->width = layout->x + layout->descendant_x1;
	if (c->height < layout->y + layout->descendant_y1)
		c->height = layout->y + layout->descendant_y1;
}


/**
 * Destroy a CONTENT_HTML and free all resources it owns.
 */

void html_destroy(struct content *c)
{
	unsigned int i;
	LOG(("content %p", c));

	imagemap_destroy(c);

	if (c->bitmap) {
	  	bitmap_destroy(c->bitmap);
	  	c->bitmap = NULL;
	}

	if (c->data.html.parser)
#ifndef WITH_HUBBUB
		htmlFreeParserCtxt(c->data.html.parser);
#else
		hubbub_parser_destroy(c->data.html.parser);
#endif

	/* Free base target */
	if (c->data.html.base_target) {
	 	talloc_free(c->data.html.base_target);
	 	c->data.html.base_target = NULL;
	}

	/* Free frameset */
	if (c->data.html.frameset) {
		html_destroy_frameset(c->data.html.frameset);
		talloc_free(c->data.html.frameset);
		c->data.html.frameset = NULL;
	}

	/* Free iframes */
	if (c->data.html.iframe) {
		html_destroy_iframe(c->data.html.iframe);
		c->data.html.iframe = NULL;
	}

	/* Free stylesheets */
	if (c->data.html.stylesheet_count) {
		for (i = 0; i != c->data.html.stylesheet_count; i++) {
			if (c->data.html.stylesheet_content[i])
				content_remove_user(c->data.html.
						stylesheet_content[i],
						html_convert_css_callback,
						(intptr_t) c, i);
		}
	}

	talloc_free(c->data.html.working_stylesheet);

	/*if (c->data.html.style)
		css_free_style(c->data.html.style);*/

	/* Free objects */
	for (i = 0; i != c->data.html.object_count; i++) {
		LOG(("object %i %p", i, c->data.html.object[i].content));
		if (c->data.html.object[i].content) {
			content_remove_user(c->data.html.object[i].content,
					 html_object_callback, (intptr_t) c, i);
			if (c->data.html.object[i].content->type == CONTENT_HTML)
				schedule_remove(html_object_refresh,
					c->data.html.object[i].content);
		}
	}
}

void html_destroy_frameset(struct content_html_frames *frameset) {
	int i;

	if (frameset->name) {
		talloc_free(frameset->name);
		frameset->name = NULL;
	}
	if (frameset->url) {
		talloc_free(frameset->url);
		frameset->url = NULL;
	}
	if (frameset->children) {
		for (i = 0; i < (frameset->rows * frameset->cols); i++) {
			if (frameset->children[i].name) {
				talloc_free(frameset->children[i].name);
				frameset->children[i].name = NULL;
			}
			if (frameset->children[i].url) {
				talloc_free(frameset->children[i].url);
				frameset->children[i].url = NULL;
			}
		  	if (frameset->children[i].children)
		  		html_destroy_frameset(&frameset->children[i]);
		}
		talloc_free(frameset->children);
		frameset->children = NULL;
	}
}

void html_destroy_iframe(struct content_html_iframe *iframe) {
	struct content_html_iframe *next;
	next = iframe;
	while ((iframe = next) != NULL) {
		next = iframe->next;
		if (iframe->name)
			talloc_free(iframe->name);
		if (iframe->url)
			talloc_free(iframe->url);
		talloc_free(iframe);
	}
}


/**
 * Set the content status.
 */

void html_set_status(struct content *c, const char *extra)
{
	unsigned int stylesheets = 0, objects = 0;
	if (c->data.html.object_count == 0)
		stylesheets = c->data.html.stylesheet_count - c->active;
	else {
		stylesheets = c->data.html.stylesheet_count;
		objects = c->data.html.object_count - c->active;
	}
	content_set_status(c, "%u/%u %s %u/%u %s  %s",
			stylesheets, c->data.html.stylesheet_count,
			messages_get((c->data.html.stylesheet_count == 1) ?
					"styl" : "styls"),
			objects, c->data.html.object_count,
			messages_get((c->data.html.object_count == 1) ?
					"obj" : "objs"),
			extra);
}


/**
 * Handle a window containing a CONTENT_HTML being opened.
 */

void html_open(struct content *c, struct browser_window *bw,
		struct content *page, unsigned int index, struct box *box,
		struct object_params *params)
{
	unsigned int i;
	c->data.html.bw = bw;
	c->data.html.page = page;
	c->data.html.index = index;
	c->data.html.box = box;
	for (i = 0; i != c->data.html.object_count; i++) {
		if (c->data.html.object[i].content == 0)
			continue;
		if (c->data.html.object[i].content->type == CONTENT_UNKNOWN)
			continue;
               	content_open(c->data.html.object[i].content,
				bw, c, i,
				c->data.html.object[i].box,
				c->data.html.object[i].box->object_params);
	}
}


/**
 * Handle a window containing a CONTENT_HTML being closed.
 */

void html_close(struct content *c)
{
	unsigned int i;
	c->data.html.bw = 0;
	schedule_remove(html_object_refresh, c);
	for (i = 0; i != c->data.html.object_count; i++) {
		if (c->data.html.object[i].content == 0)
			continue;
		if (c->data.html.object[i].content->type == CONTENT_UNKNOWN)
			continue;
               	content_close(c->data.html.object[i].content);
	}
}


/**
 * Print a frameset tree to stderr.
 */

void html_dump_frameset(struct content_html_frames *frame,
		unsigned int depth)
{
	unsigned int i;
	int row, col, index;
	const char *unit[] = {"px", "%", "*"};
	const char *scrolling[] = {"auto", "yes", "no"};

	assert(frame);

	fprintf(stderr, "%p ", frame);

	fprintf(stderr, "(%i %i) ", frame->rows, frame->cols);

	fprintf(stderr, "w%g%s ", frame->width.value, unit[frame->width.unit]);
	fprintf(stderr, "h%g%s ", frame->height.value,unit[frame->height.unit]);
	fprintf(stderr, "(margin w%i h%i) ",
			frame->margin_width, frame->margin_height);

	if (frame->name)
		fprintf(stderr, "'%s' ", frame->name);
	if (frame->url)
		fprintf(stderr, "<%s> ", frame->url);

	if (frame->no_resize)
		fprintf(stderr, "noresize ");
	fprintf(stderr, "(scrolling %s) ", scrolling[frame->scrolling]);
	if (frame->border)
		fprintf(stderr, "border %x ",
				(unsigned int) frame->border_colour);

	fprintf(stderr, "\n");

	if (frame->children) {
		for (row = 0; row != frame->rows; row++) {
			for (col = 0; col != frame->cols; col++) {
				for (i = 0; i != depth; i++)
					fprintf(stderr, "  ");
				fprintf(stderr, "(%i %i): ", row, col);
				index = (row * frame->cols) + col;
				html_dump_frameset(&frame->children[index],
						depth + 1);
			}
		}
	}
}