/*
 * Copyright 2012 John-Mark Bell <jmb@netsurf-browser.org>
 * Copyright 2004-2007 James Bursa <bursa@users.sourceforge.net>
 *
 * This file is part of NetSurf, http://www.netsurf-browser.org/
 *
 * NetSurf is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2 of the License.
 *
 * NetSurf is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * \file
 * Save HTML document with dependencies implementation.
 */

#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <sys/types.h>
#include <dom/dom.h>

#include "utils/config.h"
#include "utils/regex.h"
#include "utils/corestrings.h"
#include "utils/log.h"
#include "utils/nsurl.h"
#include "utils/utf8.h"
#include "utils/utils.h"
#include "utils/file.h"
#include "utils/messages.h"
#include "utils/ascii.h"
#include "netsurf/content.h"
#include "content/hlcache.h"
#include "css/css.h"
#include "html/box.h"
#include "html/html_save.h"
#include "html/html.h"

#include "netsurf/misc.h"
#include "desktop/gui_internal.h"
#include "desktop/save_complete.h"

regex_t save_complete_import_re;

/** An entry in save_complete_list. */
typedef struct save_complete_entry {
	struct hlcache_handle *content;
	struct save_complete_entry *next; /**< Next entry in list */
} save_complete_entry;

typedef struct save_complete_ctx {
    const char *path;
    save_complete_entry *list;
    save_complete_set_type_cb set_type;

    nsurl *base;
    FILE *fp;
    enum { STATE_NORMAL, STATE_IN_STYLE } iter_state;
} save_complete_ctx;

typedef enum {
	EVENT_ENTER,
	EVENT_LEAVE
} save_complete_event_type;


static bool save_complete_save_html(save_complete_ctx *ctx, struct hlcache_handle *c,
		bool index);
static bool save_complete_save_imported_sheets(save_complete_ctx *ctx,
		struct nscss_import *imports, uint32_t import_count);


static void save_complete_ctx_initialise(save_complete_ctx *ctx,
		const char *path, save_complete_set_type_cb set_type)
{
	ctx->path = path;
	ctx->list = NULL;
	ctx->set_type = set_type;
}

static void save_complete_ctx_finalise(save_complete_ctx *ctx)
{
	save_complete_entry *list = ctx->list;

	while (list != NULL) {
		save_complete_entry *next = list->next;
		free(list);
		list = next;
	}
}

static bool save_complete_ctx_add_content(save_complete_ctx *ctx,
		struct hlcache_handle *content)
{
	save_complete_entry *entry;

	entry = malloc(sizeof (*entry));
	if (entry == NULL)
		return false;

	entry->content = content;
	entry->next = ctx->list;
	ctx->list = entry;

	return true;
}

/**
 * find handle to content for url
 *
 * \param ctx The save context
 * \param url The url to find content handle for
 * \return The content handle or NULL if not found.
 */
static struct hlcache_handle *
save_complete_ctx_find_content(save_complete_ctx *ctx, const nsurl *url)
{
	save_complete_entry *entry;

	for (entry = ctx->list; entry != NULL; entry = entry->next) {
		if (nsurl_compare(url,
				  hlcache_handle_get_url(entry->content),
				  NSURL_COMPLETE)) {
			return entry->content;
		}
	}

	return NULL;
}


static bool save_complete_ctx_has_content(save_complete_ctx *ctx,
		struct hlcache_handle *content)
{
	save_complete_entry *entry;

	for (entry = ctx->list; entry != NULL; entry = entry->next)
		if (entry->content == content)
			return true;

	return false;
}

static bool
save_complete_save_buffer(save_complete_ctx *ctx,
			  const char *leafname,
			  const uint8_t *data,
			  size_t data_len,
			  lwc_string *mime_type)
{
	nserror ret;
	FILE *fp;
	char *fname = NULL;

	ret = netsurf_mkpath(&fname, NULL, 2, ctx->path, leafname);
	if (ret != NSERROR_OK) {
		guit->misc->warning(messages_get_errorcode(ret), 0);
		return false;
	}

	fp = fopen(fname, "wb");
	if (fp == NULL) {
		free(fname);
		NSLOG(netsurf, INFO, "fopen(): errno = %i", errno);
		guit->misc->warning("SaveError", strerror(errno));
		return false;
	}

	fwrite(data, sizeof(*data), data_len, fp);

	fclose(fp);

	if (ctx->set_type != NULL) {
		ctx->set_type(fname, mime_type);
	}
	free(fname);

	return true;
}


/**
 * perform a posix regexec on a string without a null terminator
 */
static int
snregexec(const regex_t *preg,
	 const char *string,
	 size_t stringlen,
	 size_t nmatch,
	 regmatch_t pmatch[],
	 int eflags)
{
	char *strbuf;
	int matches;

	strbuf = calloc(1, stringlen + 1);
	if (strbuf == NULL) {
		return -1;
	}
	memcpy(strbuf, string, stringlen);

	matches = regexec(preg, strbuf, nmatch, pmatch, eflags);

	free(strbuf);

	return matches;
}


/**
 * Rewrite stylesheet \@import rules for save complete.
 *
 * \param ctx Save complete context.
 * \param source stylesheet source.
 * \param size size of source.
 * \param base url of stylesheet.
 * \param osize updated with the size of the result.
 * \return converted source, or NULL on out of memory.
 */
static uint8_t *
save_complete_rewrite_stylesheet_urls(save_complete_ctx *ctx,
				      const uint8_t *source,
				      size_t size,
				      const nsurl *base,
				      size_t *osize)
{
	uint8_t *rewritten;
	unsigned long offset = 0;
	unsigned int imports = 0;
	nserror error;

	/* count number occurrences of @import to (over)estimate result size */
	/* can't use strstr because source is not 0-terminated string */
	for (offset = 0;
	     (SLEN("@import") < size) && (offset <= (size - SLEN("@import")));
	     offset++) {
		if (source[offset] == '@' &&
		    ascii_to_lower(source[offset + 1]) == 'i' &&
		    ascii_to_lower(source[offset + 2]) == 'm' &&
		    ascii_to_lower(source[offset + 3]) == 'p' &&
		    ascii_to_lower(source[offset + 4]) == 'o' &&
		    ascii_to_lower(source[offset + 5]) == 'r' &&
		    ascii_to_lower(source[offset + 6]) == 't') {
			imports++;
		}
	}

	rewritten = malloc(size + imports * 20);
	if (rewritten == NULL)
		return NULL;
	*osize = 0;

	offset = 0;
	while (offset < size) {
		const uint8_t *import_url = NULL;
		char *import_url_copy;
		int import_url_len = 0;
		nsurl *url = NULL;
		regmatch_t match[11];
		int m;

		m = snregexec(&save_complete_import_re,
			     (const char *)source + offset,
			     size - offset,
			     11,
			     match,
			     0);
		if (m)
			break;

		if (match[2].rm_so != -1) {
			import_url = source + offset + match[2].rm_so;
			import_url_len = match[2].rm_eo - match[2].rm_so;
		} else if (match[4].rm_so != -1) {
			import_url = source + offset + match[4].rm_so;
			import_url_len = match[4].rm_eo - match[4].rm_so;
		} else if (match[6].rm_so != -1) {
			import_url = source + offset + match[6].rm_so;
			import_url_len = match[6].rm_eo - match[6].rm_so;
		} else if (match[8].rm_so != -1) {
			import_url = source + offset + match[8].rm_so;
			import_url_len = match[8].rm_eo - match[8].rm_so;
		} else if (match[10].rm_so != -1) {
			import_url = source + offset + match[10].rm_so;
			import_url_len = match[10].rm_eo - match[10].rm_so;
		}
		assert(import_url != NULL);

		import_url_copy = strndup((const char *)import_url,
					  import_url_len);
		if (import_url_copy == NULL) {
			free(rewritten);
			return NULL;
		}

		error = nsurl_join(base, import_url_copy, &url);
		free(import_url_copy);
		if (error == NSERROR_NOMEM) {
			free(rewritten);
			return NULL;
		}

		/* copy data before match */
		memcpy(rewritten + *osize, source + offset, match[0].rm_so);
		*osize += match[0].rm_so;

		if (url != NULL) {
			hlcache_handle *content;
			content = save_complete_ctx_find_content(ctx, url);
			if (content != NULL) {
				/* replace import */
				char buf[64];
				snprintf(buf, sizeof buf, "@import '%p'",
						content);
				memcpy(rewritten + *osize, buf, strlen(buf));
				*osize += strlen(buf);
			} else {
				/* copy import */
				memcpy(rewritten + *osize,
					source + offset + match[0].rm_so,
					match[0].rm_eo - match[0].rm_so);
				*osize += match[0].rm_eo - match[0].rm_so;
			}
			nsurl_unref(url);
		} else {
			/* copy import */
			memcpy(rewritten + *osize,
				source + offset + match[0].rm_so,
				match[0].rm_eo - match[0].rm_so);
			*osize += match[0].rm_eo - match[0].rm_so;
		}

		assert(0 < match[0].rm_eo);
		offset += match[0].rm_eo;
	}

	/* copy rest of source */
	if (offset < size) {
		memcpy(rewritten + *osize, source + offset, size - offset);
		*osize += size - offset;
	}

	return rewritten;
}

static bool
save_complete_save_stylesheet(save_complete_ctx *ctx, hlcache_handle *css)
{
	const uint8_t *css_data;
	size_t css_size;
	uint8_t *source;
	size_t source_len;
	struct nscss_import *imports;
	uint32_t import_count;
	lwc_string *type;
	char filename[32];
	bool result;

	if (save_complete_ctx_has_content(ctx, css))
		return true;

	if (save_complete_ctx_add_content(ctx, css) == false) {
		guit->misc->warning("NoMemory", 0);
		return false;
	}

	imports = nscss_get_imports(css, &import_count);
	if (save_complete_save_imported_sheets(ctx,
			imports, import_count) == false)
		return false;

	css_data = content_get_source_data(css, &css_size);
	source = save_complete_rewrite_stylesheet_urls(
		ctx,
		css_data,
		css_size,
		hlcache_handle_get_url(css),
		&source_len);
	if (source == NULL) {
		guit->misc->warning("NoMemory", 0);
		return false;
	}

	type = content_get_mime_type(css);
	if (type == NULL) {
		free(source);
		return false;
	}

	snprintf(filename, sizeof filename, "%p", css);

	result = save_complete_save_buffer(ctx, filename,
			source, source_len, type);

	lwc_string_unref(type);
	free(source);

	return result;
}

static bool save_complete_save_imported_sheets(save_complete_ctx *ctx,
		struct nscss_import *imports, uint32_t import_count)
{
	uint32_t i;

	for (i = 0; i < import_count; i++) {
		/* treat a valid content as a stylesheet to save */
		if ((imports[i].c != NULL) &&
		    (save_complete_save_stylesheet(ctx, imports[i].c) == false)) {
				return false;
		}
	}

	return true;
}

static bool save_complete_save_html_stylesheet(save_complete_ctx *ctx,
		struct html_stylesheet *sheet)
{
	if (sheet->sheet == NULL)
		return true;

	return save_complete_save_stylesheet(ctx, sheet->sheet);
}

static bool save_complete_save_html_stylesheets(save_complete_ctx *ctx,
		hlcache_handle *c)
{
	struct html_stylesheet *sheets;
	unsigned int i, count;

	sheets = html_get_stylesheets(c, &count);

	for (i = STYLESHEET_START; i != count; i++) {
		if (save_complete_save_html_stylesheet(ctx,
				&sheets[i]) == false)
			return false;
	}

	return true;
}

static bool
save_complete_save_html_object(save_complete_ctx *ctx, hlcache_handle *obj)
{
	const uint8_t *obj_data;
	size_t obj_size;
	lwc_string *type;
	bool result;
	char filename[32];

	if (content_get_type(obj) == CONTENT_NONE)
		return true;

	obj_data = content_get_source_data(obj, &obj_size);
	if (obj_data == NULL)
		return true;

	if (save_complete_ctx_has_content(ctx, obj))
		return true;

	if (save_complete_ctx_add_content(ctx, obj) == false) {
		guit->misc->warning("NoMemory", 0);
		return false;
	}

	if (content_get_type(obj) == CONTENT_HTML) {
		return save_complete_save_html(ctx, obj, false);
	}

	snprintf(filename, sizeof filename, "%p", obj);

	type = content_get_mime_type(obj);
	if (type == NULL)
		return false;

	result = save_complete_save_buffer(ctx, filename,
			obj_data, obj_size, type);

	lwc_string_unref(type);

	return result;
}

static bool save_complete_save_html_objects(save_complete_ctx *ctx,
		hlcache_handle *c)
{
	struct content_html_object *object;
	unsigned int count;

	object = html_get_objects(c, &count);

	for (; object != NULL; object = object->next) {
		if ((object->content != NULL) && (object->box != NULL)) {
			if (save_complete_save_html_object(ctx,
					object->content) == false)
				return false;
		}
	}

	return true;
}

static bool save_complete_libdom_treewalk(dom_node *root,
		bool (*callback)(dom_node *node,
				save_complete_event_type event_type, void *ctx),
		void *ctx)
{
	dom_node *node;

	node = dom_node_ref(root); /* tree root */

	while (node != NULL) {
		dom_node *next = NULL;
		dom_exception exc;

		exc = dom_node_get_first_child(node, &next);
		if (exc != DOM_NO_ERR) {
			dom_node_unref(node);
			break;
		}

		if (next != NULL) {  /* 1. children */
			dom_node_unref(node);
			node = next;
		} else {
			exc = dom_node_get_next_sibling(node, &next);
			if (exc != DOM_NO_ERR) {
				dom_node_unref(node);
				break;
			}

			if (next != NULL) {  /* 2. siblings */
				if (callback(node, EVENT_LEAVE, ctx) == false) {
					return false;
				}
				dom_node_unref(node);
				node = next;
			} else {  /* 3. ancestor siblings */
				while (node != NULL) {
					exc = dom_node_get_next_sibling(node,
							&next);
					if (exc != DOM_NO_ERR) {
						dom_node_unref(node);
						node = NULL;
						break;
					}

					if (next != NULL) {
						dom_node_unref(next);
						break;
					}

					exc = dom_node_get_parent_node(node,
							&next);
					if (exc != DOM_NO_ERR) {
						dom_node_unref(node);
						node = NULL;
						break;
					}

					if (callback(node, EVENT_LEAVE,
							ctx) == false) {
						return false;
					}
					dom_node_unref(node);
					node = next;
				}

				if (node == NULL)
					break;

				exc = dom_node_get_next_sibling(node, &next);
				if (exc != DOM_NO_ERR) {
					dom_node_unref(node);
					break;
				}

				if (callback(node, EVENT_LEAVE, ctx) == false) {
					return false;
				}
				dom_node_unref(node);
				node = next;
			}
		}

		assert(node != NULL);

		if (callback(node, EVENT_ENTER, ctx) == false) {
			return false; /* callback caused early termination */
		}

	}

	return true;
}

static bool save_complete_rewrite_url_value(save_complete_ctx *ctx,
		const char *value, size_t value_len)
{
	nsurl *url;
	hlcache_handle *content;
	char *escaped;
	nserror error;

	error = nsurl_join(ctx->base, value, &url);
	if (error == NSERROR_NOMEM)
		return false;

	if (url != NULL) {
		content = save_complete_ctx_find_content(ctx, url);
		if (content != NULL) {
			/* found a match */
			nsurl_unref(url);

			fprintf(ctx->fp, "\"%p\"", content);
		} else {
			/* no match found */
			error = utf8_to_html(nsurl_access(url), "UTF-8",
					nsurl_length(url), &escaped);
			nsurl_unref(url);

			if (error != NSERROR_OK)
				return false;

			fprintf(ctx->fp, "\"%s\"", escaped);

			free(escaped);
		}
	} else {
		error = utf8_to_html(value, "UTF-8", value_len, &escaped);
		if (error != NSERROR_OK)
			return false;

		fprintf(ctx->fp, "\"%s\"", escaped);

		free(escaped);
	}

	return true;
}

static bool save_complete_write_value(save_complete_ctx *ctx,
		const char *value, size_t value_len)
{
	char *escaped;
	nserror ret;

	ret = utf8_to_html(value, "UTF-8", value_len, &escaped);
	if (ret != NSERROR_OK)
		return false;

	fprintf(ctx->fp, "\"%s\"", escaped);

	free(escaped);

	return true;
}

static bool save_complete_handle_attr_value(save_complete_ctx *ctx,
		dom_string *node_name, dom_string *attr_name,
		dom_string *attr_value)
{
	const char *node_data = dom_string_data(node_name);
	size_t node_len = dom_string_byte_length(node_name);
	const char *name_data = dom_string_data(attr_name);
	size_t name_len = dom_string_byte_length(attr_name);
	const char *value_data = dom_string_data(attr_value);
	size_t value_len = dom_string_byte_length(attr_value);

	/**
	 * We only need to consider the following cases:
	 *
	 * Attribute:      Elements:
	 *
	 * 1)   data         object
	 * 2)   href         a, area, link
	 * 3)   src          script, input, frame, iframe, img
	 * 4)   background   any (except those above)
	 */
	/* 1 */
	if (name_len == SLEN("data") &&
			strncasecmp(name_data, "data", name_len) == 0) {
		if (node_len == SLEN("object") &&
				strncasecmp(node_data,
						"object", node_len) == 0) {
			return save_complete_rewrite_url_value(ctx,
					value_data, value_len);
		} else {
			return save_complete_write_value(ctx,
					value_data, value_len);
		}
	}
	/* 2 */
	else if (name_len == SLEN("href") &&
			strncasecmp(name_data, "href", name_len) == 0) {
		if ((node_len == SLEN("a") &&
				strncasecmp(node_data, "a", node_len) == 0) ||
			(node_len == SLEN("area") &&
				strncasecmp(node_data, "area",
					node_len) == 0) ||
			(node_len == SLEN("link") &&
				strncasecmp(node_data, "link",
					node_len) == 0)) {
			return save_complete_rewrite_url_value(ctx,
					value_data, value_len);
		} else {
			return save_complete_write_value(ctx,
					value_data, value_len);
		}
	}
	/* 3 */
	else if (name_len == SLEN("src") &&
			strncasecmp(name_data, "src", name_len) == 0) {
		if ((node_len == SLEN("frame") &&
				strncasecmp(node_data, "frame",
					node_len) == 0) ||
			(node_len == SLEN("iframe") &&
				strncasecmp(node_data, "iframe",
					node_len) == 0) ||
			(node_len == SLEN("input") &&
				strncasecmp(node_data, "input",
					node_len) == 0) ||
			(node_len == SLEN("img") &&
				strncasecmp(node_data, "img",
					node_len) == 0) ||
			(node_len == SLEN("script") &&
				strncasecmp(node_data, "script",
					node_len) == 0)) {
			return save_complete_rewrite_url_value(ctx,
					value_data, value_len);
		} else {
			return save_complete_write_value(ctx,
					value_data, value_len);
		}
	}
	/* 4 */
	else if (name_len == SLEN("background") &&
			strncasecmp(name_data, "background", name_len) == 0) {
		return save_complete_rewrite_url_value(ctx,
				value_data, value_len);
	} else {
		return save_complete_write_value(ctx,
				value_data, value_len);
	}
}

static bool save_complete_handle_attr(save_complete_ctx *ctx,
		dom_string *node_name, dom_attr *attr)
{
	dom_string *name;
	const char *name_data;
	size_t name_len;
	dom_string *value;
	dom_exception error;

	error = dom_attr_get_name(attr, &name);
	if (error != DOM_NO_ERR)
		return false;

	if (name == NULL)
		return true;

	error = dom_attr_get_value(attr, &value);
	if (error != DOM_NO_ERR) {
		dom_string_unref(name);
		return false;
	}

	name_data = dom_string_data(name);
	name_len = dom_string_byte_length(name);

	fputc(' ', ctx->fp);
	fwrite(name_data, sizeof(*name_data), name_len, ctx->fp);

	if (value != NULL) {
		fputc('=', ctx->fp);
		if (save_complete_handle_attr_value(ctx, node_name,
				name, value) == false) {
			dom_string_unref(value);
			dom_string_unref(name);
			return false;
		}
		dom_string_unref(value);
	}

	dom_string_unref(name);

	return true;
}

static bool save_complete_handle_attrs(save_complete_ctx *ctx,
		dom_string *node_name, dom_namednodemap *attrs)
{
	uint32_t length, i;
	dom_exception error;

	error = dom_namednodemap_get_length(attrs, &length);
	if (error != DOM_NO_ERR)
		return false;

	for (i = 0; i < length; i++) {
		dom_attr *attr;

		error = dom_namednodemap_item(attrs, i, (void *) &attr);
		if (error != DOM_NO_ERR)
			return false;

		if (attr == NULL)
			continue;

		if (save_complete_handle_attr(ctx, node_name, attr) == false) {
			dom_node_unref(attr);
			return false;
		}

		dom_node_unref(attr);
	}

	return true;
}

static bool save_complete_handle_element(save_complete_ctx *ctx,
		dom_node *node, save_complete_event_type event_type)
{
	dom_string *name;
	dom_namednodemap *attrs;
	const char *name_data;
	size_t name_len;
	bool process = true;
	dom_exception error;

	ctx->iter_state = STATE_NORMAL;

	error = dom_node_get_node_name(node, &name);
	if (error != DOM_NO_ERR)
		return false;

	if (name == NULL)
		return true;

	name_data = dom_string_data(name);
	name_len = dom_string_byte_length(name);

	if (name_len == SLEN("base") &&
			strncasecmp(name_data, "base", name_len) == 0) {
		/* Elide BASE elements from the output */
		process = false;
	} else if (name_len == SLEN("meta") &&
			strncasecmp(name_data, "meta", name_len) == 0) {
		/* Don't emit close tags for META elements */
		if (event_type == EVENT_LEAVE) {
			process = false;
		} else {
			/* Elide meta charsets */
			dom_string *value;
			error = dom_element_get_attribute(node,
					corestring_dom_http_equiv, &value);
			if (error != DOM_NO_ERR) {
				dom_string_unref(name);
				return false;
			}

			if (value != NULL) {
				if (dom_string_length(value) ==
					SLEN("Content-Type") &&
					strncasecmp(dom_string_data(value),
						"Content-Type",
						SLEN("Content-Type")) == 0)
					process = false;

				dom_string_unref(value);
			} else {
				bool yes;

				error = dom_element_has_attribute(node,
						corestring_dom_charset, &yes);
				if (error != DOM_NO_ERR) {
					dom_string_unref(name);
					return false;
				}

				if (yes)
					process = false;
			}
		}
	} else if (event_type == EVENT_LEAVE &&
			((name_len == SLEN("link") &&
			strncasecmp(name_data, "link", name_len) == 0))) {
		/* Don't emit close tags for void elements */
		process = false;
	}

	if (process == false) {
		dom_string_unref(name);
		return true;
	}

	fputc('<', ctx->fp);
	if (event_type == EVENT_LEAVE)
		fputc('/', ctx->fp);
	fwrite(name_data, sizeof(*name_data), name_len, ctx->fp);

	if (event_type == EVENT_ENTER) {
		error = dom_node_get_attributes(node, &attrs);
		if (error != DOM_NO_ERR) {
			dom_string_unref(name);
			return false;
		}

		if (save_complete_handle_attrs(ctx, name, attrs) == false) {
			dom_namednodemap_unref(attrs);
			dom_string_unref(name);
			return false;
		}

		dom_namednodemap_unref(attrs);
	}

	fputc('>', ctx->fp);

	/* Rewrite contents of style elements */
	if (event_type == EVENT_ENTER && name_len == SLEN("style") &&
			strncasecmp(name_data, "style", name_len) == 0) {
		dom_string *content;

		error = dom_node_get_text_content(node, &content);
		if (error != DOM_NO_ERR) {
			dom_string_unref(name);
			return false;
		}

		if (content != NULL) {
			uint8_t *rewritten;
			size_t len;

			/* Rewrite @import rules */
			rewritten = save_complete_rewrite_stylesheet_urls(
					ctx,
					(const uint8_t *)dom_string_data(content),
					dom_string_byte_length(content),
					ctx->base,
					&len);
			if (rewritten == NULL) {
				dom_string_unref(content);
				dom_string_unref(name);
				return false;
			}

			dom_string_unref(content);

			fwrite(rewritten, sizeof(*rewritten), len, ctx->fp);

			free(rewritten);
		}

		ctx->iter_state = STATE_IN_STYLE;
	} else if (event_type == EVENT_ENTER && name_len == SLEN("head") &&
			strncasecmp(name_data, "head", name_len) == 0) {
		/* If this is a HEAD element, insert a meta charset */
		fputs("<META http-equiv=\"Content-Type\" "
				"content=\"text/html; charset=utf-8\">",
				ctx->fp);
	}

	dom_string_unref(name);

	return true;
}

static bool save_complete_node_handler(dom_node *node,
		save_complete_event_type event_type, void *ctxin)
{
	save_complete_ctx *ctx = ctxin;
	dom_node_type type;
	dom_exception error;
	nserror ret;

	error = dom_node_get_node_type(node, &type);
	if (error != DOM_NO_ERR)
		return false;

	if (type == DOM_ELEMENT_NODE) {
		return save_complete_handle_element(ctx, node, event_type);
	} else if (type == DOM_TEXT_NODE || type == DOM_COMMENT_NODE) {
		if (event_type != EVENT_ENTER)
			return true;

		if (ctx->iter_state != STATE_IN_STYLE) {
			/* Emit text content */
			dom_string *text;
			const char *text_data;
			size_t text_len;

			error = dom_characterdata_get_data(node, &text);
			if (error != DOM_NO_ERR) {
				return false;
			}

			if (type == DOM_COMMENT_NODE)
				fwrite("<!--", 1, sizeof("<!--") - 1, ctx->fp);

			if (text != NULL) {
				char *escaped;

				text_data = dom_string_data(text);
				text_len = dom_string_byte_length(text);

				ret = utf8_to_html(text_data, "UTF-8",
						text_len, &escaped);
				if (ret != NSERROR_OK)
					return false;

				fwrite(escaped, sizeof(*escaped),
						strlen(escaped), ctx->fp);

				free(escaped);

				dom_string_unref(text);
			}

			if (type == DOM_COMMENT_NODE) {
				fwrite("-->", 1, sizeof("-->") - 1, ctx->fp);
			}
		}

	} else if (type == DOM_DOCUMENT_TYPE_NODE) {
		dom_string *name;
		const char *name_data;
		size_t name_len;

		if (event_type != EVENT_ENTER)
			return true;

		error = dom_document_type_get_name(node, &name);
		if (error != DOM_NO_ERR)
			return false;

		if (name == NULL)
			return true;

		name_data = dom_string_data(name);
		name_len = dom_string_byte_length(name);

		fputs("<!DOCTYPE ", ctx->fp);
		fwrite(name_data, sizeof(*name_data), name_len, ctx->fp);

		dom_string_unref(name);

		error = dom_document_type_get_public_id(node, &name);
		if (error != DOM_NO_ERR)
			return false;

		if (name != NULL) {
			name_data = dom_string_data(name);
			name_len = dom_string_byte_length(name);

			if (name_len > 0)
				fprintf(ctx->fp, " PUBLIC \"%.*s\"",
						(int) name_len, name_data);

			dom_string_unref(name);
		}

		error = dom_document_type_get_system_id(node, &name);
		if (error != DOM_NO_ERR)
			return false;

		if (name != NULL) {
			name_data = dom_string_data(name);
			name_len = dom_string_byte_length(name);

			if (name_len > 0)
				fprintf(ctx->fp, " \"%.*s\"",
						(int) name_len, name_data);

			dom_string_unref(name);
		}

		fputc('>', ctx->fp);
	} else if (type == DOM_DOCUMENT_NODE) {
		/* Do nothing */
	} else {
		NSLOG(netsurf, INFO, "Unhandled node type: %d", type);
	}

	return true;
}

static bool save_complete_save_html_document(save_complete_ctx *ctx,
		hlcache_handle *c, bool index)
{
	nserror ret;
	FILE *fp;
	char *fname = NULL;
	dom_document *doc;
	lwc_string *mime_type;
	char filename[32];

	if (index) {
		snprintf(filename, sizeof filename, "index");
	} else {
		snprintf(filename, sizeof filename, "%p", c);
	}

	ret = netsurf_mkpath(&fname, NULL, 2, ctx->path, filename);
	if (ret != NSERROR_OK) {
		guit->misc->warning(messages_get_errorcode(ret), NULL);
		return false;
	}

	fp = fopen(fname, "wb");
	if (fp == NULL) {
		free(fname);
		NSLOG(netsurf, INFO, "fopen(): errno = %i", errno);
		guit->misc->warning("SaveError", strerror(errno));
		return false;
	}

	ctx->base = html_get_base_url(c);
	ctx->fp = fp;
	ctx->iter_state = STATE_NORMAL;

	doc = html_get_document(c);

	if (save_complete_libdom_treewalk((dom_node *) doc,
			save_complete_node_handler, ctx) == false) {
		free(fname);
		guit->misc->warning("NoMemory", 0);
		fclose(fp);
		return false;
	}

	fclose(fp);

	mime_type = content_get_mime_type(c);
	if (mime_type != NULL) {
		if (ctx->set_type != NULL)
			ctx->set_type(fname, mime_type);

		lwc_string_unref(mime_type);
	}
	free(fname);

	return true;
}

/**
 * Save an HTML page with all dependencies, recursing through imported pages.
 *
 * \param  ctx    Save complete context
 * \param  c      Content to save
 * \param  index  true to save as "index"
 * \return  true on success, false on error and error reported
 */
static bool save_complete_save_html(save_complete_ctx *ctx, hlcache_handle *c,
		bool index)
{
	if (content_get_type(c) != CONTENT_HTML)
		return false;

	if (save_complete_ctx_has_content(ctx, c))
		return true;

	if (save_complete_save_html_stylesheets(ctx, c) == false)
		return false;

	if (save_complete_save_html_objects(ctx, c) == false)
		return false;

	return save_complete_save_html_document(ctx, c, index);
}


/**
 * Create the inventory file listing original URLs.
 */

static bool save_complete_inventory(save_complete_ctx *ctx)
{
	nserror ret;
	FILE *fp;
	char *fname = NULL;
	save_complete_entry *entry;

	ret = netsurf_mkpath(&fname, NULL, 2, ctx->path, "Inventory");
	if (ret != NSERROR_OK) {
		return false;
	}

	fp = fopen(fname, "w");
	free(fname);
	if (fp == NULL) {
		NSLOG(netsurf, INFO, "fopen(): errno = %i", errno);
		guit->misc->warning("SaveError", strerror(errno));
		return false;
	}

	for (entry = ctx->list; entry != NULL; entry = entry->next) {
		fprintf(fp, "%p %s\n", entry->content,
				nsurl_access(hlcache_handle_get_url(
						entry->content)));
	}

	fclose(fp);

	return true;
}

/**
 * Compile a regular expression, handling errors.
 *
 * Parameters as for regcomp(), see man regex.
 */
static nserror regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
{
	int r;
	r = regcomp(preg, regex, cflags);
	if (r) {
		char errbuf[200];
		regerror(r, preg, errbuf, sizeof errbuf);
		NSLOG(netsurf, INFO, "Failed to compile regexp '%s': %s\n",
		      regex, errbuf);
		return NSERROR_INIT_FAILED;
	}
	return NSERROR_OK;
}


/* Documented in save_complete.h */
void save_complete_init(void)
{
	/* Match an @import rule - see CSS 2.1 G.1. */
	regcomp_wrapper(&save_complete_import_re,
			"@import"		/* IMPORT_SYM */
			"[ \t\r\n\f]*"		/* S* */
			/* 1 */
			"("			/* [ */
			/* 2 3 */
			"\"(([^\"]|[\\]\")*)\""	/* STRING (approximated) */
			"|"
			/* 4 5 */
			"'(([^']|[\\]')*)'"
			"|"			/* | */
			"url\\([ \t\r\n\f]*"	/* URI (approximated) */
			     /* 6 7 */
			     "\"(([^\"]|[\\]\")*)\""
			     "[ \t\r\n\f]*\\)"
			"|"
			"url\\([ \t\r\n\f]*"
			    /* 8 9 */
			     "'(([^']|[\\]')*)'"
			     "[ \t\r\n\f]*\\)"
			"|"
			"url\\([ \t\r\n\f]*"
			   /* 10 */
			     "([^) \t\r\n\f]*)"
			     "[ \t\r\n\f]*\\)"
			")",			/* ] */
			REG_EXTENDED | REG_ICASE);
}

/* Documented in save_complete.h */
bool
save_complete(hlcache_handle *c,
	      const char *path,
	      save_complete_set_type_cb set_type)
{
	bool result;
	save_complete_ctx ctx;

	save_complete_ctx_initialise(&ctx, path, set_type);

	result = save_complete_save_html(&ctx, c, true);

	if (result) {
		result = save_complete_inventory(&ctx);
	}

	save_complete_ctx_finalise(&ctx);

	return result;
}