netsurf/render/html.c

3615 lines
86 KiB
C

/*
* Copyright 2007 James Bursa <bursa@users.sourceforge.net>
* Copyright 2010 Michael Drake <tlsa@netsurf-browser.org>
*
* This file is part of NetSurf, http://www.netsurf-browser.org/
*
* NetSurf is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* NetSurf is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/** \file
* Content for text/html (implementation).
*/
#include <assert.h>
#include <ctype.h>
#include <stdint.h>
#include <string.h>
#include <strings.h>
#include <stdlib.h>
#include <dom/dom.h>
#include "utils/config.h"
#include "content/content_protected.h"
#include "content/fetch.h"
#include "content/hlcache.h"
#include "desktop/browser.h"
#include "desktop/options.h"
#include "desktop/selection.h"
#include "desktop/scrollbar.h"
#include "javascript/js.h"
#include "image/bitmap.h"
#include "render/box.h"
#include "render/font.h"
#include "render/form.h"
#include "render/html_internal.h"
#include "render/imagemap.h"
#include "render/layout.h"
#include "render/search.h"
#include "utils/http.h"
#include "utils/log.h"
#include "utils/messages.h"
#include "utils/schedule.h"
#include "utils/talloc.h"
#include "utils/url.h"
#include "utils/utf8.h"
#include "utils/utils.h"
#define CHUNK 4096
/* Change these to 1 to cause a dump to stderr of the frameset or box
* when the trees have been built.
*/
#define ALWAYS_DUMP_FRAMESET 0
#define ALWAYS_DUMP_BOX 0
static const char *html_types[] = {
"application/xhtml+xml",
"text/html"
};
/* forward declared functions */
static void html_object_refresh(void *p);
/* pre-interned character set */
static lwc_string *html_charset;
static nsurl *html_default_stylesheet_url;
static nsurl *html_adblock_stylesheet_url;
static nsurl *html_quirks_stylesheet_url;
static nsurl *html_user_stylesheet_url;
/* pre-interned dom strings */
static dom_string *html_dom_string_html;
static dom_string *html_dom_string_head;
static dom_string *html_dom_string_rel;
dom_string *html_dom_string_href;
static dom_string *html_dom_string_hreflang;
static dom_string *html_dom_string_type;
static dom_string *html_dom_string_media;
static dom_string *html_dom_string_sizes;
static dom_string *html_dom_string_title;
static dom_string *html_dom_string_base;
static dom_string *html_dom_string_link;
static dom_string *html_dom_string_script;
static dom_string *html_dom_string_text_javascript;
static dom_string *html_dom_string_src;
dom_string *html_dom_string_target;
static dom_string *html_dom_string__parent;
static dom_string *html_dom_string__self;
static dom_string *html_dom_string__blank;
static dom_string *html_dom_string__top;
static dom_string *html_dom_string_http_equiv;
static dom_string *html_dom_string_content;
dom_string *html_dom_string_map;
dom_string *html_dom_string_id;
dom_string *html_dom_string_name;
dom_string *html_dom_string_area;
dom_string *html_dom_string_a;
dom_string *html_dom_string_nohref;
dom_string *html_dom_string_shape;
dom_string *html_dom_string_default;
dom_string *html_dom_string_rect;
dom_string *html_dom_string_rectangle;
dom_string *html_dom_string_coords;
dom_string *html_dom_string_circle;
dom_string *html_dom_string_poly;
dom_string *html_dom_string_polygon;
typedef bool (script_handler_t)(struct jscontext *jscontext, const char *data, size_t size) ;
static script_handler_t *select_script_handler(content_type ctype)
{
if (ctype == CONTENT_JS) {
return js_exec;
}
return NULL;
}
/* attempt to progress script execution
*
* execute scripts using algorithm found in:
* http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#the-script-element
*
*/
static bool html_scripts_exec(html_content *c)
{
unsigned int i;
struct html_script *s;
script_handler_t *script_handler;
if (c->jscontext == NULL)
return false;
for (i = 0, s = c->scripts; i != c->scripts_count; i++, s++) {
if (s->already_started) {
continue;
}
assert((s->type == HTML_SCRIPT_EXTERNAL) ||
(s->type == HTML_SCRIPT_INTERNAL));
if (s->type == HTML_SCRIPT_EXTERNAL) {
/* ensure script content is present */
if (s->data.external == NULL)
continue;
/* ensure script content fetch status is not an error */
if (content_get_status(s->data.external) == CONTENT_STATUS_ERROR)
continue;
/* ensure script handler for content type */
script_handler = select_script_handler(content_get_type(s->data.external));
if (script_handler == NULL)
continue; /* unsupported type */
if (content_get_status(s->data.external) == CONTENT_STATUS_DONE) {
/* external script is now available */
const char *data;
unsigned long size;
data = content_get_source_data(s->data.external, &size );
script_handler(c->jscontext, data, size);
s->already_started = true;
} else {
/* script not yet available */
/* check if deferable or asynchronous */
if (!s->defer && !s->async) {
break;
}
}
}
}
return true;
}
/* create new html script entry */
static struct html_script *
html_process_new_script(html_content *c, enum html_script_type type)
{
struct html_script *nscript;
/* add space for new script entry */
nscript = realloc(c->scripts,
sizeof(struct html_script) * (c->scripts_count + 1));
if (nscript == NULL) {
return NULL;
}
c->scripts = nscript;
/* increment script entry count */
nscript = &c->scripts[c->scripts_count];
c->scripts_count++;
nscript->already_started = false;
nscript->parser_inserted = false;
nscript->force_async = true;
nscript->ready_exec = false;
nscript->async = false;
nscript->defer = false;
nscript->type = type;
return nscript;
}
static void html_destroy_objects(html_content *html)
{
while (html->object_list != NULL) {
struct content_html_object *victim = html->object_list;
if (victim->content != NULL) {
LOG(("object %p", victim->content));
if (content_get_type(victim->content) == CONTENT_HTML)
schedule_remove(html_object_refresh, victim);
hlcache_handle_release(victim->content);
}
html->object_list = victim->next;
talloc_free(victim);
}
}
/**
* Perform post-box-creation conversion of a document
*
* \param c HTML content to complete conversion of
* \param success Whether box tree construction was successful
*/
static void html_box_convert_done(html_content *c, bool success)
{
union content_msg_data msg_data;
dom_exception exc; /* returned by libdom functions */
dom_node *html;
LOG(("Done XML to box (%p)", c));
/* Clean up and report error if unsuccessful or aborted */
if ((success == false) || c->aborted) {
html_destroy_objects(c);
if (success == false)
msg_data.error = messages_get("NoMemory");
else
msg_data.error = messages_get("Stopped");
content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data);
content_set_error(&c->base);
return;
}
#if ALWAYS_DUMP_BOX
box_dump(stderr, c->layout->children, 0);
#endif
#if ALWAYS_DUMP_FRAMESET
if (c->frameset)
html_dump_frameset(c->frameset, 0);
#endif
exc = dom_document_get_document_element(c->document, (void *) &html);
if ((exc != DOM_NO_ERR) || (html == NULL)) {
LOG(("error retrieving html element from dom"));
msg_data.error = messages_get("ParsingFail");
content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data);
content_set_error(&c->base);
return;
}
/* extract image maps - can't do this sensibly in xml_to_box */
if (imagemap_extract(c) == false) {
LOG(("imagemap extraction failed"));
html_destroy_objects(c);
msg_data.error = messages_get("NoMemory");
content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data);
content_set_error(&c->base);
return;
}
/*imagemap_dump(c);*/
/* Destroy the parser binding */
binding_destroy_tree(c->parser_binding);
c->parser_binding = NULL;
content_set_ready(&c->base);
if (c->base.active == 0)
content_set_done(&c->base);
html_set_status(c, "");
}
/**
* Complete conversion of an HTML document
*
* \param c Content to convert
*/
static void html_finish_conversion(html_content *c)
{
union content_msg_data msg_data;
dom_exception exc; /* returned by libdom functions */
dom_node *html;
uint32_t i;
css_error error;
/* Bail out if we've been aborted */
if (c->aborted) {
msg_data.error = messages_get("Stopped");
content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data);
content_set_error(&c->base);
return;
}
/* check that the base stylesheet loaded; layout fails without it */
if (c->stylesheets[STYLESHEET_BASE].data.external == NULL) {
msg_data.error = "Base stylesheet failed to load";
content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data);
content_set_error(&c->base);
return;
}
/* Create selection context */
error = css_select_ctx_create(ns_realloc, c, &c->select_ctx);
if (error != CSS_OK) {
msg_data.error = messages_get("NoMemory");
content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data);
content_set_error(&c->base);
return;
}
/* Add sheets to it */
for (i = STYLESHEET_BASE; i != c->stylesheet_count; i++) {
const struct html_stylesheet *hsheet = &c->stylesheets[i];
css_stylesheet *sheet;
css_origin origin = CSS_ORIGIN_AUTHOR;
if (i < STYLESHEET_USER)
origin = CSS_ORIGIN_UA;
else if (i < STYLESHEET_START)
origin = CSS_ORIGIN_USER;
if (hsheet->type == HTML_STYLESHEET_EXTERNAL &&
hsheet->data.external != NULL) {
sheet = nscss_get_stylesheet(hsheet->data.external);
} else if (hsheet->type == HTML_STYLESHEET_INTERNAL) {
sheet = hsheet->data.internal->sheet;
} else {
sheet = NULL;
}
if (sheet != NULL) {
error = css_select_ctx_append_sheet(
c->select_ctx, sheet,
origin, CSS_MEDIA_SCREEN);
if (error != CSS_OK) {
msg_data.error = messages_get("NoMemory");
content_broadcast(&c->base, CONTENT_MSG_ERROR,
msg_data);
content_set_error(&c->base);
return;
}
}
}
/* convert xml tree to box tree */
LOG(("XML to box (%p)", c));
content_set_status(&c->base, messages_get("Processing"));
content_broadcast(&c->base, CONTENT_MSG_STATUS, msg_data);
exc = dom_document_get_document_element(c->document, (void *) &html);
if ((exc != DOM_NO_ERR) || (html == NULL)) {
LOG(("error retrieving html element from dom"));
msg_data.error = messages_get("ParsingFail");
content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data);
content_set_error(&c->base);
return;
}
if (xml_to_box(html, c, html_box_convert_done) == false) {
html_destroy_objects(c);
msg_data.error = messages_get("NoMemory");
content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data);
content_set_error(&c->base);
return;
}
}
/**
* Callback for fetchcache() for linked stylesheets.
*/
static nserror
html_convert_script_callback(hlcache_handle *script,
const hlcache_event *event,
void *pw)
{
html_content *parent = pw;
unsigned int i;
struct html_script *s;
/* Find script */
for (i = 0, s = parent->scripts; i != parent->scripts_count; i++, s++) {
if (s->type == HTML_SCRIPT_EXTERNAL &&
s->data.external == script)
break;
}
assert(i != parent->scripts_count);
switch (event->type) {
case CONTENT_MSG_LOADING:
break;
case CONTENT_MSG_READY:
break;
case CONTENT_MSG_DONE:
LOG(("script %d done '%s'", i,
nsurl_access(hlcache_handle_get_url(script))));
parent->base.active--;
LOG(("%d fetches active", parent->base.active));
/* script finished loading so try and continue execution */
html_scripts_exec(parent);
break;
case CONTENT_MSG_ERROR:
LOG(("script %s failed: %s",
nsurl_access(hlcache_handle_get_url(script)),
event->data.error));
hlcache_handle_release(script);
s->data.external = NULL;
parent->base.active--;
LOG(("%d fetches active", parent->base.active));
content_add_error(&parent->base, "?", 0);
/* script failed loading so try and continue execution */
html_scripts_exec(parent);
break;
case CONTENT_MSG_STATUS:
html_set_status(parent, content_get_status_message(script));
content_broadcast(&parent->base, CONTENT_MSG_STATUS,
event->data);
break;
default:
assert(0);
}
if (parent->base.active == 0)
html_finish_conversion(parent);
return NSERROR_OK;
}
/** process script node
*
*
*/
static dom_hubbub_error
html_process_script(void *ctx, dom_node *node)
{
html_content *c = (html_content *)ctx;
dom_exception exc; /* returned by libdom functions */
dom_string *src, *script, *mimetype;
struct html_script *nscript;
union content_msg_data msg_data;
/* ensure javascript context is available */
if (c->jscontext == NULL) {
union content_msg_data msg_data;
msg_data.jscontext = &c->jscontext;
content_broadcast(&c->base, CONTENT_MSG_GETCTX, msg_data);
LOG(("javascript context %p ", c->jscontext));
if (c->jscontext == NULL) {
/* no context and it could not be created, abort */
return DOM_HUBBUB_OK;
}
}
LOG(("content %p parser %p node %p",c,c->parser_binding, node));
exc = dom_element_get_attribute(node, html_dom_string_type, &mimetype);
if (exc != DOM_NO_ERR || mimetype == NULL) {
mimetype = dom_string_ref(html_dom_string_text_javascript);
}
exc = dom_element_get_attribute(node, html_dom_string_src, &src);
if (exc != DOM_NO_ERR || src == NULL) {
struct lwc_string_s *lwcmimetype;
script_handler_t *script_handler;
/* does not appear to be a src so script is inline content */
exc = dom_node_get_text_content(node, &script);
if ((exc != DOM_NO_ERR) || (script == NULL)) {
dom_string_unref(mimetype);
return DOM_HUBBUB_OK; /* no contents, skip */
}
nscript = html_process_new_script(c, HTML_STYLESHEET_INTERNAL);
if (nscript == NULL) {
dom_string_unref(mimetype);
dom_string_unref(script);
goto html_process_script_no_memory;
}
nscript->data.internal = script;
nscript->mimetype = mimetype;
nscript->already_started = true;
/* charset (encoding) */
/* ensure script handler for content type */
dom_string_intern(mimetype, &lwcmimetype);
script_handler = select_script_handler(content_factory_type_from_mime_type(lwcmimetype));
lwc_string_unref(lwcmimetype);
if (script_handler != NULL) {
script_handler(c->jscontext,
dom_string_data(script),
dom_string_byte_length(script));
}
} else {
/* script with a src tag */
nserror ns_error;
nsurl *joined;
hlcache_child_context child;
nscript = html_process_new_script(c, HTML_STYLESHEET_EXTERNAL);
if (nscript == NULL) {
dom_string_unref(src);
dom_string_unref(mimetype);
goto html_process_script_no_memory;
}
/* charset (encoding) */
ns_error = nsurl_join(c->base_url, dom_string_data(src), &joined);
dom_string_unref(src);
if (ns_error != NSERROR_OK) {
dom_string_unref(mimetype);
goto html_process_script_no_memory;
}
nscript->mimetype = mimetype; /* keep reference to mimetype */
LOG(("script %i '%s'", c->scripts_count, nsurl_access(joined)));
child.charset = c->encoding;
child.quirks = c->base.quirks;
ns_error = hlcache_handle_retrieve(joined,
0,
content_get_url(&c->base),
NULL,
html_convert_script_callback,
c,
&child,
CONTENT_SCRIPT,
&nscript->data.external);
nsurl_unref(joined);
if (ns_error != NSERROR_OK) {
goto html_process_script_no_memory;
}
c->base.active++; /* ensure base content knows the fetch is active */
LOG(("%d fetches active", c->base.active));
}
html_scripts_exec(c);
return DOM_HUBBUB_OK;
html_process_script_no_memory:
msg_data.error = messages_get("NoMemory");
content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data);
return DOM_HUBBUB_NOMEM;
}
static nserror
html_create_html_data(html_content *c, const http_parameter *params)
{
lwc_string *charset;
union content_msg_data msg_data;
binding_error error;
nserror nerror;
c->parser_binding = NULL;
c->document = NULL;
c->quirks = BINDING_QUIRKS_MODE_NONE;
c->encoding = NULL;
c->base_url = nsurl_ref(content_get_url(&c->base));
c->base_target = NULL;
c->aborted = false;
c->layout = NULL;
c->background_colour = NS_TRANSPARENT;
c->stylesheet_count = 0;
c->stylesheets = NULL;
c->select_ctx = NULL;
c->universal = NULL;
c->num_objects = 0;
c->object_list = NULL;
c->forms = NULL;
c->imagemaps = NULL;
c->bw = NULL;
c->frameset = NULL;
c->iframe = NULL;
c->page = NULL;
c->box = NULL;
c->font_func = &nsfont;
c->scrollbar = NULL;
c->scripts_count = 0;
c->scripts = NULL;
c->jscontext = NULL;
if (lwc_intern_string("*", SLEN("*"), &c->universal) != lwc_error_ok) {
error = BINDING_NOMEM;
goto error;
}
selection_prepare(&c->sel, (struct content *)c, true);
nerror = http_parameter_list_find_item(params, html_charset, &charset);
if (nerror == NSERROR_OK) {
c->encoding = talloc_strdup(c, lwc_string_data(charset));
lwc_string_unref(charset);
if (c->encoding == NULL) {
error = BINDING_NOMEM;
goto error;
}
c->encoding_source = ENCODING_SOURCE_HEADER;
}
/* Create the parser binding */
error = binding_create_tree(&c->parser_binding,
c->encoding,
nsoption_bool(enable_javascript),
html_process_script,
c);
if (error == BINDING_BADENCODING && c->encoding != NULL) {
/* Ok, we don't support the declared encoding. Bailing out
* isn't exactly user-friendly, so fall back to autodetect */
talloc_free(c->encoding);
c->encoding = NULL;
error = binding_create_tree(&c->parser_binding,
c->encoding,
nsoption_bool(enable_javascript),
html_process_script,
c);
}
if (error != BINDING_OK)
goto error;
return NSERROR_OK;
error:
if (error == BINDING_BADENCODING) {
LOG(("Bad encoding: %s", c->encoding ? c->encoding : ""));
msg_data.error = messages_get("ParsingFail");
nerror = NSERROR_BAD_ENCODING;
} else {
msg_data.error = messages_get("NoMemory");
nerror = NSERROR_NOMEM;
}
content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data);
if (c->universal != NULL) {
lwc_string_unref(c->universal);
c->universal = NULL;
}
if (c->base_url != NULL) {
nsurl_unref(c->base_url);
c->base_url = NULL;
}
return nerror;
}
/**
* Create a CONTENT_HTML.
*
* The content_html_data structure is initialized and the HTML parser is
* created.
*/
static nserror
html_create(const content_handler *handler,
lwc_string *imime_type,
const http_parameter *params,
llcache_handle *llcache,
const char *fallback_charset,
bool quirks,
struct content **c)
{
html_content *html;
nserror error;
html = talloc_zero(0, html_content);
if (html == NULL)
return NSERROR_NOMEM;
error = content__init(&html->base, handler, imime_type, params,
llcache, fallback_charset, quirks);
if (error != NSERROR_OK) {
talloc_free(html);
return error;
}
error = html_create_html_data(html, params);
if (error != NSERROR_OK) {
talloc_free(html);
return error;
}
*c = (struct content *) html;
return NSERROR_OK;
}
/**
* Process data for CONTENT_HTML.
*/
static bool
html_process_data(struct content *c, const char *data, unsigned int size)
{
html_content *html = (html_content *) c;
binding_error err;
const char *encoding;
err = binding_parse_chunk(html->parser_binding,
(const uint8_t *) data, size);
if (err == BINDING_ENCODINGCHANGE) {
goto encoding_change;
} else if (err != BINDING_OK) {
union content_msg_data msg_data;
msg_data.error = messages_get("NoMemory");
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
return true;
encoding_change:
/* Retrieve new encoding */
encoding = binding_get_encoding(
html->parser_binding,
&html->encoding_source);
if (html->encoding != NULL)
talloc_free(html->encoding);
html->encoding = talloc_strdup(c, encoding);
if (html->encoding == NULL) {
union content_msg_data msg_data;
msg_data.error = messages_get("NoMemory");
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
/* Destroy binding */
binding_destroy_tree(html->parser_binding);
html->parser_binding = NULL;
/* Create new binding, using the new encoding */
err = binding_create_tree(&html->parser_binding,
html->encoding,
nsoption_bool(enable_javascript),
html_process_script,
html);
if (err == BINDING_BADENCODING) {
/* Ok, we don't support the declared encoding. Bailing out
* isn't exactly user-friendly, so fall back to Windows-1252 */
talloc_free(html->encoding);
html->encoding = talloc_strdup(c, "Windows-1252");
if (html->encoding == NULL) {
union content_msg_data msg_data;
msg_data.error = messages_get("NoMemory");
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
err = binding_create_tree(&html->parser_binding,
html->encoding,
nsoption_bool(enable_javascript),
html_process_script,
html);
}
if (err != BINDING_OK) {
union content_msg_data msg_data;
if (err == BINDING_BADENCODING) {
LOG(("Bad encoding: %s", html->encoding
? html->encoding : ""));
msg_data.error = messages_get("ParsingFail");
} else
msg_data.error = messages_get("NoMemory");
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
{
const char *source_data;
unsigned long source_size;
source_data = content__get_source_data(c, &source_size);
/* Recurse to reprocess all the data. This is safe because
* the encoding is now specified at parser start which means
* it cannot be changed again. */
return html_process_data(c, source_data, source_size);
}
}
/** process link node */
static bool html_process_link(html_content *c, dom_node *node)
{
struct content_rfc5988_link link; /* the link added to the content */
dom_exception exc; /* returned by libdom functions */
dom_string *atr_string;
nserror error;
memset(&link, 0, sizeof(struct content_rfc5988_link));
/* check that the relation exists - w3c spec says must be present */
exc = dom_element_get_attribute(node, html_dom_string_rel, &atr_string);
if ((exc != DOM_NO_ERR) || (atr_string == NULL)) {
return false;
}
/* get a lwc string containing the link relation */
exc = dom_string_intern(atr_string, &link.rel);
dom_string_unref(atr_string);
if (exc != DOM_NO_ERR) {
return false;
}
/* check that the href exists - w3c spec says must be present */
exc = dom_element_get_attribute(node, html_dom_string_href, &atr_string);
if ((exc != DOM_NO_ERR) || (atr_string == NULL)) {
lwc_string_unref(link.rel);
return false;
}
/* get nsurl */
error = nsurl_join(c->base_url, dom_string_data(atr_string), &link.href);
dom_string_unref(atr_string);
if (error != NSERROR_OK) {
lwc_string_unref(link.rel);
return false;
}
/* look for optional properties -- we don't care if internment fails */
exc = dom_element_get_attribute(node, html_dom_string_hreflang, &atr_string);
if ((exc == DOM_NO_ERR) && (atr_string != NULL)) {
/* get a lwc string containing the href lang */
exc = dom_string_intern(atr_string, &link.hreflang);
dom_string_unref(atr_string);
}
exc = dom_element_get_attribute(node, html_dom_string_type, &atr_string);
if ((exc == DOM_NO_ERR) && (atr_string != NULL)) {
/* get a lwc string containing the type */
exc = dom_string_intern(atr_string, &link.type);
dom_string_unref(atr_string);
}
exc = dom_element_get_attribute(node, html_dom_string_media, &atr_string);
if ((exc == DOM_NO_ERR) && (atr_string != NULL)) {
/* get a lwc string containing the media */
exc = dom_string_intern(atr_string, &link.media);
dom_string_unref(atr_string);
}
exc = dom_element_get_attribute(node, html_dom_string_sizes, &atr_string);
if ((exc == DOM_NO_ERR) && (atr_string != NULL)) {
/* get a lwc string containing the sizes */
exc = dom_string_intern(atr_string, &link.sizes);
dom_string_unref(atr_string);
}
/* add to content */
content__add_rfc5988_link(&c->base, &link);
if (link.sizes != NULL)
lwc_string_unref(link.sizes);
if (link.media != NULL)
lwc_string_unref(link.media);
if (link.type != NULL)
lwc_string_unref(link.type);
if (link.hreflang != NULL)
lwc_string_unref(link.hreflang);
nsurl_unref(link.href);
lwc_string_unref(link.rel);
return true;
}
/** process title node */
static bool html_process_title(html_content *c, dom_node *node)
{
dom_exception exc; /* returned by libdom functions */
dom_string *title;
char *title_str;
bool success;
if (c->base.title != NULL)
return true;
exc = dom_node_get_text_content(node, &title);
if ((exc != DOM_NO_ERR) || (title == NULL)) {
return false;
}
title_str = squash_whitespace(dom_string_data(title));
dom_string_unref(title);
if (title_str == NULL) {
return false;
}
success = content__set_title(&c->base, title_str);
free(title_str);
return success;
}
static bool html_process_base(html_content *c, dom_node *node)
{
dom_exception exc; /* returned by libdom functions */
dom_string *atr_string;
/* get href attribute if present */
exc = dom_element_get_attribute(node, html_dom_string_href, &atr_string);
if ((exc == DOM_NO_ERR) && (atr_string != NULL)) {
nsurl *url;
nserror error;
/* get url from string */
error = nsurl_create(dom_string_data(atr_string), &url);
dom_string_unref(atr_string);
if (error == NSERROR_OK) {
if (c->base_url != NULL)
nsurl_unref(c->base_url);
c->base_url = url;
}
}
/* get target attribute if present and not already set */
if (c->base_target != NULL) {
return true;
}
exc = dom_element_get_attribute(node, html_dom_string_target, &atr_string);
if ((exc == DOM_NO_ERR) && (atr_string != NULL)) {
/* Validation rules from the HTML5 spec for the base element:
* The target must be one of _blank, _self, _parent, or
* _top or any identifier which does not begin with an
* underscore
*/
if (*dom_string_data(atr_string) != '_' ||
dom_string_caseless_isequal(atr_string, html_dom_string__blank) ||
dom_string_caseless_isequal(atr_string, html_dom_string__self) ||
dom_string_caseless_isequal(atr_string, html_dom_string__parent) ||
dom_string_caseless_isequal(atr_string, html_dom_string__top)) {
c->base_target = strdup(dom_string_data(atr_string));
}
dom_string_unref(atr_string);
}
return true;
}
/**
* Process elements in <head>.
*
* \param c content structure
* \param head xml node of head element
* \return true on success, false on memory exhaustion
*
* The title and base href are extracted if present.
*/
static bool html_head(html_content *c, dom_node *head)
{
dom_node *node;
dom_exception exc; /* returned by libdom functions */
dom_string *node_name;
dom_node_type node_type;
dom_node *next_node;
exc = dom_node_get_first_child(head, &node);
if (exc != DOM_NO_ERR) {
return false;
}
while (node != NULL) {
exc = dom_node_get_node_type(node, &node_type);
if ((exc == DOM_NO_ERR) && (node_type == DOM_ELEMENT_NODE)) {
exc = dom_node_get_node_name(node, &node_name);
if ((exc == DOM_NO_ERR) || (node_name != NULL)) {
if (dom_string_caseless_isequal(node_name,
html_dom_string_title)) {
html_process_title(c, node);
} else if (dom_string_caseless_isequal(node_name,
html_dom_string_base)) {
html_process_base(c, node);
} else if (dom_string_caseless_isequal(node_name,
html_dom_string_link)) {
html_process_link(c, node);
}
}
}
/* move to next node */
exc = dom_node_get_next_sibling(node, &next_node);
dom_node_unref(node);
if (exc == DOM_NO_ERR) {
node = next_node;
} else {
node = NULL;
}
}
return true;
}
static bool html_meta_refresh_process_element(html_content *c, dom_node *n)
{
union content_msg_data msg_data;
const char *url, *end, *refresh = NULL;
char *new_url;
char quote = '\0';
dom_string *equiv, *content;
dom_exception exc;
nsurl *nsurl;
nserror error;
exc = dom_element_get_attribute(n, html_dom_string_http_equiv, &equiv);
if (exc != DOM_NO_ERR)
return false;
if (equiv == NULL)
return true;
if (strcasecmp(dom_string_data(equiv), "refresh") != 0) {
dom_string_unref(equiv);
return true;
}
dom_string_unref(equiv);
exc = dom_element_get_attribute(n, html_dom_string_content, &content);
if (exc != DOM_NO_ERR)
return false;
if (content == NULL)
return true;
end = dom_string_data(content) + dom_string_byte_length(content);
/* content := *LWS intpart fracpart? *LWS [';' *LWS *1url *LWS]
* intpart := 1*DIGIT
* fracpart := 1*('.' | DIGIT)
* url := "url" *LWS '=' *LWS (url-nq | url-sq | url-dq)
* url-nq := *urlchar
* url-sq := "'" *(urlchar | '"') "'"
* url-dq := '"' *(urlchar | "'") '"'
* urlchar := [#x9#x21#x23-#x26#x28-#x7E] | nonascii
* nonascii := [#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
*/
url = dom_string_data(content);
/* *LWS */
while (url < end && isspace(*url)) {
url++;
}
/* intpart */
if (url == end || (*url < '0' || '9' < *url)) {
/* Empty content, or invalid timeval */
dom_string_unref(content);
return true;
}
msg_data.delay = (int) strtol(url, &new_url, 10);
/* a very small delay and self-referencing URL can cause a loop
* that grinds machines to a halt. To prevent this we set a
* minimum refresh delay of 1s. */
if (msg_data.delay < 1)
msg_data.delay = 1;
url = new_url;
/* fracpart? (ignored, as delay is integer only) */
while (url < end && (('0' <= *url && *url <= '9') ||
*url == '.')) {
url++;
}
/* *LWS */
while (url < end && isspace(*url)) {
url++;
}
/* ';' */
if (url < end && *url == ';')
url++;
/* *LWS */
while (url < end && isspace(*url)) {
url++;
}
if (url == end) {
/* Just delay specified, so refresh current page */
dom_string_unref(content);
c->base.refresh = nsurl_ref(
content_get_url(&c->base));
content_broadcast(&c->base, CONTENT_MSG_REFRESH,
msg_data);
return true;
}
/* "url" */
if (url <= end - 3) {
if (strncasecmp(url, "url", 3) == 0) {
url += 3;
} else {
/* Unexpected input, ignore this header */
dom_string_unref(content);
return true;
}
} else {
/* Insufficient input, ignore this header */
dom_string_unref(content);
return true;
}
/* *LWS */
while (url < end && isspace(*url)) {
url++;
}
/* '=' */
if (url < end) {
if (*url == '=') {
url++;
} else {
/* Unexpected input, ignore this header */
dom_string_unref(content);
return true;
}
} else {
/* Insufficient input, ignore this header */
dom_string_unref(content);
return true;
}
/* *LWS */
while (url < end && isspace(*url)) {
url++;
}
/* '"' or "'" */
if (url < end && (*url == '"' || *url == '\'')) {
quote = *url;
url++;
}
/* Start of URL */
refresh = url;
if (quote != 0) {
/* url-sq | url-dq */
while (url < end && *url != quote)
url++;
} else {
/* url-nq */
while (url < end && !isspace(*url))
url++;
}
/* '"' or "'" or *LWS (we don't care) */
if (url > refresh) {
/* There's a URL */
new_url = strndup(refresh, url - refresh);
if (new_url == NULL) {
dom_string_unref(content);
return false;
}
error = nsurl_join(c->base_url, new_url, &nsurl);
if (error != NSERROR_OK) {
free(new_url);
dom_string_unref(content);
msg_data.error = messages_get("NoMemory");
content_broadcast(&c->base, CONTENT_MSG_ERROR,
msg_data);
return false;
}
free(new_url);
c->base.refresh = nsurl;
content_broadcast(&c->base, CONTENT_MSG_REFRESH, msg_data);
}
dom_string_unref(content);
return true;
}
/**
* Search for meta refresh
*
* http://wp.netscape.com/assist/net_sites/pushpull.html
*
* \param c content structure
* \param head xml node of head element
* \return true on success, false otherwise (error reported)
*/
static bool html_meta_refresh(html_content *c, dom_node *head)
{
dom_node *n, *next;
dom_exception exc;
if (head == NULL)
return true;
exc = dom_node_get_first_child(head, &n);
if (exc != DOM_NO_ERR)
return false;
while (n != NULL) {
dom_node_type type;
exc = dom_node_get_node_type(n, &type);
if (exc != DOM_NO_ERR) {
dom_node_unref(n);
return false;
}
if (type == DOM_ELEMENT_NODE) {
dom_string *name;
exc = dom_node_get_node_name(n, &name);
if (exc != DOM_NO_ERR) {
dom_node_unref(n);
return false;
}
/* Recurse into noscript elements */
if (strcmp(dom_string_data(name), "noscript") == 0) {
if (html_meta_refresh(c, n) == false) {
/* Some error occurred */
dom_node_unref(n);
return false;
} else if (c->base.refresh) {
/* Meta refresh found - stop */
dom_node_unref(n);
return true;
}
} else if (strcmp(dom_string_data(name), "meta") == 0) {
if (html_meta_refresh_process_element(c,
n) == false) {
/* Some error occurred */
dom_node_unref(n);
return false;
} else if (c->base.refresh != NULL) {
/* Meta refresh found - stop */
dom_node_unref(n);
return true;
}
}
}
exc = dom_node_get_next_sibling(n, &next);
if (exc != DOM_NO_ERR) {
dom_node_unref(n);
return false;
}
dom_node_unref(n);
n = next;
}
return true;
}
/**
* Update a box whose content has completed rendering.
*/
static void
html_object_done(struct box *box,
hlcache_handle *object,
bool background)
{
struct box *b;
if (background) {
box->background = object;
return;
}
box->object = object;
if (!(box->flags & REPLACE_DIM)) {
/* invalidate parent min, max widths */
for (b = box; b; b = b->parent)
b->max_width = UNKNOWN_MAX_WIDTH;
/* delete any clones of this box */
while (box->next && (box->next->flags & CLONE)) {
/* box_free_box(box->next); */
box->next = box->next->next;
}
}
}
/**
* Handle object fetching or loading failure.
*
* \param box box containing object which failed to load
* \param content document of type CONTENT_HTML
* \param background the object was the background image for the box
*/
static void
html_object_failed(struct box *box, html_content *content, bool background)
{
/* Nothing to do */
return;
}
/**
* Callback for hlcache_handle_retrieve() for objects.
*/
static nserror
html_object_callback(hlcache_handle *object,
const hlcache_event *event,
void *pw)
{
struct content_html_object *o = pw;
html_content *c = (html_content *) o->parent;
int x, y;
struct box *box;
assert(c->base.status != CONTENT_STATUS_ERROR);
box = o->box;
switch (event->type) {
case CONTENT_MSG_LOADING:
if (c->base.status != CONTENT_STATUS_LOADING && c->bw != NULL)
content_open(object,
c->bw, &c->base,
box,
box->object_params);
break;
case CONTENT_MSG_READY:
if (content_get_type(object) == CONTENT_HTML) {
html_object_done(box, object, o->background);
if (c->base.status == CONTENT_STATUS_READY ||
c->base.status == CONTENT_STATUS_DONE)
content__reformat(&c->base, false,
c->base.available_width,
c->base.height);
}
break;
case CONTENT_MSG_DONE:
c->base.active--;
LOG(("%d fetches active", c->base.active));
html_object_done(box, object, o->background);
if (c->base.status != CONTENT_STATUS_LOADING &&
box->flags & REPLACE_DIM) {
union content_msg_data data;
if (!box_visible(box))
break;
box_coords(box, &x, &y);
data.redraw.x = x + box->padding[LEFT];
data.redraw.y = y + box->padding[TOP];
data.redraw.width = box->width;
data.redraw.height = box->height;
data.redraw.full_redraw = true;
content_broadcast(&c->base, CONTENT_MSG_REDRAW, data);
}
break;
case CONTENT_MSG_ERROR:
hlcache_handle_release(object);
o->content = NULL;
c->base.active--;
LOG(("%d fetches active", c->base.active));
content_add_error(&c->base, "?", 0);
html_set_status(c, event->data.error);
content_broadcast(&c->base, CONTENT_MSG_STATUS, event->data);
html_object_failed(box, c, o->background);
break;
case CONTENT_MSG_STATUS:
html_set_status(c, content_get_status_message(object));
/* content_broadcast(&c->base, CONTENT_MSG_STATUS, 0); */
break;
case CONTENT_MSG_REFORMAT:
break;
case CONTENT_MSG_REDRAW:
if (c->base.status != CONTENT_STATUS_LOADING) {
union content_msg_data data = event->data;
if (!box_visible(box))
break;
box_coords(box, &x, &y);
if (hlcache_handle_get_content(object) ==
event->data.redraw.object) {
data.redraw.x = data.redraw.x *
box->width / content_get_width(object);
data.redraw.y = data.redraw.y *
box->height /
content_get_height(object);
data.redraw.width = data.redraw.width *
box->width / content_get_width(object);
data.redraw.height = data.redraw.height *
box->height /
content_get_height(object);
data.redraw.object_width = box->width;
data.redraw.object_height = box->height;
}
data.redraw.x += x + box->padding[LEFT];
data.redraw.y += y + box->padding[TOP];
data.redraw.object_x += x + box->padding[LEFT];
data.redraw.object_y += y + box->padding[TOP];
content_broadcast(&c->base, CONTENT_MSG_REDRAW, data);
}
break;
case CONTENT_MSG_REFRESH:
if (content_get_type(object) == CONTENT_HTML) {
/* only for HTML objects */
schedule(event->data.delay * 100,
html_object_refresh, o);
}
break;
case CONTENT_MSG_LINK:
/* Don't care about favicons */
break;
default:
assert(0);
}
if (c->base.status == CONTENT_STATUS_READY && c->base.active == 0 &&
(event->type == CONTENT_MSG_LOADING ||
event->type == CONTENT_MSG_DONE ||
event->type == CONTENT_MSG_ERROR)) {
/* all objects have arrived */
content__reformat(&c->base, false, c->base.available_width,
c->base.height);
html_set_status(c, "");
content_set_done(&c->base);
}
/* If 1) the configuration option to reflow pages while objects are
* fetched is set
* 2) an object is newly fetched & converted,
* 3) the box's dimensions need to change due to being replaced
* 4) the object's parent HTML is ready for reformat,
* 5) the time since the previous reformat is more than the
* configured minimum time between reformats
* then reformat the page to display newly fetched objects */
else if (nsoption_bool(incremental_reflow) &&
event->type == CONTENT_MSG_DONE &&
!(box->flags & REPLACE_DIM) &&
(c->base.status == CONTENT_STATUS_READY ||
c->base.status == CONTENT_STATUS_DONE) &&
(wallclock() > c->base.reformat_time)) {
content__reformat(&c->base, false, c->base.available_width,
c->base.height);
}
return NSERROR_OK;
}
/**
* Start a fetch for an object required by a page, replacing an existing object.
*
* \param object Object to replace
* \param url URL of object to fetch (copied)
* \return true on success, false on memory exhaustion
*/
static bool html_replace_object(struct content_html_object *object, nsurl *url)
{
html_content *c;
hlcache_child_context child;
html_content *page;
nserror error;
assert(object != NULL);
c = (html_content *) object->parent;
child.charset = c->encoding;
child.quirks = c->base.quirks;
if (object->content != NULL) {
/* remove existing object */
if (content_get_status(object->content) != CONTENT_STATUS_DONE) {
c->base.active--;
LOG(("%d fetches active", c->base.active));
}
hlcache_handle_release(object->content);
object->content = NULL;
object->box->object = NULL;
}
/* initialise fetch */
error = hlcache_handle_retrieve(url, HLCACHE_RETRIEVE_SNIFF_TYPE,
content_get_url(&c->base), NULL,
html_object_callback, object, &child,
object->permitted_types,
&object->content);
if (error != NSERROR_OK)
return false;
for (page = c; page != NULL; page = page->page) {
page->base.active++;
LOG(("%d fetches active", c->base.active));
page->base.status = CONTENT_STATUS_READY;
}
return true;
}
/**
* schedule() callback for object refresh
*/
static void html_object_refresh(void *p)
{
struct content_html_object *object = p;
nsurl *refresh_url;
assert(content_get_type(object->content) == CONTENT_HTML);
refresh_url = content_get_refresh_url(object->content);
/* Ignore if refresh URL has gone
* (may happen if fetch errored) */
if (refresh_url == NULL)
return;
content_invalidate_reuse_data(object->content);
if (!html_replace_object(object, refresh_url)) {
/** \todo handle memory exhaustion */
}
}
/**
* Callback for fetchcache() for linked stylesheets.
*/
static nserror
html_convert_css_callback(hlcache_handle *css,
const hlcache_event *event,
void *pw)
{
html_content *parent = pw;
unsigned int i;
struct html_stylesheet *s;
/* Find sheet */
for (i = 0, s = parent->stylesheets;
i != parent->stylesheet_count; i++, s++) {
if (s->type == HTML_STYLESHEET_EXTERNAL &&
s->data.external == css)
break;
}
assert(i != parent->stylesheet_count);
switch (event->type) {
case CONTENT_MSG_LOADING:
break;
case CONTENT_MSG_READY:
break;
case CONTENT_MSG_DONE:
LOG(("done stylesheet slot %d '%s'", i,
nsurl_access(hlcache_handle_get_url(css))));
parent->base.active--;
LOG(("%d fetches active", parent->base.active));
break;
case CONTENT_MSG_ERROR:
LOG(("stylesheet %s failed: %s",
nsurl_access(hlcache_handle_get_url(css)),
event->data.error));
hlcache_handle_release(css);
s->data.external = NULL;
parent->base.active--;
LOG(("%d fetches active", parent->base.active));
content_add_error(&parent->base, "?", 0);
break;
case CONTENT_MSG_STATUS:
html_set_status(parent, content_get_status_message(css));
content_broadcast(&parent->base, CONTENT_MSG_STATUS,
event->data);
break;
default:
assert(0);
}
if (parent->base.active == 0)
html_finish_conversion(parent);
return NSERROR_OK;
}
/**
* Handle notification of inline style completion
*
* \param css Inline style object
* \param pw Private data
*/
static void html_inline_style_done(struct content_css_data *css, void *pw)
{
html_content *html = pw;
if (--html->base.active == 0)
html_finish_conversion(html);
}
/**
* Process an inline stylesheet in the document.
*
* \param c content structure
* \param index Index of stylesheet in stylesheet_content array,
* updated if successful
* \param style xml node of style element
* \return true on success, false if an error occurred
*/
static bool
html_process_style_element(html_content *c,
unsigned int *index,
dom_node *style)
{
dom_node *child, *next;
dom_string *val;
dom_exception exc;
union content_msg_data msg_data;
struct html_stylesheet *stylesheets;
struct content_css_data *sheet;
nserror error;
/* type='text/css', or not present (invalid but common) */
exc = dom_element_get_attribute(style, html_dom_string_type, &val);
if (exc == DOM_NO_ERR && val != NULL) {
if (strcmp(dom_string_data(val), "text/css") != 0) {
dom_string_unref(val);
return true;
}
dom_string_unref(val);
}
/* media contains 'screen' or 'all' or not present */
exc = dom_element_get_attribute(style, html_dom_string_media, &val);
if (exc == DOM_NO_ERR && val != NULL) {
if (strcasestr(dom_string_data(val), "screen") == NULL &&
strcasestr(dom_string_data(val),
"all") == NULL) {
dom_string_unref(val);
return true;
}
dom_string_unref(val);
}
/* Extend array */
stylesheets = talloc_realloc(c, c->stylesheets,
struct html_stylesheet, *index + 1);
if (stylesheets == NULL)
goto no_memory;
c->stylesheets = stylesheets;
c->stylesheet_count++;
c->stylesheets[(*index)].type = HTML_STYLESHEET_INTERNAL;
c->stylesheets[(*index)].data.internal = NULL;
/* create stylesheet */
sheet = talloc(c, struct content_css_data);
if (sheet == NULL) {
c->stylesheet_count--;
goto no_memory;
}
error = nscss_create_css_data(sheet,
nsurl_access(c->base_url), NULL, c->quirks,
html_inline_style_done, c);
if (error != NSERROR_OK) {
talloc_free(sheet);
c->stylesheet_count--;
goto no_memory;
}
/* can't just use xmlNodeGetContent(style), because that won't
* give the content of comments which may be used to 'hide'
* the content */
exc = dom_node_get_first_child(style, &child);
if (exc != DOM_NO_ERR) {
nscss_destroy_css_data(sheet);
talloc_free(sheet);
c->stylesheet_count--;
goto no_memory;
}
while (child != NULL) {
dom_string *data;
exc = dom_node_get_text_content(child, &data);
if (exc != DOM_NO_ERR) {
dom_node_unref(child);
nscss_destroy_css_data(sheet);
talloc_free(sheet);
c->stylesheet_count--;
goto no_memory;
}
if (nscss_process_css_data(sheet, dom_string_data(data),
dom_string_byte_length(data)) == false) {
dom_string_unref(data);
dom_node_unref(child);
nscss_destroy_css_data(sheet);
talloc_free(sheet);
c->stylesheet_count--;
goto no_memory;
}
dom_string_unref(data);
exc = dom_node_get_next_sibling(child, &next);
if (exc != DOM_NO_ERR) {
dom_node_unref(child);
nscss_destroy_css_data(sheet);
talloc_free(sheet);
c->stylesheet_count--;
goto no_memory;
}
dom_node_unref(child);
child = next;
}
c->base.active++;
LOG(("%d fetches active", c->base.active));
/* Convert the content -- manually, as we want the result */
if (nscss_convert_css_data(sheet) != CSS_OK) {
/* conversion failed */
c->base.active--;
LOG(("%d fetches active", c->base.active));
nscss_destroy_css_data(sheet);
talloc_free(sheet);
sheet = NULL;
}
/* Update index */
c->stylesheets[(*index)].data.internal = sheet;
(*index)++;
return true;
no_memory:
msg_data.error = messages_get("NoMemory");
content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data);
return false;
}
/* depth-first walk the dom calling callback for each element
*
* @param root the dom node to use as the root of the tree walk
* @return true if all nodes were examined, false if the callback terminated
* the walk early.
*/
static bool
html_treewalk_dom(dom_node *root,
bool (*callback)(dom_node *node, dom_string *name, void *ctx),
void *ctx)
{
dom_node *node;
bool result = true;;
node = dom_node_ref(root); /* tree root */
while (node != NULL) {
dom_node *next = NULL;
dom_node_type type;
dom_string *name;
dom_exception exc;
exc = dom_node_get_first_child(node, &next);
if (exc != DOM_NO_ERR) {
dom_node_unref(node);
break;
}
if (next != NULL) { /* 1. children */
dom_node_unref(node);
node = next;
} else {
exc = dom_node_get_next_sibling(node, &next);
if (exc != DOM_NO_ERR) {
dom_node_unref(node);
break;
}
if (next != NULL) { /* 2. siblings */
dom_node_unref(node);
node = next;
} else { /* 3. ancestor siblings */
while (node != NULL) {
exc = dom_node_get_next_sibling(node,
&next);
if (exc != DOM_NO_ERR) {
dom_node_unref(node);
node = NULL;
break;
}
if (next != NULL) {
dom_node_unref(next);
break;
}
exc = dom_node_get_parent_node(node,
&next);
if (exc != DOM_NO_ERR) {
dom_node_unref(node);
node = NULL;
break;
}
dom_node_unref(node);
node = next;
}
if (node == NULL)
break;
exc = dom_node_get_next_sibling(node, &next);
if (exc != DOM_NO_ERR) {
dom_node_unref(node);
break;
}
dom_node_unref(node);
node = next;
}
}
assert(node != NULL);
exc = dom_node_get_node_type(node, &type);
if ((exc != DOM_NO_ERR) || (type != DOM_ELEMENT_NODE))
continue;
exc = dom_node_get_node_name(node, &name);
if (exc != DOM_NO_ERR)
continue;
result = callback(node, name, ctx);
dom_string_unref(name);
if (result == false) {
break; /* callback caused early termination */
}
}
return result;
}
struct find_stylesheet_ctx {
unsigned int count;
html_content *c;
};
/** callback to process stylesheet elements
*/
static bool
html_process_stylesheet(dom_node *node, dom_string *name, void *vctx)
{
struct find_stylesheet_ctx *ctx = (struct find_stylesheet_ctx *)vctx;
dom_string *rel, *type_attr, *media, *href;
struct html_stylesheet *stylesheets;
nsurl *joined;
union content_msg_data msg_data;
dom_exception exc;
nserror ns_error;
hlcache_child_context child;
/* deal with style nodes */
if (strcmp(dom_string_data(name), "style") == 0) {
if (!html_process_style_element(ctx->c, &ctx->count, node))
return false;
return true;
}
/* if it is not a link node skip it */
if (strcmp(dom_string_data(name), "link") != 0) {
return true;
}
/* rel=<space separated list, including 'stylesheet'> */
exc = dom_element_get_attribute(node,
html_dom_string_rel, &rel);
if (exc != DOM_NO_ERR || rel == NULL)
return true;
if (strcasestr(dom_string_data(rel), "stylesheet") == 0) {
dom_string_unref(rel);
return true;
} else if (strcasestr(dom_string_data(rel), "alternate") != 0) {
/* Ignore alternate stylesheets */
dom_string_unref(rel);
return true;
}
dom_string_unref(rel);
/* type='text/css' or not present */
exc = dom_element_get_attribute(node, html_dom_string_type, &type_attr);
if (exc == DOM_NO_ERR && type_attr != NULL) {
if (strcmp(dom_string_data(type_attr), "text/css") != 0) {
dom_string_unref(type_attr);
return true;
}
dom_string_unref(type_attr);
}
/* media contains 'screen' or 'all' or not present */
exc = dom_element_get_attribute(node, html_dom_string_media, &media);
if (exc == DOM_NO_ERR && media != NULL) {
if (strcasestr(dom_string_data(media), "screen") == NULL &&
strcasestr(dom_string_data(media), "all") == NULL) {
dom_string_unref(media);
return true;
}
dom_string_unref(media);
}
/* href='...' */
exc = dom_element_get_attribute(node, html_dom_string_href, &href);
if (exc != DOM_NO_ERR || href == NULL)
return true;
/* TODO: only the first preferred stylesheets (ie.
* those with a title attribute) should be loaded
* (see HTML4 14.3) */
ns_error = nsurl_join(ctx->c->base_url, dom_string_data(href), &joined);
if (ns_error != NSERROR_OK) {
dom_string_unref(href);
goto no_memory;
}
dom_string_unref(href);
LOG(("linked stylesheet %i '%s'", ctx->count, nsurl_access(joined)));
/* start fetch */
stylesheets = talloc_realloc(ctx->c,
ctx->c->stylesheets,
struct html_stylesheet,
ctx->count + 1);
if (stylesheets == NULL) {
nsurl_unref(joined);
goto no_memory;
}
ctx->c->stylesheets = stylesheets;
ctx->c->stylesheet_count++;
ctx->c->stylesheets[ctx->count].type = HTML_STYLESHEET_EXTERNAL;
child.charset = ctx->c->encoding;
child.quirks = ctx->c->base.quirks;
ns_error = hlcache_handle_retrieve(joined,
0,
content_get_url(&ctx->c->base),
NULL,
html_convert_css_callback,
ctx->c,
&child,
CONTENT_CSS,
&ctx->c->stylesheets[ctx->count].data.external);
nsurl_unref(joined);
if (ns_error != NSERROR_OK)
goto no_memory;
ctx->c->base.active++;
LOG(("%d fetches active", ctx->c->base.active));
ctx->count++;
return true;
no_memory:
msg_data.error = messages_get("NoMemory");
content_broadcast(&ctx->c->base, CONTENT_MSG_ERROR, msg_data);
return false;
}
/**
* Process inline stylesheets and fetch linked stylesheets.
*
* Uses STYLE and LINK elements inside and outside HEAD
*
* \param c content structure
* \param html dom node of html element
* \return true on success, false if an error occurred
*/
static bool html_find_stylesheets(html_content *c, dom_node *html)
{
union content_msg_data msg_data;
nserror ns_error;
bool result;
struct find_stylesheet_ctx ctx;
hlcache_child_context child;
/* setup context */
ctx.c = c;
ctx.count = STYLESHEET_START;
/* stylesheet 0 is the base style sheet,
* stylesheet 1 is the quirks mode style sheet,
* stylesheet 2 is the adblocking stylesheet,
* stylesheet 3 is the user stylesheet */
c->stylesheets = talloc_array(c, struct html_stylesheet,
STYLESHEET_START);
if (c->stylesheets == NULL)
goto html_find_stylesheets_no_memory;
c->stylesheets[STYLESHEET_BASE].type = HTML_STYLESHEET_EXTERNAL;
c->stylesheets[STYLESHEET_BASE].data.external = NULL;
c->stylesheets[STYLESHEET_QUIRKS].type = HTML_STYLESHEET_EXTERNAL;
c->stylesheets[STYLESHEET_QUIRKS].data.external = NULL;
c->stylesheets[STYLESHEET_ADBLOCK].type = HTML_STYLESHEET_EXTERNAL;
c->stylesheets[STYLESHEET_ADBLOCK].data.external = NULL;
c->stylesheets[STYLESHEET_USER].type = HTML_STYLESHEET_EXTERNAL;
c->stylesheets[STYLESHEET_USER].data.external = NULL;
c->stylesheet_count = STYLESHEET_START;
child.charset = c->encoding;
child.quirks = c->base.quirks;
ns_error = hlcache_handle_retrieve(html_default_stylesheet_url, 0,
content_get_url(&c->base), NULL,
html_convert_css_callback, c, &child, CONTENT_CSS,
&c->stylesheets[STYLESHEET_BASE].data.external);
if (ns_error != NSERROR_OK)
goto html_find_stylesheets_no_memory;
c->base.active++;
LOG(("%d fetches active", c->base.active));
if (c->quirks == BINDING_QUIRKS_MODE_FULL) {
ns_error = hlcache_handle_retrieve(html_quirks_stylesheet_url,
0, content_get_url(&c->base), NULL,
html_convert_css_callback, c, &child,
CONTENT_CSS,
&c->stylesheets[STYLESHEET_QUIRKS].data.external);
if (ns_error != NSERROR_OK)
goto html_find_stylesheets_no_memory;
c->base.active++;
LOG(("%d fetches active", c->base.active));
}
if (nsoption_bool(block_ads)) {
ns_error = hlcache_handle_retrieve(html_adblock_stylesheet_url,
0, content_get_url(&c->base), NULL,
html_convert_css_callback, c, &child, CONTENT_CSS,
&c->stylesheets[STYLESHEET_ADBLOCK].
data.external);
if (ns_error != NSERROR_OK)
goto html_find_stylesheets_no_memory;
c->base.active++;
LOG(("%d fetches active", c->base.active));
}
ns_error = hlcache_handle_retrieve(html_user_stylesheet_url, 0,
content_get_url(&c->base), NULL,
html_convert_css_callback, c, &child, CONTENT_CSS,
&c->stylesheets[STYLESHEET_USER].data.external);
if (ns_error != NSERROR_OK)
goto html_find_stylesheets_no_memory;
c->base.active++;
LOG(("%d fetches active", c->base.active));
result = html_treewalk_dom(html, html_process_stylesheet, &ctx);
assert(c->stylesheet_count == ctx.count);
return result;
html_find_stylesheets_no_memory:
msg_data.error = messages_get("NoMemory");
content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data);
return false;
}
/**
* Convert a CONTENT_HTML for display.
*
* The following steps are carried out in order:
*
* - parsing to an XML tree is completed
* - stylesheets are fetched
* - the XML tree is converted to a box tree and object fetches are started
*
* On exit, the content status will be either CONTENT_STATUS_DONE if the
* document is completely loaded or CONTENT_STATUS_READY if objects are still
* being fetched.
*/
static bool html_convert(struct content *c)
{
html_content *htmlc = (html_content *) c;
binding_error err;
dom_node *html, *head;
union content_msg_data msg_data;
unsigned long size;
struct form *f;
dom_exception exc; /* returned by libdom functions */
dom_string *node_name = NULL;
/* finish parsing */
content__get_source_data(c, &size);
err = binding_parse_completed(htmlc->parser_binding);
if (err != BINDING_OK) {
union content_msg_data msg_data;
msg_data.error = messages_get("NoMemory");
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
htmlc->document = binding_get_document(htmlc->parser_binding,
&htmlc->quirks);
if (htmlc->document == NULL) {
LOG(("Parsing failed"));
msg_data.error = messages_get("ParsingFail");
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
if (htmlc->encoding == NULL) {
const char *encoding = binding_get_encoding(
htmlc->parser_binding,
&htmlc->encoding_source);
htmlc->encoding = talloc_strdup(c, encoding);
if (htmlc->encoding == NULL) {
msg_data.error = messages_get("NoMemory");
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
}
/* Give up processing if we've been aborted */
if (htmlc->aborted) {
msg_data.error = messages_get("Stopped");
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
/* locate root element and ensure it is html */
exc = dom_document_get_document_element(htmlc->document, (void *) &html);
if ((exc != DOM_NO_ERR) || (html == NULL)) {
LOG(("error retrieving html element from dom"));
msg_data.error = messages_get("ParsingFail");
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
exc = dom_node_get_node_name(html, &node_name);
if ((exc != DOM_NO_ERR) ||
(node_name == NULL) ||
(!dom_string_caseless_isequal(node_name, html_dom_string_html))) {
LOG(("root element not html"));
msg_data.error = messages_get("ParsingFail");
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
dom_string_unref(node_name);
/* ensure the head element is found */
exc = dom_node_get_first_child(html, &head);
if ((exc != DOM_NO_ERR) || (head == NULL)) {
head = NULL;
LOG(("head element not found"));
} else {
dom_node_type node_type;
dom_node *next_node;
/* find first node thats a element */
do {
exc = dom_node_get_node_type(head, &node_type);
if ((exc != DOM_NO_ERR) ||
(node_type == DOM_ELEMENT_NODE))
break;
exc = dom_node_get_next_sibling(head, &next_node);
dom_node_unref(head);
if (exc == DOM_NO_ERR) {
head = next_node;
} else {
head = NULL;
}
} while (head != NULL);
if (head != NULL) {
exc = dom_node_get_node_name(head, &node_name);
if ((exc == DOM_NO_ERR) || (node_name != NULL)) {
if (!dom_string_caseless_isequal(node_name,
html_dom_string_head)) {
dom_node_unref(head);
LOG(("head element not found"));
head = NULL;
}
dom_string_unref(node_name);
}
}
}
if (head != NULL) {
if (html_head(htmlc, head) == false) {
msg_data.error = messages_get("NoMemory");
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
/* handle meta refresh */
if (html_meta_refresh(htmlc, head) == false)
return false;
}
/* Retrieve forms from parser */
htmlc->forms = binding_get_forms(htmlc->parser_binding);
for (f = htmlc->forms; f != NULL; f = f->prev) {
char *action;
url_func_result res;
/* Make all actions absolute */
if (f->action == NULL || f->action[0] == '\0') {
/* HTML5 4.10.22.3 step 11 */
res = url_join(nsurl_access(content_get_url(c)),
nsurl_access(htmlc->base_url), &action);
} else {
res = url_join(f->action, nsurl_access(htmlc->base_url),
&action);
}
if (res != URL_FUNC_OK) {
msg_data.error = messages_get("NoMemory");
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
free(f->action);
f->action = action;
/* Ensure each form has a document encoding */
if (f->document_charset == NULL) {
f->document_charset = strdup(htmlc->encoding);
if (f->document_charset == NULL) {
msg_data.error = messages_get("NoMemory");
content_broadcast(c, CONTENT_MSG_ERROR,
msg_data);
return false;
}
}
}
/* get stylesheets */
if (html_find_stylesheets(htmlc, html) == false)
return false;
return true;
}
/**
* Start a fetch for an object required by a page.
*
* \param c content of type CONTENT_HTML
* \param url URL of object to fetch (copied)
* \param box box that will contain the object
* \param permitted_types bitmap of acceptable types
* \param available_width estimate of width of object
* \param available_height estimate of height of object
* \param background this is a background image
* \return true on success, false on memory exhaustion
*/
bool html_fetch_object(html_content *c, nsurl *url, struct box *box,
content_type permitted_types,
int available_width, int available_height,
bool background)
{
struct content_html_object *object;
hlcache_child_context child;
nserror error;
/* If we've already been aborted, don't bother attempting the fetch */
if (c->aborted)
return true;
child.charset = c->encoding;
child.quirks = c->base.quirks;
object = talloc(c, struct content_html_object);
if (object == NULL) {
return false;
}
object->parent = (struct content *) c;
object->next = NULL;
object->content = NULL;
object->box = box;
object->permitted_types = permitted_types;
object->background = background;
error = hlcache_handle_retrieve(url,
HLCACHE_RETRIEVE_SNIFF_TYPE,
content_get_url(&c->base), NULL,
html_object_callback, object, &child,
object->permitted_types, &object->content);
if (error != NSERROR_OK) {
talloc_free(object);
return error != NSERROR_NOMEM;
}
/* add to content object list */
object->next = c->object_list;
c->object_list = object;
c->num_objects++;
c->base.active++;
LOG(("%d fetches active", c->base.active));
return true;
}
/**
* Stop loading a CONTENT_HTML.
*/
static void html_stop(struct content *c)
{
html_content *htmlc = (html_content *) c;
struct content_html_object *object;
switch (c->status) {
case CONTENT_STATUS_LOADING:
/* Still loading; simply flag that we've been aborted
* html_convert/html_finish_conversion will do the rest */
htmlc->aborted = true;
break;
case CONTENT_STATUS_READY:
for (object = htmlc->object_list; object != NULL;
object = object->next) {
if (object->content == NULL)
continue;
if (content_get_status(object->content) ==
CONTENT_STATUS_DONE)
; /* already loaded: do nothing */
else if (content_get_status(object->content) ==
CONTENT_STATUS_READY)
hlcache_handle_abort(object->content);
/* Active count will be updated when
* html_object_callback receives
* CONTENT_MSG_DONE from this object */
else {
hlcache_handle_abort(object->content);
hlcache_handle_release(object->content);
object->content = NULL;
c->active--;
LOG(("%d fetches active", c->active));
}
}
/* If there are no further active fetches and we're still
* in the READY state, transition to the DONE state. */
if (c->status == CONTENT_STATUS_READY && c->active == 0) {
html_set_status(htmlc, "");
content_set_done(c);
}
break;
case CONTENT_STATUS_DONE:
/* Nothing to do */
break;
default:
LOG(("Unexpected status %d", c->status));
assert(0);
}
}
/**
* Reformat a CONTENT_HTML to a new width.
*/
static void html_reformat(struct content *c, int width, int height)
{
html_content *htmlc = (html_content *) c;
struct box *layout;
unsigned int time_before, time_taken;
time_before = wallclock();
layout_document(htmlc, width, height);
layout = htmlc->layout;
/* width and height are at least margin box of document */
c->width = layout->x + layout->padding[LEFT] + layout->width +
layout->padding[RIGHT] + layout->border[RIGHT].width +
layout->margin[RIGHT];
c->height = layout->y + layout->padding[TOP] + layout->height +
layout->padding[BOTTOM] + layout->border[BOTTOM].width +
layout->margin[BOTTOM];
/* if boxes overflow right or bottom edge, expand to contain it */
if (c->width < layout->x + layout->descendant_x1)
c->width = layout->x + layout->descendant_x1;
if (c->height < layout->y + layout->descendant_y1)
c->height = layout->y + layout->descendant_y1;
selection_reinit(&htmlc->sel, htmlc->layout);
time_taken = wallclock() - time_before;
c->reformat_time = wallclock() +
((time_taken * 3 < nsoption_int(min_reflow_period) ?
nsoption_int(min_reflow_period) : time_taken * 3));
}
/**
* Redraw a box.
*
* \param h content containing the box, of type CONTENT_HTML
* \param box box to redraw
*/
void html_redraw_a_box(hlcache_handle *h, struct box *box)
{
int x, y;
box_coords(box, &x, &y);
content_request_redraw(h, x, y,
box->padding[LEFT] + box->width + box->padding[RIGHT],
box->padding[TOP] + box->height + box->padding[BOTTOM]);
}
/**
* Redraw a box.
*
* \param h content containing the box, of type CONTENT_HTML
* \param box box to redraw
*/
void html__redraw_a_box(struct content *c, struct box *box)
{
int x, y;
box_coords(box, &x, &y);
content__request_redraw(c, x, y,
box->padding[LEFT] + box->width + box->padding[RIGHT],
box->padding[TOP] + box->height + box->padding[BOTTOM]);
}
static void html_destroy_frameset(struct content_html_frames *frameset)
{
int i;
if (frameset->name) {
talloc_free(frameset->name);
frameset->name = NULL;
}
if (frameset->url) {
talloc_free(frameset->url);
frameset->url = NULL;
}
if (frameset->children) {
for (i = 0; i < (frameset->rows * frameset->cols); i++) {
if (frameset->children[i].name) {
talloc_free(frameset->children[i].name);
frameset->children[i].name = NULL;
}
if (frameset->children[i].url) {
nsurl_unref(frameset->children[i].url);
frameset->children[i].url = NULL;
}
if (frameset->children[i].children)
html_destroy_frameset(&frameset->children[i]);
}
talloc_free(frameset->children);
frameset->children = NULL;
}
}
static void html_destroy_iframe(struct content_html_iframe *iframe)
{
struct content_html_iframe *next;
next = iframe;
while ((iframe = next) != NULL) {
next = iframe->next;
if (iframe->name)
talloc_free(iframe->name);
if (iframe->url) {
nsurl_unref(iframe->url);
iframe->url = NULL;
}
talloc_free(iframe);
}
}
/**
* Destroy a CONTENT_HTML and free all resources it owns.
*/
static void html_destroy(struct content *c)
{
html_content *html = (html_content *) c;
unsigned int i;
struct form *f, *g;
LOG(("content %p", c));
/* Destroy forms */
for (f = html->forms; f != NULL; f = g) {
g = f->prev;
form_free(f);
}
imagemap_destroy(html);
if (c->refresh)
nsurl_unref(c->refresh);
if (html->base_url)
nsurl_unref(html->base_url);
if (html->parser_binding != NULL) {
binding_destroy_tree(html->parser_binding);
html->parser_binding = NULL;
}
if (html->document != NULL)
binding_destroy_document(html->document);
/* Free base target */
if (html->base_target != NULL) {
free(html->base_target);
html->base_target = NULL;
}
/* Free frameset */
if (html->frameset != NULL) {
html_destroy_frameset(html->frameset);
talloc_free(html->frameset);
html->frameset = NULL;
}
/* Free iframes */
if (html->iframe != NULL) {
html_destroy_iframe(html->iframe);
html->iframe = NULL;
}
/* Destroy selection context */
if (html->select_ctx != NULL) {
css_select_ctx_destroy(html->select_ctx);
html->select_ctx = NULL;
}
if (html->universal != NULL) {
lwc_string_unref(html->universal);
html->universal = NULL;
}
/* Free stylesheets */
for (i = 0; i != html->stylesheet_count; i++) {
if (html->stylesheets[i].type == HTML_STYLESHEET_EXTERNAL &&
html->stylesheets[i].data.external != NULL) {
hlcache_handle_release(
html->stylesheets[i].data.external);
} else if (html->stylesheets[i].type ==
HTML_STYLESHEET_INTERNAL &&
html->stylesheets[i].data.internal != NULL) {
nscss_destroy_css_data(
html->stylesheets[i].data.internal);
}
}
/* Free scripts */
for (i = 0; i != html->scripts_count; i++) {
if (html->scripts[i].mimetype != NULL) {
dom_string_unref(html->scripts[i].mimetype);
}
if (html->scripts[i].type == HTML_SCRIPT_EXTERNAL &&
html->scripts[i].data.external != NULL) {
hlcache_handle_release(
html->scripts[i].data.external);
} else if (html->scripts[i].type ==
HTML_SCRIPT_INTERNAL &&
html->scripts[i].data.internal != NULL) {
dom_string_unref(html->scripts[i].data.internal);
}
}
free(html->scripts);
/* Free objects */
html_destroy_objects(html);
}
static nserror html_clone(const struct content *old, struct content **newc)
{
/** \todo Clone HTML specifics */
/* In the meantime, we should never be called, as HTML contents
* cannot be shared and we're not intending to fix printing's
* cloning of documents. */
assert(0 && "html_clone should never be called");
return true;
}
/**
* Set the content status.
*/
void html_set_status(html_content *c, const char *extra)
{
content_set_status(&c->base, "%s", extra);
}
/**
* Handle a window containing a CONTENT_HTML being opened.
*/
static void
html_open(struct content *c,
struct browser_window *bw,
struct content *page,
struct box *box,
struct object_params *params)
{
html_content *html = (html_content *) c;
struct content_html_object *object, *next;
html->bw = bw;
html->page = (html_content *) page;
html->box = box;
/* text selection */
selection_init(&html->sel, html->layout);
for (object = html->object_list; object != NULL; object = next) {
next = object->next;
if (object->content == NULL)
continue;
if (content_get_type(object->content) == CONTENT_NONE)
continue;
content_open(object->content,
bw, c,
object->box,
object->box->object_params);
}
}
/**
* Handle a window containing a CONTENT_HTML being closed.
*/
static void html_close(struct content *c)
{
html_content *html = (html_content *) c;
struct content_html_object *object, *next;
if (html->search != NULL)
search_destroy_context(html->search);
html->bw = NULL;
for (object = html->object_list; object != NULL; object = next) {
next = object->next;
if (object->content == NULL)
continue;
if (content_get_type(object->content) == CONTENT_NONE)
continue;
if (content_get_type(object->content) == CONTENT_HTML)
schedule_remove(html_object_refresh, object);
content_close(object->content);
}
}
/**
* Return an HTML content's selection context
*/
static struct selection *html_get_selection(struct content *c)
{
html_content *html = (html_content *) c;
return &html->sel;
}
/**
* Get access to any content, link URLs and objects (images) currently
* at the given (x, y) coordinates.
*
* \param c html content to look inside
* \param x x-coordinate of point of interest
* \param y y-coordinate of point of interest
* \param data pointer to contextual_content struct. Its fields are updated
* with pointers to any relevent content, or set to NULL if none.
*/
static void
html_get_contextual_content(struct content *c,
int x,
int y,
struct contextual_content *data)
{
html_content *html = (html_content *) c;
struct box *box = html->layout;
struct box *next;
int box_x = 0, box_y = 0;
hlcache_handle *containing_content = NULL;
while ((next = box_at_point(box, x, y, &box_x, &box_y,
&containing_content)) != NULL) {
box = next;
if (box->style && css_computed_visibility(box->style) ==
CSS_VISIBILITY_HIDDEN)
continue;
if (box->iframe)
browser_window_get_contextual_content(box->iframe,
x - box_x, y - box_y, data);
if (box->object)
data->object = box->object;
if (box->href)
data->link_url = nsurl_access(box->href);
if (box->usemap) {
const char *target = NULL;
data->link_url = nsurl_access(imagemap_get(html,
box->usemap, box_x, box_y, x, y,
&target));
}
}
}
/**
* Scroll deepest thing within the content which can be scrolled at given point
*
* \param c html content to look inside
* \param x x-coordinate of point of interest
* \param y y-coordinate of point of interest
* \param scrx x-coordinate of point of interest
* \param scry y-coordinate of point of interest
* \return true iff scroll was consumed by something in the content
*/
static bool
html_scroll_at_point(struct content *c, int x, int y, int scrx, int scry)
{
html_content *html = (html_content *) c;
struct box *box = html->layout;
struct box *next;
int box_x = 0, box_y = 0;
hlcache_handle *containing_content = NULL;
bool handled_scroll = false;
/* TODO: invert order; visit deepest box first */
while ((next = box_at_point(box, x, y, &box_x, &box_y,
&containing_content)) != NULL) {
box = next;
if (box->style && css_computed_visibility(box->style) ==
CSS_VISIBILITY_HIDDEN)
continue;
/* Pass into iframe */
if (box->iframe && browser_window_scroll_at_point(box->iframe,
x - box_x, y - box_y, scrx, scry) == true)
return true;
/* Handle box scrollbars */
if (box->scroll_y && scrollbar_scroll(box->scroll_y, scry))
handled_scroll = true;
if (box->scroll_x && scrollbar_scroll(box->scroll_x, scrx))
handled_scroll = true;
if (handled_scroll == true)
return true;
}
return false;
}
/**
* Drop a file onto a content at a particular point.
*
* \param c html content to look inside
* \param x x-coordinate of point of interest
* \param y y-coordinate of point of interest
* \param file path to file to be dropped
* \return true iff file drop has been handled
*/
static bool html_drop_file_at_point(struct content *c, int x, int y, char *file)
{
html_content *html = (html_content *) c;
struct box *box = html->layout;
struct box *next;
struct box *file_box = NULL;
struct box *text_box = NULL;
int box_x = 0, box_y = 0;
hlcache_handle *containing_content = NULL;
/* Scan box tree for boxes that can handle drop */
while ((next = box_at_point(box, x, y, &box_x, &box_y,
&containing_content)) != NULL) {
box = next;
if (box->style && css_computed_visibility(box->style) ==
CSS_VISIBILITY_HIDDEN)
continue;
if (box->iframe)
return browser_window_drop_file_at_point(box->iframe,
x - box_x, y - box_y, file);
if (box->gadget) {
switch (box->gadget->type) {
case GADGET_FILE:
file_box = box;
break;
case GADGET_TEXTBOX:
case GADGET_TEXTAREA:
case GADGET_PASSWORD:
text_box = box;
break;
default: /* appease compiler */
break;
}
}
}
if (!file_box && !text_box)
/* No box capable of handling drop */
return false;
/* Handle the drop */
if (file_box) {
/* File dropped on file input */
utf8_convert_ret ret;
char *utf8_fn;
ret = utf8_from_local_encoding(file, 0,
&utf8_fn);
if (ret != UTF8_CONVERT_OK) {
/* A bad encoding should never happen */
assert(ret != UTF8_CONVERT_BADENC);
LOG(("utf8_from_local_encoding failed"));
/* Load was for us - just no memory */
return true;
}
/* Found: update form input */
free(file_box->gadget->value);
file_box->gadget->value = utf8_fn;
/* Redraw box. */
if (containing_content == NULL)
html__redraw_a_box(c, file_box);
else
html_redraw_a_box(containing_content, file_box);
} else if (html->bw != NULL) {
/* File dropped on text input */
size_t file_len;
FILE *fp = NULL;
char *buffer;
char *utf8_buff;
utf8_convert_ret ret;
unsigned int size;
struct browser_window *bw;
/* Open file */
fp = fopen(file, "rb");
if (fp == NULL) {
/* Couldn't open file, but drop was for us */
return true;
}
/* Get filesize */
fseek(fp, 0, SEEK_END);
file_len = ftell(fp);
fseek(fp, 0, SEEK_SET);
/* Allocate buffer for file data */
buffer = malloc(file_len + 1);
if (buffer == NULL) {
/* No memory, but drop was for us */
fclose(fp);
return true;
}
/* Stick file into buffer */
if (file_len != fread(buffer, 1, file_len, fp)) {
/* Failed, but drop was for us */
free(buffer);
fclose(fp);
return true;
}
/* Done with file */
fclose(fp);
/* Ensure buffer's string termination */
buffer[file_len] = '\0';
/* TODO: Sniff for text? */
/* Convert to UTF-8 */
ret = utf8_from_local_encoding(buffer, file_len, &utf8_buff);
if (ret != UTF8_CONVERT_OK) {
/* bad encoding shouldn't happen */
assert(ret != UTF8_CONVERT_BADENC);
LOG(("utf8_from_local_encoding failed"));
free(buffer);
warn_user("NoMemory", NULL);
return true;
}
/* Done with buffer */
free(buffer);
/* Get new length */
size = strlen(utf8_buff);
/* Simulate a click over the input box, to place caret */
browser_window_mouse_click(html->bw,
BROWSER_MOUSE_PRESS_1, x, y);
bw = browser_window_get_root(html->bw);
/* Paste the file as text */
browser_window_paste_text(bw, utf8_buff, size, true);
free(utf8_buff);
}
return true;
}
/**
* Set an HTML content's search context
*
* \param c content of type html
* \param s search context, or NULL if none
*/
void html_set_search(struct content *c, struct search_context *s)
{
html_content *html = (html_content *) c;
html->search = s;
}
/**
* Return an HTML content's search context
*
* \param c content of type html
* \return content's search context, or NULL if none
*/
struct search_context *html_get_search(struct content *c)
{
html_content *html = (html_content *) c;
return html->search;
}
#if ALWAYS_DUMP_FRAMESET
/**
* Print a frameset tree to stderr.
*/
static void
html_dump_frameset(struct content_html_frames *frame, unsigned int depth)
{
unsigned int i;
int row, col, index;
const char *unit[] = {"px", "%", "*"};
const char *scrolling[] = {"auto", "yes", "no"};
assert(frame);
fprintf(stderr, "%p ", frame);
fprintf(stderr, "(%i %i) ", frame->rows, frame->cols);
fprintf(stderr, "w%g%s ", frame->width.value, unit[frame->width.unit]);
fprintf(stderr, "h%g%s ", frame->height.value,unit[frame->height.unit]);
fprintf(stderr, "(margin w%i h%i) ",
frame->margin_width, frame->margin_height);
if (frame->name)
fprintf(stderr, "'%s' ", frame->name);
if (frame->url)
fprintf(stderr, "<%s> ", frame->url);
if (frame->no_resize)
fprintf(stderr, "noresize ");
fprintf(stderr, "(scrolling %s) ", scrolling[frame->scrolling]);
if (frame->border)
fprintf(stderr, "border %x ",
(unsigned int) frame->border_colour);
fprintf(stderr, "\n");
if (frame->children) {
for (row = 0; row != frame->rows; row++) {
for (col = 0; col != frame->cols; col++) {
for (i = 0; i != depth; i++)
fprintf(stderr, " ");
fprintf(stderr, "(%i %i): ", row, col);
index = (row * frame->cols) + col;
html_dump_frameset(&frame->children[index],
depth + 1);
}
}
}
}
#endif
/**
* Retrieve HTML document tree
*
* \param h HTML content to retrieve document tree from
* \return Pointer to document tree
*/
dom_document *html_get_document(hlcache_handle *h)
{
html_content *c = (html_content *) hlcache_handle_get_content(h);
assert(c != NULL);
return c->document;
}
/**
* Retrieve box tree
*
* \param h HTML content to retrieve tree from
* \return Pointer to box tree
*
* \todo This API must die, as must all use of the box tree outside render/
*/
struct box *html_get_box_tree(hlcache_handle *h)
{
html_content *c = (html_content *) hlcache_handle_get_content(h);
assert(c != NULL);
return c->layout;
}
/**
* Retrieve the charset of an HTML document
*
* \param h Content to retrieve charset from
* \return Pointer to charset, or NULL
*/
const char *html_get_encoding(hlcache_handle *h)
{
html_content *c = (html_content *) hlcache_handle_get_content(h);
assert(c != NULL);
return c->encoding;
}
/**
* Retrieve the charset of an HTML document
*
* \param h Content to retrieve charset from
* \return Pointer to charset, or NULL
*/
binding_encoding_source html_get_encoding_source(hlcache_handle *h)
{
html_content *c = (html_content *) hlcache_handle_get_content(h);
assert(c != NULL);
return c->encoding_source;
}
/**
* Retrieve framesets used in an HTML document
*
* \param h Content to inspect
* \return Pointer to framesets, or NULL if none
*/
struct content_html_frames *html_get_frameset(hlcache_handle *h)
{
html_content *c = (html_content *) hlcache_handle_get_content(h);
assert(c != NULL);
return c->frameset;
}
/**
* Retrieve iframes used in an HTML document
*
* \param h Content to inspect
* \return Pointer to iframes, or NULL if none
*/
struct content_html_iframe *html_get_iframe(hlcache_handle *h)
{
html_content *c = (html_content *) hlcache_handle_get_content(h);
assert(c != NULL);
return c->iframe;
}
/**
* Retrieve an HTML content's base URL
*
* \param h Content to retrieve base target from
* \return Pointer to URL
*/
nsurl *html_get_base_url(hlcache_handle *h)
{
html_content *c = (html_content *) hlcache_handle_get_content(h);
assert(c != NULL);
return c->base_url;
}
/**
* Retrieve an HTML content's base target
*
* \param h Content to retrieve base target from
* \return Pointer to target, or NULL if none
*/
const char *html_get_base_target(hlcache_handle *h)
{
html_content *c = (html_content *) hlcache_handle_get_content(h);
assert(c != NULL);
return c->base_target;
}
/**
* Retrieve stylesheets used by HTML document
*
* \param h Content to retrieve stylesheets from
* \param n Pointer to location to receive number of sheets
* \return Pointer to array of stylesheets
*/
struct html_stylesheet *html_get_stylesheets(hlcache_handle *h, unsigned int *n)
{
html_content *c = (html_content *) hlcache_handle_get_content(h);
assert(c != NULL);
assert(n != NULL);
*n = c->stylesheet_count;
return c->stylesheets;
}
/**
* Retrieve objects used by HTML document
*
* \param h Content to retrieve objects from
* \param n Pointer to location to receive number of objects
* \return Pointer to list of objects
*/
struct content_html_object *html_get_objects(hlcache_handle *h, unsigned int *n)
{
html_content *c = (html_content *) hlcache_handle_get_content(h);
assert(c != NULL);
assert(n != NULL);
*n = c->num_objects;
return c->object_list;
}
/**
* Retrieve layout coordinates of box with given id
*
* \param h HTML document to search
* \param frag_id String containing an element id
* \param x Updated to global x coord iff id found
* \param y Updated to global y coord iff id found
* \return true iff id found
*/
bool html_get_id_offset(hlcache_handle *h, lwc_string *frag_id, int *x, int *y)
{
struct box *pos;
struct box *layout;
if (content_get_type(h) != CONTENT_HTML)
return false;
layout = html_get_box_tree(h);
if ((pos = box_find_by_id(layout, frag_id)) != 0) {
box_coords(pos, x, y);
return true;
}
return false;
}
/**
* Compute the type of a content
*
* \return CONTENT_HTML
*/
static content_type html_content_type(void)
{
return CONTENT_HTML;
}
static void html_fini(void)
{
box_construct_fini();
#define HTML_DOM_STRING_UNREF(NAME) \
do { \
if (html_dom_string_##NAME != NULL) { \
dom_string_unref(html_dom_string_##NAME); \
html_dom_string_##NAME = NULL; \
} \
} while (0) \
HTML_DOM_STRING_UNREF(html);
HTML_DOM_STRING_UNREF(head);
HTML_DOM_STRING_UNREF(rel);
HTML_DOM_STRING_UNREF(href);
HTML_DOM_STRING_UNREF(hreflang);
HTML_DOM_STRING_UNREF(type);
HTML_DOM_STRING_UNREF(media);
HTML_DOM_STRING_UNREF(sizes);
HTML_DOM_STRING_UNREF(title);
HTML_DOM_STRING_UNREF(base);
HTML_DOM_STRING_UNREF(src);
HTML_DOM_STRING_UNREF(text_javascript);
HTML_DOM_STRING_UNREF(script);
HTML_DOM_STRING_UNREF(link);
HTML_DOM_STRING_UNREF(target);
HTML_DOM_STRING_UNREF(_blank);
HTML_DOM_STRING_UNREF(_self);
HTML_DOM_STRING_UNREF(_parent);
HTML_DOM_STRING_UNREF(_top);
HTML_DOM_STRING_UNREF(content);
HTML_DOM_STRING_UNREF(map);
HTML_DOM_STRING_UNREF(id);
HTML_DOM_STRING_UNREF(name);
HTML_DOM_STRING_UNREF(area);
HTML_DOM_STRING_UNREF(a);
HTML_DOM_STRING_UNREF(nohref);
HTML_DOM_STRING_UNREF(shape);
HTML_DOM_STRING_UNREF(default);
HTML_DOM_STRING_UNREF(rect);
HTML_DOM_STRING_UNREF(rectangle);
HTML_DOM_STRING_UNREF(coords);
HTML_DOM_STRING_UNREF(circle);
HTML_DOM_STRING_UNREF(poly);
HTML_DOM_STRING_UNREF(polygon);
#undef HTML_DOM_STRING_UNREF
if (html_dom_string_http_equiv != NULL) {
dom_string_unref(html_dom_string_http_equiv);
html_dom_string_http_equiv = NULL;
}
if (html_user_stylesheet_url != NULL) {
nsurl_unref(html_user_stylesheet_url);
html_user_stylesheet_url = NULL;
}
if (html_quirks_stylesheet_url != NULL) {
nsurl_unref(html_quirks_stylesheet_url);
html_quirks_stylesheet_url = NULL;
}
if (html_adblock_stylesheet_url != NULL) {
nsurl_unref(html_adblock_stylesheet_url);
html_adblock_stylesheet_url = NULL;
}
if (html_default_stylesheet_url != NULL) {
nsurl_unref(html_default_stylesheet_url);
html_default_stylesheet_url = NULL;
}
if (html_charset != NULL) {
lwc_string_unref(html_charset);
html_charset = NULL;
}
}
static const content_handler html_content_handler = {
.fini = html_fini,
.create = html_create,
.process_data = html_process_data,
.data_complete = html_convert,
.reformat = html_reformat,
.destroy = html_destroy,
.stop = html_stop,
.mouse_track = html_mouse_track,
.mouse_action = html_mouse_action,
.redraw = html_redraw,
.open = html_open,
.close = html_close,
.get_selection = html_get_selection,
.get_contextual_content = html_get_contextual_content,
.scroll_at_point = html_scroll_at_point,
.drop_file_at_point = html_drop_file_at_point,
.clone = html_clone,
.type = html_content_type,
.no_share = true,
};
nserror html_init(void)
{
uint32_t i;
lwc_error lerror;
nserror error;
dom_exception exc; /* returned by libdom functions */
lerror = lwc_intern_string("charset", SLEN("charset"), &html_charset);
if (lerror != lwc_error_ok) {
error = NSERROR_NOMEM;
goto error;
}
error = nsurl_create("resource:default.css",
&html_default_stylesheet_url);
if (error != NSERROR_OK)
goto error;
error = nsurl_create("resource:adblock.css",
&html_adblock_stylesheet_url);
if (error != NSERROR_OK)
goto error;
error = nsurl_create("resource:quirks.css",
&html_quirks_stylesheet_url);
if (error != NSERROR_OK)
goto error;
error = nsurl_create("resource:user.css",
&html_user_stylesheet_url);
if (error != NSERROR_OK)
goto error;
#define HTML_DOM_STRING_INTERN(NAME) \
exc = dom_string_create_interned((const uint8_t *)#NAME, \
sizeof(#NAME) - 1, \
&html_dom_string_##NAME ); \
if ((exc != DOM_NO_ERR) || (html_dom_string_##NAME == NULL)) \
goto error
HTML_DOM_STRING_INTERN(html);
HTML_DOM_STRING_INTERN(head);
HTML_DOM_STRING_INTERN(rel);
HTML_DOM_STRING_INTERN(href);
HTML_DOM_STRING_INTERN(hreflang);
HTML_DOM_STRING_INTERN(type);
HTML_DOM_STRING_INTERN(media);
HTML_DOM_STRING_INTERN(sizes);
HTML_DOM_STRING_INTERN(title);
HTML_DOM_STRING_INTERN(base);
HTML_DOM_STRING_INTERN(link);
HTML_DOM_STRING_INTERN(script);
HTML_DOM_STRING_INTERN(src);
HTML_DOM_STRING_INTERN(target);
HTML_DOM_STRING_INTERN(_blank);
HTML_DOM_STRING_INTERN(_self);
HTML_DOM_STRING_INTERN(_parent);
HTML_DOM_STRING_INTERN(_top);
HTML_DOM_STRING_INTERN(content);
HTML_DOM_STRING_INTERN(map);
HTML_DOM_STRING_INTERN(id);
HTML_DOM_STRING_INTERN(name);
HTML_DOM_STRING_INTERN(area);
HTML_DOM_STRING_INTERN(a);
HTML_DOM_STRING_INTERN(nohref);
HTML_DOM_STRING_INTERN(shape);
HTML_DOM_STRING_INTERN(default);
HTML_DOM_STRING_INTERN(rect);
HTML_DOM_STRING_INTERN(rectangle);
HTML_DOM_STRING_INTERN(coords);
HTML_DOM_STRING_INTERN(circle);
HTML_DOM_STRING_INTERN(poly);
HTML_DOM_STRING_INTERN(polygon);
#undef HTML_DOM_STRING_INTERN
exc = dom_string_create_interned((const uint8_t *) "text/javascript",
SLEN("text/javascript"),
&html_dom_string_text_javascript);
if ((exc != DOM_NO_ERR) || (html_dom_string_text_javascript == NULL))
goto error;
exc = dom_string_create_interned((const uint8_t *) "http-equiv",
SLEN("http-equiv"),
&html_dom_string_http_equiv);
if ((exc != DOM_NO_ERR) || (html_dom_string_http_equiv == NULL))
goto error;
error = box_construct_init();
if (error != NSERROR_OK)
goto error;
for (i = 0; i < NOF_ELEMENTS(html_types); i++) {
error = content_factory_register_handler(html_types[i],
&html_content_handler);
if (error != NSERROR_OK)
goto error;
}
return NSERROR_OK;
error:
html_fini();
return error;
}
/**
* Get the browser window containing an HTML content
*
* \param c HTML content
* \return the browser window
*/
struct browser_window *html_get_browser_window(struct content *c)
{
html_content *html = (html_content *) c;
assert(c != NULL);
assert(c->handler == &html_content_handler);
return html->bw;
}