mirror of
https://github.com/netsurf-browser/netsurf
synced 2024-12-19 18:52:39 +03:00
1413 lines
34 KiB
C
1413 lines
34 KiB
C
/*
|
|
* Copyright 2005 James Bursa <bursa@users.sourceforge.net>
|
|
* Copyright 2003 Phil Mellor <monkeyson@users.sourceforge.net>
|
|
* Copyright 2005 John M Bell <jmb202@ecs.soton.ac.uk>
|
|
* Copyright 2006 Richard Wilson <info@tinct.net>
|
|
* Copyright 2008 Michael Drake <tlsa@netsurf-browser.org>
|
|
*
|
|
* This file is part of NetSurf, http://www.netsurf-browser.org/
|
|
*
|
|
* NetSurf is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; version 2 of the License.
|
|
*
|
|
* NetSurf is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
/**
|
|
* \file
|
|
* Implementation of conversion from DOM tree to box tree.
|
|
*/
|
|
|
|
#include <string.h>
|
|
#include <dom/dom.h>
|
|
|
|
#include "utils/errors.h"
|
|
#include "utils/nsoption.h"
|
|
#include "utils/corestrings.h"
|
|
#include "utils/talloc.h"
|
|
#include "utils/string.h"
|
|
#include "utils/ascii.h"
|
|
#include "utils/nsurl.h"
|
|
#include "netsurf/misc.h"
|
|
#include "css/select.h"
|
|
#include "desktop/gui_internal.h"
|
|
|
|
#include "html/private.h"
|
|
#include "html/object.h"
|
|
#include "html/box.h"
|
|
#include "html/box_manipulate.h"
|
|
#include "html/box_construct.h"
|
|
#include "html/box_special.h"
|
|
#include "html/box_normalise.h"
|
|
#include "html/form_internal.h"
|
|
|
|
/**
|
|
* Context for box tree construction
|
|
*/
|
|
struct box_construct_ctx {
|
|
html_content *content; /**< Content we're constructing for */
|
|
|
|
dom_node *n; /**< Current node to process */
|
|
|
|
struct box *root_box; /**< Root box in the tree */
|
|
|
|
box_construct_complete_cb cb; /**< Callback to invoke on completion */
|
|
|
|
int *bctx; /**< talloc context */
|
|
};
|
|
|
|
/**
|
|
* Transient properties for construction of current node
|
|
*/
|
|
struct box_construct_props {
|
|
/** Style from which to inherit, or NULL if none */
|
|
const css_computed_style *parent_style;
|
|
/** Current link target, or NULL if none */
|
|
struct nsurl *href;
|
|
/** Current frame target, or NULL if none */
|
|
const char *target;
|
|
/** Current title attribute, or NULL if none */
|
|
const char *title;
|
|
/** Identity of the current block-level container */
|
|
struct box *containing_block;
|
|
/** Current container for inlines, or NULL if none
|
|
* \note If non-NULL, will be the last child of containing_block */
|
|
struct box *inline_container;
|
|
/** Whether the current node is the root of the DOM tree */
|
|
bool node_is_root;
|
|
};
|
|
|
|
static const content_type image_types = CONTENT_IMAGE;
|
|
|
|
/**
|
|
* mapping from CSS display to box type this table must be in sync
|
|
* with libcss' css_display enum
|
|
*/
|
|
static const box_type box_map[] = {
|
|
0, /* CSS_DISPLAY_INHERIT, */
|
|
BOX_INLINE, /* CSS_DISPLAY_INLINE, */
|
|
BOX_BLOCK, /* CSS_DISPLAY_BLOCK, */
|
|
BOX_BLOCK, /* CSS_DISPLAY_LIST_ITEM, */
|
|
BOX_INLINE, /* CSS_DISPLAY_RUN_IN, */
|
|
BOX_INLINE_BLOCK, /* CSS_DISPLAY_INLINE_BLOCK, */
|
|
BOX_TABLE, /* CSS_DISPLAY_TABLE, */
|
|
BOX_TABLE, /* CSS_DISPLAY_INLINE_TABLE, */
|
|
BOX_TABLE_ROW_GROUP, /* CSS_DISPLAY_TABLE_ROW_GROUP, */
|
|
BOX_TABLE_ROW_GROUP, /* CSS_DISPLAY_TABLE_HEADER_GROUP, */
|
|
BOX_TABLE_ROW_GROUP, /* CSS_DISPLAY_TABLE_FOOTER_GROUP, */
|
|
BOX_TABLE_ROW, /* CSS_DISPLAY_TABLE_ROW, */
|
|
BOX_NONE, /* CSS_DISPLAY_TABLE_COLUMN_GROUP, */
|
|
BOX_NONE, /* CSS_DISPLAY_TABLE_COLUMN, */
|
|
BOX_TABLE_CELL, /* CSS_DISPLAY_TABLE_CELL, */
|
|
BOX_INLINE, /* CSS_DISPLAY_TABLE_CAPTION, */
|
|
BOX_NONE /* CSS_DISPLAY_NONE */
|
|
};
|
|
|
|
|
|
/**
|
|
* determine if a box is the root node
|
|
*
|
|
* \param n node to check
|
|
* \return true if node is root else false.
|
|
*/
|
|
static inline bool box_is_root(dom_node *n)
|
|
{
|
|
dom_node *parent;
|
|
dom_node_type type;
|
|
dom_exception err;
|
|
|
|
err = dom_node_get_parent_node(n, &parent);
|
|
if (err != DOM_NO_ERR)
|
|
return false;
|
|
|
|
if (parent != NULL) {
|
|
err = dom_node_get_node_type(parent, &type);
|
|
|
|
dom_node_unref(parent);
|
|
|
|
if (err != DOM_NO_ERR)
|
|
return false;
|
|
|
|
if (type != DOM_DOCUMENT_NODE)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
/**
|
|
* Extract transient construction properties
|
|
*
|
|
* \param n Current DOM node to convert
|
|
* \param props Property object to populate
|
|
*/
|
|
static void
|
|
box_extract_properties(dom_node *n, struct box_construct_props *props)
|
|
{
|
|
memset(props, 0, sizeof(*props));
|
|
|
|
props->node_is_root = box_is_root(n);
|
|
|
|
/* Extract properties from containing DOM node */
|
|
if (props->node_is_root == false) {
|
|
dom_node *current_node = n;
|
|
dom_node *parent_node = NULL;
|
|
struct box *parent_box;
|
|
dom_exception err;
|
|
|
|
/* Find ancestor node containing parent box */
|
|
while (true) {
|
|
err = dom_node_get_parent_node(current_node,
|
|
&parent_node);
|
|
if (err != DOM_NO_ERR || parent_node == NULL)
|
|
break;
|
|
|
|
parent_box = box_for_node(parent_node);
|
|
|
|
if (parent_box != NULL) {
|
|
props->parent_style = parent_box->style;
|
|
props->href = parent_box->href;
|
|
props->target = parent_box->target;
|
|
props->title = parent_box->title;
|
|
|
|
dom_node_unref(parent_node);
|
|
break;
|
|
} else {
|
|
if (current_node != n)
|
|
dom_node_unref(current_node);
|
|
current_node = parent_node;
|
|
parent_node = NULL;
|
|
}
|
|
}
|
|
|
|
/* Find containing block (may be parent) */
|
|
while (true) {
|
|
struct box *b;
|
|
|
|
err = dom_node_get_parent_node(current_node,
|
|
&parent_node);
|
|
if (err != DOM_NO_ERR || parent_node == NULL) {
|
|
if (current_node != n)
|
|
dom_node_unref(current_node);
|
|
break;
|
|
}
|
|
|
|
if (current_node != n)
|
|
dom_node_unref(current_node);
|
|
|
|
b = box_for_node(parent_node);
|
|
|
|
/* Children of nodes that created an inline box
|
|
* will generate boxes which are attached as
|
|
* _siblings_ of the box generated for their
|
|
* parent node. Note, however, that we'll still
|
|
* use the parent node's styling as the parent
|
|
* style, above. */
|
|
if (b != NULL && b->type != BOX_INLINE &&
|
|
b->type != BOX_BR) {
|
|
props->containing_block = b;
|
|
|
|
dom_node_unref(parent_node);
|
|
break;
|
|
} else {
|
|
current_node = parent_node;
|
|
parent_node = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Compute current inline container, if any */
|
|
if (props->containing_block != NULL &&
|
|
props->containing_block->last != NULL &&
|
|
props->containing_block->last->type ==
|
|
BOX_INLINE_CONTAINER)
|
|
props->inline_container = props->containing_block->last;
|
|
}
|
|
|
|
|
|
/**
|
|
* Get the style for an element.
|
|
*
|
|
* \param c content of type CONTENT_HTML that is being processed
|
|
* \param parent_style style at this point in xml tree, or NULL for root
|
|
* \param root_style root node's style, or NULL for root
|
|
* \param n node in xml tree
|
|
* \return the new style, or NULL on memory exhaustion
|
|
*/
|
|
static css_select_results *
|
|
box_get_style(html_content *c,
|
|
const css_computed_style *parent_style,
|
|
const css_computed_style *root_style,
|
|
dom_node *n)
|
|
{
|
|
dom_string *s;
|
|
dom_exception err;
|
|
css_stylesheet *inline_style = NULL;
|
|
css_select_results *styles;
|
|
nscss_select_ctx ctx;
|
|
|
|
/* Firstly, construct inline stylesheet, if any */
|
|
err = dom_element_get_attribute(n, corestring_dom_style, &s);
|
|
if (err != DOM_NO_ERR)
|
|
return NULL;
|
|
|
|
if (s != NULL) {
|
|
inline_style = nscss_create_inline_style(
|
|
(const uint8_t *) dom_string_data(s),
|
|
dom_string_byte_length(s),
|
|
c->encoding,
|
|
nsurl_access(c->base_url),
|
|
c->quirks != DOM_DOCUMENT_QUIRKS_MODE_NONE);
|
|
|
|
dom_string_unref(s);
|
|
|
|
if (inline_style == NULL)
|
|
return NULL;
|
|
}
|
|
|
|
/* Populate selection context */
|
|
ctx.ctx = c->select_ctx;
|
|
ctx.quirks = (c->quirks == DOM_DOCUMENT_QUIRKS_MODE_FULL);
|
|
ctx.base_url = c->base_url;
|
|
ctx.universal = c->universal;
|
|
ctx.root_style = root_style;
|
|
ctx.parent_style = parent_style;
|
|
|
|
/* Select style for element */
|
|
styles = nscss_get_style(&ctx, n, &c->media, inline_style);
|
|
|
|
/* No longer need inline style */
|
|
if (inline_style != NULL)
|
|
css_stylesheet_destroy(inline_style);
|
|
|
|
return styles;
|
|
}
|
|
|
|
|
|
/**
|
|
* Construct the box required for a generated element.
|
|
*
|
|
* \param n XML node of type XML_ELEMENT_NODE
|
|
* \param content Content of type CONTENT_HTML that is being processed
|
|
* \param box Box which may have generated content
|
|
* \param style Complete computed style for pseudo element, or NULL
|
|
*
|
|
* \todo This is currently incomplete. It just does enough to support
|
|
* the clearfix hack. (http://www.positioniseverything.net/easyclearing.html )
|
|
*/
|
|
static void
|
|
box_construct_generate(dom_node *n,
|
|
html_content *content,
|
|
struct box *box,
|
|
const css_computed_style *style)
|
|
{
|
|
struct box *gen = NULL;
|
|
enum css_display_e computed_display;
|
|
const css_computed_content_item *c_item;
|
|
|
|
/* Nothing to generate if the parent box is not a block */
|
|
if (box->type != BOX_BLOCK)
|
|
return;
|
|
|
|
/* To determine if an element has a pseudo element, we select
|
|
* for it and test to see if the returned style's content
|
|
* property is set to normal. */
|
|
if (style == NULL ||
|
|
css_computed_content(style, &c_item) ==
|
|
CSS_CONTENT_NORMAL) {
|
|
/* No pseudo element */
|
|
return;
|
|
}
|
|
|
|
/* create box for this element */
|
|
computed_display = ns_computed_display(style, box_is_root(n));
|
|
if (computed_display == CSS_DISPLAY_BLOCK ||
|
|
computed_display == CSS_DISPLAY_TABLE) {
|
|
/* currently only support block level boxes */
|
|
|
|
/** \todo Not wise to drop const from the computed style */
|
|
gen = box_create(NULL, (css_computed_style *) style,
|
|
false, NULL, NULL, NULL, NULL, content->bctx);
|
|
if (gen == NULL) {
|
|
return;
|
|
}
|
|
|
|
/* set box type from computed display */
|
|
gen->type = box_map[ns_computed_display(
|
|
style, box_is_root(n))];
|
|
|
|
box_add_child(box, gen);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Construct a list marker box
|
|
*
|
|
* \param box Box to attach marker to
|
|
* \param title Current title attribute
|
|
* \param ctx Box construction context
|
|
* \param parent Current block-level container
|
|
* \return true on success, false on memory exhaustion
|
|
*/
|
|
static bool
|
|
box_construct_marker(struct box *box,
|
|
const char *title,
|
|
struct box_construct_ctx *ctx,
|
|
struct box *parent)
|
|
{
|
|
lwc_string *image_uri;
|
|
struct box *marker;
|
|
enum css_list_style_type_e list_style_type;
|
|
|
|
marker = box_create(NULL, box->style, false, NULL, NULL, title,
|
|
NULL, ctx->bctx);
|
|
if (marker == false)
|
|
return false;
|
|
|
|
marker->type = BOX_BLOCK;
|
|
|
|
list_style_type = css_computed_list_style_type(box->style);
|
|
|
|
/** \todo marker content (list-style-type) */
|
|
switch (list_style_type) {
|
|
case CSS_LIST_STYLE_TYPE_DISC:
|
|
/* 2022 BULLET */
|
|
marker->text = (char *) "\342\200\242";
|
|
marker->length = 3;
|
|
break;
|
|
|
|
case CSS_LIST_STYLE_TYPE_CIRCLE:
|
|
/* 25CB WHITE CIRCLE */
|
|
marker->text = (char *) "\342\227\213";
|
|
marker->length = 3;
|
|
break;
|
|
|
|
case CSS_LIST_STYLE_TYPE_SQUARE:
|
|
/* 25AA BLACK SMALL SQUARE */
|
|
marker->text = (char *) "\342\226\252";
|
|
marker->length = 3;
|
|
break;
|
|
|
|
default:
|
|
/* Numerical list counters get handled in layout. */
|
|
/* Fall through. */
|
|
case CSS_LIST_STYLE_TYPE_NONE:
|
|
marker->text = NULL;
|
|
marker->length = 0;
|
|
break;
|
|
}
|
|
|
|
if (css_computed_list_style_image(box->style, &image_uri) == CSS_LIST_STYLE_IMAGE_URI &&
|
|
(image_uri != NULL) &&
|
|
(nsoption_bool(foreground_images) == true)) {
|
|
nsurl *url;
|
|
nserror error;
|
|
|
|
/* TODO: we get a url out of libcss as a lwc string, but
|
|
* earlier we already had it as a nsurl after we
|
|
* nsurl_joined it. Can this be improved?
|
|
* For now, just making another nsurl. */
|
|
error = nsurl_create(lwc_string_data(image_uri), &url);
|
|
if (error != NSERROR_OK)
|
|
return false;
|
|
|
|
if (html_fetch_object(ctx->content,
|
|
url,
|
|
marker,
|
|
image_types,
|
|
false) == false) {
|
|
nsurl_unref(url);
|
|
return false;
|
|
}
|
|
nsurl_unref(url);
|
|
}
|
|
|
|
box->list_marker = marker;
|
|
marker->parent = box;
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
/**
|
|
* Construct the box tree for an XML element.
|
|
*
|
|
* \param ctx Tree construction context
|
|
* \param convert_children Whether to convert children
|
|
* \return true on success, false on memory exhaustion
|
|
*/
|
|
static bool
|
|
box_construct_element(struct box_construct_ctx *ctx, bool *convert_children)
|
|
{
|
|
dom_string *title0, *s;
|
|
lwc_string *id = NULL;
|
|
struct box *box = NULL, *old_box;
|
|
css_select_results *styles = NULL;
|
|
lwc_string *bgimage_uri;
|
|
dom_exception err;
|
|
struct box_construct_props props;
|
|
const css_computed_style *root_style = NULL;
|
|
|
|
assert(ctx->n != NULL);
|
|
|
|
box_extract_properties(ctx->n, &props);
|
|
|
|
if (props.containing_block != NULL) {
|
|
/* In case the containing block is a pre block, we clear
|
|
* the PRE_STRIP flag since it is not used if we follow
|
|
* the pre with a tag */
|
|
props.containing_block->flags &= ~PRE_STRIP;
|
|
}
|
|
|
|
if (props.node_is_root == false) {
|
|
root_style = ctx->root_box->style;
|
|
}
|
|
|
|
styles = box_get_style(ctx->content, props.parent_style, root_style,
|
|
ctx->n);
|
|
if (styles == NULL)
|
|
return false;
|
|
|
|
/* Extract title attribute, if present */
|
|
err = dom_element_get_attribute(ctx->n, corestring_dom_title, &title0);
|
|
if (err != DOM_NO_ERR)
|
|
return false;
|
|
|
|
if (title0 != NULL) {
|
|
char *t = squash_whitespace(dom_string_data(title0));
|
|
|
|
dom_string_unref(title0);
|
|
|
|
if (t == NULL)
|
|
return false;
|
|
|
|
props.title = talloc_strdup(ctx->bctx, t);
|
|
|
|
free(t);
|
|
|
|
if (props.title == NULL)
|
|
return false;
|
|
}
|
|
|
|
/* Extract id attribute, if present */
|
|
err = dom_element_get_attribute(ctx->n, corestring_dom_id, &s);
|
|
if (err != DOM_NO_ERR)
|
|
return false;
|
|
|
|
if (s != NULL) {
|
|
err = dom_string_intern(s, &id);
|
|
if (err != DOM_NO_ERR)
|
|
id = NULL;
|
|
|
|
dom_string_unref(s);
|
|
}
|
|
|
|
box = box_create(styles, styles->styles[CSS_PSEUDO_ELEMENT_NONE], false,
|
|
props.href, props.target, props.title, id,
|
|
ctx->bctx);
|
|
if (box == NULL)
|
|
return false;
|
|
|
|
/* If this is the root box, add it to the context */
|
|
if (props.node_is_root)
|
|
ctx->root_box = box;
|
|
|
|
/* Deal with colspan/rowspan */
|
|
err = dom_element_get_attribute(ctx->n, corestring_dom_colspan, &s);
|
|
if (err != DOM_NO_ERR)
|
|
return false;
|
|
|
|
if (s != NULL) {
|
|
const char *val = dom_string_data(s);
|
|
|
|
if ('0' <= val[0] && val[0] <= '9')
|
|
box->columns = strtol(val, NULL, 10);
|
|
|
|
dom_string_unref(s);
|
|
}
|
|
|
|
err = dom_element_get_attribute(ctx->n, corestring_dom_rowspan, &s);
|
|
if (err != DOM_NO_ERR)
|
|
return false;
|
|
|
|
if (s != NULL) {
|
|
const char *val = dom_string_data(s);
|
|
|
|
if ('0' <= val[0] && val[0] <= '9')
|
|
box->rows = strtol(val, NULL, 10);
|
|
|
|
dom_string_unref(s);
|
|
}
|
|
|
|
/* Set box type from computed display */
|
|
if ((css_computed_position(box->style) == CSS_POSITION_ABSOLUTE ||
|
|
css_computed_position(box->style) ==
|
|
CSS_POSITION_FIXED) &&
|
|
(ns_computed_display_static(box->style) ==
|
|
CSS_DISPLAY_INLINE ||
|
|
ns_computed_display_static(box->style) ==
|
|
CSS_DISPLAY_INLINE_BLOCK ||
|
|
ns_computed_display_static(box->style) ==
|
|
CSS_DISPLAY_INLINE_TABLE)) {
|
|
/* Special case for absolute positioning: make absolute inlines
|
|
* into inline block so that the boxes are constructed in an
|
|
* inline container as if they were not absolutely positioned.
|
|
* Layout expects and handles this. */
|
|
box->type = box_map[CSS_DISPLAY_INLINE_BLOCK];
|
|
} else if (props.node_is_root) {
|
|
/* Special case for root element: force it to BLOCK, or the
|
|
* rest of the layout will break. */
|
|
box->type = BOX_BLOCK;
|
|
} else {
|
|
/* Normal mapping */
|
|
box->type = box_map[ns_computed_display(box->style,
|
|
props.node_is_root)];
|
|
}
|
|
|
|
if (convert_special_elements(ctx->n,
|
|
ctx->content,
|
|
box,
|
|
convert_children) == false) {
|
|
return false;
|
|
}
|
|
|
|
/* Handle the :before pseudo element */
|
|
if (!(box->flags & IS_REPLACED)) {
|
|
box_construct_generate(ctx->n, ctx->content, box,
|
|
box->styles->styles[CSS_PSEUDO_ELEMENT_BEFORE]);
|
|
}
|
|
|
|
if (box->type == BOX_NONE ||
|
|
(ns_computed_display(box->style,
|
|
props.node_is_root) == CSS_DISPLAY_NONE &&
|
|
props.node_is_root == false)) {
|
|
css_select_results_destroy(styles);
|
|
box->styles = NULL;
|
|
box->style = NULL;
|
|
|
|
/* Invalidate associated gadget, if any */
|
|
if (box->gadget != NULL) {
|
|
box->gadget->box = NULL;
|
|
box->gadget = NULL;
|
|
}
|
|
|
|
/* Can't do this, because the lifetimes of boxes and gadgets
|
|
* are inextricably linked. Fortunately, talloc will save us
|
|
* (for now) */
|
|
/* box_free_box(box); */
|
|
|
|
*convert_children = false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/* Attach DOM node to box */
|
|
err = dom_node_set_user_data(ctx->n,
|
|
corestring_dom___ns_key_box_node_data, box, NULL,
|
|
(void *) &old_box);
|
|
if (err != DOM_NO_ERR)
|
|
return false;
|
|
|
|
/* Attach box to DOM node */
|
|
box->node = dom_node_ref(ctx->n);
|
|
|
|
if (props.inline_container == NULL &&
|
|
(box->type == BOX_INLINE ||
|
|
box->type == BOX_BR ||
|
|
box->type == BOX_INLINE_BLOCK ||
|
|
css_computed_float(box->style) == CSS_FLOAT_LEFT ||
|
|
css_computed_float(box->style) == CSS_FLOAT_RIGHT) &&
|
|
props.node_is_root == false) {
|
|
/* Found an inline child of a block without a current container
|
|
* (i.e. this box is the first child of its parent, or was
|
|
* preceded by block-level siblings) */
|
|
assert(props.containing_block != NULL &&
|
|
"Box must have containing block.");
|
|
|
|
props.inline_container = box_create(NULL, NULL, false, NULL,
|
|
NULL, NULL, NULL, ctx->bctx);
|
|
if (props.inline_container == NULL)
|
|
return false;
|
|
|
|
props.inline_container->type = BOX_INLINE_CONTAINER;
|
|
|
|
box_add_child(props.containing_block, props.inline_container);
|
|
}
|
|
|
|
/* Kick off fetch for any background image */
|
|
if (css_computed_background_image(box->style, &bgimage_uri) ==
|
|
CSS_BACKGROUND_IMAGE_IMAGE && bgimage_uri != NULL &&
|
|
nsoption_bool(background_images) == true) {
|
|
nsurl *url;
|
|
nserror error;
|
|
|
|
/* TODO: we get a url out of libcss as a lwc string, but
|
|
* earlier we already had it as a nsurl after we
|
|
* nsurl_joined it. Can this be improved?
|
|
* For now, just making another nsurl. */
|
|
error = nsurl_create(lwc_string_data(bgimage_uri), &url);
|
|
if (error == NSERROR_OK) {
|
|
/* Fetch image if we got a valid URL */
|
|
if (html_fetch_object(ctx->content,
|
|
url,
|
|
box,
|
|
image_types,
|
|
true) == false) {
|
|
nsurl_unref(url);
|
|
return false;
|
|
}
|
|
nsurl_unref(url);
|
|
}
|
|
}
|
|
|
|
if (*convert_children)
|
|
box->flags |= CONVERT_CHILDREN;
|
|
|
|
if (box->type == BOX_INLINE || box->type == BOX_BR ||
|
|
box->type == BOX_INLINE_BLOCK) {
|
|
/* Inline container must exist, as we'll have
|
|
* created it above if it didn't */
|
|
assert(props.inline_container != NULL);
|
|
|
|
box_add_child(props.inline_container, box);
|
|
} else {
|
|
if (ns_computed_display(box->style, props.node_is_root) ==
|
|
CSS_DISPLAY_LIST_ITEM) {
|
|
/* List item: compute marker */
|
|
if (box_construct_marker(box, props.title, ctx,
|
|
props.containing_block) == false)
|
|
return false;
|
|
}
|
|
|
|
if (props.node_is_root == false &&
|
|
(css_computed_float(box->style) ==
|
|
CSS_FLOAT_LEFT ||
|
|
css_computed_float(box->style) ==
|
|
CSS_FLOAT_RIGHT)) {
|
|
/* Float: insert a float between the parent and box. */
|
|
struct box *flt = box_create(NULL, NULL, false,
|
|
props.href, props.target, props.title,
|
|
NULL, ctx->bctx);
|
|
if (flt == NULL)
|
|
return false;
|
|
|
|
if (css_computed_float(box->style) == CSS_FLOAT_LEFT)
|
|
flt->type = BOX_FLOAT_LEFT;
|
|
else
|
|
flt->type = BOX_FLOAT_RIGHT;
|
|
|
|
box_add_child(props.inline_container, flt);
|
|
box_add_child(flt, box);
|
|
} else {
|
|
/* Non-floated block-level box: add to containing block
|
|
* if there is one. If we're the root box, then there
|
|
* won't be. */
|
|
if (props.containing_block != NULL)
|
|
box_add_child(props.containing_block, box);
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
/**
|
|
* Complete construction of the box tree for an element.
|
|
*
|
|
* \param n DOM node to construct for
|
|
* \param content Containing document
|
|
*
|
|
* This will be called after all children of an element have been processed
|
|
*/
|
|
static void box_construct_element_after(dom_node *n, html_content *content)
|
|
{
|
|
struct box_construct_props props;
|
|
struct box *box = box_for_node(n);
|
|
|
|
assert(box != NULL);
|
|
|
|
box_extract_properties(n, &props);
|
|
|
|
if (box->type == BOX_INLINE || box->type == BOX_BR) {
|
|
/* Insert INLINE_END into containing block */
|
|
struct box *inline_end;
|
|
bool has_children;
|
|
dom_exception err;
|
|
|
|
err = dom_node_has_child_nodes(n, &has_children);
|
|
if (err != DOM_NO_ERR)
|
|
return;
|
|
|
|
if (has_children == false ||
|
|
(box->flags & CONVERT_CHILDREN) == 0) {
|
|
/* No children, or didn't want children converted */
|
|
return;
|
|
}
|
|
|
|
if (props.inline_container == NULL) {
|
|
/* Create inline container if we don't have one */
|
|
props.inline_container = box_create(NULL, NULL, false,
|
|
NULL, NULL, NULL, NULL, content->bctx);
|
|
if (props.inline_container == NULL)
|
|
return;
|
|
|
|
props.inline_container->type = BOX_INLINE_CONTAINER;
|
|
|
|
box_add_child(props.containing_block,
|
|
props.inline_container);
|
|
}
|
|
|
|
inline_end = box_create(NULL, box->style, false,
|
|
box->href, box->target, box->title,
|
|
box->id == NULL ? NULL :
|
|
lwc_string_ref(box->id), content->bctx);
|
|
if (inline_end != NULL) {
|
|
inline_end->type = BOX_INLINE_END;
|
|
|
|
assert(props.inline_container != NULL);
|
|
|
|
box_add_child(props.inline_container, inline_end);
|
|
|
|
box->inline_end = inline_end;
|
|
inline_end->inline_end = box;
|
|
}
|
|
} else if (!(box->flags & IS_REPLACED)) {
|
|
/* Handle the :after pseudo element */
|
|
box_construct_generate(n, content, box,
|
|
box->styles->styles[CSS_PSEUDO_ELEMENT_AFTER]);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Find the next node in the DOM tree, completing element construction
|
|
* where appropriate.
|
|
*
|
|
* \param n Current node
|
|
* \param content Containing content
|
|
* \param convert_children Whether to consider children of \a n
|
|
* \return Next node to process, or NULL if complete
|
|
*
|
|
* \note \a n will be unreferenced
|
|
*/
|
|
static dom_node *
|
|
next_node(dom_node *n, html_content *content, bool convert_children)
|
|
{
|
|
dom_node *next = NULL;
|
|
bool has_children;
|
|
dom_exception err;
|
|
|
|
err = dom_node_has_child_nodes(n, &has_children);
|
|
if (err != DOM_NO_ERR) {
|
|
dom_node_unref(n);
|
|
return NULL;
|
|
}
|
|
|
|
if (convert_children && has_children) {
|
|
err = dom_node_get_first_child(n, &next);
|
|
if (err != DOM_NO_ERR) {
|
|
dom_node_unref(n);
|
|
return NULL;
|
|
}
|
|
dom_node_unref(n);
|
|
} else {
|
|
err = dom_node_get_next_sibling(n, &next);
|
|
if (err != DOM_NO_ERR) {
|
|
dom_node_unref(n);
|
|
return NULL;
|
|
}
|
|
|
|
if (next != NULL) {
|
|
if (box_for_node(n) != NULL)
|
|
box_construct_element_after(n, content);
|
|
dom_node_unref(n);
|
|
} else {
|
|
if (box_for_node(n) != NULL)
|
|
box_construct_element_after(n, content);
|
|
|
|
while (box_is_root(n) == false) {
|
|
dom_node *parent = NULL;
|
|
dom_node *parent_next = NULL;
|
|
|
|
err = dom_node_get_parent_node(n, &parent);
|
|
if (err != DOM_NO_ERR) {
|
|
dom_node_unref(n);
|
|
return NULL;
|
|
}
|
|
|
|
assert(parent != NULL);
|
|
|
|
err = dom_node_get_next_sibling(parent,
|
|
&parent_next);
|
|
if (err != DOM_NO_ERR) {
|
|
dom_node_unref(parent);
|
|
dom_node_unref(n);
|
|
return NULL;
|
|
}
|
|
|
|
if (parent_next != NULL) {
|
|
dom_node_unref(parent_next);
|
|
dom_node_unref(parent);
|
|
break;
|
|
}
|
|
|
|
dom_node_unref(n);
|
|
n = parent;
|
|
parent = NULL;
|
|
|
|
if (box_for_node(n) != NULL) {
|
|
box_construct_element_after(
|
|
n, content);
|
|
}
|
|
}
|
|
|
|
if (box_is_root(n) == false) {
|
|
dom_node *parent = NULL;
|
|
|
|
err = dom_node_get_parent_node(n, &parent);
|
|
if (err != DOM_NO_ERR) {
|
|
dom_node_unref(n);
|
|
return NULL;
|
|
}
|
|
|
|
assert(parent != NULL);
|
|
|
|
err = dom_node_get_next_sibling(parent, &next);
|
|
if (err != DOM_NO_ERR) {
|
|
dom_node_unref(parent);
|
|
dom_node_unref(n);
|
|
return NULL;
|
|
}
|
|
|
|
if (box_for_node(parent) != NULL) {
|
|
box_construct_element_after(parent,
|
|
content);
|
|
}
|
|
|
|
dom_node_unref(parent);
|
|
}
|
|
|
|
dom_node_unref(n);
|
|
}
|
|
}
|
|
|
|
return next;
|
|
}
|
|
|
|
|
|
/**
|
|
* Apply the CSS text-transform property to given text for its ASCII chars.
|
|
*
|
|
* \param s string to transform
|
|
* \param len length of s
|
|
* \param tt transform type
|
|
*/
|
|
static void
|
|
box_text_transform(char *s, unsigned int len, enum css_text_transform_e tt)
|
|
{
|
|
unsigned int i;
|
|
if (len == 0)
|
|
return;
|
|
switch (tt) {
|
|
case CSS_TEXT_TRANSFORM_UPPERCASE:
|
|
for (i = 0; i < len; ++i)
|
|
if ((unsigned char) s[i] < 0x80)
|
|
s[i] = ascii_to_upper(s[i]);
|
|
break;
|
|
case CSS_TEXT_TRANSFORM_LOWERCASE:
|
|
for (i = 0; i < len; ++i)
|
|
if ((unsigned char) s[i] < 0x80)
|
|
s[i] = ascii_to_lower(s[i]);
|
|
break;
|
|
case CSS_TEXT_TRANSFORM_CAPITALIZE:
|
|
if ((unsigned char) s[0] < 0x80)
|
|
s[0] = ascii_to_upper(s[0]);
|
|
for (i = 1; i < len; ++i)
|
|
if ((unsigned char) s[i] < 0x80 &&
|
|
ascii_is_space(s[i - 1]))
|
|
s[i] = ascii_to_upper(s[i]);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Construct the box tree for an XML text node.
|
|
*
|
|
* \param ctx Tree construction context
|
|
* \return true on success, false on memory exhaustion
|
|
*/
|
|
static bool box_construct_text(struct box_construct_ctx *ctx)
|
|
{
|
|
struct box_construct_props props;
|
|
struct box *box = NULL;
|
|
dom_string *content;
|
|
dom_exception err;
|
|
|
|
assert(ctx->n != NULL);
|
|
|
|
box_extract_properties(ctx->n, &props);
|
|
|
|
assert(props.containing_block != NULL);
|
|
|
|
err = dom_characterdata_get_data(ctx->n, &content);
|
|
if (err != DOM_NO_ERR || content == NULL)
|
|
return false;
|
|
|
|
if (css_computed_white_space(props.parent_style) ==
|
|
CSS_WHITE_SPACE_NORMAL ||
|
|
css_computed_white_space(props.parent_style) ==
|
|
CSS_WHITE_SPACE_NOWRAP) {
|
|
char *text;
|
|
|
|
text = squash_whitespace(dom_string_data(content));
|
|
|
|
dom_string_unref(content);
|
|
|
|
if (text == NULL)
|
|
return false;
|
|
|
|
/* if the text is just a space, combine it with the preceding
|
|
* text node, if any */
|
|
if (text[0] == ' ' && text[1] == 0) {
|
|
if (props.inline_container != NULL) {
|
|
assert(props.inline_container->last != NULL);
|
|
|
|
props.inline_container->last->space =
|
|
UNKNOWN_WIDTH;
|
|
}
|
|
|
|
free(text);
|
|
|
|
return true;
|
|
}
|
|
|
|
if (props.inline_container == NULL) {
|
|
/* Child of a block without a current container
|
|
* (i.e. this box is the first child of its parent, or
|
|
* was preceded by block-level siblings) */
|
|
props.inline_container = box_create(NULL, NULL, false,
|
|
NULL, NULL, NULL, NULL, ctx->bctx);
|
|
if (props.inline_container == NULL) {
|
|
free(text);
|
|
return false;
|
|
}
|
|
|
|
props.inline_container->type = BOX_INLINE_CONTAINER;
|
|
|
|
box_add_child(props.containing_block,
|
|
props.inline_container);
|
|
}
|
|
|
|
/** \todo Dropping const here is not clever */
|
|
box = box_create(NULL,
|
|
(css_computed_style *) props.parent_style,
|
|
false, props.href, props.target, props.title,
|
|
NULL, ctx->bctx);
|
|
if (box == NULL) {
|
|
free(text);
|
|
return false;
|
|
}
|
|
|
|
box->type = BOX_TEXT;
|
|
|
|
box->text = talloc_strdup(ctx->bctx, text);
|
|
free(text);
|
|
if (box->text == NULL)
|
|
return false;
|
|
|
|
box->length = strlen(box->text);
|
|
|
|
/* strip ending space char off */
|
|
if (box->length > 1 && box->text[box->length - 1] == ' ') {
|
|
box->space = UNKNOWN_WIDTH;
|
|
box->length--;
|
|
}
|
|
|
|
if (css_computed_text_transform(props.parent_style) !=
|
|
CSS_TEXT_TRANSFORM_NONE)
|
|
box_text_transform(box->text, box->length,
|
|
css_computed_text_transform(
|
|
props.parent_style));
|
|
|
|
box_add_child(props.inline_container, box);
|
|
|
|
if (box->text[0] == ' ') {
|
|
box->length--;
|
|
|
|
memmove(box->text, &box->text[1], box->length);
|
|
|
|
if (box->prev != NULL)
|
|
box->prev->space = UNKNOWN_WIDTH;
|
|
}
|
|
} else {
|
|
/* white-space: pre */
|
|
char *text;
|
|
size_t text_len = dom_string_byte_length(content);
|
|
size_t i;
|
|
char *current;
|
|
enum css_white_space_e white_space =
|
|
css_computed_white_space(props.parent_style);
|
|
|
|
/* note: pre-wrap/pre-line are unimplemented */
|
|
assert(white_space == CSS_WHITE_SPACE_PRE ||
|
|
white_space == CSS_WHITE_SPACE_PRE_LINE ||
|
|
white_space == CSS_WHITE_SPACE_PRE_WRAP);
|
|
|
|
text = malloc(text_len + 1);
|
|
dom_string_unref(content);
|
|
|
|
if (text == NULL)
|
|
return false;
|
|
|
|
memcpy(text, dom_string_data(content), text_len);
|
|
text[text_len] = '\0';
|
|
|
|
/* TODO: Handle tabs properly */
|
|
for (i = 0; i < text_len; i++)
|
|
if (text[i] == '\t')
|
|
text[i] = ' ';
|
|
|
|
if (css_computed_text_transform(props.parent_style) !=
|
|
CSS_TEXT_TRANSFORM_NONE)
|
|
box_text_transform(text, strlen(text),
|
|
css_computed_text_transform(
|
|
props.parent_style));
|
|
|
|
current = text;
|
|
|
|
/* swallow a single leading new line */
|
|
if (props.containing_block->flags & PRE_STRIP) {
|
|
switch (*current) {
|
|
case '\n':
|
|
current++;
|
|
break;
|
|
case '\r':
|
|
current++;
|
|
if (*current == '\n')
|
|
current++;
|
|
break;
|
|
}
|
|
props.containing_block->flags &= ~PRE_STRIP;
|
|
}
|
|
|
|
do {
|
|
size_t len = strcspn(current, "\r\n");
|
|
|
|
char old = current[len];
|
|
|
|
current[len] = 0;
|
|
|
|
if (props.inline_container == NULL) {
|
|
/* Child of a block without a current container
|
|
* (i.e. this box is the first child of its
|
|
* parent, or was preceded by block-level
|
|
* siblings) */
|
|
props.inline_container = box_create(NULL, NULL,
|
|
false, NULL, NULL, NULL, NULL,
|
|
ctx->bctx);
|
|
if (props.inline_container == NULL) {
|
|
free(text);
|
|
return false;
|
|
}
|
|
|
|
props.inline_container->type =
|
|
BOX_INLINE_CONTAINER;
|
|
|
|
box_add_child(props.containing_block,
|
|
props.inline_container);
|
|
}
|
|
|
|
/** \todo Dropping const isn't clever */
|
|
box = box_create(NULL,
|
|
(css_computed_style *) props.parent_style,
|
|
false, props.href, props.target, props.title,
|
|
NULL, ctx->bctx);
|
|
if (box == NULL) {
|
|
free(text);
|
|
return false;
|
|
}
|
|
|
|
box->type = BOX_TEXT;
|
|
|
|
box->text = talloc_strdup(ctx->bctx, current);
|
|
if (box->text == NULL) {
|
|
free(text);
|
|
return false;
|
|
}
|
|
|
|
box->length = strlen(box->text);
|
|
|
|
box_add_child(props.inline_container, box);
|
|
|
|
current[len] = old;
|
|
|
|
current += len;
|
|
|
|
if (current[0] != '\0') {
|
|
/* Linebreak: create new inline container */
|
|
props.inline_container = box_create(NULL, NULL,
|
|
false, NULL, NULL, NULL, NULL,
|
|
ctx->bctx);
|
|
if (props.inline_container == NULL) {
|
|
free(text);
|
|
return false;
|
|
}
|
|
|
|
props.inline_container->type =
|
|
BOX_INLINE_CONTAINER;
|
|
|
|
box_add_child(props.containing_block,
|
|
props.inline_container);
|
|
|
|
if (current[0] == '\r' && current[1] == '\n')
|
|
current += 2;
|
|
else
|
|
current++;
|
|
}
|
|
} while (*current);
|
|
|
|
free(text);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
/**
|
|
* Convert an ELEMENT node to a box tree fragment,
|
|
* then schedule conversion of the next ELEMENT node
|
|
*/
|
|
static void convert_xml_to_box(struct box_construct_ctx *ctx)
|
|
{
|
|
dom_node *next;
|
|
bool convert_children;
|
|
uint32_t num_processed = 0;
|
|
const uint32_t max_processed_before_yield = 10;
|
|
|
|
do {
|
|
convert_children = true;
|
|
|
|
assert(ctx->n != NULL);
|
|
|
|
if (box_construct_element(ctx, &convert_children) == false) {
|
|
ctx->cb(ctx->content, false);
|
|
dom_node_unref(ctx->n);
|
|
free(ctx);
|
|
return;
|
|
}
|
|
|
|
/* Find next element to process, converting text nodes as we go */
|
|
next = next_node(ctx->n, ctx->content, convert_children);
|
|
while (next != NULL) {
|
|
dom_node_type type;
|
|
dom_exception err;
|
|
|
|
err = dom_node_get_node_type(next, &type);
|
|
if (err != DOM_NO_ERR) {
|
|
ctx->cb(ctx->content, false);
|
|
dom_node_unref(next);
|
|
free(ctx);
|
|
return;
|
|
}
|
|
|
|
if (type == DOM_ELEMENT_NODE)
|
|
break;
|
|
|
|
if (type == DOM_TEXT_NODE) {
|
|
ctx->n = next;
|
|
if (box_construct_text(ctx) == false) {
|
|
ctx->cb(ctx->content, false);
|
|
dom_node_unref(ctx->n);
|
|
free(ctx);
|
|
return;
|
|
}
|
|
}
|
|
|
|
next = next_node(next, ctx->content, true);
|
|
}
|
|
|
|
ctx->n = next;
|
|
|
|
if (next == NULL) {
|
|
/* Conversion complete */
|
|
struct box root;
|
|
|
|
memset(&root, 0, sizeof(root));
|
|
|
|
root.type = BOX_BLOCK;
|
|
root.children = root.last = ctx->root_box;
|
|
root.children->parent = &root;
|
|
|
|
/** \todo Remove box_normalise_block */
|
|
if (box_normalise_block(&root, ctx->root_box,
|
|
ctx->content) == false) {
|
|
ctx->cb(ctx->content, false);
|
|
} else {
|
|
ctx->content->layout = root.children;
|
|
ctx->content->layout->parent = NULL;
|
|
|
|
ctx->cb(ctx->content, true);
|
|
}
|
|
|
|
assert(ctx->n == NULL);
|
|
|
|
free(ctx);
|
|
return;
|
|
}
|
|
} while (++num_processed < max_processed_before_yield);
|
|
|
|
/* More work to do: schedule a continuation */
|
|
guit->misc->schedule(0, (void *)convert_xml_to_box, ctx);
|
|
}
|
|
|
|
|
|
/* exported function documented in html/box_construct.h */
|
|
nserror
|
|
dom_to_box(dom_node *n,
|
|
html_content *c,
|
|
box_construct_complete_cb cb,
|
|
void **box_conversion_context)
|
|
{
|
|
struct box_construct_ctx *ctx;
|
|
|
|
assert(box_conversion_context != NULL);
|
|
|
|
if (c->bctx == NULL) {
|
|
/* create a context allocation for this box tree */
|
|
c->bctx = talloc_zero(0, int);
|
|
if (c->bctx == NULL) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
}
|
|
|
|
ctx = malloc(sizeof(*ctx));
|
|
if (ctx == NULL) {
|
|
return NSERROR_NOMEM;
|
|
}
|
|
|
|
ctx->content = c;
|
|
ctx->n = dom_node_ref(n);
|
|
ctx->root_box = NULL;
|
|
ctx->cb = cb;
|
|
ctx->bctx = c->bctx;
|
|
|
|
*box_conversion_context = ctx;
|
|
|
|
return guit->misc->schedule(0, (void *)convert_xml_to_box, ctx);
|
|
}
|
|
|
|
|
|
/* exported function documented in html/box_construct.h */
|
|
nserror cancel_dom_to_box(void *box_conversion_context)
|
|
{
|
|
struct box_construct_ctx *ctx = box_conversion_context;
|
|
nserror err;
|
|
|
|
err = guit->misc->schedule(-1, (void *)convert_xml_to_box, ctx);
|
|
if (err != NSERROR_OK) {
|
|
return err;
|
|
}
|
|
|
|
dom_node_unref(ctx->n);
|
|
free(ctx);
|
|
|
|
return NSERROR_OK;
|
|
}
|
|
|
|
|
|
/* exported function documented in html/box_construct.h */
|
|
struct box *box_for_node(dom_node *n)
|
|
{
|
|
struct box *box = NULL;
|
|
dom_exception err;
|
|
|
|
err = dom_node_get_user_data(n, corestring_dom___ns_key_box_node_data,
|
|
(void *) &box);
|
|
if (err != DOM_NO_ERR)
|
|
return NULL;
|
|
|
|
return box;
|
|
}
|
|
|
|
|
|
/* exported function documented in html/box_construct.h */
|
|
bool
|
|
box_extract_link(const html_content *content,
|
|
const dom_string *dsrel,
|
|
nsurl *base,
|
|
nsurl **result)
|
|
{
|
|
char *s, *s1, *apos0 = 0, *apos1 = 0, *quot0 = 0, *quot1 = 0;
|
|
unsigned int i, j, end;
|
|
nserror error;
|
|
const char *rel;
|
|
|
|
rel = dom_string_data(dsrel);
|
|
|
|
s1 = s = malloc(3 * strlen(rel) + 1);
|
|
if (!s)
|
|
return false;
|
|
|
|
/* copy to s, removing white space and control characters */
|
|
for (i = 0; rel[i] && ascii_is_space(rel[i]); i++)
|
|
;
|
|
for (end = strlen(rel);
|
|
(end != i) && ascii_is_space(rel[end - 1]);
|
|
end--)
|
|
;
|
|
for (j = 0; i != end; i++) {
|
|
if ((unsigned char) rel[i] < 0x20) {
|
|
; /* skip control characters */
|
|
} else if (rel[i] == ' ') {
|
|
s[j++] = '%';
|
|
s[j++] = '2';
|
|
s[j++] = '0';
|
|
} else {
|
|
s[j++] = rel[i];
|
|
}
|
|
}
|
|
s[j] = 0;
|
|
|
|
if (content->enable_scripting == false) {
|
|
/* extract first quoted string out of "javascript:" link */
|
|
if (strncmp(s, "javascript:", 11) == 0) {
|
|
apos0 = strchr(s, '\'');
|
|
if (apos0)
|
|
apos1 = strchr(apos0 + 1, '\'');
|
|
quot0 = strchr(s, '"');
|
|
if (quot0)
|
|
quot1 = strchr(quot0 + 1, '"');
|
|
if (apos0 && apos1 &&
|
|
(!quot0 || !quot1 || apos0 < quot0)) {
|
|
*apos1 = 0;
|
|
s1 = apos0 + 1;
|
|
} else if (quot0 && quot1) {
|
|
*quot1 = 0;
|
|
s1 = quot0 + 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* construct absolute URL */
|
|
error = nsurl_join(base, s1, result);
|
|
free(s);
|
|
if (error != NSERROR_OK) {
|
|
*result = NULL;
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|