netsurf/content/handlers/html/box_construct.c
2020-05-02 20:47:53 +01:00

1464 lines
36 KiB
C

/*
* Copyright 2005 James Bursa <bursa@users.sourceforge.net>
* Copyright 2003 Phil Mellor <monkeyson@users.sourceforge.net>
* Copyright 2005 John M Bell <jmb202@ecs.soton.ac.uk>
* Copyright 2006 Richard Wilson <info@tinct.net>
* Copyright 2008 Michael Drake <tlsa@netsurf-browser.org>
*
* This file is part of NetSurf, http://www.netsurf-browser.org/
*
* NetSurf is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* NetSurf is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file
* Implementation of conversion from DOM tree to box tree.
*/
#include <dom/dom.h>
#include "utils/errors.h"
#include "utils/nsoption.h"
#include "utils/corestrings.h"
#include "utils/talloc.h"
#include "utils/string.h"
#include "utils/ascii.h"
#include "netsurf/misc.h"
#include "css/select.h"
#include "desktop/gui_internal.h"
#include "html/private.h"
#include "html/object.h"
#include "html/box.h"
#include "html/box_manipulate.h"
#include "html/box_construct.h"
#include "html/box_special.h"
#include "html/box_normalise.h"
#include "html/form_internal.h"
/**
* Context for box tree construction
*/
struct box_construct_ctx {
html_content *content; /**< Content we're constructing for */
dom_node *n; /**< Current node to process */
struct box *root_box; /**< Root box in the tree */
box_construct_complete_cb cb; /**< Callback to invoke on completion */
int *bctx; /**< talloc context */
};
/**
* Transient properties for construction of current node
*/
struct box_construct_props {
/** Style from which to inherit, or NULL if none */
const css_computed_style *parent_style;
/** Current link target, or NULL if none */
nsurl *href;
/** Current frame target, or NULL if none */
const char *target;
/** Current title attribute, or NULL if none */
const char *title;
/** Identity of the current block-level container */
struct box *containing_block;
/** Current container for inlines, or NULL if none
* \note If non-NULL, will be the last child of containing_block */
struct box *inline_container;
/** Whether the current node is the root of the DOM tree */
bool node_is_root;
};
static const content_type image_types = CONTENT_IMAGE;
/* the strings are not important, since we just compare the pointers */
const char *TARGET_SELF = "_self";
const char *TARGET_PARENT = "_parent";
const char *TARGET_TOP = "_top";
const char *TARGET_BLANK = "_blank";
/**
* mapping from CSS display to box type this table must be in sync
* with libcss' css_display enum
*/
static const box_type box_map[] = {
0, /* CSS_DISPLAY_INHERIT, */
BOX_INLINE, /* CSS_DISPLAY_INLINE, */
BOX_BLOCK, /* CSS_DISPLAY_BLOCK, */
BOX_BLOCK, /* CSS_DISPLAY_LIST_ITEM, */
BOX_INLINE, /* CSS_DISPLAY_RUN_IN, */
BOX_INLINE_BLOCK, /* CSS_DISPLAY_INLINE_BLOCK, */
BOX_TABLE, /* CSS_DISPLAY_TABLE, */
BOX_TABLE, /* CSS_DISPLAY_INLINE_TABLE, */
BOX_TABLE_ROW_GROUP, /* CSS_DISPLAY_TABLE_ROW_GROUP, */
BOX_TABLE_ROW_GROUP, /* CSS_DISPLAY_TABLE_HEADER_GROUP, */
BOX_TABLE_ROW_GROUP, /* CSS_DISPLAY_TABLE_FOOTER_GROUP, */
BOX_TABLE_ROW, /* CSS_DISPLAY_TABLE_ROW, */
BOX_NONE, /* CSS_DISPLAY_TABLE_COLUMN_GROUP, */
BOX_NONE, /* CSS_DISPLAY_TABLE_COLUMN, */
BOX_TABLE_CELL, /* CSS_DISPLAY_TABLE_CELL, */
BOX_INLINE, /* CSS_DISPLAY_TABLE_CAPTION, */
BOX_NONE /* CSS_DISPLAY_NONE */
};
/**
* determine if a box is the root node
*
* \param n node to check
* \return true if node is root else false.
*/
static inline bool box_is_root(dom_node *n)
{
dom_node *parent;
dom_node_type type;
dom_exception err;
err = dom_node_get_parent_node(n, &parent);
if (err != DOM_NO_ERR)
return false;
if (parent != NULL) {
err = dom_node_get_node_type(parent, &type);
dom_node_unref(parent);
if (err != DOM_NO_ERR)
return false;
if (type != DOM_DOCUMENT_NODE)
return false;
}
return true;
}
/**
* Extract transient construction properties
*
* \param n Current DOM node to convert
* \param props Property object to populate
*/
static void
box_extract_properties(dom_node *n, struct box_construct_props *props)
{
memset(props, 0, sizeof(*props));
props->node_is_root = box_is_root(n);
/* Extract properties from containing DOM node */
if (props->node_is_root == false) {
dom_node *current_node = n;
dom_node *parent_node = NULL;
struct box *parent_box;
dom_exception err;
/* Find ancestor node containing parent box */
while (true) {
err = dom_node_get_parent_node(current_node,
&parent_node);
if (err != DOM_NO_ERR || parent_node == NULL)
break;
parent_box = box_for_node(parent_node);
if (parent_box != NULL) {
props->parent_style = parent_box->style;
props->href = parent_box->href;
props->target = parent_box->target;
props->title = parent_box->title;
dom_node_unref(parent_node);
break;
} else {
if (current_node != n)
dom_node_unref(current_node);
current_node = parent_node;
parent_node = NULL;
}
}
/* Find containing block (may be parent) */
while (true) {
struct box *b;
err = dom_node_get_parent_node(current_node,
&parent_node);
if (err != DOM_NO_ERR || parent_node == NULL) {
if (current_node != n)
dom_node_unref(current_node);
break;
}
if (current_node != n)
dom_node_unref(current_node);
b = box_for_node(parent_node);
/* Children of nodes that created an inline box
* will generate boxes which are attached as
* _siblings_ of the box generated for their
* parent node. Note, however, that we'll still
* use the parent node's styling as the parent
* style, above. */
if (b != NULL && b->type != BOX_INLINE &&
b->type != BOX_BR) {
props->containing_block = b;
dom_node_unref(parent_node);
break;
} else {
current_node = parent_node;
parent_node = NULL;
}
}
}
/* Compute current inline container, if any */
if (props->containing_block != NULL &&
props->containing_block->last != NULL &&
props->containing_block->last->type ==
BOX_INLINE_CONTAINER)
props->inline_container = props->containing_block->last;
}
/**
* Get the style for an element.
*
* \param c content of type CONTENT_HTML that is being processed
* \param parent_style style at this point in xml tree, or NULL for root
* \param root_style root node's style, or NULL for root
* \param n node in xml tree
* \return the new style, or NULL on memory exhaustion
*/
static css_select_results *
box_get_style(html_content *c,
const css_computed_style *parent_style,
const css_computed_style *root_style,
dom_node *n)
{
dom_string *s;
dom_exception err;
css_stylesheet *inline_style = NULL;
css_select_results *styles;
nscss_select_ctx ctx;
/* Firstly, construct inline stylesheet, if any */
err = dom_element_get_attribute(n, corestring_dom_style, &s);
if (err != DOM_NO_ERR)
return NULL;
if (s != NULL) {
inline_style = nscss_create_inline_style(
(const uint8_t *) dom_string_data(s),
dom_string_byte_length(s),
c->encoding,
nsurl_access(c->base_url),
c->quirks != DOM_DOCUMENT_QUIRKS_MODE_NONE);
dom_string_unref(s);
if (inline_style == NULL)
return NULL;
}
/* Populate selection context */
ctx.ctx = c->select_ctx;
ctx.quirks = (c->quirks == DOM_DOCUMENT_QUIRKS_MODE_FULL);
ctx.base_url = c->base_url;
ctx.universal = c->universal;
ctx.root_style = root_style;
ctx.parent_style = parent_style;
/* Select style for element */
styles = nscss_get_style(&ctx, n, &c->media, inline_style);
/* No longer need inline style */
if (inline_style != NULL)
css_stylesheet_destroy(inline_style);
return styles;
}
/**
* Construct the box required for a generated element.
*
* \param n XML node of type XML_ELEMENT_NODE
* \param content Content of type CONTENT_HTML that is being processed
* \param box Box which may have generated content
* \param style Complete computed style for pseudo element, or NULL
*
* \todo This is currently incomplete. It just does enough to support
* the clearfix hack. (http://www.positioniseverything.net/easyclearing.html )
*/
static void
box_construct_generate(dom_node *n,
html_content *content,
struct box *box,
const css_computed_style *style)
{
struct box *gen = NULL;
enum css_display_e computed_display;
const css_computed_content_item *c_item;
/* Nothing to generate if the parent box is not a block */
if (box->type != BOX_BLOCK)
return;
/* To determine if an element has a pseudo element, we select
* for it and test to see if the returned style's content
* property is set to normal. */
if (style == NULL ||
css_computed_content(style, &c_item) ==
CSS_CONTENT_NORMAL) {
/* No pseudo element */
return;
}
/* create box for this element */
computed_display = ns_computed_display(style, box_is_root(n));
if (computed_display == CSS_DISPLAY_BLOCK ||
computed_display == CSS_DISPLAY_TABLE) {
/* currently only support block level boxes */
/** \todo Not wise to drop const from the computed style */
gen = box_create(NULL, (css_computed_style *) style,
false, NULL, NULL, NULL, NULL, content->bctx);
if (gen == NULL) {
return;
}
/* set box type from computed display */
gen->type = box_map[ns_computed_display(
style, box_is_root(n))];
box_add_child(box, gen);
}
}
/**
* Construct a list marker box
*
* \param box Box to attach marker to
* \param title Current title attribute
* \param ctx Box construction context
* \param parent Current block-level container
* \return true on success, false on memory exhaustion
*/
static bool
box_construct_marker(struct box *box,
const char *title,
struct box_construct_ctx *ctx,
struct box *parent)
{
lwc_string *image_uri;
struct box *marker;
marker = box_create(NULL, box->style, false, NULL, NULL, title,
NULL, ctx->bctx);
if (marker == false)
return false;
marker->type = BOX_BLOCK;
/** \todo marker content (list-style-type) */
switch (css_computed_list_style_type(box->style)) {
case CSS_LIST_STYLE_TYPE_DISC:
/* 2022 BULLET */
marker->text = (char *) "\342\200\242";
marker->length = 3;
break;
case CSS_LIST_STYLE_TYPE_CIRCLE:
/* 25CB WHITE CIRCLE */
marker->text = (char *) "\342\227\213";
marker->length = 3;
break;
case CSS_LIST_STYLE_TYPE_SQUARE:
/* 25AA BLACK SMALL SQUARE */
marker->text = (char *) "\342\226\252";
marker->length = 3;
break;
case CSS_LIST_STYLE_TYPE_DECIMAL:
case CSS_LIST_STYLE_TYPE_LOWER_ALPHA:
case CSS_LIST_STYLE_TYPE_LOWER_ROMAN:
case CSS_LIST_STYLE_TYPE_UPPER_ALPHA:
case CSS_LIST_STYLE_TYPE_UPPER_ROMAN:
default:
if (parent->last) {
struct box *last = parent->last;
/* Drill down into last child of parent
* to find the list marker (if any)
*
* Floated list boxes end up as:
*
* parent
* BOX_INLINE_CONTAINER
* BOX_FLOAT_{LEFT,RIGHT}
* BOX_BLOCK <-- list box
* ...
*/
while (last != NULL && last->list_marker == NULL) {
struct box *last_inner = last;
while (last_inner != NULL) {
if (last_inner->list_marker != NULL)
break;
if (last_inner->type ==
BOX_INLINE_CONTAINER ||
last_inner->type ==
BOX_FLOAT_LEFT ||
last_inner->type ==
BOX_FLOAT_RIGHT) {
last_inner = last_inner->last;
} else {
last_inner = NULL;
}
}
if (last_inner != NULL) {
last = last_inner;
} else {
last = last->prev;
}
}
if (last && last->list_marker) {
marker->rows = last->list_marker->rows + 1;
}
}
marker->text = talloc_array(ctx->bctx, char, 20);
if (marker->text == NULL)
return false;
snprintf(marker->text, 20, "%u.", marker->rows);
marker->length = strlen(marker->text);
break;
case CSS_LIST_STYLE_TYPE_NONE:
marker->text = 0;
marker->length = 0;
break;
}
if (css_computed_list_style_image(box->style, &image_uri) == CSS_LIST_STYLE_IMAGE_URI &&
(image_uri != NULL) &&
(nsoption_bool(foreground_images) == true)) {
nsurl *url;
nserror error;
/* TODO: we get a url out of libcss as a lwc string, but
* earlier we already had it as a nsurl after we
* nsurl_joined it. Can this be improved?
* For now, just making another nsurl. */
error = nsurl_create(lwc_string_data(image_uri), &url);
if (error != NSERROR_OK)
return false;
if (html_fetch_object(ctx->content,
url,
marker,
image_types,
false) == false) {
nsurl_unref(url);
return false;
}
nsurl_unref(url);
}
box->list_marker = marker;
marker->parent = box;
return true;
}
/**
* Construct the box tree for an XML element.
*
* \param ctx Tree construction context
* \param convert_children Whether to convert children
* \return true on success, false on memory exhaustion
*/
static bool
box_construct_element(struct box_construct_ctx *ctx, bool *convert_children)
{
dom_string *title0, *s;
lwc_string *id = NULL;
struct box *box = NULL, *old_box;
css_select_results *styles = NULL;
lwc_string *bgimage_uri;
dom_exception err;
struct box_construct_props props;
const css_computed_style *root_style = NULL;
assert(ctx->n != NULL);
box_extract_properties(ctx->n, &props);
if (props.containing_block != NULL) {
/* In case the containing block is a pre block, we clear
* the PRE_STRIP flag since it is not used if we follow
* the pre with a tag */
props.containing_block->flags &= ~PRE_STRIP;
}
if (props.node_is_root == false) {
root_style = ctx->root_box->style;
}
styles = box_get_style(ctx->content, props.parent_style, root_style,
ctx->n);
if (styles == NULL)
return false;
/* Extract title attribute, if present */
err = dom_element_get_attribute(ctx->n, corestring_dom_title, &title0);
if (err != DOM_NO_ERR)
return false;
if (title0 != NULL) {
char *t = squash_whitespace(dom_string_data(title0));
dom_string_unref(title0);
if (t == NULL)
return false;
props.title = talloc_strdup(ctx->bctx, t);
free(t);
if (props.title == NULL)
return false;
}
/* Extract id attribute, if present */
err = dom_element_get_attribute(ctx->n, corestring_dom_id, &s);
if (err != DOM_NO_ERR)
return false;
if (s != NULL) {
err = dom_string_intern(s, &id);
if (err != DOM_NO_ERR)
id = NULL;
dom_string_unref(s);
}
box = box_create(styles, styles->styles[CSS_PSEUDO_ELEMENT_NONE], false,
props.href, props.target, props.title, id,
ctx->bctx);
if (box == NULL)
return false;
/* If this is the root box, add it to the context */
if (props.node_is_root)
ctx->root_box = box;
/* Deal with colspan/rowspan */
err = dom_element_get_attribute(ctx->n, corestring_dom_colspan, &s);
if (err != DOM_NO_ERR)
return false;
if (s != NULL) {
const char *val = dom_string_data(s);
if ('0' <= val[0] && val[0] <= '9')
box->columns = strtol(val, NULL, 10);
dom_string_unref(s);
}
err = dom_element_get_attribute(ctx->n, corestring_dom_rowspan, &s);
if (err != DOM_NO_ERR)
return false;
if (s != NULL) {
const char *val = dom_string_data(s);
if ('0' <= val[0] && val[0] <= '9')
box->rows = strtol(val, NULL, 10);
dom_string_unref(s);
}
/* Set box type from computed display */
if ((css_computed_position(box->style) == CSS_POSITION_ABSOLUTE ||
css_computed_position(box->style) ==
CSS_POSITION_FIXED) &&
(ns_computed_display_static(box->style) ==
CSS_DISPLAY_INLINE ||
ns_computed_display_static(box->style) ==
CSS_DISPLAY_INLINE_BLOCK ||
ns_computed_display_static(box->style) ==
CSS_DISPLAY_INLINE_TABLE)) {
/* Special case for absolute positioning: make absolute inlines
* into inline block so that the boxes are constructed in an
* inline container as if they were not absolutely positioned.
* Layout expects and handles this. */
box->type = box_map[CSS_DISPLAY_INLINE_BLOCK];
} else if (props.node_is_root) {
/* Special case for root element: force it to BLOCK, or the
* rest of the layout will break. */
box->type = BOX_BLOCK;
} else {
/* Normal mapping */
box->type = box_map[ns_computed_display(box->style,
props.node_is_root)];
}
if (convert_special_elements(ctx->n,
ctx->content,
box,
convert_children) == false) {
return false;
}
/* Handle the :before pseudo element */
if (!(box->flags & IS_REPLACED)) {
box_construct_generate(ctx->n, ctx->content, box,
box->styles->styles[CSS_PSEUDO_ELEMENT_BEFORE]);
}
if (box->type == BOX_NONE ||
(ns_computed_display(box->style,
props.node_is_root) == CSS_DISPLAY_NONE &&
props.node_is_root == false)) {
css_select_results_destroy(styles);
box->styles = NULL;
box->style = NULL;
/* Invalidate associated gadget, if any */
if (box->gadget != NULL) {
box->gadget->box = NULL;
box->gadget = NULL;
}
/* Can't do this, because the lifetimes of boxes and gadgets
* are inextricably linked. Fortunately, talloc will save us
* (for now) */
/* box_free_box(box); */
*convert_children = false;
return true;
}
/* Attach DOM node to box */
err = dom_node_set_user_data(ctx->n,
corestring_dom___ns_key_box_node_data, box, NULL,
(void *) &old_box);
if (err != DOM_NO_ERR)
return false;
/* Attach box to DOM node */
box->node = dom_node_ref(ctx->n);
if (props.inline_container == NULL &&
(box->type == BOX_INLINE ||
box->type == BOX_BR ||
box->type == BOX_INLINE_BLOCK ||
css_computed_float(box->style) == CSS_FLOAT_LEFT ||
css_computed_float(box->style) == CSS_FLOAT_RIGHT) &&
props.node_is_root == false) {
/* Found an inline child of a block without a current container
* (i.e. this box is the first child of its parent, or was
* preceded by block-level siblings) */
assert(props.containing_block != NULL &&
"Box must have containing block.");
props.inline_container = box_create(NULL, NULL, false, NULL,
NULL, NULL, NULL, ctx->bctx);
if (props.inline_container == NULL)
return false;
props.inline_container->type = BOX_INLINE_CONTAINER;
box_add_child(props.containing_block, props.inline_container);
}
/* Kick off fetch for any background image */
if (css_computed_background_image(box->style, &bgimage_uri) ==
CSS_BACKGROUND_IMAGE_IMAGE && bgimage_uri != NULL &&
nsoption_bool(background_images) == true) {
nsurl *url;
nserror error;
/* TODO: we get a url out of libcss as a lwc string, but
* earlier we already had it as a nsurl after we
* nsurl_joined it. Can this be improved?
* For now, just making another nsurl. */
error = nsurl_create(lwc_string_data(bgimage_uri), &url);
if (error == NSERROR_OK) {
/* Fetch image if we got a valid URL */
if (html_fetch_object(ctx->content,
url,
box,
image_types,
true) == false) {
nsurl_unref(url);
return false;
}
nsurl_unref(url);
}
}
if (*convert_children)
box->flags |= CONVERT_CHILDREN;
if (box->type == BOX_INLINE || box->type == BOX_BR ||
box->type == BOX_INLINE_BLOCK) {
/* Inline container must exist, as we'll have
* created it above if it didn't */
assert(props.inline_container != NULL);
box_add_child(props.inline_container, box);
} else {
if (ns_computed_display(box->style, props.node_is_root) ==
CSS_DISPLAY_LIST_ITEM) {
/* List item: compute marker */
if (box_construct_marker(box, props.title, ctx,
props.containing_block) == false)
return false;
}
if (props.node_is_root == false &&
(css_computed_float(box->style) ==
CSS_FLOAT_LEFT ||
css_computed_float(box->style) ==
CSS_FLOAT_RIGHT)) {
/* Float: insert a float between the parent and box. */
struct box *flt = box_create(NULL, NULL, false,
props.href, props.target, props.title,
NULL, ctx->bctx);
if (flt == NULL)
return false;
if (css_computed_float(box->style) == CSS_FLOAT_LEFT)
flt->type = BOX_FLOAT_LEFT;
else
flt->type = BOX_FLOAT_RIGHT;
box_add_child(props.inline_container, flt);
box_add_child(flt, box);
} else {
/* Non-floated block-level box: add to containing block
* if there is one. If we're the root box, then there
* won't be. */
if (props.containing_block != NULL)
box_add_child(props.containing_block, box);
}
}
return true;
}
/**
* Complete construction of the box tree for an element.
*
* \param n DOM node to construct for
* \param content Containing document
*
* This will be called after all children of an element have been processed
*/
static void box_construct_element_after(dom_node *n, html_content *content)
{
struct box_construct_props props;
struct box *box = box_for_node(n);
assert(box != NULL);
box_extract_properties(n, &props);
if (box->type == BOX_INLINE || box->type == BOX_BR) {
/* Insert INLINE_END into containing block */
struct box *inline_end;
bool has_children;
dom_exception err;
err = dom_node_has_child_nodes(n, &has_children);
if (err != DOM_NO_ERR)
return;
if (has_children == false ||
(box->flags & CONVERT_CHILDREN) == 0) {
/* No children, or didn't want children converted */
return;
}
if (props.inline_container == NULL) {
/* Create inline container if we don't have one */
props.inline_container = box_create(NULL, NULL, false,
NULL, NULL, NULL, NULL, content->bctx);
if (props.inline_container == NULL)
return;
props.inline_container->type = BOX_INLINE_CONTAINER;
box_add_child(props.containing_block,
props.inline_container);
}
inline_end = box_create(NULL, box->style, false,
box->href, box->target, box->title,
box->id == NULL ? NULL :
lwc_string_ref(box->id), content->bctx);
if (inline_end != NULL) {
inline_end->type = BOX_INLINE_END;
assert(props.inline_container != NULL);
box_add_child(props.inline_container, inline_end);
box->inline_end = inline_end;
inline_end->inline_end = box;
}
} else if (!(box->flags & IS_REPLACED)) {
/* Handle the :after pseudo element */
box_construct_generate(n, content, box,
box->styles->styles[CSS_PSEUDO_ELEMENT_AFTER]);
}
}
/**
* Find the next node in the DOM tree, completing element construction
* where appropriate.
*
* \param n Current node
* \param content Containing content
* \param convert_children Whether to consider children of \a n
* \return Next node to process, or NULL if complete
*
* \note \a n will be unreferenced
*/
static dom_node *
next_node(dom_node *n, html_content *content, bool convert_children)
{
dom_node *next = NULL;
bool has_children;
dom_exception err;
err = dom_node_has_child_nodes(n, &has_children);
if (err != DOM_NO_ERR) {
dom_node_unref(n);
return NULL;
}
if (convert_children && has_children) {
err = dom_node_get_first_child(n, &next);
if (err != DOM_NO_ERR) {
dom_node_unref(n);
return NULL;
}
dom_node_unref(n);
} else {
err = dom_node_get_next_sibling(n, &next);
if (err != DOM_NO_ERR) {
dom_node_unref(n);
return NULL;
}
if (next != NULL) {
if (box_for_node(n) != NULL)
box_construct_element_after(n, content);
dom_node_unref(n);
} else {
if (box_for_node(n) != NULL)
box_construct_element_after(n, content);
while (box_is_root(n) == false) {
dom_node *parent = NULL;
dom_node *parent_next = NULL;
err = dom_node_get_parent_node(n, &parent);
if (err != DOM_NO_ERR) {
dom_node_unref(n);
return NULL;
}
assert(parent != NULL);
err = dom_node_get_next_sibling(parent,
&parent_next);
if (err != DOM_NO_ERR) {
dom_node_unref(parent);
dom_node_unref(n);
return NULL;
}
if (parent_next != NULL) {
dom_node_unref(parent_next);
dom_node_unref(parent);
break;
}
dom_node_unref(n);
n = parent;
parent = NULL;
if (box_for_node(n) != NULL) {
box_construct_element_after(
n, content);
}
}
if (box_is_root(n) == false) {
dom_node *parent = NULL;
err = dom_node_get_parent_node(n, &parent);
if (err != DOM_NO_ERR) {
dom_node_unref(n);
return NULL;
}
assert(parent != NULL);
err = dom_node_get_next_sibling(parent, &next);
if (err != DOM_NO_ERR) {
dom_node_unref(parent);
dom_node_unref(n);
return NULL;
}
if (box_for_node(parent) != NULL) {
box_construct_element_after(parent,
content);
}
dom_node_unref(parent);
}
dom_node_unref(n);
}
}
return next;
}
/**
* Apply the CSS text-transform property to given text for its ASCII chars.
*
* \param s string to transform
* \param len length of s
* \param tt transform type
*/
static void
box_text_transform(char *s, unsigned int len, enum css_text_transform_e tt)
{
unsigned int i;
if (len == 0)
return;
switch (tt) {
case CSS_TEXT_TRANSFORM_UPPERCASE:
for (i = 0; i < len; ++i)
if ((unsigned char) s[i] < 0x80)
s[i] = toupper(s[i]);
break;
case CSS_TEXT_TRANSFORM_LOWERCASE:
for (i = 0; i < len; ++i)
if ((unsigned char) s[i] < 0x80)
s[i] = tolower(s[i]);
break;
case CSS_TEXT_TRANSFORM_CAPITALIZE:
if ((unsigned char) s[0] < 0x80)
s[0] = toupper(s[0]);
for (i = 1; i < len; ++i)
if ((unsigned char) s[i] < 0x80 &&
isspace(s[i - 1]))
s[i] = toupper(s[i]);
break;
default:
break;
}
}
/**
* Construct the box tree for an XML text node.
*
* \param ctx Tree construction context
* \return true on success, false on memory exhaustion
*/
static bool box_construct_text(struct box_construct_ctx *ctx)
{
struct box_construct_props props;
struct box *box = NULL;
dom_string *content;
dom_exception err;
assert(ctx->n != NULL);
box_extract_properties(ctx->n, &props);
assert(props.containing_block != NULL);
err = dom_characterdata_get_data(ctx->n, &content);
if (err != DOM_NO_ERR || content == NULL)
return false;
if (css_computed_white_space(props.parent_style) ==
CSS_WHITE_SPACE_NORMAL ||
css_computed_white_space(props.parent_style) ==
CSS_WHITE_SPACE_NOWRAP) {
char *text;
text = squash_whitespace(dom_string_data(content));
dom_string_unref(content);
if (text == NULL)
return false;
/* if the text is just a space, combine it with the preceding
* text node, if any */
if (text[0] == ' ' && text[1] == 0) {
if (props.inline_container != NULL) {
assert(props.inline_container->last != NULL);
props.inline_container->last->space =
UNKNOWN_WIDTH;
}
free(text);
return true;
}
if (props.inline_container == NULL) {
/* Child of a block without a current container
* (i.e. this box is the first child of its parent, or
* was preceded by block-level siblings) */
props.inline_container = box_create(NULL, NULL, false,
NULL, NULL, NULL, NULL, ctx->bctx);
if (props.inline_container == NULL) {
free(text);
return false;
}
props.inline_container->type = BOX_INLINE_CONTAINER;
box_add_child(props.containing_block,
props.inline_container);
}
/** \todo Dropping const here is not clever */
box = box_create(NULL,
(css_computed_style *) props.parent_style,
false, props.href, props.target, props.title,
NULL, ctx->bctx);
if (box == NULL) {
free(text);
return false;
}
box->type = BOX_TEXT;
box->text = talloc_strdup(ctx->bctx, text);
free(text);
if (box->text == NULL)
return false;
box->length = strlen(box->text);
/* strip ending space char off */
if (box->length > 1 && box->text[box->length - 1] == ' ') {
box->space = UNKNOWN_WIDTH;
box->length--;
}
if (css_computed_text_transform(props.parent_style) !=
CSS_TEXT_TRANSFORM_NONE)
box_text_transform(box->text, box->length,
css_computed_text_transform(
props.parent_style));
box_add_child(props.inline_container, box);
if (box->text[0] == ' ') {
box->length--;
memmove(box->text, &box->text[1], box->length);
if (box->prev != NULL)
box->prev->space = UNKNOWN_WIDTH;
}
} else {
/* white-space: pre */
char *text;
size_t text_len = dom_string_byte_length(content);
size_t i;
char *current;
enum css_white_space_e white_space =
css_computed_white_space(props.parent_style);
/* note: pre-wrap/pre-line are unimplemented */
assert(white_space == CSS_WHITE_SPACE_PRE ||
white_space == CSS_WHITE_SPACE_PRE_LINE ||
white_space == CSS_WHITE_SPACE_PRE_WRAP);
text = malloc(text_len + 1);
dom_string_unref(content);
if (text == NULL)
return false;
memcpy(text, dom_string_data(content), text_len);
text[text_len] = '\0';
/* TODO: Handle tabs properly */
for (i = 0; i < text_len; i++)
if (text[i] == '\t')
text[i] = ' ';
if (css_computed_text_transform(props.parent_style) !=
CSS_TEXT_TRANSFORM_NONE)
box_text_transform(text, strlen(text),
css_computed_text_transform(
props.parent_style));
current = text;
/* swallow a single leading new line */
if (props.containing_block->flags & PRE_STRIP) {
switch (*current) {
case '\n':
current++;
break;
case '\r':
current++;
if (*current == '\n')
current++;
break;
}
props.containing_block->flags &= ~PRE_STRIP;
}
do {
size_t len = strcspn(current, "\r\n");
char old = current[len];
current[len] = 0;
if (props.inline_container == NULL) {
/* Child of a block without a current container
* (i.e. this box is the first child of its
* parent, or was preceded by block-level
* siblings) */
props.inline_container = box_create(NULL, NULL,
false, NULL, NULL, NULL, NULL,
ctx->bctx);
if (props.inline_container == NULL) {
free(text);
return false;
}
props.inline_container->type =
BOX_INLINE_CONTAINER;
box_add_child(props.containing_block,
props.inline_container);
}
/** \todo Dropping const isn't clever */
box = box_create(NULL,
(css_computed_style *) props.parent_style,
false, props.href, props.target, props.title,
NULL, ctx->bctx);
if (box == NULL) {
free(text);
return false;
}
box->type = BOX_TEXT;
box->text = talloc_strdup(ctx->bctx, current);
if (box->text == NULL) {
free(text);
return false;
}
box->length = strlen(box->text);
box_add_child(props.inline_container, box);
current[len] = old;
current += len;
if (current[0] != '\0') {
/* Linebreak: create new inline container */
props.inline_container = box_create(NULL, NULL,
false, NULL, NULL, NULL, NULL,
ctx->bctx);
if (props.inline_container == NULL) {
free(text);
return false;
}
props.inline_container->type =
BOX_INLINE_CONTAINER;
box_add_child(props.containing_block,
props.inline_container);
if (current[0] == '\r' && current[1] == '\n')
current += 2;
else
current++;
}
} while (*current);
free(text);
}
return true;
}
/**
* Convert an ELEMENT node to a box tree fragment,
* then schedule conversion of the next ELEMENT node
*/
static void convert_xml_to_box(struct box_construct_ctx *ctx)
{
dom_node *next;
bool convert_children;
uint32_t num_processed = 0;
const uint32_t max_processed_before_yield = 10;
do {
convert_children = true;
assert(ctx->n != NULL);
if (box_construct_element(ctx, &convert_children) == false) {
ctx->cb(ctx->content, false);
dom_node_unref(ctx->n);
free(ctx);
return;
}
/* Find next element to process, converting text nodes as we go */
next = next_node(ctx->n, ctx->content, convert_children);
while (next != NULL) {
dom_node_type type;
dom_exception err;
err = dom_node_get_node_type(next, &type);
if (err != DOM_NO_ERR) {
ctx->cb(ctx->content, false);
dom_node_unref(next);
free(ctx);
return;
}
if (type == DOM_ELEMENT_NODE)
break;
if (type == DOM_TEXT_NODE) {
ctx->n = next;
if (box_construct_text(ctx) == false) {
ctx->cb(ctx->content, false);
dom_node_unref(ctx->n);
free(ctx);
return;
}
}
next = next_node(next, ctx->content, true);
}
ctx->n = next;
if (next == NULL) {
/* Conversion complete */
struct box root;
memset(&root, 0, sizeof(root));
root.type = BOX_BLOCK;
root.children = root.last = ctx->root_box;
root.children->parent = &root;
/** \todo Remove box_normalise_block */
if (box_normalise_block(&root, ctx->root_box,
ctx->content) == false) {
ctx->cb(ctx->content, false);
} else {
ctx->content->layout = root.children;
ctx->content->layout->parent = NULL;
ctx->cb(ctx->content, true);
}
assert(ctx->n == NULL);
free(ctx);
return;
}
} while (++num_processed < max_processed_before_yield);
/* More work to do: schedule a continuation */
guit->misc->schedule(0, (void *)convert_xml_to_box, ctx);
}
/* exported function documented in html/box_construct.h */
nserror
dom_to_box(dom_node *n,
html_content *c,
box_construct_complete_cb cb,
void **box_conversion_context)
{
struct box_construct_ctx *ctx;
assert(box_conversion_context != NULL);
if (c->bctx == NULL) {
/* create a context allocation for this box tree */
c->bctx = talloc_zero(0, int);
if (c->bctx == NULL) {
return NSERROR_NOMEM;
}
}
ctx = malloc(sizeof(*ctx));
if (ctx == NULL) {
return NSERROR_NOMEM;
}
ctx->content = c;
ctx->n = dom_node_ref(n);
ctx->root_box = NULL;
ctx->cb = cb;
ctx->bctx = c->bctx;
*box_conversion_context = ctx;
return guit->misc->schedule(0, (void *)convert_xml_to_box, ctx);
}
/* exported function documented in html/box_construct.h */
nserror cancel_dom_to_box(void *box_conversion_context)
{
struct box_construct_ctx *ctx = box_conversion_context;
nserror err;
err = guit->misc->schedule(-1, (void *)convert_xml_to_box, ctx);
if (err != NSERROR_OK) {
return err;
}
dom_node_unref(ctx->n);
free(ctx);
return NSERROR_OK;
}
/* exported function documented in html/box_construct.h */
struct box *box_for_node(dom_node *n)
{
struct box *box = NULL;
dom_exception err;
err = dom_node_get_user_data(n, corestring_dom___ns_key_box_node_data,
(void *) &box);
if (err != DOM_NO_ERR)
return NULL;
return box;
}
/* exported function documented in html/box_construct.h */
bool
box_extract_link(const html_content *content,
const dom_string *dsrel,
nsurl *base,
nsurl **result)
{
char *s, *s1, *apos0 = 0, *apos1 = 0, *quot0 = 0, *quot1 = 0;
unsigned int i, j, end;
nserror error;
const char *rel;
rel = dom_string_data(dsrel);
s1 = s = malloc(3 * strlen(rel) + 1);
if (!s)
return false;
/* copy to s, removing white space and control characters */
for (i = 0; rel[i] && ascii_is_space(rel[i]); i++)
;
for (end = strlen(rel);
(end != i) && ascii_is_space(rel[end - 1]);
end--)
;
for (j = 0; i != end; i++) {
if ((unsigned char) rel[i] < 0x20) {
; /* skip control characters */
} else if (rel[i] == ' ') {
s[j++] = '%';
s[j++] = '2';
s[j++] = '0';
} else {
s[j++] = rel[i];
}
}
s[j] = 0;
if (content->enable_scripting == false) {
/* extract first quoted string out of "javascript:" link */
if (strncmp(s, "javascript:", 11) == 0) {
apos0 = strchr(s, '\'');
if (apos0)
apos1 = strchr(apos0 + 1, '\'');
quot0 = strchr(s, '"');
if (quot0)
quot1 = strchr(quot0 + 1, '"');
if (apos0 && apos1 &&
(!quot0 || !quot1 || apos0 < quot0)) {
*apos1 = 0;
s1 = apos0 + 1;
} else if (quot0 && quot1) {
*quot1 = 0;
s1 = quot0 + 1;
}
}
}
/* construct absolute URL */
error = nsurl_join(base, s1, result);
free(s);
if (error != NSERROR_OK) {
*result = NULL;
return false;
}
return true;
}