netsurf/content/handlers/html/box_construct.c

1750 lines
42 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright 2005 James Bursa <bursa@users.sourceforge.net>
* Copyright 2003 Phil Mellor <monkeyson@users.sourceforge.net>
* Copyright 2005 John M Bell <jmb202@ecs.soton.ac.uk>
* Copyright 2006 Richard Wilson <info@tinct.net>
* Copyright 2008 Michael Drake <tlsa@netsurf-browser.org>
*
* This file is part of NetSurf, http://www.netsurf-browser.org/
*
* NetSurf is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* NetSurf is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file
* Implementation of conversion from DOM tree to box tree.
*/
#include <string.h>
#include <dom/dom.h>
#include "utils/errors.h"
#include "utils/nsoption.h"
#include "utils/corestrings.h"
#include "utils/talloc.h"
#include "utils/string.h"
#include "utils/ascii.h"
#include "utils/nsurl.h"
#include "netsurf/misc.h"
#include "css/select.h"
#include "desktop/gui_internal.h"
#include "html/private.h"
#include "html/object.h"
#include "html/box.h"
#include "html/box_manipulate.h"
#include "html/box_construct.h"
#include "html/box_special.h"
#include "html/box_normalise.h"
#include "html/form_internal.h"
/**
* Context for box tree construction
*/
struct box_construct_ctx {
html_content *content; /**< Content we're constructing for */
dom_node *n; /**< Current node to process */
struct box *root_box; /**< Root box in the tree */
box_construct_complete_cb cb; /**< Callback to invoke on completion */
int *bctx; /**< talloc context */
};
/**
* Transient properties for construction of current node
*/
struct box_construct_props {
/** Style from which to inherit, or NULL if none */
const css_computed_style *parent_style;
/** Current link target, or NULL if none */
struct nsurl *href;
/** Current frame target, or NULL if none */
const char *target;
/** Current title attribute, or NULL if none */
const char *title;
/** Identity of the current block-level container */
struct box *containing_block;
/** Current container for inlines, or NULL if none
* \note If non-NULL, will be the last child of containing_block */
struct box *inline_container;
/** Whether the current node is the root of the DOM tree */
bool node_is_root;
};
static const content_type image_types = CONTENT_IMAGE;
/**
* mapping from CSS display to box type this table must be in sync
* with libcss' css_display enum
*/
static const box_type box_map[] = {
0, /* CSS_DISPLAY_INHERIT, */
BOX_INLINE, /* CSS_DISPLAY_INLINE, */
BOX_BLOCK, /* CSS_DISPLAY_BLOCK, */
BOX_BLOCK, /* CSS_DISPLAY_LIST_ITEM, */
BOX_INLINE, /* CSS_DISPLAY_RUN_IN, */
BOX_INLINE_BLOCK, /* CSS_DISPLAY_INLINE_BLOCK, */
BOX_TABLE, /* CSS_DISPLAY_TABLE, */
BOX_TABLE, /* CSS_DISPLAY_INLINE_TABLE, */
BOX_TABLE_ROW_GROUP, /* CSS_DISPLAY_TABLE_ROW_GROUP, */
BOX_TABLE_ROW_GROUP, /* CSS_DISPLAY_TABLE_HEADER_GROUP, */
BOX_TABLE_ROW_GROUP, /* CSS_DISPLAY_TABLE_FOOTER_GROUP, */
BOX_TABLE_ROW, /* CSS_DISPLAY_TABLE_ROW, */
BOX_NONE, /* CSS_DISPLAY_TABLE_COLUMN_GROUP, */
BOX_NONE, /* CSS_DISPLAY_TABLE_COLUMN, */
BOX_TABLE_CELL, /* CSS_DISPLAY_TABLE_CELL, */
BOX_INLINE, /* CSS_DISPLAY_TABLE_CAPTION, */
BOX_NONE /* CSS_DISPLAY_NONE */
};
/**
* determine if a box is the root node
*
* \param n node to check
* \return true if node is root else false.
*/
static inline bool box_is_root(dom_node *n)
{
dom_node *parent;
dom_node_type type;
dom_exception err;
err = dom_node_get_parent_node(n, &parent);
if (err != DOM_NO_ERR)
return false;
if (parent != NULL) {
err = dom_node_get_node_type(parent, &type);
dom_node_unref(parent);
if (err != DOM_NO_ERR)
return false;
if (type != DOM_DOCUMENT_NODE)
return false;
}
return true;
}
/**
* Extract transient construction properties
*
* \param n Current DOM node to convert
* \param props Property object to populate
*/
static void
box_extract_properties(dom_node *n, struct box_construct_props *props)
{
memset(props, 0, sizeof(*props));
props->node_is_root = box_is_root(n);
/* Extract properties from containing DOM node */
if (props->node_is_root == false) {
dom_node *current_node = n;
dom_node *parent_node = NULL;
struct box *parent_box;
dom_exception err;
/* Find ancestor node containing parent box */
while (true) {
err = dom_node_get_parent_node(current_node,
&parent_node);
if (err != DOM_NO_ERR || parent_node == NULL)
break;
parent_box = box_for_node(parent_node);
if (parent_box != NULL) {
props->parent_style = parent_box->style;
props->href = parent_box->href;
props->target = parent_box->target;
props->title = parent_box->title;
dom_node_unref(parent_node);
break;
} else {
if (current_node != n)
dom_node_unref(current_node);
current_node = parent_node;
parent_node = NULL;
}
}
/* Find containing block (may be parent) */
while (true) {
struct box *b;
err = dom_node_get_parent_node(current_node,
&parent_node);
if (err != DOM_NO_ERR || parent_node == NULL) {
if (current_node != n)
dom_node_unref(current_node);
break;
}
if (current_node != n)
dom_node_unref(current_node);
b = box_for_node(parent_node);
/* Children of nodes that created an inline box
* will generate boxes which are attached as
* _siblings_ of the box generated for their
* parent node. Note, however, that we'll still
* use the parent node's styling as the parent
* style, above. */
if (b != NULL && b->type != BOX_INLINE &&
b->type != BOX_BR) {
props->containing_block = b;
dom_node_unref(parent_node);
break;
} else {
current_node = parent_node;
parent_node = NULL;
}
}
}
/* Compute current inline container, if any */
if (props->containing_block != NULL &&
props->containing_block->last != NULL &&
props->containing_block->last->type ==
BOX_INLINE_CONTAINER)
props->inline_container = props->containing_block->last;
}
/**
* Get the style for an element.
*
* \param c content of type CONTENT_HTML that is being processed
* \param parent_style style at this point in xml tree, or NULL for root
* \param root_style root node's style, or NULL for root
* \param n node in xml tree
* \return the new style, or NULL on memory exhaustion
*/
static css_select_results *
box_get_style(html_content *c,
const css_computed_style *parent_style,
const css_computed_style *root_style,
dom_node *n)
{
dom_string *s;
dom_exception err;
css_stylesheet *inline_style = NULL;
css_select_results *styles;
nscss_select_ctx ctx;
/* Firstly, construct inline stylesheet, if any */
err = dom_element_get_attribute(n, corestring_dom_style, &s);
if (err != DOM_NO_ERR)
return NULL;
if (s != NULL) {
inline_style = nscss_create_inline_style(
(const uint8_t *) dom_string_data(s),
dom_string_byte_length(s),
c->encoding,
nsurl_access(c->base_url),
c->quirks != DOM_DOCUMENT_QUIRKS_MODE_NONE);
dom_string_unref(s);
if (inline_style == NULL)
return NULL;
}
/* Populate selection context */
ctx.ctx = c->select_ctx;
ctx.quirks = (c->quirks == DOM_DOCUMENT_QUIRKS_MODE_FULL);
ctx.base_url = c->base_url;
ctx.universal = c->universal;
ctx.root_style = root_style;
ctx.parent_style = parent_style;
/* Select style for element */
styles = nscss_get_style(&ctx, n, &c->media, inline_style);
/* No longer need inline style */
if (inline_style != NULL)
css_stylesheet_destroy(inline_style);
return styles;
}
/**
* Construct the box required for a generated element.
*
* \param n XML node of type XML_ELEMENT_NODE
* \param content Content of type CONTENT_HTML that is being processed
* \param box Box which may have generated content
* \param style Complete computed style for pseudo element, or NULL
*
* \todo This is currently incomplete. It just does enough to support
* the clearfix hack. (http://www.positioniseverything.net/easyclearing.html )
*/
static void
box_construct_generate(dom_node *n,
html_content *content,
struct box *box,
const css_computed_style *style)
{
struct box *gen = NULL;
enum css_display_e computed_display;
const css_computed_content_item *c_item;
/* Nothing to generate if the parent box is not a block */
if (box->type != BOX_BLOCK)
return;
/* To determine if an element has a pseudo element, we select
* for it and test to see if the returned style's content
* property is set to normal. */
if (style == NULL ||
css_computed_content(style, &c_item) ==
CSS_CONTENT_NORMAL) {
/* No pseudo element */
return;
}
/* create box for this element */
computed_display = ns_computed_display(style, box_is_root(n));
if (computed_display == CSS_DISPLAY_BLOCK ||
computed_display == CSS_DISPLAY_TABLE) {
/* currently only support block level boxes */
/** \todo Not wise to drop const from the computed style */
gen = box_create(NULL, (css_computed_style *) style,
false, NULL, NULL, NULL, NULL, content->bctx);
if (gen == NULL) {
return;
}
/* set box type from computed display */
gen->type = box_map[ns_computed_display(
style, box_is_root(n))];
box_add_child(box, gen);
}
}
/**
* compute the index for a list marker
*
* calculates a one based index of a list item
*/
static unsigned int compute_list_marker_index(struct box *last)
{
/* Drill down into last child of parent
* to find the list marker (if any)
*
* Floated list boxes end up as:
*
* parent
* BOX_INLINE_CONTAINER
* BOX_FLOAT_{LEFT,RIGHT}
* BOX_BLOCK <-- list box
* ...
*/
while ((last != NULL) && (last->list_marker == NULL)) {
struct box *last_inner = last;
while (last_inner != NULL) {
if (last_inner->list_marker != NULL) {
break;
}
if (last_inner->type == BOX_INLINE_CONTAINER ||
last_inner->type == BOX_FLOAT_LEFT ||
last_inner->type == BOX_FLOAT_RIGHT) {
last_inner = last_inner->last;
} else {
last_inner = NULL;
}
}
if (last_inner != NULL) {
last = last_inner;
} else {
last = last->prev;
}
}
if ((last == NULL) || (last->list_marker == NULL)) {
return 1;
}
return last->list_marker->rows + 1;
}
/**
* Roman numeral conversion
*
* \return The number of characters that are nesesary for full output
*/
static int
ntoromannumeral(char *buf, const size_t maxlen, int value, const char *C)
{
const int S[] = { 0, 2, 4, 2, 4, 2, 4 };
const int D[] = { 1000, 500, 100, 50, 10, 5, 1 };
const size_t L = sizeof(D) / sizeof(int) - 1;
size_t k = 0; /* index into output buffer */
unsigned int i = 0; /* index into maps */
int r, r2;
while (value > 0) {
if (D[i] <= value) {
r = value / D[i];
value = value - (r * D[i]);
if (i < L) {
/* lookahead */
r2 = value / D[i+1];
}
if (i < L && r2 >= S[i+1]) {
/* will violate repeat boundary on next pass */
value = value - (r2 * D[i+1]);
if (k < maxlen) buf[k++] = C[i+1];
if (k < maxlen) buf[k++] = C[i-1];
} else if (S[i] && r >= S[i]) {
/* violated repeat boundary on this pass */
if (k < maxlen) buf[k++] = C[i];
if (k < maxlen) buf[k++] = C[i-1];
} else {
while (r-- > 0 && k < maxlen) {
buf[k++] = C[i];
}
}
}
i++;
}
if (k < maxlen) {
buf[k] = '\0';
}
return k;
}
/**
* lower case roman numeral
*/
static int ntolcromannumeral(char *buf, const size_t maxlen, int value)
{
const char C[] = { 'm', 'd', 'c', 'l', 'x', 'v', 'i' };
return ntoromannumeral(buf, maxlen, value, C);
}
/**
* upper case roman numeral
*/
static int ntoucromannumeral(char *buf, const size_t maxlen, int value)
{
const char C[] = { 'M', 'D', 'C', 'L', 'X', 'V', 'I' };
return ntoromannumeral(buf, maxlen, value, C);
}
/**
* generate alphabet symbol values for latin and greek labelling
*
* fills array with alphabet values suitable for the input value
*
* \param ares Buffer to recive the converted values
* \param alen the length of \a ares buffer
* \param value The value to convert
* \param slen The number of symbols in the alphabet
* \return The length a complete conversion which may be larger than \a alen
*/
static size_t
calc_alphabet_values(uint8_t *ares,
const size_t alen,
int value,
unsigned char slen)
{
size_t idx = 0;
uint8_t *first;
uint8_t *last;
/* generate alphabet values in ascending order */
while (value > 0) {
--value;
if (idx < alen) ares[idx] = value % slen;
idx++;
value = value / slen;
}
/* put the values in decending order */
first = ares;
if (idx < alen) {
last = first + (idx - 1);
} else {
last = first + (alen - 1);
}
while (first < last) {
*first ^= *last;
*last ^= *first;
*first ^= *last;
first++;
last--;
}
return idx;
}
/**
* maps alphabet values to output values with a symbol table
*
* Takes a list of alphabet values and for each one outputs the
* compete symbol (in utf8) to an output buffer.
*
* \param buf The oputput buffer
* \param buflen the length of \a buf
* \param aval array of alphabet values
* \param alen The number of values in \a alen
* \param symtab The symbol table
* \param symtablen The number of symbols in \a symtab
* \return The number of bytes needed in the output buffer whichmay be
* larger than \a buflen but the buffer will not be overrun
*/
static int
map_aval_to_symbols(char *buf, const size_t buflen,
const uint8_t *aval, const size_t alen,
const char symtab[][4], const size_t symtablen)
{
size_t oidx;
size_t aidx;
int sidx;
oidx = 0;
for (aidx=0; aidx < alen; aidx++) {
sidx=0;
while ((sidx < 4) &&
(symtab[aval[aidx]][sidx] != 0)) {
if (oidx < buflen) {
buf[oidx] = symtab[aval[aidx]][sidx];
}
oidx++;
sidx++;
}
}
return oidx;
}
static int ntolcalpha(char *buf, const size_t buflen, int value)
{
size_t alen;
uint8_t aval[20];
const char symtab[][4] = {
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
"k", "l", "m", "n", "o", "p", "q", "r", "s", "t",
"u", "v", "w", "x", "y", "z"
};
const size_t symtablen = sizeof(symtab) / 4;
alen = calc_alphabet_values(aval, sizeof(aval), value, symtablen);
if (alen >= sizeof(aval)) {
*buf = '?';
return 1;
}
return map_aval_to_symbols(buf, buflen, aval, alen, symtab, symtablen);
}
static int ntoucalpha(char *buf, const size_t buflen, int value)
{
size_t alen;
uint8_t aval[20];
const char symtab[][4] = {
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J",
"K", "L", "M", "N", "O", "P", "Q", "R", "S", "T",
"U", "V", "W", "X", "Y", "Z"
};
const size_t symtablen = sizeof(symtab) / 4;
alen = calc_alphabet_values(aval, sizeof(aval), value, symtablen);
if (alen >= sizeof(aval)) {
*buf = '?';
return 1;
}
return map_aval_to_symbols(buf, buflen, aval, alen, symtab, symtablen);
}
static int ntolcgreek(char *buf, const size_t buflen, int value)
{
size_t alen;
uint8_t aval[20];
const char symtab[][4] = {
"α", "β", "γ", "δ", "ε", "ζ", "η", "θ", "ι", "κ",
"λ", "μ", "ν", "ξ", "ο", "π", "ρ", "σ", "τ", "υ",
"φ", "χ", "ψ", "ω"
};
const size_t symtablen = sizeof(symtab) / 4;
alen = calc_alphabet_values(aval, sizeof(aval), value, symtablen);
if (alen >= sizeof(aval)) {
*buf = '?';
return 1;
}
return map_aval_to_symbols(buf, buflen, aval, alen, symtab, symtablen);
}
/**
* format value into a list marker with a style
*
* The value is a one based index into the list. This means for
* numeric printing the value must be incremented by one.
*/
static size_t
format_list_marker_value(char *text,
size_t text_len,
enum css_list_style_type_e list_style_type,
unsigned int value)
{
int res = -1;
switch (list_style_type) {
case CSS_LIST_STYLE_TYPE_DECIMAL_LEADING_ZERO:
res = snprintf(text, text_len, "%02u", value);
break;
case CSS_LIST_STYLE_TYPE_LOWER_ROMAN:
res = ntolcromannumeral(text, text_len, value);
break;
case CSS_LIST_STYLE_TYPE_UPPER_ROMAN:
res = ntoucromannumeral(text, text_len, value);
break;
case CSS_LIST_STYLE_TYPE_LOWER_ALPHA:
case CSS_LIST_STYLE_TYPE_LOWER_LATIN:
res = ntolcalpha(text, text_len, value);
break;
case CSS_LIST_STYLE_TYPE_UPPER_ALPHA:
case CSS_LIST_STYLE_TYPE_UPPER_LATIN:
res = ntoucalpha(text, text_len, value);
break;
case CSS_LIST_STYLE_TYPE_LOWER_GREEK:
res = ntolcgreek(text, text_len, value);
break;
case CSS_LIST_STYLE_TYPE_ARMENIAN:
case CSS_LIST_STYLE_TYPE_GEORGIAN:
case CSS_LIST_STYLE_TYPE_DECIMAL:
default:
res = snprintf(text, text_len, "%u", value);
break;
}
/* deal with error */
if (res < 0) {
text[0] = 0;
return 0;
}
/* deal with overflow */
if ((size_t)res >= (text_len-2)) {
res = text_len-2;
}
text[res++] = '.';
text[res++] = 0;
return res;
}
/**
* Construct a list marker box
*
* \param box Box to attach marker to
* \param title Current title attribute
* \param ctx Box construction context
* \param parent Current block-level container
* \return true on success, false on memory exhaustion
*/
static bool
box_construct_marker(struct box *box,
const char *title,
struct box_construct_ctx *ctx,
struct box *parent)
{
lwc_string *image_uri;
struct box *marker;
enum css_list_style_type_e list_style_type;
marker = box_create(NULL, box->style, false, NULL, NULL, title,
NULL, ctx->bctx);
if (marker == false)
return false;
marker->type = BOX_BLOCK;
list_style_type = css_computed_list_style_type(box->style);
/** \todo marker content (list-style-type) */
switch (list_style_type) {
case CSS_LIST_STYLE_TYPE_DISC:
/* 2022 BULLET */
marker->text = (char *) "\342\200\242";
marker->length = 3;
break;
case CSS_LIST_STYLE_TYPE_CIRCLE:
/* 25CB WHITE CIRCLE */
marker->text = (char *) "\342\227\213";
marker->length = 3;
break;
case CSS_LIST_STYLE_TYPE_SQUARE:
/* 25AA BLACK SMALL SQUARE */
marker->text = (char *) "\342\226\252";
marker->length = 3;
break;
case CSS_LIST_STYLE_TYPE_NONE:
marker->text = 0;
marker->length = 0;
break;
default:
marker->rows = compute_list_marker_index(parent->last);
marker->text = talloc_array(ctx->bctx, char, 20);
if (marker->text == NULL)
return false;
marker->length = format_list_marker_value(marker->text,
20,
list_style_type,
marker->rows);
break;
}
if (css_computed_list_style_image(box->style, &image_uri) == CSS_LIST_STYLE_IMAGE_URI &&
(image_uri != NULL) &&
(nsoption_bool(foreground_images) == true)) {
nsurl *url;
nserror error;
/* TODO: we get a url out of libcss as a lwc string, but
* earlier we already had it as a nsurl after we
* nsurl_joined it. Can this be improved?
* For now, just making another nsurl. */
error = nsurl_create(lwc_string_data(image_uri), &url);
if (error != NSERROR_OK)
return false;
if (html_fetch_object(ctx->content,
url,
marker,
image_types,
false) == false) {
nsurl_unref(url);
return false;
}
nsurl_unref(url);
}
box->list_marker = marker;
marker->parent = box;
return true;
}
/**
* Construct the box tree for an XML element.
*
* \param ctx Tree construction context
* \param convert_children Whether to convert children
* \return true on success, false on memory exhaustion
*/
static bool
box_construct_element(struct box_construct_ctx *ctx, bool *convert_children)
{
dom_string *title0, *s;
lwc_string *id = NULL;
struct box *box = NULL, *old_box;
css_select_results *styles = NULL;
lwc_string *bgimage_uri;
dom_exception err;
struct box_construct_props props;
const css_computed_style *root_style = NULL;
assert(ctx->n != NULL);
box_extract_properties(ctx->n, &props);
if (props.containing_block != NULL) {
/* In case the containing block is a pre block, we clear
* the PRE_STRIP flag since it is not used if we follow
* the pre with a tag */
props.containing_block->flags &= ~PRE_STRIP;
}
if (props.node_is_root == false) {
root_style = ctx->root_box->style;
}
styles = box_get_style(ctx->content, props.parent_style, root_style,
ctx->n);
if (styles == NULL)
return false;
/* Extract title attribute, if present */
err = dom_element_get_attribute(ctx->n, corestring_dom_title, &title0);
if (err != DOM_NO_ERR)
return false;
if (title0 != NULL) {
char *t = squash_whitespace(dom_string_data(title0));
dom_string_unref(title0);
if (t == NULL)
return false;
props.title = talloc_strdup(ctx->bctx, t);
free(t);
if (props.title == NULL)
return false;
}
/* Extract id attribute, if present */
err = dom_element_get_attribute(ctx->n, corestring_dom_id, &s);
if (err != DOM_NO_ERR)
return false;
if (s != NULL) {
err = dom_string_intern(s, &id);
if (err != DOM_NO_ERR)
id = NULL;
dom_string_unref(s);
}
box = box_create(styles, styles->styles[CSS_PSEUDO_ELEMENT_NONE], false,
props.href, props.target, props.title, id,
ctx->bctx);
if (box == NULL)
return false;
/* If this is the root box, add it to the context */
if (props.node_is_root)
ctx->root_box = box;
/* Deal with colspan/rowspan */
err = dom_element_get_attribute(ctx->n, corestring_dom_colspan, &s);
if (err != DOM_NO_ERR)
return false;
if (s != NULL) {
const char *val = dom_string_data(s);
if ('0' <= val[0] && val[0] <= '9')
box->columns = strtol(val, NULL, 10);
dom_string_unref(s);
}
err = dom_element_get_attribute(ctx->n, corestring_dom_rowspan, &s);
if (err != DOM_NO_ERR)
return false;
if (s != NULL) {
const char *val = dom_string_data(s);
if ('0' <= val[0] && val[0] <= '9')
box->rows = strtol(val, NULL, 10);
dom_string_unref(s);
}
/* Set box type from computed display */
if ((css_computed_position(box->style) == CSS_POSITION_ABSOLUTE ||
css_computed_position(box->style) ==
CSS_POSITION_FIXED) &&
(ns_computed_display_static(box->style) ==
CSS_DISPLAY_INLINE ||
ns_computed_display_static(box->style) ==
CSS_DISPLAY_INLINE_BLOCK ||
ns_computed_display_static(box->style) ==
CSS_DISPLAY_INLINE_TABLE)) {
/* Special case for absolute positioning: make absolute inlines
* into inline block so that the boxes are constructed in an
* inline container as if they were not absolutely positioned.
* Layout expects and handles this. */
box->type = box_map[CSS_DISPLAY_INLINE_BLOCK];
} else if (props.node_is_root) {
/* Special case for root element: force it to BLOCK, or the
* rest of the layout will break. */
box->type = BOX_BLOCK;
} else {
/* Normal mapping */
box->type = box_map[ns_computed_display(box->style,
props.node_is_root)];
}
if (convert_special_elements(ctx->n,
ctx->content,
box,
convert_children) == false) {
return false;
}
/* Handle the :before pseudo element */
if (!(box->flags & IS_REPLACED)) {
box_construct_generate(ctx->n, ctx->content, box,
box->styles->styles[CSS_PSEUDO_ELEMENT_BEFORE]);
}
if (box->type == BOX_NONE ||
(ns_computed_display(box->style,
props.node_is_root) == CSS_DISPLAY_NONE &&
props.node_is_root == false)) {
css_select_results_destroy(styles);
box->styles = NULL;
box->style = NULL;
/* Invalidate associated gadget, if any */
if (box->gadget != NULL) {
box->gadget->box = NULL;
box->gadget = NULL;
}
/* Can't do this, because the lifetimes of boxes and gadgets
* are inextricably linked. Fortunately, talloc will save us
* (for now) */
/* box_free_box(box); */
*convert_children = false;
return true;
}
/* Attach DOM node to box */
err = dom_node_set_user_data(ctx->n,
corestring_dom___ns_key_box_node_data, box, NULL,
(void *) &old_box);
if (err != DOM_NO_ERR)
return false;
/* Attach box to DOM node */
box->node = dom_node_ref(ctx->n);
if (props.inline_container == NULL &&
(box->type == BOX_INLINE ||
box->type == BOX_BR ||
box->type == BOX_INLINE_BLOCK ||
css_computed_float(box->style) == CSS_FLOAT_LEFT ||
css_computed_float(box->style) == CSS_FLOAT_RIGHT) &&
props.node_is_root == false) {
/* Found an inline child of a block without a current container
* (i.e. this box is the first child of its parent, or was
* preceded by block-level siblings) */
assert(props.containing_block != NULL &&
"Box must have containing block.");
props.inline_container = box_create(NULL, NULL, false, NULL,
NULL, NULL, NULL, ctx->bctx);
if (props.inline_container == NULL)
return false;
props.inline_container->type = BOX_INLINE_CONTAINER;
box_add_child(props.containing_block, props.inline_container);
}
/* Kick off fetch for any background image */
if (css_computed_background_image(box->style, &bgimage_uri) ==
CSS_BACKGROUND_IMAGE_IMAGE && bgimage_uri != NULL &&
nsoption_bool(background_images) == true) {
nsurl *url;
nserror error;
/* TODO: we get a url out of libcss as a lwc string, but
* earlier we already had it as a nsurl after we
* nsurl_joined it. Can this be improved?
* For now, just making another nsurl. */
error = nsurl_create(lwc_string_data(bgimage_uri), &url);
if (error == NSERROR_OK) {
/* Fetch image if we got a valid URL */
if (html_fetch_object(ctx->content,
url,
box,
image_types,
true) == false) {
nsurl_unref(url);
return false;
}
nsurl_unref(url);
}
}
if (*convert_children)
box->flags |= CONVERT_CHILDREN;
if (box->type == BOX_INLINE || box->type == BOX_BR ||
box->type == BOX_INLINE_BLOCK) {
/* Inline container must exist, as we'll have
* created it above if it didn't */
assert(props.inline_container != NULL);
box_add_child(props.inline_container, box);
} else {
if (ns_computed_display(box->style, props.node_is_root) ==
CSS_DISPLAY_LIST_ITEM) {
/* List item: compute marker */
if (box_construct_marker(box, props.title, ctx,
props.containing_block) == false)
return false;
}
if (props.node_is_root == false &&
(css_computed_float(box->style) ==
CSS_FLOAT_LEFT ||
css_computed_float(box->style) ==
CSS_FLOAT_RIGHT)) {
/* Float: insert a float between the parent and box. */
struct box *flt = box_create(NULL, NULL, false,
props.href, props.target, props.title,
NULL, ctx->bctx);
if (flt == NULL)
return false;
if (css_computed_float(box->style) == CSS_FLOAT_LEFT)
flt->type = BOX_FLOAT_LEFT;
else
flt->type = BOX_FLOAT_RIGHT;
box_add_child(props.inline_container, flt);
box_add_child(flt, box);
} else {
/* Non-floated block-level box: add to containing block
* if there is one. If we're the root box, then there
* won't be. */
if (props.containing_block != NULL)
box_add_child(props.containing_block, box);
}
}
return true;
}
/**
* Complete construction of the box tree for an element.
*
* \param n DOM node to construct for
* \param content Containing document
*
* This will be called after all children of an element have been processed
*/
static void box_construct_element_after(dom_node *n, html_content *content)
{
struct box_construct_props props;
struct box *box = box_for_node(n);
assert(box != NULL);
box_extract_properties(n, &props);
if (box->type == BOX_INLINE || box->type == BOX_BR) {
/* Insert INLINE_END into containing block */
struct box *inline_end;
bool has_children;
dom_exception err;
err = dom_node_has_child_nodes(n, &has_children);
if (err != DOM_NO_ERR)
return;
if (has_children == false ||
(box->flags & CONVERT_CHILDREN) == 0) {
/* No children, or didn't want children converted */
return;
}
if (props.inline_container == NULL) {
/* Create inline container if we don't have one */
props.inline_container = box_create(NULL, NULL, false,
NULL, NULL, NULL, NULL, content->bctx);
if (props.inline_container == NULL)
return;
props.inline_container->type = BOX_INLINE_CONTAINER;
box_add_child(props.containing_block,
props.inline_container);
}
inline_end = box_create(NULL, box->style, false,
box->href, box->target, box->title,
box->id == NULL ? NULL :
lwc_string_ref(box->id), content->bctx);
if (inline_end != NULL) {
inline_end->type = BOX_INLINE_END;
assert(props.inline_container != NULL);
box_add_child(props.inline_container, inline_end);
box->inline_end = inline_end;
inline_end->inline_end = box;
}
} else if (!(box->flags & IS_REPLACED)) {
/* Handle the :after pseudo element */
box_construct_generate(n, content, box,
box->styles->styles[CSS_PSEUDO_ELEMENT_AFTER]);
}
}
/**
* Find the next node in the DOM tree, completing element construction
* where appropriate.
*
* \param n Current node
* \param content Containing content
* \param convert_children Whether to consider children of \a n
* \return Next node to process, or NULL if complete
*
* \note \a n will be unreferenced
*/
static dom_node *
next_node(dom_node *n, html_content *content, bool convert_children)
{
dom_node *next = NULL;
bool has_children;
dom_exception err;
err = dom_node_has_child_nodes(n, &has_children);
if (err != DOM_NO_ERR) {
dom_node_unref(n);
return NULL;
}
if (convert_children && has_children) {
err = dom_node_get_first_child(n, &next);
if (err != DOM_NO_ERR) {
dom_node_unref(n);
return NULL;
}
dom_node_unref(n);
} else {
err = dom_node_get_next_sibling(n, &next);
if (err != DOM_NO_ERR) {
dom_node_unref(n);
return NULL;
}
if (next != NULL) {
if (box_for_node(n) != NULL)
box_construct_element_after(n, content);
dom_node_unref(n);
} else {
if (box_for_node(n) != NULL)
box_construct_element_after(n, content);
while (box_is_root(n) == false) {
dom_node *parent = NULL;
dom_node *parent_next = NULL;
err = dom_node_get_parent_node(n, &parent);
if (err != DOM_NO_ERR) {
dom_node_unref(n);
return NULL;
}
assert(parent != NULL);
err = dom_node_get_next_sibling(parent,
&parent_next);
if (err != DOM_NO_ERR) {
dom_node_unref(parent);
dom_node_unref(n);
return NULL;
}
if (parent_next != NULL) {
dom_node_unref(parent_next);
dom_node_unref(parent);
break;
}
dom_node_unref(n);
n = parent;
parent = NULL;
if (box_for_node(n) != NULL) {
box_construct_element_after(
n, content);
}
}
if (box_is_root(n) == false) {
dom_node *parent = NULL;
err = dom_node_get_parent_node(n, &parent);
if (err != DOM_NO_ERR) {
dom_node_unref(n);
return NULL;
}
assert(parent != NULL);
err = dom_node_get_next_sibling(parent, &next);
if (err != DOM_NO_ERR) {
dom_node_unref(parent);
dom_node_unref(n);
return NULL;
}
if (box_for_node(parent) != NULL) {
box_construct_element_after(parent,
content);
}
dom_node_unref(parent);
}
dom_node_unref(n);
}
}
return next;
}
/**
* Apply the CSS text-transform property to given text for its ASCII chars.
*
* \param s string to transform
* \param len length of s
* \param tt transform type
*/
static void
box_text_transform(char *s, unsigned int len, enum css_text_transform_e tt)
{
unsigned int i;
if (len == 0)
return;
switch (tt) {
case CSS_TEXT_TRANSFORM_UPPERCASE:
for (i = 0; i < len; ++i)
if ((unsigned char) s[i] < 0x80)
s[i] = ascii_to_upper(s[i]);
break;
case CSS_TEXT_TRANSFORM_LOWERCASE:
for (i = 0; i < len; ++i)
if ((unsigned char) s[i] < 0x80)
s[i] = ascii_to_lower(s[i]);
break;
case CSS_TEXT_TRANSFORM_CAPITALIZE:
if ((unsigned char) s[0] < 0x80)
s[0] = ascii_to_upper(s[0]);
for (i = 1; i < len; ++i)
if ((unsigned char) s[i] < 0x80 &&
ascii_is_space(s[i - 1]))
s[i] = ascii_to_upper(s[i]);
break;
default:
break;
}
}
/**
* Construct the box tree for an XML text node.
*
* \param ctx Tree construction context
* \return true on success, false on memory exhaustion
*/
static bool box_construct_text(struct box_construct_ctx *ctx)
{
struct box_construct_props props;
struct box *box = NULL;
dom_string *content;
dom_exception err;
assert(ctx->n != NULL);
box_extract_properties(ctx->n, &props);
assert(props.containing_block != NULL);
err = dom_characterdata_get_data(ctx->n, &content);
if (err != DOM_NO_ERR || content == NULL)
return false;
if (css_computed_white_space(props.parent_style) ==
CSS_WHITE_SPACE_NORMAL ||
css_computed_white_space(props.parent_style) ==
CSS_WHITE_SPACE_NOWRAP) {
char *text;
text = squash_whitespace(dom_string_data(content));
dom_string_unref(content);
if (text == NULL)
return false;
/* if the text is just a space, combine it with the preceding
* text node, if any */
if (text[0] == ' ' && text[1] == 0) {
if (props.inline_container != NULL) {
assert(props.inline_container->last != NULL);
props.inline_container->last->space =
UNKNOWN_WIDTH;
}
free(text);
return true;
}
if (props.inline_container == NULL) {
/* Child of a block without a current container
* (i.e. this box is the first child of its parent, or
* was preceded by block-level siblings) */
props.inline_container = box_create(NULL, NULL, false,
NULL, NULL, NULL, NULL, ctx->bctx);
if (props.inline_container == NULL) {
free(text);
return false;
}
props.inline_container->type = BOX_INLINE_CONTAINER;
box_add_child(props.containing_block,
props.inline_container);
}
/** \todo Dropping const here is not clever */
box = box_create(NULL,
(css_computed_style *) props.parent_style,
false, props.href, props.target, props.title,
NULL, ctx->bctx);
if (box == NULL) {
free(text);
return false;
}
box->type = BOX_TEXT;
box->text = talloc_strdup(ctx->bctx, text);
free(text);
if (box->text == NULL)
return false;
box->length = strlen(box->text);
/* strip ending space char off */
if (box->length > 1 && box->text[box->length - 1] == ' ') {
box->space = UNKNOWN_WIDTH;
box->length--;
}
if (css_computed_text_transform(props.parent_style) !=
CSS_TEXT_TRANSFORM_NONE)
box_text_transform(box->text, box->length,
css_computed_text_transform(
props.parent_style));
box_add_child(props.inline_container, box);
if (box->text[0] == ' ') {
box->length--;
memmove(box->text, &box->text[1], box->length);
if (box->prev != NULL)
box->prev->space = UNKNOWN_WIDTH;
}
} else {
/* white-space: pre */
char *text;
size_t text_len = dom_string_byte_length(content);
size_t i;
char *current;
enum css_white_space_e white_space =
css_computed_white_space(props.parent_style);
/* note: pre-wrap/pre-line are unimplemented */
assert(white_space == CSS_WHITE_SPACE_PRE ||
white_space == CSS_WHITE_SPACE_PRE_LINE ||
white_space == CSS_WHITE_SPACE_PRE_WRAP);
text = malloc(text_len + 1);
dom_string_unref(content);
if (text == NULL)
return false;
memcpy(text, dom_string_data(content), text_len);
text[text_len] = '\0';
/* TODO: Handle tabs properly */
for (i = 0; i < text_len; i++)
if (text[i] == '\t')
text[i] = ' ';
if (css_computed_text_transform(props.parent_style) !=
CSS_TEXT_TRANSFORM_NONE)
box_text_transform(text, strlen(text),
css_computed_text_transform(
props.parent_style));
current = text;
/* swallow a single leading new line */
if (props.containing_block->flags & PRE_STRIP) {
switch (*current) {
case '\n':
current++;
break;
case '\r':
current++;
if (*current == '\n')
current++;
break;
}
props.containing_block->flags &= ~PRE_STRIP;
}
do {
size_t len = strcspn(current, "\r\n");
char old = current[len];
current[len] = 0;
if (props.inline_container == NULL) {
/* Child of a block without a current container
* (i.e. this box is the first child of its
* parent, or was preceded by block-level
* siblings) */
props.inline_container = box_create(NULL, NULL,
false, NULL, NULL, NULL, NULL,
ctx->bctx);
if (props.inline_container == NULL) {
free(text);
return false;
}
props.inline_container->type =
BOX_INLINE_CONTAINER;
box_add_child(props.containing_block,
props.inline_container);
}
/** \todo Dropping const isn't clever */
box = box_create(NULL,
(css_computed_style *) props.parent_style,
false, props.href, props.target, props.title,
NULL, ctx->bctx);
if (box == NULL) {
free(text);
return false;
}
box->type = BOX_TEXT;
box->text = talloc_strdup(ctx->bctx, current);
if (box->text == NULL) {
free(text);
return false;
}
box->length = strlen(box->text);
box_add_child(props.inline_container, box);
current[len] = old;
current += len;
if (current[0] != '\0') {
/* Linebreak: create new inline container */
props.inline_container = box_create(NULL, NULL,
false, NULL, NULL, NULL, NULL,
ctx->bctx);
if (props.inline_container == NULL) {
free(text);
return false;
}
props.inline_container->type =
BOX_INLINE_CONTAINER;
box_add_child(props.containing_block,
props.inline_container);
if (current[0] == '\r' && current[1] == '\n')
current += 2;
else
current++;
}
} while (*current);
free(text);
}
return true;
}
/**
* Convert an ELEMENT node to a box tree fragment,
* then schedule conversion of the next ELEMENT node
*/
static void convert_xml_to_box(struct box_construct_ctx *ctx)
{
dom_node *next;
bool convert_children;
uint32_t num_processed = 0;
const uint32_t max_processed_before_yield = 10;
do {
convert_children = true;
assert(ctx->n != NULL);
if (box_construct_element(ctx, &convert_children) == false) {
ctx->cb(ctx->content, false);
dom_node_unref(ctx->n);
free(ctx);
return;
}
/* Find next element to process, converting text nodes as we go */
next = next_node(ctx->n, ctx->content, convert_children);
while (next != NULL) {
dom_node_type type;
dom_exception err;
err = dom_node_get_node_type(next, &type);
if (err != DOM_NO_ERR) {
ctx->cb(ctx->content, false);
dom_node_unref(next);
free(ctx);
return;
}
if (type == DOM_ELEMENT_NODE)
break;
if (type == DOM_TEXT_NODE) {
ctx->n = next;
if (box_construct_text(ctx) == false) {
ctx->cb(ctx->content, false);
dom_node_unref(ctx->n);
free(ctx);
return;
}
}
next = next_node(next, ctx->content, true);
}
ctx->n = next;
if (next == NULL) {
/* Conversion complete */
struct box root;
memset(&root, 0, sizeof(root));
root.type = BOX_BLOCK;
root.children = root.last = ctx->root_box;
root.children->parent = &root;
/** \todo Remove box_normalise_block */
if (box_normalise_block(&root, ctx->root_box,
ctx->content) == false) {
ctx->cb(ctx->content, false);
} else {
ctx->content->layout = root.children;
ctx->content->layout->parent = NULL;
ctx->cb(ctx->content, true);
}
assert(ctx->n == NULL);
free(ctx);
return;
}
} while (++num_processed < max_processed_before_yield);
/* More work to do: schedule a continuation */
guit->misc->schedule(0, (void *)convert_xml_to_box, ctx);
}
/* exported function documented in html/box_construct.h */
nserror
dom_to_box(dom_node *n,
html_content *c,
box_construct_complete_cb cb,
void **box_conversion_context)
{
struct box_construct_ctx *ctx;
assert(box_conversion_context != NULL);
if (c->bctx == NULL) {
/* create a context allocation for this box tree */
c->bctx = talloc_zero(0, int);
if (c->bctx == NULL) {
return NSERROR_NOMEM;
}
}
ctx = malloc(sizeof(*ctx));
if (ctx == NULL) {
return NSERROR_NOMEM;
}
ctx->content = c;
ctx->n = dom_node_ref(n);
ctx->root_box = NULL;
ctx->cb = cb;
ctx->bctx = c->bctx;
*box_conversion_context = ctx;
return guit->misc->schedule(0, (void *)convert_xml_to_box, ctx);
}
/* exported function documented in html/box_construct.h */
nserror cancel_dom_to_box(void *box_conversion_context)
{
struct box_construct_ctx *ctx = box_conversion_context;
nserror err;
err = guit->misc->schedule(-1, (void *)convert_xml_to_box, ctx);
if (err != NSERROR_OK) {
return err;
}
dom_node_unref(ctx->n);
free(ctx);
return NSERROR_OK;
}
/* exported function documented in html/box_construct.h */
struct box *box_for_node(dom_node *n)
{
struct box *box = NULL;
dom_exception err;
err = dom_node_get_user_data(n, corestring_dom___ns_key_box_node_data,
(void *) &box);
if (err != DOM_NO_ERR)
return NULL;
return box;
}
/* exported function documented in html/box_construct.h */
bool
box_extract_link(const html_content *content,
const dom_string *dsrel,
nsurl *base,
nsurl **result)
{
char *s, *s1, *apos0 = 0, *apos1 = 0, *quot0 = 0, *quot1 = 0;
unsigned int i, j, end;
nserror error;
const char *rel;
rel = dom_string_data(dsrel);
s1 = s = malloc(3 * strlen(rel) + 1);
if (!s)
return false;
/* copy to s, removing white space and control characters */
for (i = 0; rel[i] && ascii_is_space(rel[i]); i++)
;
for (end = strlen(rel);
(end != i) && ascii_is_space(rel[end - 1]);
end--)
;
for (j = 0; i != end; i++) {
if ((unsigned char) rel[i] < 0x20) {
; /* skip control characters */
} else if (rel[i] == ' ') {
s[j++] = '%';
s[j++] = '2';
s[j++] = '0';
} else {
s[j++] = rel[i];
}
}
s[j] = 0;
if (content->enable_scripting == false) {
/* extract first quoted string out of "javascript:" link */
if (strncmp(s, "javascript:", 11) == 0) {
apos0 = strchr(s, '\'');
if (apos0)
apos1 = strchr(apos0 + 1, '\'');
quot0 = strchr(s, '"');
if (quot0)
quot1 = strchr(quot0 + 1, '"');
if (apos0 && apos1 &&
(!quot0 || !quot1 || apos0 < quot0)) {
*apos1 = 0;
s1 = apos0 + 1;
} else if (quot0 && quot1) {
*quot1 = 0;
s1 = quot0 + 1;
}
}
}
/* construct absolute URL */
error = nsurl_join(base, s1, result);
free(s);
if (error != NSERROR_OK) {
*result = NULL;
return false;
}
return true;
}