/* * Copyright 2007 James Bursa * Copyright 2010 Michael Drake * * This file is part of NetSurf, http://www.netsurf-browser.org/ * * NetSurf is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License. * * NetSurf is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /** \file * Content for text/html (implementation). */ #include #include #include #include #include #include #include "utils/config.h" #include "content/content_protected.h" #include "content/fetch.h" #include "content/hlcache.h" #include "desktop/browser.h" #include "desktop/gui.h" #include "desktop/options.h" #include "image/bitmap.h" #include "render/box.h" #include "render/favicon.h" #include "render/font.h" #include "render/form.h" #include "render/html.h" #include "render/imagemap.h" #include "render/layout.h" #include "utils/http.h" #include "utils/log.h" #include "utils/messages.h" #include "utils/schedule.h" #include "utils/talloc.h" #include "utils/url.h" #include "utils/utils.h" #define CHUNK 4096 /* Change these to 1 to cause a dump to stderr of the frameset or box * when the trees have been built. */ #define ALWAYS_DUMP_FRAMESET 0 #define ALWAYS_DUMP_BOX 0 static void html_finish_conversion(struct content *c); static nserror html_convert_css_callback(hlcache_handle *css, const hlcache_event *event, void *pw); static bool html_meta_refresh(struct content *c, xmlNode *head); static bool html_head(struct content *c, xmlNode *head); static bool html_find_stylesheets(struct content *c, xmlNode *html); static bool html_process_style_element(struct content *c, unsigned int *index, xmlNode *style); static void html_inline_style_done(struct content_css_data *css, void *pw); static bool html_replace_object(struct content_html_object *object, const char *url); static nserror html_object_callback(hlcache_handle *object, const hlcache_event *event, void *pw); static void html_object_done(struct box *box, hlcache_handle *object, bool background); static void html_object_failed(struct box *box, struct content *content, bool background); static bool html_object_type_permitted(const content_type type, const content_type *permitted_types); static void html_object_refresh(void *p); static void html_destroy_frameset(struct content_html_frames *frameset); static void html_destroy_iframe(struct content_html_iframe *iframe); #if ALWAYS_DUMP_FRAMESET static void html_dump_frameset(struct content_html_frames *frame, unsigned int depth); #endif static const char empty_document[] = "" "" "" "Empty document" "" "" "

Empty document

" "

The document sent by the server is empty.

" "" ""; /** * Create a CONTENT_HTML. * * The content_html_data structure is initialized and the HTML parser is * created. */ bool html_create(struct content *c, const http_parameter *params) { struct content_html_data *html = &c->data.html; const char *charset; union content_msg_data msg_data; binding_error error; nserror nerror; html->parser_binding = NULL; html->document = NULL; html->quirks = BINDING_QUIRKS_MODE_NONE; html->encoding = NULL; html->base_url = (char *) content__get_url(c); html->base_target = NULL; html->layout = NULL; html->background_colour = NS_TRANSPARENT; html->stylesheet_count = 0; html->stylesheets = NULL; html->select_ctx = NULL; html->num_objects = 0; html->object_list = NULL; html->forms = NULL; html->imagemaps = NULL; html->bw = NULL; html->frameset = NULL; html->iframe = NULL; html->page = NULL; html->box = NULL; html->font_func = &nsfont; nerror = http_parameter_list_find_item(params, "charset", &charset); if (nerror == NSERROR_OK) { html->encoding = talloc_strdup(c, charset); if (!html->encoding) { error = BINDING_NOMEM; goto error; } html->encoding_source = ENCODING_SOURCE_HEADER; } /* Create the parser binding */ error = binding_create_tree(c, html->encoding, &html->parser_binding); if (error == BINDING_BADENCODING && html->encoding != NULL) { /* Ok, we don't support the declared encoding. Bailing out * isn't exactly user-friendly, so fall back to autodetect */ talloc_free(html->encoding); html->encoding = NULL; error = binding_create_tree(c, html->encoding, &html->parser_binding); } if (error != BINDING_OK) goto error; return true; error: if (error == BINDING_BADENCODING) { LOG(("Bad encoding: %s", html->encoding ? html->encoding : "")); msg_data.error = messages_get("ParsingFail"); } else msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } /** * Process data for CONTENT_HTML. * * The data is parsed in chunks of size CHUNK, multitasking in between. */ bool html_process_data(struct content *c, const char *data, unsigned int size) { unsigned long x; binding_error err; const char *encoding; for (x = 0; x + CHUNK <= size; x += CHUNK) { err = binding_parse_chunk(c->data.html.parser_binding, (const uint8_t *) data + x, CHUNK); if (err == BINDING_ENCODINGCHANGE) { goto encoding_change; } else if (err != BINDING_OK) { union content_msg_data msg_data; msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } gui_multitask(); } err = binding_parse_chunk(c->data.html.parser_binding, (const uint8_t *) data + x, (size - x)); if (err == BINDING_ENCODINGCHANGE) { goto encoding_change; } else if (err != BINDING_OK) { union content_msg_data msg_data; msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } return true; encoding_change: /* Retrieve new encoding */ encoding = binding_get_encoding( c->data.html.parser_binding, &c->data.html.encoding_source); if (c->data.html.encoding != NULL) talloc_free(c->data.html.encoding); c->data.html.encoding = talloc_strdup(c, encoding); if (c->data.html.encoding == NULL) { union content_msg_data msg_data; msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } /* Destroy binding */ binding_destroy_tree(c->data.html.parser_binding); /* Create new binding, using the new encoding */ err = binding_create_tree(c, c->data.html.encoding, &c->data.html.parser_binding); if (err == BINDING_BADENCODING) { /* Ok, we don't support the declared encoding. Bailing out * isn't exactly user-friendly, so fall back to Windows-1252 */ talloc_free(c->data.html.encoding); c->data.html.encoding = talloc_strdup(c, "Windows-1252"); if (c->data.html.encoding == NULL) { union content_msg_data msg_data; msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } err = binding_create_tree(c, c->data.html.encoding, &c->data.html.parser_binding); } if (err != BINDING_OK) { union content_msg_data msg_data; if (err == BINDING_BADENCODING) { LOG(("Bad encoding: %s", c->data.html.encoding ? c->data.html.encoding : "")); msg_data.error = messages_get("ParsingFail"); } else msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } { const char *source_data; unsigned long source_size; source_data = content__get_source_data(c, &source_size); /* Recurse to reprocess all that data. This is safe because * the encoding is now specified at parser-start which means * it cannot be changed again. */ return html_process_data(c, source_data, source_size); } } /** * Convert a CONTENT_HTML for display. * * The following steps are carried out in order: * * - parsing to an XML tree is completed * - stylesheets are fetched * - favicon is retrieved * - the XML tree is converted to a box tree and object fetches are started * * On exit, the content status will be either CONTENT_STATUS_DONE if the * document is completely loaded or CONTENT_STATUS_READY if objects are still * being fetched. */ bool html_convert(struct content *c) { binding_error err; xmlNode *html, *head; union content_msg_data msg_data; unsigned long size; struct form *f; /* finish parsing */ content__get_source_data(c, &size); if (size == 0) { /* Destroy current binding */ binding_destroy_tree(c->data.html.parser_binding); /* Also, any existing encoding information, * as it's not guaranteed to match the error page. */ talloc_free(c->data.html.encoding); c->data.html.encoding = NULL; /* Create new binding, using default charset */ err = binding_create_tree(c, NULL, &c->data.html.parser_binding); if (err != BINDING_OK) { union content_msg_data msg_data; if (err == BINDING_BADENCODING) { LOG(("Bad encoding: %s", c->data.html.encoding ? c->data.html.encoding : "")); msg_data.error = messages_get("ParsingFail"); } else msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } /* Process the error page */ if (html_process_data(c, (char *) empty_document, SLEN(empty_document)) == false) return false; } err = binding_parse_completed(c->data.html.parser_binding); if (err != BINDING_OK) { union content_msg_data msg_data; msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } c->data.html.document = binding_get_document(c->data.html.parser_binding, &c->data.html.quirks); /*xmlDebugDumpDocument(stderr, c->data.html.document);*/ if (!c->data.html.document) { LOG(("Parsing failed")); msg_data.error = messages_get("ParsingFail"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } if (c->data.html.encoding == NULL) { const char *encoding = binding_get_encoding( c->data.html.parser_binding, &c->data.html.encoding_source); c->data.html.encoding = talloc_strdup(c, encoding); if (c->data.html.encoding == NULL) { msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } } /* locate html and head elements */ html = xmlDocGetRootElement(c->data.html.document); if (html == 0 || strcmp((const char *) html->name, "html") != 0) { LOG(("html element not found")); msg_data.error = messages_get("ParsingFail"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } for (head = html->children; head != 0 && head->type != XML_ELEMENT_NODE; head = head->next) ; if (head && strcmp((const char *) head->name, "head") != 0) { head = 0; LOG(("head element not found")); } if (head) { if (!html_head(c, head)) { msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } /* handle meta refresh */ if (!html_meta_refresh(c, head)) return false; } /* Retrieve forms from parser */ c->data.html.forms = binding_get_forms(c->data.html.parser_binding); for (f = c->data.html.forms; f != NULL; f = f->prev) { char *action; url_func_result res; /* Make all actions absolute */ if (f->action == NULL || f->action[0] == '\0') { /* HTML5 4.10.22.3 step 11 */ res = url_join(content__get_url(c), c->data.html.base_url, &action); } else { res = url_join(f->action, c->data.html.base_url, &action); } if (res != URL_FUNC_OK) { msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } free(f->action); f->action = action; /* Ensure each form has a document encoding */ if (f->document_charset == NULL) { f->document_charset = strdup(c->data.html.encoding); if (f->document_charset == NULL) { msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } } } /* get stylesheets */ if (!html_find_stylesheets(c, html)) return false; return true; } /** * Complete conversion of an HTML document * * \param c Content to convert */ void html_finish_conversion(struct content *c) { union content_msg_data msg_data; xmlNode *html; uint32_t i; css_error error; html = xmlDocGetRootElement(c->data.html.document); assert(html != NULL); /* check that the base stylesheet loaded; layout fails without it */ if (c->data.html.stylesheets[STYLESHEET_BASE].data.external == NULL) { msg_data.error = "Base stylesheet failed to load"; content_broadcast(c, CONTENT_MSG_ERROR, msg_data); c->status = CONTENT_STATUS_ERROR; return; } /* Create selection context */ error = css_select_ctx_create(ns_realloc, c, &c->data.html.select_ctx); if (error != CSS_OK) { msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); c->status = CONTENT_MSG_ERROR; return; } /* Add sheets to it */ for (i = STYLESHEET_BASE; i != c->data.html.stylesheet_count; i++) { const struct html_stylesheet *hsheet = &c->data.html.stylesheets[i]; css_stylesheet *sheet; css_origin origin = CSS_ORIGIN_AUTHOR; if (i < STYLESHEET_START) origin = CSS_ORIGIN_UA; if (hsheet->type == HTML_STYLESHEET_EXTERNAL && hsheet->data.external != NULL) { struct content *s = hlcache_handle_get_content( hsheet->data.external); sheet = s->data.css.sheet; } else if (hsheet->type == HTML_STYLESHEET_INTERNAL) { sheet = hsheet->data.internal->sheet; } else { sheet = NULL; } if (sheet != NULL) { error = css_select_ctx_append_sheet( c->data.html.select_ctx, sheet, origin, CSS_MEDIA_SCREEN); if (error != CSS_OK) { msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); c->status = CONTENT_STATUS_ERROR; return; } } } #ifdef FAVICONS_ARE_NOT_A_PERFORMANCE_HOLE /* get icon */ favicon_get_icon(c, html); #endif /* convert xml tree to box tree */ LOG(("XML to box")); content_set_status(c, messages_get("Processing")); content_broadcast(c, CONTENT_MSG_STATUS, msg_data); if (!xml_to_box(html, c)) { msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); c->status = CONTENT_STATUS_ERROR; return; } #if ALWAYS_DUMP_BOX box_dump(stderr, c->data.html.layout->children, 0); #endif #if ALWAYS_DUMP_FRAMESET if (c->data.html.frameset) html_dump_frameset(c->data.html.frameset, 0); #endif /* extract image maps - can't do this sensibly in xml_to_box */ if (!imagemap_extract(html, c)) { LOG(("imagemap extraction failed")); msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); c->status = CONTENT_STATUS_ERROR; return; } /*imagemap_dump(c);*/ /* Destroy the parser binding */ binding_destroy_tree(c->data.html.parser_binding); c->data.html.parser_binding = NULL; content_set_ready(c); if (c->active == 0) content_set_done(c); html_set_status(c, ""); } /** * Process elements in . * * \param c content structure * \param head xml node of head element * \return true on success, false on memory exhaustion * * The title and base href are extracted if present. */ bool html_head(struct content *c, xmlNode *head) { xmlNode *node; xmlChar *s; for (node = head->children; node != 0; node = node->next) { if (node->type != XML_ELEMENT_NODE) continue; if (c->title == NULL && strcmp((const char *) node->name, "title") == 0) { xmlChar *title = xmlNodeGetContent(node); char *title2; if (!title) return false; title2 = squash_whitespace((const char *) title); xmlFree(title); if (!title2) return false; if (content__set_title(c, title2) == false) { free(title2); return false; } free(title2); } else if (strcmp((const char *) node->name, "base") == 0) { char *href = (char *) xmlGetProp(node, (const xmlChar *) "href"); if (href) { char *url; url_func_result res; res = url_normalize(href, &url); if (res == URL_FUNC_OK) { c->data.html.base_url = talloc_strdup(c, url); free(url); } xmlFree(href); } /* don't use the central values to ease freeing later on */ if ((s = xmlGetProp(node, (const xmlChar *) "target"))) { if ((!strcasecmp((const char *) s, "_blank")) || (!strcasecmp((const char *) s, "_top")) || (!strcasecmp((const char *) s, "_parent")) || (!strcasecmp((const char *) s, "_self")) || ('a' <= s[0] && s[0] <= 'z') || ('A' <= s[0] && s[0] <= 'Z')) { /* [6.16] */ c->data.html.base_target = talloc_strdup(c, (const char *) s); if (!c->data.html.base_target) { xmlFree(s); return false; } } xmlFree(s); } } } return true; } /** * Search for meta refresh * * http://wp.netscape.com/assist/net_sites/pushpull.html * * \param c content structure * \param head xml node of head element * \return true on success, false otherwise (error reported) */ bool html_meta_refresh(struct content *c, xmlNode *head) { xmlNode *n; xmlChar *equiv, *content; union content_msg_data msg_data; char *url, *end, *refresh = NULL, quote = 0; url_func_result res; for (n = head == 0 ? 0 : head->children; n; n = n->next) { if (n->type != XML_ELEMENT_NODE) continue; /* Recurse into noscript elements */ if (strcmp((const char *) n->name, "noscript") == 0) { if (!html_meta_refresh(c, n)) { /* Some error occurred */ return false; } else if (c->refresh) { /* Meta refresh found - stop */ return true; } } if (strcmp((const char *) n->name, "meta")) { continue; } equiv = xmlGetProp(n, (const xmlChar *) "http-equiv"); if (!equiv) continue; if (strcasecmp((const char *) equiv, "refresh")) { xmlFree(equiv); continue; } xmlFree(equiv); content = xmlGetProp(n, (const xmlChar *) "content"); if (!content) continue; end = (char *) content + strlen((const char *) content); /* content := *LWS intpart fracpart? *LWS [';' *LWS *1url *LWS] * intpart := 1*DIGIT * fracpart := 1*('.' | DIGIT) * url := "url" *LWS '=' *LWS (url-nq | url-sq | url-dq) * url-nq := *urlchar * url-sq := "'" *(urlchar | '"') "'" * url-dq := '"' *(urlchar | "'") '"' * urlchar := [#x9#x21#x23-#x26#x28-#x7E] | nonascii * nonascii := [#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF] */ /* *LWS intpart */ msg_data.delay = (int)strtol((char *) content, &url, 10); /* a very small delay and self-referencing URL can cause a loop * that grinds machines to a halt. To prevent this we set a * minimum refresh delay of 1s. */ if (msg_data.delay < 1) msg_data.delay = 1; /* fracpart? (ignored, as delay is integer only) */ while (url < end && (('0' <= *url && *url <= '9') || *url == '.')) { url++; } /* *LWS */ while (url < end && isspace(*url)) { url++; } /* ';' */ if (url < end && *url == ';') url++; /* *LWS */ while (url < end && isspace(*url)) { url++; } if (url == end) { /* Just delay specified, so refresh current page */ xmlFree(content); c->refresh = talloc_strdup(c, content__get_url(c)); if (!c->refresh) { msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } content_broadcast(c, CONTENT_MSG_REFRESH, msg_data); break; } /* "url" */ if (url <= end - 3) { if (strncasecmp(url, "url", 3) == 0) { url += 3; } else { /* Unexpected input, ignore this header */ continue; } } else { /* Insufficient input, ignore this header */ continue; } /* *LWS */ while (url < end && isspace(*url)) { url++; } /* '=' */ if (url < end) { if (*url == '=') { url++; } else { /* Unexpected input, ignore this header */ continue; } } else { /* Insufficient input, ignore this header */ continue; } /* *LWS */ while (url < end && isspace(*url)) { url++; } /* '"' or "'" */ if (url < end && (*url == '"' || *url == '\'')) { quote = *url; url++; } /* Start of URL */ refresh = url; if (quote != 0) { /* url-sq | url-dq */ while (url < end && *url != quote) url++; } else { /* url-nq */ while (url < end && !isspace(*url)) url++; } /* '"' or "'" or *LWS (we don't care) */ if (url < end) *url = '\0'; res = url_join(refresh, c->data.html.base_url, &refresh); xmlFree(content); if (res == URL_FUNC_NOMEM) { msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } else if (res == URL_FUNC_FAILED) { /* This isn't fatal so carry on looking */ continue; } c->refresh = talloc_strdup(c, refresh); free(refresh); if (!c->refresh) { msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } content_broadcast(c, CONTENT_MSG_REFRESH, msg_data); } return true; } /** * Process inline stylesheets and fetch linked stylesheets. * * Uses STYLE and LINK elements inside and outside HEAD * * \param c content structure * \param html xml node of html element * \return true on success, false if an error occurred */ bool html_find_stylesheets(struct content *c, xmlNode *html) { static const content_type accept[] = { CONTENT_CSS, CONTENT_UNKNOWN }; xmlNode *node; char *rel, *type, *media, *href, *url, *url2; unsigned int i = STYLESHEET_START; union content_msg_data msg_data; url_func_result res; struct html_stylesheet *stylesheets; hlcache_child_context child; nserror ns_error; child.charset = c->data.html.encoding; child.quirks = c->quirks; /* stylesheet 0 is the base style sheet, * stylesheet 1 is the quirks mode style sheet, * stylesheet 2 is the adblocking stylesheet */ c->data.html.stylesheets = talloc_array(c, struct html_stylesheet, STYLESHEET_START); if (c->data.html.stylesheets == NULL) goto no_memory; c->data.html.stylesheets[STYLESHEET_BASE].type = HTML_STYLESHEET_EXTERNAL; c->data.html.stylesheets[STYLESHEET_BASE].data.external = NULL; c->data.html.stylesheets[STYLESHEET_QUIRKS].type = HTML_STYLESHEET_EXTERNAL; c->data.html.stylesheets[STYLESHEET_QUIRKS].data.external = NULL; c->data.html.stylesheets[STYLESHEET_ADBLOCK].type = HTML_STYLESHEET_EXTERNAL; c->data.html.stylesheets[STYLESHEET_ADBLOCK].data.external = NULL; c->data.html.stylesheet_count = STYLESHEET_START; c->active = 0; ns_error = hlcache_handle_retrieve(default_stylesheet_url, 0, content__get_url(c), NULL, html_convert_css_callback, c, &child, accept, &c->data.html.stylesheets[ STYLESHEET_BASE].data.external); if (ns_error != NSERROR_OK) goto no_memory; c->active++; if (c->data.html.quirks == BINDING_QUIRKS_MODE_FULL) { ns_error = hlcache_handle_retrieve(quirks_stylesheet_url, 0, content__get_url(c), NULL, html_convert_css_callback, c, &child, accept, &c->data.html.stylesheets[ STYLESHEET_QUIRKS].data.external); if (ns_error != NSERROR_OK) goto no_memory; c->active++; } if (option_block_ads) { ns_error = hlcache_handle_retrieve(adblock_stylesheet_url, 0, content__get_url(c), NULL, html_convert_css_callback, c, &child, accept, &c->data.html.stylesheets[ STYLESHEET_ADBLOCK].data.external); if (ns_error != NSERROR_OK) goto no_memory; c->active++; } node = html; /* depth-first search the tree for link elements */ while (node) { if (node->children) { /* 1. children */ node = node->children; } else if (node->next) { /* 2. siblings */ node = node->next; } else { /* 3. ancestor siblings */ while (node && !node->next) node = node->parent; if (!node) break; node = node->next; } assert(node); if (node->type != XML_ELEMENT_NODE) continue; if (strcmp((const char *) node->name, "link") == 0) { /* rel= */ if ((rel = (char *) xmlGetProp(node, (const xmlChar *) "rel")) == NULL) continue; if (strcasestr(rel, "stylesheet") == 0) { xmlFree(rel); continue; } else if (strcasestr(rel, "alternate")) { /* Ignore alternate stylesheets */ xmlFree(rel); continue; } xmlFree(rel); /* type='text/css' or not present */ if ((type = (char *) xmlGetProp(node, (const xmlChar *) "type")) != NULL) { if (strcmp(type, "text/css") != 0) { xmlFree(type); continue; } xmlFree(type); } /* media contains 'screen' or 'all' or not present */ if ((media = (char *) xmlGetProp(node, (const xmlChar *) "media")) != NULL) { if (strcasestr(media, "screen") == NULL && strcasestr(media, "all") == NULL) { xmlFree(media); continue; } xmlFree(media); } /* href='...' */ if ((href = (char *) xmlGetProp(node, (const xmlChar *) "href")) == NULL) continue; /* TODO: only the first preferred stylesheets (ie. * those with a title attribute) should be loaded * (see HTML4 14.3) */ res = url_join(href, c->data.html.base_url, &url); xmlFree(href); if (res != URL_FUNC_OK) continue; LOG(("linked stylesheet %i '%s'", i, url)); res = url_normalize(url, &url2); free(url); if (res != URL_FUNC_OK) { if (res == URL_FUNC_NOMEM) goto no_memory; continue; } /* start fetch */ stylesheets = talloc_realloc(c, c->data.html.stylesheets, struct html_stylesheet, i + 1); if (stylesheets == NULL) { free(url2); goto no_memory; } c->data.html.stylesheets = stylesheets; c->data.html.stylesheet_count++; c->data.html.stylesheets[i].type = HTML_STYLESHEET_EXTERNAL; ns_error = hlcache_handle_retrieve(url2, 0, content__get_url(c), NULL, html_convert_css_callback, c, &child, accept, &c->data.html.stylesheets[i]. data.external); free(url2); if (ns_error != NSERROR_OK) goto no_memory; c->active++; i++; } else if (strcmp((const char *) node->name, "style") == 0) { if (!html_process_style_element(c, &i, node)) return false; } } assert(c->data.html.stylesheet_count == i); return true; no_memory: msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } /** * Process an inline stylesheet in the document. * * \param c content structure * \param index Index of stylesheet in stylesheet_content array, * updated if successful * \param style xml node of style element * \return true on success, false if an error occurred */ bool html_process_style_element(struct content *c, unsigned int *index, xmlNode *style) { xmlNode *child; char *type, *media, *data; union content_msg_data msg_data; struct html_stylesheet *stylesheets; struct content_css_data *sheet; nserror error; /* type='text/css', or not present (invalid but common) */ if ((type = (char *) xmlGetProp(style, (const xmlChar *) "type"))) { if (strcmp(type, "text/css") != 0) { xmlFree(type); return true; } xmlFree(type); } /* media contains 'screen' or 'all' or not present */ if ((media = (char *) xmlGetProp(style, (const xmlChar *) "media"))) { if (strcasestr(media, "screen") == NULL && strcasestr(media, "all") == NULL) { xmlFree(media); return true; } xmlFree(media); } /* Extend array */ stylesheets = talloc_realloc(c, c->data.html.stylesheets, struct html_stylesheet, *index + 1); if (stylesheets == NULL) goto no_memory; c->data.html.stylesheets = stylesheets; c->data.html.stylesheet_count++; c->data.html.stylesheets[(*index)].type = HTML_STYLESHEET_INTERNAL; c->data.html.stylesheets[(*index)].data.internal = NULL; /* create stylesheet */ sheet = talloc(c, struct content_css_data); if (sheet == NULL) { c->data.html.stylesheet_count--; goto no_memory; } error = nscss_create_css_data(sheet, c->data.html.base_url, NULL, c->data.html.quirks, html_inline_style_done, c); if (error != NSERROR_OK) { c->data.html.stylesheet_count--; goto no_memory; } /* can't just use xmlNodeGetContent(style), because that won't * give the content of comments which may be used to 'hide' * the content */ for (child = style->children; child != 0; child = child->next) { data = (char *) xmlNodeGetContent(child); if (nscss_process_css_data(sheet, data, strlen(data)) == false) { xmlFree(data); nscss_destroy_css_data(sheet); talloc_free(sheet); c->data.html.stylesheet_count--; /** \todo not necessarily caused by * memory exhaustion */ goto no_memory; } xmlFree(data); } c->active++; /* Convert the content -- manually, as we want the result */ if (nscss_convert_css_data(sheet) != CSS_OK) { /* conversion failed */ c->active--; nscss_destroy_css_data(sheet); talloc_free(sheet); sheet = NULL; } /* Update index */ c->data.html.stylesheets[(*index)].data.internal = sheet; (*index)++; return true; no_memory: msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } /** * Handle notification of inline style completion * * \param css Inline style object * \param pw Private data */ void html_inline_style_done(struct content_css_data *css, void *pw) { struct content *html = pw; if (--html->active == 0) html_finish_conversion(html); } /** * Callback for fetchcache() for linked stylesheets. */ nserror html_convert_css_callback(hlcache_handle *css, const hlcache_event *event, void *pw) { struct content *parent = pw; unsigned int i; struct html_stylesheet *s; /* Find sheet */ for (i = 0, s = parent->data.html.stylesheets; i != parent->data.html.stylesheet_count; i++, s++) { if (s->type == HTML_STYLESHEET_EXTERNAL && s->data.external == css) break; } assert(i != parent->data.html.stylesheet_count); switch (event->type) { case CONTENT_MSG_LOADING: /* check that the stylesheet is really CSS */ if (content_get_type(css) != CONTENT_CSS) { assert(0 && "Non-CSS type unexpected"); } break; case CONTENT_MSG_READY: break; case CONTENT_MSG_DONE: LOG(("got stylesheet '%s'", content_get_url(css))); parent->active--; break; case CONTENT_MSG_ERROR: LOG(("stylesheet %s failed: %s", content_get_url(css), event->data.error)); hlcache_handle_release(css); s->data.external = NULL; parent->active--; content_add_error(parent, "?", 0); break; case CONTENT_MSG_STATUS: html_set_status(parent, content_get_status_message(css)); content_broadcast(parent, CONTENT_MSG_STATUS, event->data); break; default: assert(0); } if (parent->active == 0) html_finish_conversion(parent); return NSERROR_OK; } /** * Start a fetch for an object required by a page. * * \param c content of type CONTENT_HTML * \param url URL of object to fetch (copied) * \param box box that will contain the object * \param permitted_types array of types, terminated by CONTENT_UNKNOWN, * or 0 if all types except OTHER and UNKNOWN acceptable * \param available_width estimate of width of object * \param available_height estimate of height of object * \param background this is a background image * \return true on success, false on memory exhaustion */ bool html_fetch_object(struct content *c, const char *url, struct box *box, const content_type *permitted_types, int available_width, int available_height, bool background) { struct content_html_object *object; hlcache_child_context child; char *url2; url_func_result res; nserror error; child.charset = c->data.html.encoding; child.quirks = c->quirks; /* Normalize the URL */ res = url_normalize(url, &url2); if (res != URL_FUNC_OK) { LOG(("failed to normalize url '%s'", url)); return res != URL_FUNC_NOMEM; } object = talloc(c, struct content_html_object); if (object == NULL) { free(url2); return false; } object->parent = c; object->next = NULL; object->content = NULL; object->box = box; object->permitted_types = permitted_types; object->background = background; error = hlcache_handle_retrieve(url2, 0, content__get_url(c), NULL, html_object_callback, object, &child, permitted_types, &object->content); /* No longer need normalized url */ free(url2); if (error != NSERROR_OK) { talloc_free(object); return error != NSERROR_NOMEM; } /* add to object list */ object->next = c->data.html.object_list; c->data.html.object_list = object; c->data.html.num_objects++; c->active++; return error != NSERROR_NOMEM; } /** * Start a fetch for an object required by a page, replacing an existing object. * * \param object Object to replace * \param url URL of object to fetch (copied) * \return true on success, false on memory exhaustion */ bool html_replace_object(struct content_html_object *object, const char *url) { struct content *c; hlcache_child_context child; struct content *page; char *url2; url_func_result res; nserror error; assert(object != NULL); c = object->parent; child.charset = c->data.html.encoding; child.quirks = c->quirks; if (object->content != NULL) { /* remove existing object */ if (content_get_status(object->content) != CONTENT_STATUS_DONE) c->active--; hlcache_handle_release(object->content); object->content = NULL; object->box->object = NULL; } res = url_normalize(url, &url2); if (res != URL_FUNC_OK) return res != URL_FUNC_NOMEM; /* initialise fetch */ error = hlcache_handle_retrieve(url2, 0, content__get_url(c), NULL, html_object_callback, c, &child, object->permitted_types, &object->content); free(url2); if (error != NSERROR_OK) return false; for (page = c; page; page = page->data.html.page) { assert(page->type == CONTENT_HTML); page->active++; page->status = CONTENT_STATUS_READY; } return true; } /** * Callback for hlcache_handle_retrieve() for objects. */ nserror html_object_callback(hlcache_handle *object, const hlcache_event *event, void *pw) { struct content_html_object *o = pw; struct content *c = o->parent; int x, y; struct box *box; box = o->box; switch (event->type) { case CONTENT_MSG_LOADING: /* check if the type is acceptable for this object */ if (html_object_type_permitted(content_get_type(object), o->permitted_types)) { if (c->data.html.bw != NULL) content_open(object, c->data.html.bw, c, box, box->object_params); break; } /* not acceptable */ hlcache_handle_release(object); o->content = NULL; c->active--; content_add_error(c, "?", 0); html_set_status(c, messages_get("BadObject")); content_broadcast(c, CONTENT_MSG_STATUS, event->data); html_object_failed(box, c, o->background); break; case CONTENT_MSG_READY: if (content_get_type(object) == CONTENT_HTML) { html_object_done(box, object, o->background); if (c->status == CONTENT_STATUS_READY || c->status == CONTENT_STATUS_DONE) content__reformat(c, c->available_width, c->height); } break; case CONTENT_MSG_DONE: html_object_done(box, object, o->background); c->active--; break; case CONTENT_MSG_ERROR: hlcache_handle_release(object); o->content = NULL; c->active--; content_add_error(c, "?", 0); html_set_status(c, event->data.error); content_broadcast(c, CONTENT_MSG_STATUS, event->data); html_object_failed(box, c, o->background); break; case CONTENT_MSG_STATUS: html_set_status(c, content_get_status_message(object)); /* content_broadcast(c, CONTENT_MSG_STATUS, 0); */ break; case CONTENT_MSG_REFORMAT: break; case CONTENT_MSG_REDRAW: { union content_msg_data data = event->data; if (!box_visible(box)) break; box_coords(box, &x, &y); if (hlcache_handle_get_content(object) == event->data.redraw.object) { data.redraw.x = data.redraw.x * box->width / content_get_width(object); data.redraw.y = data.redraw.y * box->height / content_get_height(object); data.redraw.width = data.redraw.width * box->width / content_get_width(object); data.redraw.height = data.redraw.height * box->height / content_get_height(object); data.redraw.object_width = box->width; data.redraw.object_height = box->height; } data.redraw.x += x + box->padding[LEFT]; data.redraw.y += y + box->padding[TOP]; data.redraw.object_x += x + box->padding[LEFT]; data.redraw.object_y += y + box->padding[TOP]; content_broadcast(c, CONTENT_MSG_REDRAW, data); } break; case CONTENT_MSG_REFRESH: if (content_get_type(object) == CONTENT_HTML) { /* only for HTML objects */ schedule(event->data.delay * 100, html_object_refresh, o); } break; default: assert(0); } if (c->status == CONTENT_STATUS_READY && c->active == 0 && (event->type == CONTENT_MSG_LOADING || event->type == CONTENT_MSG_DONE || event->type == CONTENT_MSG_ERROR)) { /* all objects have arrived */ content__reformat(c, c->available_width, c->height); html_set_status(c, ""); content_set_done(c); } /* If 1) the configuration option to reflow pages while objects are * fetched is set * 2) an object is newly fetched & converted, * 3) the object's parent HTML is ready for reformat, * 4) the time since the previous reformat is more than the * configured minimum time between reformats * then reformat the page to display newly fetched objects */ else if (option_incremental_reflow && event->type == CONTENT_MSG_DONE && (c->status == CONTENT_STATUS_READY || c->status == CONTENT_STATUS_DONE) && (wallclock() > c->reformat_time)) { content__reformat(c, c->available_width, c->height); } return NSERROR_OK; } /** * Update a box whose content has completed rendering. */ void html_object_done(struct box *box, hlcache_handle *object, bool background) { struct box *b; if (background) { box->background = object; return; } box->object = object; /* invalidate parent min, max widths */ for (b = box; b; b = b->parent) b->max_width = UNKNOWN_MAX_WIDTH; /* delete any clones of this box */ while (box->next && (box->next->flags & CLONE)) { /* box_free_box(box->next); */ box->next = box->next->next; } } /** * Handle object fetching or loading failure. * * \param box box containing object which failed to load * \param content document of type CONTENT_HTML * \param background the object was the background image for the box */ void html_object_failed(struct box *box, struct content *content, bool background) { /* Nothing to do */ return; } /** * Check if a type is in a list. * * \param type the content_type to search for * \param permitted_types array of types, terminated by CONTENT_UNKNOWN, * or 0 if all types except OTHER and UNKNOWN acceptable * \return the type is in the list or acceptable */ bool html_object_type_permitted(const content_type type, const content_type *permitted_types) { if (permitted_types) { for (; *permitted_types != CONTENT_UNKNOWN; permitted_types++) if (*permitted_types == type) return true; } else if (type < CONTENT_OTHER) { return true; } return false; } /** * schedule() callback for object refresh */ void html_object_refresh(void *p) { struct content_html_object *object = p; const char *refresh_url; assert(content_get_type(object->content) == CONTENT_HTML); refresh_url = content_get_refresh_url(object->content); /* Ignore if refresh URL has gone * (may happen if fetch errored) */ if (refresh_url == NULL) return; content_invalidate_reuse_data(object->content); if (!html_replace_object(object, refresh_url)) { /** \todo handle memory exhaustion */ } } /** * Stop loading a CONTENT_HTML in state READY. */ void html_stop(struct content *c) { struct content_html_object *object; assert(c->status == CONTENT_STATUS_READY); for (object = c->data.html.object_list; object != NULL; object = object->next) { if (object->content == NULL) continue; if (content_get_status(object->content) == CONTENT_STATUS_DONE) ; /* already loaded: do nothing */ else if (content_get_status(object->content) == CONTENT_STATUS_READY) hlcache_handle_abort(object->content); else { hlcache_handle_release(object->content); object->content = NULL; } } c->status = CONTENT_STATUS_DONE; } /** * Reformat a CONTENT_HTML to a new width. */ void html_reformat(struct content *c, int width, int height) { struct box *layout; unsigned int time_before, time_taken; time_before = wallclock(); layout_document(c, width, height); layout = c->data.html.layout; /* width and height are at least margin box of document */ c->width = layout->x + layout->padding[LEFT] + layout->width + layout->padding[RIGHT] + layout->border[RIGHT].width + layout->margin[RIGHT]; c->height = layout->y + layout->padding[TOP] + layout->height + layout->padding[BOTTOM] + layout->border[BOTTOM].width + layout->margin[BOTTOM]; /* if boxes overflow right or bottom edge, expand to contain it */ if (c->width < layout->x + layout->descendant_x1) c->width = layout->x + layout->descendant_x1; if (c->height < layout->y + layout->descendant_y1) c->height = layout->y + layout->descendant_y1; time_taken = wallclock() - time_before; c->reformat_time = wallclock() + ((time_taken * 3 < option_min_reflow_period ? option_min_reflow_period : time_taken * 3)); } /** * Redraw a box. * * \param h content containing the box, of type CONTENT_HTML * \param box box to redraw */ void html_redraw_a_box(hlcache_handle *h, struct box *box) { int x, y; box_coords(box, &x, &y); content_request_redraw(h, x, y, box->padding[LEFT] + box->width + box->padding[RIGHT], box->padding[TOP] + box->height + box->padding[BOTTOM]); } /** * Destroy a CONTENT_HTML and free all resources it owns. */ void html_destroy(struct content *c) { unsigned int i; struct form *f, *g; struct content_html_data *html; LOG(("content %p", c)); html = &c->data.html; /* Destroy forms */ for (f = html->forms; f != NULL; f = g) { g = f->prev; form_free(f); } if (html->favicon != NULL) { hlcache_handle_release(html->favicon); html->favicon = NULL; } imagemap_destroy(c); if (html->parser_binding != NULL) binding_destroy_tree(html->parser_binding); if (html->document != NULL) xmlFreeDoc(html->document); /* Free base target */ if (html->base_target != NULL) { talloc_free(html->base_target); html->base_target = NULL; } /* Free frameset */ if (html->frameset != NULL) { html_destroy_frameset(html->frameset); talloc_free(html->frameset); html->frameset = NULL; } /* Free iframes */ if (html->iframe != NULL) { html_destroy_iframe(html->iframe); html->iframe = NULL; } /* Destroy selection context */ if (html->select_ctx != NULL) { css_select_ctx_destroy(html->select_ctx); html->select_ctx = NULL; } /* Free stylesheets */ for (i = 0; i != html->stylesheet_count; i++) { if (html->stylesheets[i].type == HTML_STYLESHEET_EXTERNAL && html->stylesheets[i].data.external != NULL) { hlcache_handle_release( html->stylesheets[i].data.external); } else if (html->stylesheets[i].type == HTML_STYLESHEET_INTERNAL && html->stylesheets[i].data.internal != NULL) { nscss_destroy_css_data( html->stylesheets[i].data.internal); } } /* Free objects */ while (html->object_list != NULL) { struct content_html_object *victim = html->object_list; LOG(("object %p", victim->content)); if (victim->content != NULL) { if (content_get_type(victim->content) == CONTENT_HTML) schedule_remove(html_object_refresh, victim); hlcache_handle_release(victim->content); } html->object_list = victim->next; talloc_free(victim); } } void html_destroy_frameset(struct content_html_frames *frameset) { int i; if (frameset->name) { talloc_free(frameset->name); frameset->name = NULL; } if (frameset->url) { talloc_free(frameset->url); frameset->url = NULL; } if (frameset->children) { for (i = 0; i < (frameset->rows * frameset->cols); i++) { if (frameset->children[i].name) { talloc_free(frameset->children[i].name); frameset->children[i].name = NULL; } if (frameset->children[i].url) { talloc_free(frameset->children[i].url); frameset->children[i].url = NULL; } if (frameset->children[i].children) html_destroy_frameset(&frameset->children[i]); } talloc_free(frameset->children); frameset->children = NULL; } } void html_destroy_iframe(struct content_html_iframe *iframe) { struct content_html_iframe *next; next = iframe; while ((iframe = next) != NULL) { next = iframe->next; if (iframe->name) talloc_free(iframe->name); if (iframe->url) talloc_free(iframe->url); talloc_free(iframe); } } bool html_clone(const struct content *old, struct content *new_content) { /** \todo Clone HTML specifics */ /* In the meantime, we should never be called, as HTML contents * cannot be shared and we're not intending to fix printing's * cloning of documents. */ assert(0 && "html_clone should never be called"); return true; } /** * Set the content status. */ void html_set_status(struct content *c, const char *extra) { unsigned int stylesheets = 0, objects = 0; if (c->data.html.num_objects == 0) stylesheets = c->data.html.stylesheet_count - c->active; else { stylesheets = c->data.html.stylesheet_count; objects = c->data.html.num_objects - c->active; } content_set_status(c, "%u/%u %s %u/%u %s %s", stylesheets, c->data.html.stylesheet_count, messages_get((c->data.html.stylesheet_count == 1) ? "styl" : "styls"), objects, c->data.html.num_objects, messages_get((c->data.html.num_objects == 1) ? "obj" : "objs"), extra); } /** * Handle a window containing a CONTENT_HTML being opened. */ void html_open(struct content *c, struct browser_window *bw, struct content *page, struct box *box, struct object_params *params) { struct content_html_object *object, *next; c->data.html.bw = bw; c->data.html.page = page; c->data.html.box = box; for (object = c->data.html.object_list; object != NULL; object = next) { next = object->next; if (object->content == NULL) continue; if (content_get_type(object->content) == CONTENT_UNKNOWN) continue; content_open(object->content, bw, c, object->box, object->box->object_params); } } /** * Handle a window containing a CONTENT_HTML being closed. */ void html_close(struct content *c) { struct content_html_object *object, *next; c->data.html.bw = 0; for (object = c->data.html.object_list; object != NULL; object = next) { next = object->next; if (object->content == NULL) continue; if (content_get_type(object->content) == CONTENT_UNKNOWN) continue; if (content_get_type(object->content) == CONTENT_HTML) schedule_remove(html_object_refresh, object); content_close(object->content); } } #if ALWAYS_DUMP_FRAMESET /** * Print a frameset tree to stderr. */ void html_dump_frameset(struct content_html_frames *frame, unsigned int depth) { unsigned int i; int row, col, index; const char *unit[] = {"px", "%", "*"}; const char *scrolling[] = {"auto", "yes", "no"}; assert(frame); fprintf(stderr, "%p ", frame); fprintf(stderr, "(%i %i) ", frame->rows, frame->cols); fprintf(stderr, "w%g%s ", frame->width.value, unit[frame->width.unit]); fprintf(stderr, "h%g%s ", frame->height.value,unit[frame->height.unit]); fprintf(stderr, "(margin w%i h%i) ", frame->margin_width, frame->margin_height); if (frame->name) fprintf(stderr, "'%s' ", frame->name); if (frame->url) fprintf(stderr, "<%s> ", frame->url); if (frame->no_resize) fprintf(stderr, "noresize "); fprintf(stderr, "(scrolling %s) ", scrolling[frame->scrolling]); if (frame->border) fprintf(stderr, "border %x ", (unsigned int) frame->border_colour); fprintf(stderr, "\n"); if (frame->children) { for (row = 0; row != frame->rows; row++) { for (col = 0; col != frame->cols; col++) { for (i = 0; i != depth; i++) fprintf(stderr, " "); fprintf(stderr, "(%i %i): ", row, col); index = (row * frame->cols) + col; html_dump_frameset(&frame->children[index], depth + 1); } } } } #endif /** * Retrieve HTML document tree * * \param h HTML content to retrieve document tree from * \return Pointer to document tree */ xmlDoc *html_get_document(hlcache_handle *h) { struct content *c = hlcache_handle_get_content(h); assert(c != NULL); assert(c->type == CONTENT_HTML); return c->data.html.document; } /** * Retrieve box tree * * \param h HTML content to retrieve tree from * \return Pointer to box tree * * \todo This API must die, as must all use of the box tree outside render/ */ struct box *html_get_box_tree(hlcache_handle *h) { struct content *c = hlcache_handle_get_content(h); assert(c != NULL); assert(c->type == CONTENT_HTML); return c->data.html.layout; } /** * Retrieve the charset of an HTML document * * \param h Content to retrieve charset from * \return Pointer to charset, or NULL */ const char *html_get_encoding(hlcache_handle *h) { struct content *c = hlcache_handle_get_content(h); assert(c != NULL); assert(c->type == CONTENT_HTML); return c->data.html.encoding; } /** * Retrieve the charset of an HTML document * * \param h Content to retrieve charset from * \return Pointer to charset, or NULL */ binding_encoding_source html_get_encoding_source(hlcache_handle *h) { struct content *c = hlcache_handle_get_content(h); assert(c != NULL); assert(c->type == CONTENT_HTML); return c->data.html.encoding_source; } /** * Retrieve framesets used in an HTML document * * \param h Content to inspect * \return Pointer to framesets, or NULL if none */ struct content_html_frames *html_get_frameset(hlcache_handle *h) { struct content *c = hlcache_handle_get_content(h); assert(c != NULL); assert(c->type == CONTENT_HTML); return c->data.html.frameset; } /** * Retrieve iframes used in an HTML document * * \param h Content to inspect * \return Pointer to iframes, or NULL if none */ struct content_html_iframe *html_get_iframe(hlcache_handle *h) { struct content *c = hlcache_handle_get_content(h); assert(c != NULL); assert(c->type == CONTENT_HTML); return c->data.html.iframe; } /** * Retrieve an HTML content's base URL * * \param h Content to retrieve base target from * \return Pointer to URL */ const char *html_get_base_url(hlcache_handle *h) { struct content *c = hlcache_handle_get_content(h); assert(c != NULL); assert(c->type == CONTENT_HTML); return c->data.html.base_url; } /** * Retrieve an HTML content's base target * * \param h Content to retrieve base target from * \return Pointer to target, or NULL if none */ const char *html_get_base_target(hlcache_handle *h) { struct content *c = hlcache_handle_get_content(h); assert(c != NULL); assert(c->type == CONTENT_HTML); return c->data.html.base_target; } /** * Retrieve stylesheets used by HTML document * * \param h Content to retrieve stylesheets from * \param n Pointer to location to receive number of sheets * \return Pointer to array of stylesheets */ struct html_stylesheet *html_get_stylesheets(hlcache_handle *h, unsigned int *n) { struct content *c = hlcache_handle_get_content(h); assert(c != NULL); assert(c->type == CONTENT_HTML); assert(n != NULL); *n = c->data.html.stylesheet_count; return c->data.html.stylesheets; } /** * Retrieve objects used by HTML document * * \param h Content to retrieve objects from * \param n Pointer to location to receive number of objects * \return Pointer to list of objects */ struct content_html_object *html_get_objects(hlcache_handle *h, unsigned int *n) { struct content *c = hlcache_handle_get_content(h); assert(c != NULL); assert(c->type == CONTENT_HTML); assert(n != NULL); *n = c->data.html.num_objects; return c->data.html.object_list; } /** * Retrieve favicon associated with an HTML document * * \param h HTML document to retrieve favicon from * \return Pointer to favicon, or NULL if none */ hlcache_handle *html_get_favicon(hlcache_handle *h) { struct content *c = hlcache_handle_get_content(h); assert(c != NULL); assert(c->type == CONTENT_HTML); return c->data.html.favicon; } /** * Retrieve layout coordinates of box with given id * * \param h HTML document to search * \param frag_id String containing an element id * \param x Updated to global x coord iff id found * \param y Updated to global y coord iff id found * \return true iff id found */ bool html_get_id_offset(hlcache_handle *h, const char *frag_id, int *x, int *y) { struct box *pos; struct box *layout; if (content_get_type(h) != CONTENT_HTML) return false; layout = html_get_box_tree(h); if ((pos = box_find_by_id(layout, frag_id)) != 0) { box_coords(pos, x, y); return true; } return false; }