From 5a312946c5a61fbc7ba1d6223769ebc05e143c64 Mon Sep 17 00:00:00 2001 From: Michael Drake Date: Wed, 22 May 2013 23:57:59 +0100 Subject: [PATCH] Move meta refresh handling to dom event handler. --- render/html.c | 496 +++++++++++++++++------------------------ render/html_internal.h | 3 + 2 files changed, 206 insertions(+), 293 deletions(-) diff --git a/render/html.c b/render/html.c index 3fdbc5beb..75cbf68f2 100644 --- a/render/html.c +++ b/render/html.c @@ -144,6 +144,198 @@ static void html_box_convert_done(html_content *c, bool success) dom_node_unref(html); } +static nserror html_meta_refresh_process_element(html_content *c, dom_node *n) +{ + union content_msg_data msg_data; + const char *url, *end, *refresh = NULL; + char *new_url; + char quote = '\0'; + dom_string *equiv, *content; + dom_exception exc; + nsurl *nsurl; + nserror error = NSERROR_OK; + + exc = dom_element_get_attribute(n, corestring_dom_http_equiv, &equiv); + if (exc != DOM_NO_ERR) { + return NSERROR_DOM; + } + + if (equiv == NULL) { + return NSERROR_OK; + } + + if (!dom_string_caseless_lwc_isequal(equiv, corestring_lwc_refresh)) { + dom_string_unref(equiv); + return NSERROR_OK; + } + + dom_string_unref(equiv); + + exc = dom_element_get_attribute(n, corestring_dom_content, &content); + if (exc != DOM_NO_ERR) { + return NSERROR_DOM; + } + + if (content == NULL) { + return NSERROR_OK; + } + + end = dom_string_data(content) + dom_string_byte_length(content); + + /* content := *LWS intpart fracpart? *LWS [';' *LWS *1url *LWS] + * intpart := 1*DIGIT + * fracpart := 1*('.' | DIGIT) + * url := "url" *LWS '=' *LWS (url-nq | url-sq | url-dq) + * url-nq := *urlchar + * url-sq := "'" *(urlchar | '"') "'" + * url-dq := '"' *(urlchar | "'") '"' + * urlchar := [#x9#x21#x23-#x26#x28-#x7E] | nonascii + * nonascii := [#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF] + */ + + url = dom_string_data(content); + + /* *LWS */ + while (url < end && isspace(*url)) { + url++; + } + + /* intpart */ + if (url == end || (*url < '0' || '9' < *url)) { + /* Empty content, or invalid timeval */ + dom_string_unref(content); + return NSERROR_OK; + } + + msg_data.delay = (int) strtol(url, &new_url, 10); + /* a very small delay and self-referencing URL can cause a loop + * that grinds machines to a halt. To prevent this we set a + * minimum refresh delay of 1s. */ + if (msg_data.delay < 1) { + msg_data.delay = 1; + } + + url = new_url; + + /* fracpart? (ignored, as delay is integer only) */ + while (url < end && (('0' <= *url && *url <= '9') || + *url == '.')) { + url++; + } + + /* *LWS */ + while (url < end && isspace(*url)) { + url++; + } + + /* ';' */ + if (url < end && *url == ';') + url++; + + /* *LWS */ + while (url < end && isspace(*url)) { + url++; + } + + if (url == end) { + /* Just delay specified, so refresh current page */ + dom_string_unref(content); + + c->base.refresh = nsurl_ref( + content_get_url(&c->base)); + + content_broadcast(&c->base, CONTENT_MSG_REFRESH, msg_data); + + return NSERROR_OK; + } + + /* "url" */ + if (url <= end - 3) { + if (strncasecmp(url, "url", 3) == 0) { + url += 3; + } else { + /* Unexpected input, ignore this header */ + dom_string_unref(content); + return NSERROR_OK; + } + } else { + /* Insufficient input, ignore this header */ + dom_string_unref(content); + return NSERROR_OK; + } + + /* *LWS */ + while (url < end && isspace(*url)) { + url++; + } + + /* '=' */ + if (url < end) { + if (*url == '=') { + url++; + } else { + /* Unexpected input, ignore this header */ + dom_string_unref(content); + return NSERROR_OK; + } + } else { + /* Insufficient input, ignore this header */ + dom_string_unref(content); + return NSERROR_OK; + } + + /* *LWS */ + while (url < end && isspace(*url)) { + url++; + } + + /* '"' or "'" */ + if (url < end && (*url == '"' || *url == '\'')) { + quote = *url; + url++; + } + + /* Start of URL */ + refresh = url; + + if (quote != 0) { + /* url-sq | url-dq */ + while (url < end && *url != quote) + url++; + } else { + /* url-nq */ + while (url < end && !isspace(*url)) + url++; + } + + /* '"' or "'" or *LWS (we don't care) */ + if (url > refresh) { + /* There's a URL */ + new_url = strndup(refresh, url - refresh); + if (new_url == NULL) { + dom_string_unref(content); + return NSERROR_NOMEM; + } + + error = nsurl_join(c->base_url, new_url, &nsurl); + if (error == NSERROR_OK) { + /* broadcast valid refresh url */ + + c->base.refresh = nsurl; + + content_broadcast(&c->base, CONTENT_MSG_REFRESH, msg_data); + c->refresh = true; + } + + free(new_url); + + } + + dom_string_unref(content); + + return error; +} + /** * Complete conversion of an HTML document @@ -223,8 +415,16 @@ dom_default_action_DOMNodeInserted_cb(struct dom_event *evt, void *pw) exc = dom_node_get_node_name(node, &name); if ((exc == DOM_NO_ERR) && (name != NULL)) { /* LOG(("element htmlc:%p node %p name:%s", htmlc, node, dom_string_data(name))); */ - if (dom_string_caseless_isequal(name, corestring_dom_link)) { - html_css_process_link(htmlc, (dom_node *)node); + if (dom_string_caseless_isequal(name, + corestring_dom_link)) { + html_css_process_link(htmlc, + (dom_node *)node); + + } else if (dom_string_caseless_lwc_isequal(name, + corestring_lwc_meta) && + htmlc->refresh == false) { + html_meta_refresh_process_element(htmlc, + (dom_node *)node); } dom_string_unref(name); @@ -308,6 +508,7 @@ html_create_html_data(html_content *c, const http_parameter *params) c->base_url = nsurl_ref(content_get_url(&c->base)); c->base_target = NULL; c->aborted = false; + c->refresh = false; c->bctx = NULL; c->layout = NULL; c->background_colour = NS_TRANSPARENT; @@ -797,287 +998,6 @@ static nserror html_head(html_content *c, dom_node *head) return NSERROR_OK; } -static nserror html_meta_refresh_process_element(html_content *c, dom_node *n) -{ - union content_msg_data msg_data; - const char *url, *end, *refresh = NULL; - char *new_url; - char quote = '\0'; - dom_string *equiv, *content; - dom_exception exc; - nsurl *nsurl; - nserror error = NSERROR_OK; - - exc = dom_element_get_attribute(n, corestring_dom_http_equiv, &equiv); - if (exc != DOM_NO_ERR) { - return NSERROR_DOM; - } - - if (equiv == NULL) { - return NSERROR_OK; - } - - if (!dom_string_caseless_lwc_isequal(equiv, corestring_lwc_refresh)) { - dom_string_unref(equiv); - return NSERROR_OK; - } - - dom_string_unref(equiv); - - exc = dom_element_get_attribute(n, corestring_dom_content, &content); - if (exc != DOM_NO_ERR) { - return NSERROR_DOM; - } - - if (content == NULL) { - return NSERROR_OK; - } - - end = dom_string_data(content) + dom_string_byte_length(content); - - /* content := *LWS intpart fracpart? *LWS [';' *LWS *1url *LWS] - * intpart := 1*DIGIT - * fracpart := 1*('.' | DIGIT) - * url := "url" *LWS '=' *LWS (url-nq | url-sq | url-dq) - * url-nq := *urlchar - * url-sq := "'" *(urlchar | '"') "'" - * url-dq := '"' *(urlchar | "'") '"' - * urlchar := [#x9#x21#x23-#x26#x28-#x7E] | nonascii - * nonascii := [#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF] - */ - - url = dom_string_data(content); - - /* *LWS */ - while (url < end && isspace(*url)) { - url++; - } - - /* intpart */ - if (url == end || (*url < '0' || '9' < *url)) { - /* Empty content, or invalid timeval */ - dom_string_unref(content); - return NSERROR_OK; - } - - msg_data.delay = (int) strtol(url, &new_url, 10); - /* a very small delay and self-referencing URL can cause a loop - * that grinds machines to a halt. To prevent this we set a - * minimum refresh delay of 1s. */ - if (msg_data.delay < 1) { - msg_data.delay = 1; - } - - url = new_url; - - /* fracpart? (ignored, as delay is integer only) */ - while (url < end && (('0' <= *url && *url <= '9') || - *url == '.')) { - url++; - } - - /* *LWS */ - while (url < end && isspace(*url)) { - url++; - } - - /* ';' */ - if (url < end && *url == ';') - url++; - - /* *LWS */ - while (url < end && isspace(*url)) { - url++; - } - - if (url == end) { - /* Just delay specified, so refresh current page */ - dom_string_unref(content); - - c->base.refresh = nsurl_ref( - content_get_url(&c->base)); - - content_broadcast(&c->base, CONTENT_MSG_REFRESH, msg_data); - - return NSERROR_OK; - } - - /* "url" */ - if (url <= end - 3) { - if (strncasecmp(url, "url", 3) == 0) { - url += 3; - } else { - /* Unexpected input, ignore this header */ - dom_string_unref(content); - return NSERROR_OK; - } - } else { - /* Insufficient input, ignore this header */ - dom_string_unref(content); - return NSERROR_OK; - } - - /* *LWS */ - while (url < end && isspace(*url)) { - url++; - } - - /* '=' */ - if (url < end) { - if (*url == '=') { - url++; - } else { - /* Unexpected input, ignore this header */ - dom_string_unref(content); - return NSERROR_OK; - } - } else { - /* Insufficient input, ignore this header */ - dom_string_unref(content); - return NSERROR_OK; - } - - /* *LWS */ - while (url < end && isspace(*url)) { - url++; - } - - /* '"' or "'" */ - if (url < end && (*url == '"' || *url == '\'')) { - quote = *url; - url++; - } - - /* Start of URL */ - refresh = url; - - if (quote != 0) { - /* url-sq | url-dq */ - while (url < end && *url != quote) - url++; - } else { - /* url-nq */ - while (url < end && !isspace(*url)) - url++; - } - - /* '"' or "'" or *LWS (we don't care) */ - if (url > refresh) { - /* There's a URL */ - new_url = strndup(refresh, url - refresh); - if (new_url == NULL) { - dom_string_unref(content); - return NSERROR_NOMEM; - } - - error = nsurl_join(c->base_url, new_url, &nsurl); - if (error == NSERROR_OK) { - /* broadcast valid refresh url */ - - c->base.refresh = nsurl; - - content_broadcast(&c->base, CONTENT_MSG_REFRESH, msg_data); - } - - free(new_url); - - } - - dom_string_unref(content); - - return error; -} - -/** - * Search for meta refresh - * - * http://wp.netscape.com/assist/net_sites/pushpull.html - * - * \param c content structure - * \param head xml node of head element - * \return true on success, false otherwise (error reported) - */ - -static nserror html_meta_refresh(html_content *c, dom_node *head) -{ - dom_node *n, *next; - dom_exception exc; - nserror ns_error = NSERROR_OK; - - if (head == NULL) { - return ns_error; - } - - exc = dom_node_get_first_child(head, &n); - if (exc != DOM_NO_ERR) { - return NSERROR_DOM; - } - - while (n != NULL) { - dom_node_type type; - - exc = dom_node_get_node_type(n, &type); - if (exc != DOM_NO_ERR) { - dom_node_unref(n); - return NSERROR_DOM; - } - - if (type == DOM_ELEMENT_NODE) { - dom_string *name; - - exc = dom_node_get_node_name(n, &name); - if (exc != DOM_NO_ERR) { - dom_node_unref(n); - return NSERROR_DOM; - } - - /* Recurse into noscript elements */ - if (dom_string_caseless_lwc_isequal(name, corestring_lwc_noscript)) { - ns_error = html_meta_refresh(c, n); - if (ns_error != NSERROR_OK) { - /* Some error occurred */ - dom_string_unref(name); - dom_node_unref(n); - return ns_error; - } else if (c->base.refresh != NULL) { - /* Meta refresh found - stop */ - dom_string_unref(name); - dom_node_unref(n); - return NSERROR_OK; - } - } else if (dom_string_caseless_lwc_isequal(name, corestring_lwc_meta)) { - ns_error = html_meta_refresh_process_element(c, n); - if (ns_error != NSERROR_OK) { - /* Some error occurred */ - dom_string_unref(name); - dom_node_unref(n); - return ns_error; - } else if (c->base.refresh != NULL) { - /* Meta refresh found - stop */ - dom_string_unref(name); - dom_node_unref(n); - return NSERROR_OK; - } - } - dom_string_unref(name); - } - - exc = dom_node_get_next_sibling(n, &next); - if (exc != DOM_NO_ERR) { - dom_node_unref(n); - return NSERROR_DOM; - } - - dom_node_unref(n); - n = next; - } - - return ns_error; -} - - - - /** @@ -1231,16 +1151,6 @@ html_begin_conversion(html_content *htmlc) dom_node_unref(head); return false; } - - /* handle meta refresh */ - ns_error = html_meta_refresh(htmlc, head); - if (ns_error != NSERROR_OK) { - content_broadcast_errorcode(&htmlc->base, ns_error); - - dom_node_unref(html); - dom_node_unref(head); - return false; - } } /* Retrieve forms from parser */ diff --git a/render/html_internal.h b/render/html_internal.h index b491f197a..7c30fdda0 100644 --- a/render/html_internal.h +++ b/render/html_internal.h @@ -90,6 +90,9 @@ typedef struct html_content { /** Content has been aborted in the LOADING state */ bool aborted; + /** Whether a meta refresh has been handled */ + bool refresh; + /** A talloc context purely for the render box tree */ int *bctx; /** Box tree, or NULL. */