From 3b40e0f5fc824bdcea46ee4fce609c2511c6b1fd Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Sat, 10 Feb 2007 21:34:22 +0000 Subject: [PATCH] Reparse entire document if meta charset resulting in changed document encoding is encountered (fixes 1389126) svn path=/trunk/netsurf/; revision=3176 --- render/html.c | 62 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/render/html.c b/render/html.c index b0d0aef7f..46d98b81d 100644 --- a/render/html.c +++ b/render/html.c @@ -177,6 +177,53 @@ bool html_process_data(struct content *c, char *data, unsigned int size) } htmlParseChunk(c->data.html.parser, data + x, (int) (size - x), 0); + if (!c->data.html.encoding && c->data.html.parser->input->encoding) { + /* The encoding was not in headers or detected, + * and the parser found a . */ + c->data.html.encoding = talloc_strdup(c, + c->data.html.parser->input->encoding); + if (!c->data.html.encoding) { + union content_msg_data msg_data; + + msg_data.error = messages_get("NoMemory"); + content_broadcast(c, CONTENT_MSG_ERROR, msg_data); + return false; + } + c->data.html.encoding_source = ENCODING_SOURCE_META; + + /* have the encoding; don't attempt to detect it */ + c->data.html.getenc = false; + + /* now, we must reset the parser such that it reparses + * using the correct charset, and then reparse any document + * source we've got. we achieve this by recreating the + * parser in its entirety as this is simpler than resetting + * the existing one and ensuring it's still set up correctly. + */ + if (c->data.html.parser->myDoc) + xmlFreeDoc(c->data.html.parser->myDoc); + htmlFreeParserCtxt(c->data.html.parser); + + c->data.html.parser = htmlCreatePushParserCtxt(0, 0, "", 0, + 0, XML_CHAR_ENCODING_NONE); + if (!c->data.html.parser) { + union content_msg_data msg_data; + + msg_data.error = messages_get("NoMemory"); + content_broadcast(c, CONTENT_MSG_ERROR, msg_data); + return false; + } + if (!html_set_parser_encoding(c, c->data.html.encoding)) + return false; + + /* and reparse received document source - the recursion + * is safe as we've just set c->data.html.encoding so + * we'll never get back in here. */ + if (!html_process_data(c, c->source_data, c->source_size)) + return false; + } + return true; } @@ -235,7 +282,7 @@ bool html_set_parser_encoding(struct content *c, const char *encoding) */ if (!html->parser->input->encoding) html->parser->input->encoding = - xmlStrdup((xmlChar *) encoding); + xmlStrdup((const xmlChar *) encoding); /* Ensure noone else attempts to reset the encoding */ html->getenc = false; @@ -316,19 +363,6 @@ bool html_convert(struct content *c, int width, int height) return false; } - if (!c->data.html.encoding && document->encoding) { - /* The encoding was not in headers or detected, and the parser - * found a . */ - c->data.html.encoding = talloc_strdup(c, document->encoding); - if (!c->data.html.encoding) { - msg_data.error = messages_get("NoMemory"); - content_broadcast(c, CONTENT_MSG_ERROR, msg_data); - return false; - } - c->data.html.encoding_source = ENCODING_SOURCE_META; - } - /* locate html and head elements */ for (html = document->children; html != 0 && html->type != XML_ELEMENT_NODE;