Reparse entire document if meta charset resulting in changed document

encoding is encountered (fixes 1389126)

svn path=/trunk/netsurf/; revision=3176
This commit is contained in:
John Mark Bell 2007-02-10 21:34:22 +00:00
parent a9b34bd990
commit 3b40e0f5fc
1 changed files with 48 additions and 14 deletions

View File

@ -177,6 +177,53 @@ bool html_process_data(struct content *c, char *data, unsigned int size)
}
htmlParseChunk(c->data.html.parser, data + x, (int) (size - x), 0);
if (!c->data.html.encoding && c->data.html.parser->input->encoding) {
/* The encoding was not in headers or detected,
* and the parser found a <meta http-equiv="content-type"
* content="text/html; charset=...">. */
c->data.html.encoding = talloc_strdup(c,
c->data.html.parser->input->encoding);
if (!c->data.html.encoding) {
union content_msg_data msg_data;
msg_data.error = messages_get("NoMemory");
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
c->data.html.encoding_source = ENCODING_SOURCE_META;
/* have the encoding; don't attempt to detect it */
c->data.html.getenc = false;
/* now, we must reset the parser such that it reparses
* using the correct charset, and then reparse any document
* source we've got. we achieve this by recreating the
* parser in its entirety as this is simpler than resetting
* the existing one and ensuring it's still set up correctly.
*/
if (c->data.html.parser->myDoc)
xmlFreeDoc(c->data.html.parser->myDoc);
htmlFreeParserCtxt(c->data.html.parser);
c->data.html.parser = htmlCreatePushParserCtxt(0, 0, "", 0,
0, XML_CHAR_ENCODING_NONE);
if (!c->data.html.parser) {
union content_msg_data msg_data;
msg_data.error = messages_get("NoMemory");
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
if (!html_set_parser_encoding(c, c->data.html.encoding))
return false;
/* and reparse received document source - the recursion
* is safe as we've just set c->data.html.encoding so
* we'll never get back in here. */
if (!html_process_data(c, c->source_data, c->source_size))
return false;
}
return true;
}
@ -235,7 +282,7 @@ bool html_set_parser_encoding(struct content *c, const char *encoding)
*/
if (!html->parser->input->encoding)
html->parser->input->encoding =
xmlStrdup((xmlChar *) encoding);
xmlStrdup((const xmlChar *) encoding);
/* Ensure noone else attempts to reset the encoding */
html->getenc = false;
@ -316,19 +363,6 @@ bool html_convert(struct content *c, int width, int height)
return false;
}
if (!c->data.html.encoding && document->encoding) {
/* The encoding was not in headers or detected, and the parser
* found a <meta http-equiv="content-type"
* content="text/html; charset=...">. */
c->data.html.encoding = talloc_strdup(c, document->encoding);
if (!c->data.html.encoding) {
msg_data.error = messages_get("NoMemory");
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
c->data.html.encoding_source = ENCODING_SOURCE_META;
}
/* locate html and head elements */
for (html = document->children;
html != 0 && html->type != XML_ELEMENT_NODE;