From f2743a7d70e7b2891277632121bb51e739743a47 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 26 Jan 2024 10:15:32 +0900 Subject: [PATCH] Revert "Add support for parsing of large XML data (>= 10MB)" This reverts commit 2197d06224a1, following a discussion over a Coverity report where issues like the "Billion laugh attack" could cause the backend to waste CPU and memory even if a client applied checks on the size of the data given in input, and libxml2 does not offer guarantees that input limits are respected under XML_PARSE_HUGE. Discussion: https://postgr.es/m/ZbHlgrPLtBZyr_QW@paquier.xyz --- contrib/xml2/xpath.c | 4 ++-- contrib/xml2/xslt_proc.c | 4 ++-- src/backend/utils/adt/xml.c | 38 +++++++++---------------------------- 3 files changed, 13 insertions(+), 33 deletions(-) diff --git a/contrib/xml2/xpath.c b/contrib/xml2/xpath.c index a2cec95f3f..a967257546 100644 --- a/contrib/xml2/xpath.c +++ b/contrib/xml2/xpath.c @@ -381,7 +381,7 @@ pgxml_xpath(text *document, xmlChar *xpath, xpath_workspace *workspace) { workspace->doctree = xmlReadMemory((char *) VARDATA_ANY(document), docsize, NULL, NULL, - XML_PARSE_HUGE | XML_PARSE_NOENT); + XML_PARSE_NOENT); if (workspace->doctree != NULL) { workspace->ctxt = xmlXPathNewContext(workspace->doctree); @@ -626,7 +626,7 @@ xpath_table(PG_FUNCTION_ARGS) if (xmldoc) doctree = xmlReadMemory(xmldoc, strlen(xmldoc), NULL, NULL, - XML_PARSE_HUGE | XML_PARSE_NOENT); + XML_PARSE_NOENT); else /* treat NULL as not well-formed */ doctree = NULL; diff --git a/contrib/xml2/xslt_proc.c b/contrib/xml2/xslt_proc.c index 9cbc05db1a..f30a3a42c0 100644 --- a/contrib/xml2/xslt_proc.c +++ b/contrib/xml2/xslt_proc.c @@ -87,7 +87,7 @@ xslt_process(PG_FUNCTION_ARGS) /* Parse document */ doctree = xmlReadMemory((char *) VARDATA_ANY(doct), VARSIZE_ANY_EXHDR(doct), NULL, NULL, - XML_PARSE_HUGE | XML_PARSE_NOENT); + XML_PARSE_NOENT); if (doctree == NULL) xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, @@ -96,7 +96,7 @@ xslt_process(PG_FUNCTION_ARGS) /* Same for stylesheet */ ssdoc = xmlReadMemory((char *) VARDATA_ANY(ssheet), VARSIZE_ANY_EXHDR(ssheet), NULL, NULL, - XML_PARSE_HUGE | XML_PARSE_NOENT); + XML_PARSE_NOENT); if (ssdoc == NULL) xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index d3db75eb87..f869c680af 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -1688,8 +1688,8 @@ xml_doctype_in_content(const xmlChar *str) * xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode). * * If parsed_nodes isn't NULL and the input is not an XML document, the list - * of parsed nodes from the xmlParseInNodeContext call will be returned to - * *parsed_nodes. + * of parsed nodes from the xmlParseBalancedChunkMemory call will be returned + * to *parsed_nodes. * * Errors normally result in ereport(ERROR), but if escontext is an * ErrorSaveContext, then "safe" errors are reported there instead, and the @@ -1795,7 +1795,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, doc = xmlCtxtReadDoc(ctxt, utf8string, NULL, "UTF-8", - XML_PARSE_NOENT | XML_PARSE_DTDATTR | XML_PARSE_HUGE + XML_PARSE_NOENT | XML_PARSE_DTDATTR | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS)); if (doc == NULL || xmlerrcxt->err_occurred) { @@ -1828,30 +1828,10 @@ xml_parse(text *data, XmlOptionType xmloption_arg, /* allow empty content */ if (*(utf8string + count)) { - const char *data; - xmlNodePtr root; - xmlNodePtr lst; - xmlParserErrors xml_error; - - data = (const char *) (utf8string + count); - - /* - * Create a fake root node. The xmlNewDoc() function creates - * an XML document without any nodes, and this is required for - * xmlParseInNodeContext() that is able to handle - * XML_PARSE_HUGE. - */ - root = xmlNewNode(NULL, (const xmlChar *) "content-root"); - if (root == NULL || xmlerrcxt->err_occurred) - xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, - "could not allocate xml node"); - xmlDocSetRootElement(doc, root); - - /* Try to parse string with using root node context. */ - xml_error = xmlParseInNodeContext(root, data, strlen(data), - XML_PARSE_HUGE, - parsed_nodes ? parsed_nodes : &lst); - if (xml_error != XML_ERR_OK || xmlerrcxt->err_occurred) + res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, + utf8string + count, + parsed_nodes); + if (res_code != 0 || xmlerrcxt->err_occurred) { xml_errsave(escontext, xmlerrcxt, ERRCODE_INVALID_XML_CONTENT, @@ -4364,7 +4344,7 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate parser context"); doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len, - len - xmldecl_len, NULL, NULL, XML_PARSE_HUGE); + len - xmldecl_len, NULL, NULL, 0); if (doc == NULL || xmlerrcxt->err_occurred) xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, "could not parse XML document"); @@ -4695,7 +4675,7 @@ XmlTableSetDocument(TableFuncScanState *state, Datum value) PG_TRY(); { - doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, XML_PARSE_HUGE); + doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0); if (doc == NULL || xtCxt->xmlerrcxt->err_occurred) xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, "could not parse XML document");