Revert "Add support for parsing of large XML data (>= 10MB)"
This reverts commit 2197d06224a1, following a discussion over a Coverity report where issues like the "Billion laugh attack" could cause the backend to waste CPU and memory even if a client applied checks on the size of the data given in input, and libxml2 does not offer guarantees that input limits are respected under XML_PARSE_HUGE. Discussion: https://postgr.es/m/ZbHlgrPLtBZyr_QW@paquier.xyz
This commit is contained in:
parent
376c216138
commit
f2743a7d70
@ -381,7 +381,7 @@ pgxml_xpath(text *document, xmlChar *xpath, xpath_workspace *workspace)
|
|||||||
{
|
{
|
||||||
workspace->doctree = xmlReadMemory((char *) VARDATA_ANY(document),
|
workspace->doctree = xmlReadMemory((char *) VARDATA_ANY(document),
|
||||||
docsize, NULL, NULL,
|
docsize, NULL, NULL,
|
||||||
XML_PARSE_HUGE | XML_PARSE_NOENT);
|
XML_PARSE_NOENT);
|
||||||
if (workspace->doctree != NULL)
|
if (workspace->doctree != NULL)
|
||||||
{
|
{
|
||||||
workspace->ctxt = xmlXPathNewContext(workspace->doctree);
|
workspace->ctxt = xmlXPathNewContext(workspace->doctree);
|
||||||
@ -626,7 +626,7 @@ xpath_table(PG_FUNCTION_ARGS)
|
|||||||
if (xmldoc)
|
if (xmldoc)
|
||||||
doctree = xmlReadMemory(xmldoc, strlen(xmldoc),
|
doctree = xmlReadMemory(xmldoc, strlen(xmldoc),
|
||||||
NULL, NULL,
|
NULL, NULL,
|
||||||
XML_PARSE_HUGE | XML_PARSE_NOENT);
|
XML_PARSE_NOENT);
|
||||||
else /* treat NULL as not well-formed */
|
else /* treat NULL as not well-formed */
|
||||||
doctree = NULL;
|
doctree = NULL;
|
||||||
|
|
||||||
|
@ -87,7 +87,7 @@ xslt_process(PG_FUNCTION_ARGS)
|
|||||||
/* Parse document */
|
/* Parse document */
|
||||||
doctree = xmlReadMemory((char *) VARDATA_ANY(doct),
|
doctree = xmlReadMemory((char *) VARDATA_ANY(doct),
|
||||||
VARSIZE_ANY_EXHDR(doct), NULL, NULL,
|
VARSIZE_ANY_EXHDR(doct), NULL, NULL,
|
||||||
XML_PARSE_HUGE | XML_PARSE_NOENT);
|
XML_PARSE_NOENT);
|
||||||
|
|
||||||
if (doctree == NULL)
|
if (doctree == NULL)
|
||||||
xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
|
xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
|
||||||
@ -96,7 +96,7 @@ xslt_process(PG_FUNCTION_ARGS)
|
|||||||
/* Same for stylesheet */
|
/* Same for stylesheet */
|
||||||
ssdoc = xmlReadMemory((char *) VARDATA_ANY(ssheet),
|
ssdoc = xmlReadMemory((char *) VARDATA_ANY(ssheet),
|
||||||
VARSIZE_ANY_EXHDR(ssheet), NULL, NULL,
|
VARSIZE_ANY_EXHDR(ssheet), NULL, NULL,
|
||||||
XML_PARSE_HUGE | XML_PARSE_NOENT);
|
XML_PARSE_NOENT);
|
||||||
|
|
||||||
if (ssdoc == NULL)
|
if (ssdoc == NULL)
|
||||||
xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
|
xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
|
||||||
|
@ -1688,8 +1688,8 @@ xml_doctype_in_content(const xmlChar *str)
|
|||||||
* xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
|
* xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
|
||||||
*
|
*
|
||||||
* If parsed_nodes isn't NULL and the input is not an XML document, the list
|
* If parsed_nodes isn't NULL and the input is not an XML document, the list
|
||||||
* of parsed nodes from the xmlParseInNodeContext call will be returned to
|
* of parsed nodes from the xmlParseBalancedChunkMemory call will be returned
|
||||||
* *parsed_nodes.
|
* to *parsed_nodes.
|
||||||
*
|
*
|
||||||
* Errors normally result in ereport(ERROR), but if escontext is an
|
* Errors normally result in ereport(ERROR), but if escontext is an
|
||||||
* ErrorSaveContext, then "safe" errors are reported there instead, and the
|
* ErrorSaveContext, then "safe" errors are reported there instead, and the
|
||||||
@ -1795,7 +1795,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
|
|||||||
doc = xmlCtxtReadDoc(ctxt, utf8string,
|
doc = xmlCtxtReadDoc(ctxt, utf8string,
|
||||||
NULL,
|
NULL,
|
||||||
"UTF-8",
|
"UTF-8",
|
||||||
XML_PARSE_NOENT | XML_PARSE_DTDATTR | XML_PARSE_HUGE
|
XML_PARSE_NOENT | XML_PARSE_DTDATTR
|
||||||
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
|
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
|
||||||
if (doc == NULL || xmlerrcxt->err_occurred)
|
if (doc == NULL || xmlerrcxt->err_occurred)
|
||||||
{
|
{
|
||||||
@ -1828,30 +1828,10 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
|
|||||||
/* allow empty content */
|
/* allow empty content */
|
||||||
if (*(utf8string + count))
|
if (*(utf8string + count))
|
||||||
{
|
{
|
||||||
const char *data;
|
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
|
||||||
xmlNodePtr root;
|
utf8string + count,
|
||||||
xmlNodePtr lst;
|
parsed_nodes);
|
||||||
xmlParserErrors xml_error;
|
if (res_code != 0 || xmlerrcxt->err_occurred)
|
||||||
|
|
||||||
data = (const char *) (utf8string + count);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Create a fake root node. The xmlNewDoc() function creates
|
|
||||||
* an XML document without any nodes, and this is required for
|
|
||||||
* xmlParseInNodeContext() that is able to handle
|
|
||||||
* XML_PARSE_HUGE.
|
|
||||||
*/
|
|
||||||
root = xmlNewNode(NULL, (const xmlChar *) "content-root");
|
|
||||||
if (root == NULL || xmlerrcxt->err_occurred)
|
|
||||||
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
|
|
||||||
"could not allocate xml node");
|
|
||||||
xmlDocSetRootElement(doc, root);
|
|
||||||
|
|
||||||
/* Try to parse string with using root node context. */
|
|
||||||
xml_error = xmlParseInNodeContext(root, data, strlen(data),
|
|
||||||
XML_PARSE_HUGE,
|
|
||||||
parsed_nodes ? parsed_nodes : &lst);
|
|
||||||
if (xml_error != XML_ERR_OK || xmlerrcxt->err_occurred)
|
|
||||||
{
|
{
|
||||||
xml_errsave(escontext, xmlerrcxt,
|
xml_errsave(escontext, xmlerrcxt,
|
||||||
ERRCODE_INVALID_XML_CONTENT,
|
ERRCODE_INVALID_XML_CONTENT,
|
||||||
@ -4364,7 +4344,7 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
|
|||||||
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
|
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
|
||||||
"could not allocate parser context");
|
"could not allocate parser context");
|
||||||
doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
|
doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
|
||||||
len - xmldecl_len, NULL, NULL, XML_PARSE_HUGE);
|
len - xmldecl_len, NULL, NULL, 0);
|
||||||
if (doc == NULL || xmlerrcxt->err_occurred)
|
if (doc == NULL || xmlerrcxt->err_occurred)
|
||||||
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
|
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
|
||||||
"could not parse XML document");
|
"could not parse XML document");
|
||||||
@ -4695,7 +4675,7 @@ XmlTableSetDocument(TableFuncScanState *state, Datum value)
|
|||||||
|
|
||||||
PG_TRY();
|
PG_TRY();
|
||||||
{
|
{
|
||||||
doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, XML_PARSE_HUGE);
|
doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
|
||||||
if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
|
if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
|
||||||
xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
|
xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
|
||||||
"could not parse XML document");
|
"could not parse XML document");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user