Use xmlParseInNodeContext not xmlParseBalancedChunkMemory.

xmlParseInNodeContext has basically the same functionality with
a different API: we have to supply an xmlNode that's attached to a
document rather than just the document.  That's not hard though.
The benefits are two:

* Early 2.13.x releases of libxml2 contain a bug that causes
xmlParseBalancedChunkMemory to return the wrong status value in some
cases.  This breaks our regression tests.  While that bug is now fixed
upstream and will probably never be seen in any production-oriented
distro, it is currently a problem on some more-bleeding-edge-friendly
platforms.

* xmlParseBalancedChunkMemory is considered to depend on libxml2's
semi-deprecated SAX1 APIs, and will go away when and if they do.
There may already be libxml2 builds out there that lack this function.

So there are both short- and long-term reasons to make this change.

While here, avoid allocating an xmlParserCtxt in DOCUMENT parse mode,
since that code path is not going to use it.

Like 066e8ac6e, this will need to be back-patched.  This is just a
trial commit to see if the buildfarm agrees that we can use
xmlParseInNodeContext unconditionally.

Erik Wienhold and Tom Lane, per report from Frank Streitzig.

Discussion: https://postgr.es/m/trinity-b0161630-d230-4598-9ebc-7a23acdb37cb-1720186432160@3c-app-gmx-bap25
Discussion: https://postgr.es/m/trinity-361ba18b-541a-4fe7-bc63-655ae3a7d599-1720259822452@3c-app-gmx-bs01
This commit is contained in:
Tom Lane 2024-07-08 14:04:00 -04:00
parent 1ff39f4ff2
commit 6082b3d5d3
1 changed files with 52 additions and 23 deletions

View File

@ -1696,9 +1696,9 @@ xml_doctype_in_content(const xmlChar *str)
* XmlOptionType actually used to parse the input (typically the same as
* xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
*
* If parsed_nodes isn't NULL and the input is not an XML document, the list
* of parsed nodes from the xmlParseBalancedChunkMemory call will be returned
* to *parsed_nodes.
* If parsed_nodes isn't NULL and we parse in CONTENT mode, the list
* of parsed nodes from the xmlParseInNodeContext call will be returned
* to *parsed_nodes. (It is caller's responsibility to free that.)
*
* Errors normally result in ereport(ERROR), but if escontext is an
* ErrorSaveContext, then "safe" errors are reported there instead, and the
@ -1750,6 +1750,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
PG_TRY();
{
bool parse_as_document = false;
int options;
int res_code;
size_t count = 0;
xmlChar *version = NULL;
@ -1758,11 +1759,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
/* Any errors here are reported as hard ereport's */
xmlInitParser();
ctxt = xmlNewParserCtxt();
if (ctxt == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate parser context");
/* Decide whether to parse as document or content */
if (xmloption_arg == XMLOPTION_DOCUMENT)
parse_as_document = true;
@ -1785,6 +1781,18 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
parse_as_document = true;
}
/*
* Select parse options.
*
* Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
* according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
* internal DTD are applied'. As for external DTDs, we try to support
* them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
* happen because xmlPgEntityLoader prevents it.
*/
options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
/* initialize output parameters */
if (parsed_xmloptiontype != NULL)
*parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
@ -1794,18 +1802,16 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
if (parse_as_document)
{
/*
* Note, that here we try to apply DTD defaults
* (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
* 'Default values defined by internal DTD are applied'. As for
* external DTDs, we try to support them too, (see SQL/XML:2008 GR
* 10.16.7.e)
*/
ctxt = xmlNewParserCtxt();
if (ctxt == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate parser context");
doc = xmlCtxtReadDoc(ctxt, utf8string,
NULL,
NULL, /* no URL */
"UTF-8",
XML_PARSE_NOENT | XML_PARSE_DTDATTR
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
options);
if (doc == NULL || xmlerrcxt->err_occurred)
{
/* Use original option to decide which error code to report */
@ -1822,6 +1828,9 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
}
else
{
xmlNodePtr root;
/* set up document with empty root node to be the context node */
doc = xmlNewDoc(version);
if (doc == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
@ -1834,19 +1843,38 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
"could not allocate XML document");
doc->standalone = standalone;
root = xmlNewNode(NULL, (const xmlChar *) "content-root");
if (root == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate xml node");
/* This attaches root to doc, so we need not free it separately. */
xmlDocSetRootElement(doc, root);
/* allow empty content */
if (*(utf8string + count))
{
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
utf8string + count,
parsed_nodes);
if (res_code != 0 || xmlerrcxt->err_occurred)
xmlNodePtr node_list = NULL;
xmlParserErrors res;
res = xmlParseInNodeContext(root,
(char *) utf8string + count,
strlen((char *) utf8string + count),
options,
&node_list);
if (res != XML_ERR_OK || xmlerrcxt->err_occurred)
{
xmlFreeNodeList(node_list);
xml_errsave(escontext, xmlerrcxt,
ERRCODE_INVALID_XML_CONTENT,
"invalid XML content");
goto fail;
}
if (parsed_nodes != NULL)
*parsed_nodes = node_list;
else
xmlFreeNodeList(node_list);
}
}
@ -1866,7 +1894,8 @@ fail:
}
PG_END_TRY();
xmlFreeParserCtxt(ctxt);
if (ctxt != NULL)
xmlFreeParserCtxt(ctxt);
pg_xml_done(xmlerrcxt, false);