From a134baea7ab0cc907b9d9d3d16f9ac97ec8200be Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 10 Jul 2024 20:15:52 -0400 Subject: [PATCH] Make our back branches compatible with libxml2 2.13.x. This back-patches HEAD commits 066e8ac6e, 6082b3d5d, e7192486d, and 896cd266f into supported branches. Changes: * Use xmlAddChildList not xmlAddChild in XMLSERIALIZE (affects v16 and up only). This was a flat-out coding mistake that we got away with due to lax checking in previous versions of xmlAddChild. * Use xmlParseInNodeContext not xmlParseBalancedChunkMemory. This is to dodge a bug in xmlParseBalancedChunkMemory in libxm2 releases 2.13.0-2.13.2. While that bug is now fixed upstream and will probably never be seen in any production-oriented distro, it is currently a problem on some more-bleeding-edge-friendly platforms. * Suppress "chunk is not well balanced" errors from libxml2, unless it is the only error. This eliminates an error-reporting discrepancy between 2.13 and older releases. This error is almost always redundant with previous errors, if not flat-out inappropriate, which is why 2.13 changed the behavior and why nobody's likely to miss it. Erik Wienhold and Tom Lane, per report from Frank Streitzig. Discussion: https://postgr.es/m/trinity-b0161630-d230-4598-9ebc-7a23acdb37cb-1720186432160@3c-app-gmx-bap25 Discussion: https://postgr.es/m/trinity-361ba18b-541a-4fe7-bc63-655ae3a7d599-1720259822452@3c-app-gmx-bs01 --- src/backend/utils/adt/xml.c | 77 ++++++++++++++++++++++------- src/test/regress/expected/xml.out | 9 ---- src/test/regress/expected/xml_2.out | 3 -- 3 files changed, 58 insertions(+), 31 deletions(-) diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 51b16f2b73..bce3728c18 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -1547,6 +1547,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, PG_TRY(); { bool parse_as_document = false; + int options; int res_code; size_t count = 0; xmlChar *version = NULL; @@ -1554,11 +1555,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, xmlInitParser(); - ctxt = xmlNewParserCtxt(); - if (ctxt == NULL || xmlerrcxt->err_occurred) - xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, - "could not allocate parser context"); - /* Decide whether to parse as document or content */ if (xmloption_arg == XMLOPTION_DOCUMENT) parse_as_document = true; @@ -1577,20 +1573,30 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, parse_as_document = true; } + /* + * Select parse options. + * + * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR) + * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by + * internal DTD are applied'. As for external DTDs, we try to support + * them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really + * happen because xmlPgEntityLoader prevents it. + */ + options = XML_PARSE_NOENT | XML_PARSE_DTDATTR + | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS); + if (parse_as_document) { - /* - * Note, that here we try to apply DTD defaults - * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d: - * 'Default values defined by internal DTD are applied'. As for - * external DTDs, we try to support them too, (see SQL/XML:2008 GR - * 10.16.7.e) - */ + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate parser context"); + doc = xmlCtxtReadDoc(ctxt, utf8string, - NULL, + NULL, /* no URL */ "UTF-8", - XML_PARSE_NOENT | XML_PARSE_DTDATTR - | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS)); + options); + if (doc == NULL || xmlerrcxt->err_occurred) { /* Use original option to decide which error code to throw */ @@ -1604,17 +1610,36 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, } else { + xmlNodePtr root; + + /* set up document with empty root node to be the context node */ doc = xmlNewDoc(version); Assert(doc->encoding == NULL); doc->encoding = xmlStrdup((const xmlChar *) "UTF-8"); doc->standalone = standalone; + root = xmlNewNode(NULL, (const xmlChar *) "content-root"); + if (root == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xml node"); + /* This attaches root to doc, so we need not free it separately. */ + xmlDocSetRootElement(doc, root); + /* allow empty content */ if (*(utf8string + count)) { - res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, - utf8string + count, NULL); - if (res_code != 0 || xmlerrcxt->err_occurred) + xmlNodePtr node_list = NULL; + xmlParserErrors res; + + res = xmlParseInNodeContext(root, + (char *) utf8string + count, + strlen((char *) utf8string + count), + options, + &node_list); + + xmlFreeNodeList(node_list); + + if (res != XML_ERR_OK || xmlerrcxt->err_occurred) xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT, "invalid XML content"); } @@ -1633,7 +1658,8 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, } PG_END_TRY(); - xmlFreeParserCtxt(ctxt); + if (ctxt != NULL) + xmlFreeParserCtxt(ctxt); pg_xml_done(xmlerrcxt, false); @@ -1814,6 +1840,19 @@ xml_errorHandler(void *data, PgXmlErrorPtr error) switch (domain) { case XML_FROM_PARSER: + + /* + * XML_ERR_NOT_WELL_BALANCED is typically reported after some + * other, more on-point error. Furthermore, libxml2 2.13 reports + * it under a completely different set of rules than prior + * versions. To avoid cross-version behavioral differences, + * suppress it so long as we already logged some error. + */ + if (error->code == XML_ERR_NOT_WELL_BALANCED && + xmlerrcxt->err_occurred) + return; + /* fall through */ + case XML_FROM_NONE: case XML_FROM_MEMORY: case XML_FROM_IO: diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out index 55ac49be26..5b6ae62c04 100644 --- a/src/test/regress/expected/xml.out +++ b/src/test/regress/expected/xml.out @@ -223,17 +223,11 @@ ERROR: invalid XML content DETAIL: line 1: xmlParseEntityRef: no name & ^ -line 1: chunk is not well balanced -& - ^ SELECT xmlparse(content '&idontexist;'); ERROR: invalid XML content DETAIL: line 1: Entity 'idontexist' not defined &idontexist; ^ -line 1: chunk is not well balanced -&idontexist; - ^ SELECT xmlparse(content ''); xmlparse --------------------------- @@ -252,9 +246,6 @@ DETAIL: line 1: Entity 'idontexist' not defined &idontexist; ^ line 1: Opening and ending tag mismatch: twoerrors line 1 and unbalanced -&idontexist; - ^ -line 1: chunk is not well balanced &idontexist; ^ SELECT xmlparse(content ''); diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out index 493c6186e1..6638458ba2 100644 --- a/src/test/regress/expected/xml_2.out +++ b/src/test/regress/expected/xml_2.out @@ -219,13 +219,11 @@ ERROR: invalid XML content DETAIL: line 1: xmlParseEntityRef: no name & ^ -line 1: chunk is not well balanced SELECT xmlparse(content '&idontexist;'); ERROR: invalid XML content DETAIL: line 1: Entity 'idontexist' not defined &idontexist; ^ -line 1: chunk is not well balanced SELECT xmlparse(content ''); xmlparse --------------------------- @@ -244,7 +242,6 @@ DETAIL: line 1: Entity 'idontexist' not defined &idontexist; ^ line 1: Opening and ending tag mismatch: twoerrors line 1 and unbalanced -line 1: chunk is not well balanced SELECT xmlparse(content ''); xmlparse ---------------------