diff --git a/configure b/configure index 9b8cb48931..ba01548186 100755 --- a/configure +++ b/configure @@ -23655,6 +23655,75 @@ fi +# Older versions of libxml2 lack the xmlStructuredErrorContext variable +# (which could be a macro referring to a function, if threading is enabled) +if test "$with_libxml" = yes ; then + { $as_echo "$as_me:$LINENO: checking for xmlStructuredErrorContext" >&5 +$as_echo_n "checking for xmlStructuredErrorContext... " >&6; } +if test "${pgac_cv_libxml_structerrctx+set}" = set; then + $as_echo_n "(cached) " >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + void *globptr; +int +main () +{ +globptr = xmlStructuredErrorContext + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + $as_test_x conftest$ac_exeext + }; then + pgac_cv_libxml_structerrctx=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + pgac_cv_libxml_structerrctx=no +fi + +rm -rf conftest.dSYM +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:$LINENO: result: $pgac_cv_libxml_structerrctx" >&5 +$as_echo "$pgac_cv_libxml_structerrctx" >&6; } + if test x"$pgac_cv_libxml_structerrctx" = x"yes"; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_XMLSTRUCTUREDERRORCONTEXT 1 +_ACEOF + + fi +fi + + # This test makes sure that run tests work at all. Sometimes a shared # library is found by the linker, but the runtime linker can't find it. # This check should come after all modifications of compiler or linker diff --git a/configure.in b/configure.in index e873c7b157..b1f15f131f 100644 --- a/configure.in +++ b/configure.in @@ -1519,6 +1519,23 @@ AC_SUBST(LDAP_LIBS_FE) AC_SUBST(LDAP_LIBS_BE) +# Older versions of libxml2 lack the xmlStructuredErrorContext variable +# (which could be a macro referring to a function, if threading is enabled) +if test "$with_libxml" = yes ; then + AC_CACHE_CHECK([for xmlStructuredErrorContext], pgac_cv_libxml_structerrctx, + [AC_TRY_LINK([#include + void *globptr;], + [globptr = xmlStructuredErrorContext], + [pgac_cv_libxml_structerrctx=yes], + [pgac_cv_libxml_structerrctx=no])]) + if test x"$pgac_cv_libxml_structerrctx" = x"yes"; then + AC_DEFINE(HAVE_XMLSTRUCTUREDERRORCONTEXT, + 1, + [Define to 1 if your libxml has xmlStructuredErrorContext.]) + fi +fi + + # This test makes sure that run tests work at all. Sometimes a shared # library is found by the linker, but the runtime linker can't find it. # This check should come after all modifications of compiler or linker diff --git a/contrib/xml2/xpath.c b/contrib/xml2/xpath.c index 44c600e134..2ddee59fcb 100644 --- a/contrib/xml2/xpath.c +++ b/contrib/xml2/xpath.c @@ -38,7 +38,7 @@ Datum xpath_table(PG_FUNCTION_ARGS); /* exported for use by xslt_proc.c */ -void pgxml_parser_init(void); +PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness); /* workspace for pgxml_xpath() */ @@ -68,18 +68,27 @@ static void cleanup_workspace(xpath_workspace *workspace); /* * Initialize for xml parsing. + * + * As with the underlying pg_xml_init function, calls to this MUST be followed + * by a PG_TRY block that guarantees that pg_xml_done is called. */ -void -pgxml_parser_init(void) +PgXmlErrorContext * +pgxml_parser_init(PgXmlStrictness strictness) { + PgXmlErrorContext *xmlerrcxt; + /* Set up error handling (we share the core's error handler) */ - pg_xml_init(); + xmlerrcxt = pg_xml_init(strictness); + + /* Note: we're assuming an elog cannot be thrown by the following calls */ /* Initialize libxml */ xmlInitParser(); xmlSubstituteEntitiesDefault(1); xmlLoadExtDtdDefaultValue = 1; + + return xmlerrcxt; } @@ -98,16 +107,33 @@ Datum xml_is_well_formed(PG_FUNCTION_ARGS) { text *t = PG_GETARG_TEXT_P(0); /* document buffer */ + bool result = false; int32 docsize = VARSIZE(t) - VARHDRSZ; xmlDocPtr doctree; + PgXmlErrorContext *xmlerrcxt; - pgxml_parser_init(); + xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); - doctree = xmlParseMemory((char *) VARDATA(t), docsize); - if (doctree == NULL) - PG_RETURN_BOOL(false); /* i.e. not well-formed */ - xmlFreeDoc(doctree); - PG_RETURN_BOOL(true); + PG_TRY(); + { + doctree = xmlParseMemory((char *) VARDATA(t), docsize); + + result = (doctree != NULL); + + if (doctree != NULL) + xmlFreeDoc(doctree); + } + PG_CATCH(); + { + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + pg_xml_done(xmlerrcxt, false); + + PG_RETURN_BOOL(result); } @@ -399,41 +425,52 @@ static xmlXPathObjectPtr pgxml_xpath(text *document, xmlChar *xpath, xpath_workspace *workspace) { int32 docsize = VARSIZE(document) - VARHDRSZ; - xmlXPathObjectPtr res; + PgXmlErrorContext *xmlerrcxt; xmlXPathCompExprPtr comppath; workspace->doctree = NULL; workspace->ctxt = NULL; workspace->res = NULL; - pgxml_parser_init(); + xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); - workspace->doctree = xmlParseMemory((char *) VARDATA(document), docsize); - if (workspace->doctree == NULL) - return NULL; /* not well-formed */ + PG_TRY(); + { + workspace->doctree = xmlParseMemory((char *) VARDATA(document), + docsize); + if (workspace->doctree != NULL) + { + workspace->ctxt = xmlXPathNewContext(workspace->doctree); + workspace->ctxt->node = xmlDocGetRootElement(workspace->doctree); - workspace->ctxt = xmlXPathNewContext(workspace->doctree); - workspace->ctxt->node = xmlDocGetRootElement(workspace->doctree); + /* compile the path */ + comppath = xmlXPathCompile(xpath); + if (comppath == NULL) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, + "XPath Syntax Error"); - /* compile the path */ - comppath = xmlXPathCompile(xpath); - if (comppath == NULL) + /* Now evaluate the path expression. */ + workspace->res = xmlXPathCompiledEval(comppath, workspace->ctxt); + + xmlXPathFreeCompExpr(comppath); + } + } + PG_CATCH(); { cleanup_workspace(workspace); - xml_ereport(ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, - "XPath Syntax Error"); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); } + PG_END_TRY(); - /* Now evaluate the path expression. */ - res = xmlXPathCompiledEval(comppath, workspace->ctxt); - workspace->res = res; - - xmlXPathFreeCompExpr(comppath); - - if (res == NULL) + if (workspace->res == NULL) cleanup_workspace(workspace); - return res; + pg_xml_done(xmlerrcxt, false); + + return workspace->res; } /* Clean up after processing the result of pgxml_xpath() */ @@ -534,6 +571,8 @@ xpath_table(PG_FUNCTION_ARGS) * document */ bool had_values; /* To determine end of nodeset results */ StringInfoData query_buf; + PgXmlErrorContext *xmlerrcxt; + volatile xmlDocPtr doctree = NULL; /* We only have a valid tuple description in table function mode */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) @@ -659,14 +698,15 @@ xpath_table(PG_FUNCTION_ARGS) * Setup the parser. This should happen after we are done evaluating the * query, in case it calls functions that set up libxml differently. */ - pgxml_parser_init(); + xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); + PG_TRY(); + { /* For each row i.e. document returned from SPI */ for (i = 0; i < proc; i++) { char *pkey; char *xmldoc; - xmlDocPtr doctree; xmlXPathContextPtr ctxt; xmlXPathObjectPtr res; xmlChar *resstr; @@ -718,11 +758,9 @@ xpath_table(PG_FUNCTION_ARGS) /* compile the path */ comppath = xmlXPathCompile(xpaths[j]); if (comppath == NULL) - { - xmlFreeDoc(doctree); - xml_ereport(ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, + xml_ereport(xmlerrcxt, ERROR, + ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, "XPath Syntax Error"); - } /* Now evaluate the path expression. */ res = xmlXPathCompiledEval(comppath, ctxt); @@ -737,8 +775,7 @@ xpath_table(PG_FUNCTION_ARGS) if (res->nodesetval != NULL && rownr < res->nodesetval->nodeNr) { - resstr = - xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]); + resstr = xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]); had_values = true; } else @@ -776,13 +813,31 @@ xpath_table(PG_FUNCTION_ARGS) } while (had_values); } - xmlFreeDoc(doctree); + if (doctree != NULL) + xmlFreeDoc(doctree); + doctree = NULL; if (pkey) pfree(pkey); if (xmldoc) pfree(xmldoc); } + } + PG_CATCH(); + { + if (doctree != NULL) + xmlFreeDoc(doctree); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + if (doctree != NULL) + xmlFreeDoc(doctree); + + pg_xml_done(xmlerrcxt, false); tuplestore_donestoring(tupstore); diff --git a/contrib/xml2/xslt_proc.c b/contrib/xml2/xslt_proc.c index f8f7d7263f..a8e481a3ce 100644 --- a/contrib/xml2/xslt_proc.c +++ b/contrib/xml2/xslt_proc.c @@ -38,7 +38,7 @@ Datum xslt_process(PG_FUNCTION_ARGS); #ifdef USE_LIBXSLT /* declarations to come from xpath.c */ -extern void pgxml_parser_init(void); +extern PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness); /* local defs */ static const char **parse_params(text *paramstr); @@ -56,13 +56,14 @@ xslt_process(PG_FUNCTION_ARGS) text *ssheet = PG_GETARG_TEXT_P(1); text *paramstr; const char **params; - xsltStylesheetPtr stylesheet = NULL; - xmlDocPtr doctree; - xmlDocPtr restree; - xmlDocPtr ssdoc = NULL; - xmlChar *resstr; - int resstat; - int reslen; + PgXmlErrorContext *xmlerrcxt; + volatile xsltStylesheetPtr stylesheet = NULL; + volatile xmlDocPtr doctree = NULL; + volatile xmlDocPtr restree = NULL; + volatile xmlDocPtr ssdoc = NULL; + volatile int resstat = -1; + xmlChar *resstr = NULL; + int reslen = 0; if (fcinfo->nargs == 3) { @@ -77,9 +78,11 @@ xslt_process(PG_FUNCTION_ARGS) } /* Setup parser */ - pgxml_parser_init(); + xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); - /* Check to see if document is a file or a literal */ + PG_TRY(); + { + /* Check to see if document is a file or a literal */ if (VARDATA(doct)[0] == '<') doctree = xmlParseMemory((char *) VARDATA(doct), VARSIZE(doct) - VARHDRSZ); @@ -87,7 +90,7 @@ xslt_process(PG_FUNCTION_ARGS) doctree = xmlParseFile(text_to_cstring(doct)); if (doctree == NULL) - xml_ereport(ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, + xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, "error parsing XML document"); /* Same for stylesheet */ @@ -96,35 +99,44 @@ xslt_process(PG_FUNCTION_ARGS) ssdoc = xmlParseMemory((char *) VARDATA(ssheet), VARSIZE(ssheet) - VARHDRSZ); if (ssdoc == NULL) - { - xmlFreeDoc(doctree); - xml_ereport(ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, + xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, "error parsing stylesheet as XML document"); - } stylesheet = xsltParseStylesheetDoc(ssdoc); } else stylesheet = xsltParseStylesheetFile((xmlChar *) text_to_cstring(ssheet)); - if (stylesheet == NULL) - { - xmlFreeDoc(doctree); - xsltCleanupGlobals(); - xml_ereport(ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, + xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, "failed to parse stylesheet"); - } restree = xsltApplyStylesheet(stylesheet, doctree, params); resstat = xsltSaveResultToString(&resstr, &reslen, restree, stylesheet); + } + PG_CATCH(); + { + if (stylesheet != NULL) + xsltFreeStylesheet(stylesheet); + if (restree != NULL) + xmlFreeDoc(restree); + if (doctree != NULL) + xmlFreeDoc(doctree); + xsltCleanupGlobals(); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); xsltFreeStylesheet(stylesheet); xmlFreeDoc(restree); xmlFreeDoc(doctree); - xsltCleanupGlobals(); + pg_xml_done(xmlerrcxt, false); + if (resstat < 0) PG_RETURN_NULL(); diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 945bc2901f..6786cd91bb 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -85,11 +85,27 @@ int xmloption; #ifdef USE_LIBXML -static StringInfo xml_err_buf = NULL; +/* random number to identify PgXmlErrorContext */ +#define ERRCXT_MAGIC 68275028 -static void xml_errorHandler(void *ctxt, const char *msg,...); +struct PgXmlErrorContext +{ + int magic; + /* strictness argument passed to pg_xml_init */ + PgXmlStrictness strictness; + /* current error status and accumulated message, if any */ + bool err_occurred; + StringInfoData err_buf; + /* previous libxml error handling state (saved by pg_xml_init) */ + xmlStructuredErrorFunc saved_errfunc; + void *saved_errcxt; +}; + +static void xml_errorHandler(void *data, xmlErrorPtr error); static void xml_ereport_by_code(int level, int sqlcode, const char *msg, int errcode); +static void chopStringInfoNewlines(StringInfo str); +static void appendStringInfoLineSeparator(StringInfo str); #ifdef USE_LIBXMLCONTEXT @@ -552,8 +568,9 @@ xmlelement(XmlExprState *xmlExpr, ExprContext *econtext) int i; ListCell *arg; ListCell *narg; - xmlBufferPtr buf = NULL; - xmlTextWriterPtr writer = NULL; + PgXmlErrorContext *xmlerrcxt; + volatile xmlBufferPtr buf = NULL; + volatile xmlTextWriterPtr writer = NULL; /* * We first evaluate all the arguments, then start up libxml and create @@ -598,17 +615,17 @@ xmlelement(XmlExprState *xmlExpr, ExprContext *econtext) } /* now safe to run libxml */ - pg_xml_init(); + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); PG_TRY(); { buf = xmlBufferCreate(); - if (!buf) - xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, + if (buf == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate xmlBuffer"); writer = xmlNewTextWriterMemory(buf, 0); - if (!writer) - xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, + if (writer == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate xmlTextWriter"); xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name); @@ -645,12 +662,17 @@ xmlelement(XmlExprState *xmlExpr, ExprContext *econtext) xmlFreeTextWriter(writer); if (buf) xmlBufferFree(buf); + + pg_xml_done(xmlerrcxt, true); + PG_RE_THROW(); } PG_END_TRY(); xmlBufferFree(buf); + pg_xml_done(xmlerrcxt, false); + return result; #else NO_XML_SUPPORT(); @@ -800,7 +822,7 @@ xml_is_document(xmltype *arg) { #ifdef USE_LIBXML bool result; - xmlDocPtr doc = NULL; + volatile xmlDocPtr doc = NULL; MemoryContext ccxt = CurrentMemoryContext; /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */ @@ -844,30 +866,24 @@ xml_is_document(xmltype *arg) #ifdef USE_LIBXML /* - * pg_xml_init --- set up for use of libxml + * pg_xml_init_library --- set up for use of libxml * * This should be called by each function that is about to use libxml - * facilities. It has two responsibilities: verify compatibility with the - * loaded libxml version (done on first call in a session) and establish - * or re-establish our libxml error handler. The latter needs to be done - * anytime we might have passed control to add-on modules (eg libperl) which - * might have set their own error handler for libxml. - * - * This is exported for use by contrib/xml2, as well as other code that might - * wish to share use of this module's libxml error handler. + * facilities but doesn't require error handling. It initializes libxml + * and verifies compatibility with the loaded libxml version. These are + * once-per-session activities. * * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and * check) */ void -pg_xml_init(void) +pg_xml_init_library(void) { static bool first_time = true; if (first_time) { /* Stuff we need do only once per session */ - MemoryContext oldcontext; /* * Currently, we have no pure UTF-8 support for internals -- check if @@ -879,16 +895,8 @@ pg_xml_init(void) errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.", (int) sizeof(char), (int) sizeof(xmlChar)))); - /* create error buffer in permanent context */ - oldcontext = MemoryContextSwitchTo(TopMemoryContext); - xml_err_buf = makeStringInfo(); - MemoryContextSwitchTo(oldcontext); - - /* Now that xml_err_buf exists, safe to call xml_errorHandler */ - xmlSetGenericErrorFunc(NULL, xml_errorHandler); - #ifdef USE_LIBXMLCONTEXT - /* Set up memory allocation our way, too */ + /* Set up libxml's memory allocation our way */ xml_memory_init(); #endif @@ -897,21 +905,119 @@ pg_xml_init(void) first_time = false; } - else - { - /* Reset pre-existing buffer to empty */ - Assert(xml_err_buf != NULL); - resetStringInfo(xml_err_buf); +} - /* - * We re-establish the error callback function every time. This makes - * it safe for other subsystems (PL/Perl, say) to also use libxml with - * their own callbacks ... so long as they likewise set up the - * callbacks on every use. It's cheap enough to not be worth worrying - * about, anyway. - */ - xmlSetGenericErrorFunc(NULL, xml_errorHandler); - } +/* + * pg_xml_init --- set up for use of libxml and register an error handler + * + * This should be called by each function that is about to use libxml + * facilities and requires error handling. It initializes libxml with + * pg_xml_init_library() and establishes our libxml error handler. + * + * strictness determines which errors are reported and which are ignored. + * + * Calls to this function MUST be followed by a PG_TRY block that guarantees + * that pg_xml_done() is called during either normal or error exit. + * + * This is exported for use by contrib/xml2, as well as other code that might + * wish to share use of this module's libxml error handler. + */ +PgXmlErrorContext * +pg_xml_init(PgXmlStrictness strictness) +{ + PgXmlErrorContext *errcxt; + + /* Do one-time setup if needed */ + pg_xml_init_library(); + + /* Create error handling context structure */ + errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext)); + errcxt->magic = ERRCXT_MAGIC; + errcxt->strictness = strictness; + errcxt->err_occurred = false; + initStringInfo(&errcxt->err_buf); + + /* + * Save original error handler and install ours. libxml originally didn't + * distinguish between the contexts for generic and for structured error + * handlers. If we're using an old libxml version, we must thus save + * the generic error context, even though we're using a structured + * error handler. + */ + errcxt->saved_errfunc = xmlStructuredError; + +#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT + errcxt->saved_errcxt = xmlStructuredErrorContext; +#else + errcxt->saved_errcxt = xmlGenericErrorContext; +#endif + + xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler); + + return errcxt; +} + + +/* + * pg_xml_done --- restore previous libxml error handling + * + * Resets libxml's global error-handling state to what it was before + * pg_xml_init() was called. + * + * This routine verifies that all pending errors have been dealt with + * (in assert-enabled builds, anyway). + */ +void +pg_xml_done(PgXmlErrorContext *errcxt, bool isError) +{ + void *cur_errcxt; + + /* An assert seems like enough protection here */ + Assert(errcxt->magic == ERRCXT_MAGIC); + + /* + * In a normal exit, there should be no un-handled libxml errors. But we + * shouldn't try to enforce this during error recovery, since the longjmp + * could have been thrown before xml_ereport had a chance to run. + */ + Assert(!errcxt->err_occurred || isError); + + /* + * Check that libxml's global state is correct, warn if not. This is + * a real test and not an Assert because it has a higher probability + * of happening. + */ +#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT + cur_errcxt = xmlStructuredErrorContext; +#else + cur_errcxt = xmlGenericErrorContext; +#endif + + if (cur_errcxt != (void *) errcxt) + elog(WARNING, "libxml error handling state is out of sync with xml.c"); + + /* Restore the saved handler */ + xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc); + + /* + * Mark the struct as invalid, just in case somebody somehow manages to + * call xml_errorHandler or xml_ereport with it. + */ + errcxt->magic = 0; + + /* Release memory */ + pfree(errcxt->err_buf.data); + pfree(errcxt); +} + + +/* + * pg_xml_error_occurred() --- test the error flag + */ +bool +pg_xml_error_occurred(PgXmlErrorContext *errcxt) +{ + return errcxt->err_occurred; } @@ -970,7 +1076,13 @@ parse_xml_decl(const xmlChar *str, size_t *lenp, int utf8char; int utf8len; - pg_xml_init(); + /* + * Only initialize libxml. We don't need error handling here, but we do + * need to make sure libxml is initialized before calling any of its + * functions. Note that this is safe (and a no-op) if caller has already + * done pg_xml_init(). + */ + pg_xml_init_library(); /* Initialize output arguments to "not present" */ if (version) @@ -1124,8 +1236,6 @@ static bool print_xml_decl(StringInfo buf, const xmlChar *version, pg_enc encoding, int standalone) { - pg_xml_init(); /* why is this here? */ - if ((version && strcmp((char *) version, PG_XML_DEFAULT_VERSION) != 0) || (encoding && encoding != PG_UTF8) || standalone != -1) @@ -1176,8 +1286,9 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, int32 len; xmlChar *string; xmlChar *utf8string; - xmlParserCtxtPtr ctxt; - xmlDocPtr doc; + PgXmlErrorContext *xmlerrcxt; + volatile xmlParserCtxtPtr ctxt = NULL; + volatile xmlDocPtr doc = NULL; len = VARSIZE(data) - VARHDRSZ; /* will be useful later */ string = xml_text2xmlChar(data); @@ -1187,18 +1298,19 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, encoding, PG_UTF8); - /* Start up libxml and its parser (no-ops if already done) */ - pg_xml_init(); - xmlInitParser(); + /* Start up libxml and its parser */ + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED); - ctxt = xmlNewParserCtxt(); - if (ctxt == NULL) - xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, - "could not allocate parser context"); - - /* Use a TRY block to ensure the ctxt is released */ + /* Use a TRY block to ensure we clean up correctly */ PG_TRY(); { + xmlInitParser(); + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate parser context"); + if (xmloption_arg == XMLOPTION_DOCUMENT) { /* @@ -1213,8 +1325,8 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, "UTF-8", XML_PARSE_NOENT | XML_PARSE_DTDATTR | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS)); - if (doc == NULL) - xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT, + if (doc == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, "invalid XML document"); } else @@ -1238,23 +1350,28 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, utf8string + count, NULL); - if (res_code != 0) - { - xmlFreeDoc(doc); - xml_ereport(ERROR, ERRCODE_INVALID_XML_CONTENT, + if (res_code != 0 || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT, "invalid XML content"); - } } } PG_CATCH(); { - xmlFreeParserCtxt(ctxt); + if (doc != NULL) + xmlFreeDoc(doc); + if (ctxt != NULL) + xmlFreeParserCtxt(ctxt); + + pg_xml_done(xmlerrcxt, true); + PG_RE_THROW(); } PG_END_TRY(); xmlFreeParserCtxt(ctxt); + pg_xml_done(xmlerrcxt, false); + return doc; } @@ -1335,70 +1452,180 @@ xml_pstrdup(const char *string) * handler. Note that pg_xml_init() *must* have been called previously. */ void -xml_ereport(int level, int sqlcode, const char *msg) +xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg) { char *detail; - /* - * It might seem that we should just pass xml_err_buf->data directly to - * errdetail. However, we want to clean out xml_err_buf before throwing - * error, in case there is another function using libxml further down the - * call stack. - */ - if (xml_err_buf->len > 0) - { - detail = pstrdup(xml_err_buf->data); - resetStringInfo(xml_err_buf); - } + /* Defend against someone passing us a bogus context struct */ + if (errcxt->magic != ERRCXT_MAGIC) + elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext"); + + /* Flag that the current libxml error has been reported */ + errcxt->err_occurred = false; + + /* Include detail only if we have some text from libxml */ + if (errcxt->err_buf.len > 0) + detail = errcxt->err_buf.data; else detail = NULL; - if (detail) - { - size_t len; - - /* libxml error messages end in '\n'; get rid of it */ - len = strlen(detail); - if (len > 0 && detail[len - 1] == '\n') - detail[len - 1] = '\0'; - - ereport(level, - (errcode(sqlcode), - errmsg_internal("%s", msg), - errdetail_internal("%s", detail))); - } - else - { - ereport(level, - (errcode(sqlcode), - errmsg_internal("%s", msg))); - } + ereport(level, + (errcode(sqlcode), + errmsg_internal("%s", msg), + detail ? errdetail_internal("%s", detail) : 0)); } /* - * Error handler for libxml error messages + * Error handler for libxml errors and warnings */ static void -xml_errorHandler(void *ctxt, const char *msg,...) +xml_errorHandler(void *data, xmlErrorPtr error) { - /* Append the formatted text to xml_err_buf */ - for (;;) + PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data; + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt; + xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL; + xmlNodePtr node = error->node; + const xmlChar *name = (node != NULL && + node->type == XML_ELEMENT_NODE) ? node->name : NULL; + int domain = error->domain; + int level = error->level; + StringInfo errorBuf; + + /* Defend against someone passing us a bogus context struct */ + if (xmlerrcxt->magic != ERRCXT_MAGIC) + elog(ERROR, "xml_errorHandler called with invalid PgXmlErrorContext"); + + /*---------- + * Older libxml versions report some errors differently. + * First, some errors were previously reported as coming from the parser + * domain but are now reported as coming from the namespace domain. + * Second, some warnings were upgraded to errors. + * We attempt to compensate for that here. + *---------- + */ + switch (error->code) { - va_list args; - bool success; - - /* Try to format the data. */ - va_start(args, msg); - success = appendStringInfoVA(xml_err_buf, msg, args); - va_end(args); - - if (success) + case XML_WAR_NS_URI: + level = XML_ERR_ERROR; + domain = XML_FROM_NAMESPACE; break; - /* Double the buffer size and try again. */ - enlargeStringInfo(xml_err_buf, xml_err_buf->maxlen); + case XML_ERR_NS_DECL_ERROR: + case XML_WAR_NS_URI_RELATIVE: + case XML_WAR_NS_COLUMN: + case XML_NS_ERR_XML_NAMESPACE: + case XML_NS_ERR_UNDEFINED_NAMESPACE: + case XML_NS_ERR_QNAME: + case XML_NS_ERR_ATTRIBUTE_REDEFINED: + case XML_NS_ERR_EMPTY: + domain = XML_FROM_NAMESPACE; + break; } + + /* Decide whether to act on the error or not */ + switch (domain) + { + case XML_FROM_PARSER: + case XML_FROM_NONE: + case XML_FROM_MEMORY: + case XML_FROM_IO: + /* Accept error regardless of the parsing purpose */ + break; + + default: + /* Ignore error if only doing well-formedness check */ + if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED) + return; + break; + } + + /* Prepare error message in errorBuf */ + errorBuf = makeStringInfo(); + + if (error->line > 0) + appendStringInfo(errorBuf, "line %d: ", error->line); + if (name != NULL) + appendStringInfo(errorBuf, "element %s: ", name); + appendStringInfoString(errorBuf, error->message); + + /* + * Append context information to errorBuf. + * + * xmlParserPrintFileContext() uses libxml's "generic" error handler to + * write the context. Since we don't want to duplicate libxml + * functionality here, we set up a generic error handler temporarily. + * + * We use appendStringInfo() directly as libxml's generic error handler. + * This should work because it has essentially the same signature as + * libxml expects, namely (void *ptr, const char *msg, ...). + */ + if (input != NULL) + { + xmlGenericErrorFunc errFuncSaved = xmlGenericError; + void *errCtxSaved = xmlGenericErrorContext; + + xmlSetGenericErrorFunc((void *) errorBuf, + (xmlGenericErrorFunc) appendStringInfo); + + /* Add context information to errorBuf */ + appendStringInfoLineSeparator(errorBuf); + + xmlParserPrintFileContext(input); + + /* Restore generic error func */ + xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved); + } + + /* Get rid of any trailing newlines in errorBuf */ + chopStringInfoNewlines(errorBuf); + + /* + * Legacy error handling mode. err_occurred is never set, we just add the + * message to err_buf. This mode exists because the xml2 contrib module + * uses our error-handling infrastructure, but we don't want to change its + * behaviour since it's deprecated anyway. This is also why we don't + * distinguish between notices, warnings and errors here --- the old-style + * generic error handler wouldn't have done that either. + */ + if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY) + { + appendStringInfoLineSeparator(&xmlerrcxt->err_buf); + appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data); + + pfree(errorBuf->data); + pfree(errorBuf); + return; + } + + /* + * We don't want to ereport() here because that'd probably leave libxml in + * an inconsistent state. Instead, we remember the error and ereport() + * from xml_ereport(). + * + * Warnings and notices can be reported immediately since they won't cause + * a longjmp() out of libxml. + */ + if (level >= XML_ERR_ERROR) + { + appendStringInfoLineSeparator(&xmlerrcxt->err_buf); + appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data); + + xmlerrcxt->err_occurred = true; + } + else if (level >= XML_ERR_WARNING) + { + ereport(WARNING, + (errmsg_internal("%s", errorBuf->data))); + } + else + { + ereport(NOTICE, + (errmsg_internal("%s", errorBuf->data))); + } + + pfree(errorBuf->data); + pfree(errorBuf); } @@ -1447,6 +1674,29 @@ xml_ereport_by_code(int level, int sqlcode, } +/* + * Remove all trailing newlines from a StringInfo string + */ +static void +chopStringInfoNewlines(StringInfo str) +{ + while (str->len > 0 && str->data[str->len - 1] == '\n') + str->data[--str->len] = '\0'; +} + + +/* + * Append a newline after removing any existing trailing newlines + */ +static void +appendStringInfoLineSeparator(StringInfo str) +{ + chopStringInfoNewlines(str); + if (str->len > 0) + appendStringInfoChar(str, '\n'); +} + + /* * Convert one char in the current server encoding to a Unicode codepoint. */ @@ -1753,21 +2003,22 @@ map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings) case BYTEAOID: { bytea *bstr = DatumGetByteaPP(value); - xmlBufferPtr buf = NULL; - xmlTextWriterPtr writer = NULL; + PgXmlErrorContext *xmlerrcxt; + volatile xmlBufferPtr buf = NULL; + volatile xmlTextWriterPtr writer = NULL; char *result; - pg_xml_init(); + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); PG_TRY(); { buf = xmlBufferCreate(); - if (!buf) - xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, + if (buf == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate xmlBuffer"); writer = xmlNewTextWriterMemory(buf, 0); - if (!writer) - xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, + if (writer == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate xmlTextWriter"); if (xmlbinary == XMLBINARY_BASE64) @@ -1789,12 +2040,17 @@ map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings) xmlFreeTextWriter(writer); if (buf) xmlBufferFree(buf); + + pg_xml_done(xmlerrcxt, true); + PG_RE_THROW(); } PG_END_TRY(); xmlBufferFree(buf); + pg_xml_done(xmlerrcxt, false); + return result; } #endif /* USE_LIBXML */ @@ -3312,11 +3568,12 @@ static void xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, int *res_nitems, ArrayBuildState **astate) { - xmlParserCtxtPtr ctxt = NULL; - xmlDocPtr doc = NULL; - xmlXPathContextPtr xpathctx = NULL; - xmlXPathCompExprPtr xpathcomp = NULL; - xmlXPathObjectPtr xpathobj = NULL; + PgXmlErrorContext *xmlerrcxt; + volatile xmlParserCtxtPtr ctxt = NULL; + volatile xmlDocPtr doc = NULL; + volatile xmlXPathContextPtr xpathctx = NULL; + volatile xmlXPathCompExprPtr xpathcomp = NULL; + volatile xmlXPathObjectPtr xpathobj = NULL; char *datastr; int32 len; int32 xpath_len; @@ -3382,30 +3639,31 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len); xpath_expr[xpath_len] = '\0'; - pg_xml_init(); - xmlInitParser(); + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); PG_TRY(); { + xmlInitParser(); + /* * redundant XML parsing (two parsings for the same value during one * command execution are possible) */ ctxt = xmlNewParserCtxt(); - if (ctxt == NULL) - xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, + if (ctxt == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate parser context"); doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0); - if (doc == NULL) - xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT, + if (doc == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, "could not parse XML document"); xpathctx = xmlXPathNewContext(doc); - if (xpathctx == NULL) - xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, + if (xpathctx == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate XPath context"); xpathctx->node = xmlDocGetRootElement(doc); - if (xpathctx->node == NULL) - xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR, + if (xpathctx->node == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, "could not find root XML element"); /* register namespaces, if any */ @@ -3433,8 +3691,8 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, } xpathcomp = xmlXPathCompile(xpath_expr); - if (xpathcomp == NULL) /* TODO: show proper XPath error details */ - xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR, + if (xpathcomp == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, "invalid XPath expression"); /* @@ -3445,8 +3703,8 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, * the same. */ xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx); - if (xpathobj == NULL) /* TODO: reason? */ - xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR, + if (xpathobj == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, "could not create XPath object"); /* return empty array in cases when nothing is found */ @@ -3482,6 +3740,9 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, xmlFreeDoc(doc); if (ctxt) xmlFreeParserCtxt(ctxt); + + pg_xml_done(xmlerrcxt, true); + PG_RE_THROW(); } PG_END_TRY(); @@ -3491,6 +3752,8 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, xmlXPathFreeContext(xpathctx); xmlFreeDoc(doc); xmlFreeParserCtxt(ctxt); + + pg_xml_done(xmlerrcxt, false); } #endif /* USE_LIBXML */ @@ -3579,7 +3842,7 @@ static bool wellformed_xml(text *data, XmlOptionType xmloption_arg) { bool result; - xmlDocPtr doc = NULL; + volatile xmlDocPtr doc = NULL; /* We want to catch any exceptions and return false */ PG_TRY(); diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 19f38cc0fd..24b951e7bd 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -671,6 +671,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_WINLDAP_H +/* Define to 1 if your libxml has xmlStructuredErrorContext. */ +#undef HAVE_XMLSTRUCTUREDERRORCONTEXT + /* Define to the appropriate snprintf format for 64-bit ints. */ #undef INT64_FORMAT diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h index 6359cd6043..f9f6f56859 100644 --- a/src/include/utils/xml.h +++ b/src/include/utils/xml.h @@ -21,6 +21,31 @@ typedef struct varlena xmltype; +typedef enum +{ + XML_STANDALONE_YES, + XML_STANDALONE_NO, + XML_STANDALONE_NO_VALUE, + XML_STANDALONE_OMITTED +} XmlStandaloneType; + +typedef enum +{ + XMLBINARY_BASE64, + XMLBINARY_HEX +} XmlBinaryType; + +typedef enum +{ + PG_XML_STRICTNESS_LEGACY, /* ignore errors unless function result + * indicates error condition */ + PG_XML_STRICTNESS_WELLFORMED, /* ignore non-parser messages */ + PG_XML_STRICTNESS_ALL /* report all notices/warnings/errors */ +} PgXmlStrictness; + +/* struct PgXmlErrorContext is private to xml.c */ +typedef struct PgXmlErrorContext PgXmlErrorContext; + #define DatumGetXmlP(X) ((xmltype *) PG_DETOAST_DATUM(X)) #define XmlPGetDatum(X) PointerGetDatum(X) @@ -60,16 +85,13 @@ extern Datum database_to_xml(PG_FUNCTION_ARGS); extern Datum database_to_xmlschema(PG_FUNCTION_ARGS); extern Datum database_to_xml_and_xmlschema(PG_FUNCTION_ARGS); -typedef enum -{ - XML_STANDALONE_YES, - XML_STANDALONE_NO, - XML_STANDALONE_NO_VALUE, - XML_STANDALONE_OMITTED -} XmlStandaloneType; +extern void pg_xml_init_library(void); +extern PgXmlErrorContext *pg_xml_init(PgXmlStrictness strictness); +extern void pg_xml_done(PgXmlErrorContext *errcxt, bool isError); +extern bool pg_xml_error_occurred(PgXmlErrorContext *errcxt); +extern void xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, + const char *msg); -extern void pg_xml_init(void); -extern void xml_ereport(int level, int sqlcode, const char *msg); extern xmltype *xmlconcat(List *args); extern xmltype *xmlelement(XmlExprState *xmlExpr, ExprContext *econtext); extern xmltype *xmlparse(text *data, XmlOptionType xmloption, bool preserve_whitespace); @@ -83,12 +105,6 @@ extern char *map_sql_identifier_to_xml_name(char *ident, bool fully_escaped, boo extern char *map_xml_name_to_sql_identifier(char *name); extern char *map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings); -typedef enum -{ - XMLBINARY_BASE64, - XMLBINARY_HEX -} XmlBinaryType; - extern int xmlbinary; /* XmlBinaryType, but int for guc enum */ extern int xmloption; /* XmlOptionType, but int for guc enum */ diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out index eaa5a74ef0..379777aced 100644 --- a/src/test/regress/expected/xml.out +++ b/src/test/regress/expected/xml.out @@ -8,7 +8,7 @@ INSERT INTO xmltest VALUES (3, '', NULL, ''); @@ -206,9 +206,54 @@ SELECT xmlparse(content 'x'); x (1 row) +SELECT xmlparse(content '&'); +ERROR: invalid XML content +DETAIL: line 1: xmlParseEntityRef: no name +& + ^ +line 1: chunk is not well balanced +& + ^ +SELECT xmlparse(content '&idontexist;'); +ERROR: invalid XML content +DETAIL: line 1: Entity 'idontexist' not defined +&idontexist; + ^ +line 1: chunk is not well balanced +&idontexist; + ^ +SELECT xmlparse(content ''); + xmlparse +--------------------------- + +(1 row) + +SELECT xmlparse(content ''); + xmlparse +-------------------------------- + +(1 row) + +SELECT xmlparse(content '&idontexist;'); +ERROR: invalid XML content +DETAIL: line 1: Entity 'idontexist' not defined +&idontexist; + ^ +line 1: Opening and ending tag mismatch: twoerrors line 1 and unbalanced +&idontexist; + ^ +line 1: chunk is not well balanced +&idontexist; + ^ +SELECT xmlparse(content ''); + xmlparse +--------------------- + +(1 row) + SELECT xmlparse(document 'abc'); ERROR: invalid XML document -DETAIL: Entity: line 1: parser error : Start tag expected, '<' not found +DETAIL: line 1: Start tag expected, '<' not found abc ^ SELECT xmlparse(document 'x'); @@ -217,6 +262,48 @@ SELECT xmlparse(document 'x'); x (1 row) +SELECT xmlparse(document '&'); +ERROR: invalid XML document +DETAIL: line 1: xmlParseEntityRef: no name +& + ^ +line 1: Opening and ending tag mismatch: invalidentity line 1 and abc +& + ^ +SELECT xmlparse(document '&idontexist;'); +ERROR: invalid XML document +DETAIL: line 1: Entity 'idontexist' not defined +&idontexist; + ^ +line 1: Opening and ending tag mismatch: undefinedentity line 1 and abc +&idontexist; + ^ +SELECT xmlparse(document ''); + xmlparse +--------------------------- + +(1 row) + +SELECT xmlparse(document ''); + xmlparse +-------------------------------- + +(1 row) + +SELECT xmlparse(document '&idontexist;'); +ERROR: invalid XML document +DETAIL: line 1: Entity 'idontexist' not defined +&idontexist; + ^ +line 1: Opening and ending tag mismatch: twoerrors line 1 and unbalanced +&idontexist; + ^ +SELECT xmlparse(document ''); + xmlparse +--------------------- + +(1 row) + SELECT xmlpi(name foo); xmlpi --------- @@ -379,7 +466,7 @@ SELECT '<>' IS NOT DOCUMENT; ERROR: invalid XML content LINE 1: SELECT '<>' IS NOT DOCUMENT; ^ -DETAIL: Entity: line 1: parser error : StartTag: invalid element name +DETAIL: line 1: StartTag: invalid element name <> ^ SELECT xmlagg(data) FROM xmltest; @@ -425,7 +512,7 @@ EXECUTE foo ('bad'); ERROR: invalid XML document LINE 1: EXECUTE foo ('bad'); ^ -DETAIL: Entity: line 1: parser error : Start tag expected, '<' not found +DETAIL: line 1: Start tag expected, '<' not found bad ^ SET XML OPTION CONTENT; @@ -679,6 +766,36 @@ SELECT xml_is_well_formed('bar

&'); + xml_is_well_formed +-------------------- + f +(1 row) + +SELECT xml_is_well_formed('&idontexist;'); + xml_is_well_formed +-------------------- + f +(1 row) + +SELECT xml_is_well_formed(''); + xml_is_well_formed +-------------------- + t +(1 row) + +SELECT xml_is_well_formed(''); + xml_is_well_formed +-------------------- + t +(1 row) + +SELECT xml_is_well_formed('&idontexist;'); + xml_is_well_formed +-------------------- + f +(1 row) + SET xmloption TO CONTENT; SELECT xml_is_well_formed('abc'); xml_is_well_formed @@ -686,3 +803,36 @@ SELECT xml_is_well_formed('abc'); t (1 row) +-- Since xpath() deals with namespaces, it's a bit stricter about +-- what's well-formed and what's not. If we don't obey these rules +-- (i.e. ignore namespace-related errors from libxml), xpath() +-- fails in subtle ways. The following would for example produce +-- the xml value +-- +-- which is invalid because '<' may not appear un-escaped in +-- attribute values. +-- Since different libxml versions emit slightly different +-- error messages, we suppress the DETAIL in this test. +\set VERBOSITY terse +SELECT xpath('/*', ''); +ERROR: could not parse XML document +\set VERBOSITY default +-- Again, the XML isn't well-formed for namespace purposes +SELECT xpath('/*', ''); +ERROR: could not parse XML document +DETAIL: line 1: Namespace prefix nosuchprefix on tag is not defined + + ^ +CONTEXT: SQL function "xpath" statement 1 +-- XPath deprecates relative namespaces, but they're not supposed to +-- throw an error, only a warning. +SELECT xpath('/*', ''); +WARNING: line 1: xmlns: URI relative is not absolute + + ^ +CONTEXT: SQL function "xpath" statement 1 + xpath +-------------------------------------- + {""} +(1 row) + diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out index 711b4358a2..1f17bffc0b 100644 --- a/src/test/regress/expected/xml_1.out +++ b/src/test/regress/expected/xml_1.out @@ -172,6 +172,30 @@ SELECT xmlparse(content 'x'); ERROR: unsupported XML feature DETAIL: This functionality requires the server to be built with libxml support. HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(content '&'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(content '&idontexist;'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(content ''); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(content ''); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(content '&idontexist;'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(content ''); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. SELECT xmlparse(document 'abc'); ERROR: unsupported XML feature DETAIL: This functionality requires the server to be built with libxml support. @@ -180,6 +204,30 @@ SELECT xmlparse(document 'x'); ERROR: unsupported XML feature DETAIL: This functionality requires the server to be built with libxml support. HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(document '&'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(document '&idontexist;'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(document ''); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(document ''); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(document '&idontexist;'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(document ''); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. SELECT xmlpi(name foo); ERROR: unsupported XML feature DETAIL: This functionality requires the server to be built with libxml support. @@ -627,8 +675,57 @@ SELECT xml_is_well_formed('bar

&'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed('&idontexist;'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed(''); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed(''); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed('&idontexist;'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. SET xmloption TO CONTENT; SELECT xml_is_well_formed('abc'); ERROR: unsupported XML feature DETAIL: This functionality requires the server to be built with libxml support. HINT: You need to rebuild PostgreSQL using --with-libxml. +-- Since xpath() deals with namespaces, it's a bit stricter about +-- what's well-formed and what's not. If we don't obey these rules +-- (i.e. ignore namespace-related errors from libxml), xpath() +-- fails in subtle ways. The following would for example produce +-- the xml value +-- +-- which is invalid because '<' may not appear un-escaped in +-- attribute values. +-- Since different libxml versions emit slightly different +-- error messages, we suppress the DETAIL in this test. +\set VERBOSITY terse +SELECT xpath('/*', ''); +ERROR: unsupported XML feature at character 20 +\set VERBOSITY default +-- Again, the XML isn't well-formed for namespace purposes +SELECT xpath('/*', ''); +ERROR: unsupported XML feature +LINE 1: SELECT xpath('/*', ''); + ^ +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +-- XPath deprecates relative namespaces, but they're not supposed to +-- throw an error, only a warning. +SELECT xpath('/*', ''); +ERROR: unsupported XML feature +LINE 1: SELECT xpath('/*', ''); + ^ +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql index 717a1e7170..f4e423618e 100644 --- a/src/test/regress/sql/xml.sql +++ b/src/test/regress/sql/xml.sql @@ -62,9 +62,21 @@ SELECT xmlelement(name foo, xmlattributes('<>&"''' as funny, xml 'br' as fun SELECT xmlparse(content 'abc'); SELECT xmlparse(content 'x'); +SELECT xmlparse(content '&'); +SELECT xmlparse(content '&idontexist;'); +SELECT xmlparse(content ''); +SELECT xmlparse(content ''); +SELECT xmlparse(content '&idontexist;'); +SELECT xmlparse(content ''); SELECT xmlparse(document 'abc'); SELECT xmlparse(document 'x'); +SELECT xmlparse(document '&'); +SELECT xmlparse(document '&idontexist;'); +SELECT xmlparse(document ''); +SELECT xmlparse(document ''); +SELECT xmlparse(document '&idontexist;'); +SELECT xmlparse(document ''); SELECT xmlpi(name foo); @@ -208,6 +220,32 @@ SELECT xml_is_well_formed('baz'); SELECT xml_is_well_formed('number one'); SELECT xml_is_well_formed('bar'); SELECT xml_is_well_formed('bar'); +SELECT xml_is_well_formed('&'); +SELECT xml_is_well_formed('&idontexist;'); +SELECT xml_is_well_formed(''); +SELECT xml_is_well_formed(''); +SELECT xml_is_well_formed('&idontexist;'); SET xmloption TO CONTENT; SELECT xml_is_well_formed('abc'); + +-- Since xpath() deals with namespaces, it's a bit stricter about +-- what's well-formed and what's not. If we don't obey these rules +-- (i.e. ignore namespace-related errors from libxml), xpath() +-- fails in subtle ways. The following would for example produce +-- the xml value +-- +-- which is invalid because '<' may not appear un-escaped in +-- attribute values. +-- Since different libxml versions emit slightly different +-- error messages, we suppress the DETAIL in this test. +\set VERBOSITY terse +SELECT xpath('/*', ''); +\set VERBOSITY default + +-- Again, the XML isn't well-formed for namespace purposes +SELECT xpath('/*', ''); + +-- XPath deprecates relative namespaces, but they're not supposed to +-- throw an error, only a warning. +SELECT xpath('/*', '');