From a0b7b717a4324f573d3a7651a06037557066eb77 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 13 Aug 2010 18:36:26 +0000 Subject: [PATCH] Add xml_is_well_formed, xml_is_well_formed_document, xml_is_well_formed_content functions to the core XML code. Per discussion, the former depends on XMLOPTION while the others do not. These supersede a version previously offered by contrib/xml2. Mike Fowler, reviewed by Pavel Stehule --- contrib/xml2/pgxml.sql.in | 10 +--- contrib/xml2/uninstall_pgxml.sql | 4 +- contrib/xml2/xpath.c | 11 +++- doc/src/sgml/func.sgml | 80 +++++++++++++++++++++++++- src/backend/utils/adt/xml.c | 72 +++++++++++++++++++++++- src/include/catalog/catversion.h | 4 +- src/include/catalog/pg_proc.h | 8 ++- src/include/utils/xml.h | 5 +- src/test/regress/expected/xml.out | 87 +++++++++++++++++++++++++++++ src/test/regress/expected/xml_1.out | 59 +++++++++++++++++++ src/test/regress/sql/xml.sql | 21 +++++++ 11 files changed, 343 insertions(+), 18 deletions(-) diff --git a/contrib/xml2/pgxml.sql.in b/contrib/xml2/pgxml.sql.in index 98d8f81b57..0a52561135 100644 --- a/contrib/xml2/pgxml.sql.in +++ b/contrib/xml2/pgxml.sql.in @@ -1,18 +1,14 @@ -/* $PostgreSQL: pgsql/contrib/xml2/pgxml.sql.in,v 1.12 2010/03/01 18:07:59 tgl Exp $ */ +/* $PostgreSQL: pgsql/contrib/xml2/pgxml.sql.in,v 1.13 2010/08/13 18:36:23 tgl Exp $ */ -- Adjust this setting to control where the objects get created. SET search_path = public; --SQL for XML parser -CREATE OR REPLACE FUNCTION xml_is_well_formed(text) RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -- deprecated old name for xml_is_well_formed CREATE OR REPLACE FUNCTION xml_valid(text) RETURNS bool -AS 'MODULE_PATHNAME', 'xml_is_well_formed' -LANGUAGE C STRICT IMMUTABLE; +AS 'xml_is_well_formed' +LANGUAGE INTERNAL STRICT STABLE; CREATE OR REPLACE FUNCTION xml_encode_special_chars(text) RETURNS text AS 'MODULE_PATHNAME' diff --git a/contrib/xml2/uninstall_pgxml.sql b/contrib/xml2/uninstall_pgxml.sql index 09441ef01f..016658dc7f 100644 --- a/contrib/xml2/uninstall_pgxml.sql +++ b/contrib/xml2/uninstall_pgxml.sql @@ -1,4 +1,4 @@ -/* $PostgreSQL: pgsql/contrib/xml2/uninstall_pgxml.sql,v 1.4 2007/11/13 04:24:29 momjian Exp $ */ +/* $PostgreSQL: pgsql/contrib/xml2/uninstall_pgxml.sql,v 1.5 2010/08/13 18:36:23 tgl Exp $ */ -- Adjust this setting to control where the objects get dropped. SET search_path = public; @@ -29,5 +29,3 @@ DROP FUNCTION xml_encode_special_chars(text); -- deprecated old name for xml_is_well_formed DROP FUNCTION xml_valid(text); - -DROP FUNCTION xml_is_well_formed(text); diff --git a/contrib/xml2/xpath.c b/contrib/xml2/xpath.c index dbf0b76f92..8ee949ce4e 100644 --- a/contrib/xml2/xpath.c +++ b/contrib/xml2/xpath.c @@ -1,5 +1,5 @@ /* - * $PostgreSQL: pgsql/contrib/xml2/xpath.c,v 1.30 2010/07/06 19:18:55 momjian Exp $ + * $PostgreSQL: pgsql/contrib/xml2/xpath.c,v 1.31 2010/08/13 18:36:23 tgl Exp $ * * Parser interface for DOM-based parser (libxml) rather than * stream-based SAX-type parser @@ -71,7 +71,14 @@ pgxml_parser_init(void) } -/* Returns true if document is well-formed */ +/* + * Returns true if document is well-formed + * + * Note: this has been superseded by a core function. We still have to + * have it in the contrib module so that existing SQL-level references + * to the function won't fail; but in normal usage with up-to-date SQL + * definitions for the contrib module, this won't be called. + */ PG_FUNCTION_INFO_V1(xml_is_well_formed); diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index de6ba61650..562ba485d2 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1,4 +1,4 @@ - + Functions and Operators @@ -8625,6 +8625,84 @@ SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF 'Tor supports XPath, which is a subset of XQuery. + + + xml_is_well_formed + + + xml_is_well_formed + + + + xml_is_well_formed_document + + + + xml_is_well_formed_content + + + +xml_is_well_formed(text) +xml_is_well_formed_document(text) +xml_is_well_formed_content(text) + + + + These functions check whether a text string is well-formed XML, + returning a boolean result. + xml_is_well_formed_document checks for a well-formed + document, while xml_is_well_formed_content checks + for well-formed content. xml_is_well_formed does + the former if the configuration + parameter is set to DOCUMENT, or the latter if it is set to + CONTENT. This means that + xml_is_well_formed is useful for seeing whether + a simple cast to type xml will succeed, whereas the other two + functions are useful for seeing whether the corresponding variants of + XMLPARSE will succeed. + + + + Examples: + +'); + xml_is_well_formed +-------------------- + f +(1 row) + +SELECT xml_is_well_formed(''); + xml_is_well_formed +-------------------- + t +(1 row) + +SET xmloption TO CONTENT; +SELECT xml_is_well_formed('abc'); + xml_is_well_formed +-------------------- + t +(1 row) + +SELECT xml_is_well_formed_document('bar'); + xml_is_well_formed_document +----------------------------- + t +(1 row) + +SELECT xml_is_well_formed_document('bar'); + xml_is_well_formed_document +----------------------------- + f +(1 row) +]]> + + The last example shows that the checks include whether + namespaces are correctly matched. + + diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 520668cf40..756390530a 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.100 2010/08/08 19:15:27 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.101 2010/08/13 18:36:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3565,3 +3565,73 @@ xpath_exists(PG_FUNCTION_ARGS) return 0; #endif } + +/* + * Functions for checking well-formed-ness + */ + +#ifdef USE_LIBXML +static bool +wellformed_xml(text *data, XmlOptionType xmloption_arg) +{ + bool result; + xmlDocPtr doc = NULL; + + /* We want to catch any exceptions and return false */ + PG_TRY(); + { + doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding()); + result = true; + } + PG_CATCH(); + { + FlushErrorState(); + result = false; + } + PG_END_TRY(); + + if (doc) + xmlFreeDoc(doc); + + return result; +} +#endif + +Datum +xml_is_well_formed(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *data = PG_GETARG_TEXT_P(0); + + PG_RETURN_BOOL(wellformed_xml(data, xmloption)); +#else + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} + +Datum +xml_is_well_formed_document(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *data = PG_GETARG_TEXT_P(0); + + PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT)); +#else + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} + +Datum +xml_is_well_formed_content(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *data = PG_GETARG_TEXT_P(0); + + PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT)); +#else + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index a7739db82d..db5f3c67b3 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.594 2010/08/10 21:51:00 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.595 2010/08/13 18:36:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201008101 +#define CATALOG_VERSION_NO 201008131 #endif diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 0ba9435b0a..7531b7ab5e 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.578 2010/08/10 21:51:00 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.579 2010/08/13 18:36:25 tgl Exp $ * * NOTES * The script catalog/genbki.pl reads this file and generates .bki @@ -4423,6 +4423,12 @@ DATA(insert OID = 3049 ( xpath_exists PGNSP PGUID 12 1 0 0 f f f t f i 3 0 16 DESCR("test XML value against XPath expression, with namespace support"); DATA(insert OID = 3050 ( xpath_exists PGNSP PGUID 14 1 0 0 f f f t f i 2 0 16 "25 142" _null_ _null_ _null_ _null_ "select pg_catalog.xpath_exists($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ )); DESCR("test XML value against XPath expression"); +DATA(insert OID = 3051 ( xml_is_well_formed PGNSP PGUID 12 1 0 0 f f f t f s 1 0 16 "25" _null_ _null_ _null_ _null_ xml_is_well_formed _null_ _null_ _null_ )); +DESCR("determine if a string is well formed XML"); +DATA(insert OID = 3052 ( xml_is_well_formed_document PGNSP PGUID 12 1 0 0 f f f t f i 1 0 16 "25" _null_ _null_ _null_ _null_ xml_is_well_formed_document _null_ _null_ _null_ )); +DESCR("determine if a string is well formed XML document"); +DATA(insert OID = 3053 ( xml_is_well_formed_content PGNSP PGUID 12 1 0 0 f f f t f i 1 0 16 "25" _null_ _null_ _null_ _null_ xml_is_well_formed_content _null_ _null_ _null_ )); +DESCR("determine if a string is well formed XML content"); /* uuid */ DATA(insert OID = 2952 ( uuid_in PGNSP PGUID 12 1 0 0 f f f t f i 1 0 2950 "2275" _null_ _null_ _null_ _null_ uuid_in _null_ _null_ _null_ )); diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h index 807bb08485..96029c2ebd 100644 --- a/src/include/utils/xml.h +++ b/src/include/utils/xml.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/xml.h,v 1.33 2010/08/08 19:15:27 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/xml.h,v 1.34 2010/08/13 18:36:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -39,6 +39,9 @@ extern Datum xmlvalidate(PG_FUNCTION_ARGS); extern Datum xpath(PG_FUNCTION_ARGS); extern Datum xpath_exists(PG_FUNCTION_ARGS); extern Datum xmlexists(PG_FUNCTION_ARGS); +extern Datum xml_is_well_formed(PG_FUNCTION_ARGS); +extern Datum xml_is_well_formed_document(PG_FUNCTION_ARGS); +extern Datum xml_is_well_formed_content(PG_FUNCTION_ARGS); extern Datum table_to_xml(PG_FUNCTION_ARGS); extern Datum query_to_xml(PG_FUNCTION_ARGS); diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out index 435331dcc3..eaa5a74ef0 100644 --- a/src/test/regress/expected/xml.out +++ b/src/test/regress/expected/xml.out @@ -599,3 +599,90 @@ SELECT COUNT(id) FROM xmltest, query WHERE xmlexists(expr PASSING BY REF data); 2 (1 row) +-- Test xml_is_well_formed and variants +SELECT xml_is_well_formed_document('bar'); + xml_is_well_formed_document +----------------------------- + t +(1 row) + +SELECT xml_is_well_formed_document('abc'); + xml_is_well_formed_document +----------------------------- + f +(1 row) + +SELECT xml_is_well_formed_content('bar'); + xml_is_well_formed_content +---------------------------- + t +(1 row) + +SELECT xml_is_well_formed_content('abc'); + xml_is_well_formed_content +---------------------------- + t +(1 row) + +SET xmloption TO DOCUMENT; +SELECT xml_is_well_formed('abc'); + xml_is_well_formed +-------------------- + f +(1 row) + +SELECT xml_is_well_formed('<>'); + xml_is_well_formed +-------------------- + f +(1 row) + +SELECT xml_is_well_formed(''); + xml_is_well_formed +-------------------- + t +(1 row) + +SELECT xml_is_well_formed('bar'); + xml_is_well_formed +-------------------- + t +(1 row) + +SELECT xml_is_well_formed('barbaz'); + xml_is_well_formed +-------------------- + f +(1 row) + +SELECT xml_is_well_formed('number one'); + xml_is_well_formed +-------------------- + t +(1 row) + +SELECT xml_is_well_formed('bar'); + xml_is_well_formed +-------------------- + f +(1 row) + +SELECT xml_is_well_formed('bar'); + xml_is_well_formed +-------------------- + t +(1 row) + +SET xmloption TO CONTENT; +SELECT xml_is_well_formed('abc'); + xml_is_well_formed +-------------------- + t +(1 row) + diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out index 2ce543aeaa..711b4358a2 100644 --- a/src/test/regress/expected/xml_1.out +++ b/src/test/regress/expected/xml_1.out @@ -573,3 +573,62 @@ SELECT COUNT(id) FROM xmltest, query WHERE xmlexists(expr PASSING BY REF data); 0 (1 row) +-- Test xml_is_well_formed and variants +SELECT xml_is_well_formed_document('bar'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed_document('abc'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed_content('bar'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed_content('abc'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SET xmloption TO DOCUMENT; +SELECT xml_is_well_formed('abc'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed('<>'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed(''); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed('bar'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed('barbaz'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed('number one'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed('bar'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed('bar'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SET xmloption TO CONTENT; +SELECT xml_is_well_formed('abc'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql index 0e8c0fb227..717a1e7170 100644 --- a/src/test/regress/sql/xml.sql +++ b/src/test/regress/sql/xml.sql @@ -190,3 +190,24 @@ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers/myns:nam CREATE TABLE query ( expr TEXT ); INSERT INTO query VALUES ('/menu/beers/cost[text() = ''lots'']'); SELECT COUNT(id) FROM xmltest, query WHERE xmlexists(expr PASSING BY REF data); + +-- Test xml_is_well_formed and variants + +SELECT xml_is_well_formed_document('bar'); +SELECT xml_is_well_formed_document('abc'); +SELECT xml_is_well_formed_content('bar'); +SELECT xml_is_well_formed_content('abc'); + +SET xmloption TO DOCUMENT; +SELECT xml_is_well_formed('abc'); +SELECT xml_is_well_formed('<>'); +SELECT xml_is_well_formed(''); +SELECT xml_is_well_formed('bar'); +SELECT xml_is_well_formed('barbaz'); +SELECT xml_is_well_formed('number one'); +SELECT xml_is_well_formed('bar'); +SELECT xml_is_well_formed('bar'); + +SET xmloption TO CONTENT; +SELECT xml_is_well_formed('abc');