From 936c4af1c6876e09e914eca91c37949e6885b02e Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 28 Feb 2010 21:31:57 +0000 Subject: [PATCH] Fix up memory management problems in contrib/xml2. Get rid of the code that attempted to funnel libxml2's memory allocations into palloc. We already knew from experience with the core xml datatype that trying to do this is simply not reliable. Unlike the core code, I did not bother adding a lot of PG_TRY/PG_CATCH logic to try to ensure that everything is cleaned up on error exit. Hence, we might leak some memory if one of these functions fails partway through. Given the deprecated status of this contrib module and the fact that errors partway through the functions shouldn't be too common, it doesn't seem worth worrying about. Also fix a separate bug in xpath_table, that it did the wrong things if given a result tuple descriptor with less than 2 columns. While such a case isn't very useful in practice, we shouldn't fail or stomp memory when it occurs. Add some simple regression tests based on all the reported crash cases that I have on hand. This should be back-patched, but let's see if the buildfarm likes it first. --- contrib/xml2/Makefile | 3 +- contrib/xml2/expected/xml2.out | 147 +++++++++++++++++++++++++++++++++ contrib/xml2/sql/xml2.sql | 82 ++++++++++++++++++ contrib/xml2/xpath.c | 78 +++++------------ 4 files changed, 253 insertions(+), 57 deletions(-) create mode 100644 contrib/xml2/expected/xml2.out create mode 100644 contrib/xml2/sql/xml2.sql diff --git a/contrib/xml2/Makefile b/contrib/xml2/Makefile index b96defa5d6..3426deaa25 100644 --- a/contrib/xml2/Makefile +++ b/contrib/xml2/Makefile @@ -1,4 +1,4 @@ -# $PostgreSQL: pgsql/contrib/xml2/Makefile,v 1.12 2008/05/08 16:49:37 tgl Exp $ +# $PostgreSQL: pgsql/contrib/xml2/Makefile,v 1.13 2010/02/28 21:31:57 tgl Exp $ MODULE_big = pgxml @@ -8,6 +8,7 @@ SHLIB_LINK += $(filter -lxslt, $(LIBS)) $(filter -lxml2, $(LIBS)) DATA_built = pgxml.sql DATA = uninstall_pgxml.sql +REGRESS = xml2 ifdef USE_PGXS PG_CONFIG = pg_config diff --git a/contrib/xml2/expected/xml2.out b/contrib/xml2/expected/xml2.out new file mode 100644 index 0000000000..74896b0802 --- /dev/null +++ b/contrib/xml2/expected/xml2.out @@ -0,0 +1,147 @@ +-- +-- first, define the functions. Turn off echoing so that expected file +-- does not depend on contents of pgxml.sql. +-- +SET client_min_messages = warning; +\set ECHO none +RESET client_min_messages; +select query_to_xml('select 1 as x',true,false,''); + query_to_xml +--------------------------------------------------------------- + + + + + + + 1 + + + + + +
+ + +(1 row) + +select xslt_process( query_to_xml('select x from generate_series(1,5) as +x',true,false,'')::text, +$$ + + + + + + + + + + + +$$::text); + xslt_process +--------------------------------------------------------------- + + + + + + + + + 1 + + + + + + + + 2 + + + + + + + + 3 + + + + + + + + 4 + + + + + + + + 5 + + + + + +
+ + +(1 row) + +CREATE TABLE xpath_test (id integer NOT NULL, t xml); +INSERT INTO xpath_test VALUES (1, '1'); +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4); + id +---- +(0 rows) + +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4, doc int4); + id | doc +----+----- + 1 | 1 +(1 row) + +DROP TABLE xpath_test; +CREATE TABLE xpath_test (id integer NOT NULL, t text); +INSERT INTO xpath_test VALUES (1, '1'); +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4); + id +---- +(0 rows) + +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4, doc int4); + id | doc +----+----- + 1 | 1 +(1 row) + +create table articles (article_id integer, article_xml xml, date_entered date); +insert into articles (article_id, article_xml, date_entered) +values (2, '
test37
', now()); +SELECT * FROM +xpath_table('article_id', + 'article_xml', + 'articles', + '/article/author|/article/pages|/article/title', + 'date_entered > ''2003-01-01'' ') +AS t(article_id integer, author text, page_count integer, title text); + article_id | author | page_count | title +------------+--------+------------+------- + 2 | test | 37 | +(1 row) + +-- this used to fail when invoked a second time +select xslt_process('',$$ + + + + + +$$)::xml; + xslt_process +-------------- + + + +(1 row) + +select xslt_process('',$$ + + + + + +$$)::xml; + xslt_process +-------------- + + + +(1 row) + +create table t1 (id integer, xml_data xml); +insert into t1 (id, xml_data) +values +(1, 'Some +Value'); +create index idx_xpath on t1 ( xpath_string +('/attributes/attribute[@name="attr_1"]/text()', xml_data::text)); diff --git a/contrib/xml2/sql/xml2.sql b/contrib/xml2/sql/xml2.sql new file mode 100644 index 0000000000..73723b6be1 --- /dev/null +++ b/contrib/xml2/sql/xml2.sql @@ -0,0 +1,82 @@ +-- +-- first, define the functions. Turn off echoing so that expected file +-- does not depend on contents of pgxml.sql. +-- +SET client_min_messages = warning; +\set ECHO none +\i pgxml.sql +\set ECHO all +RESET client_min_messages; + +select query_to_xml('select 1 as x',true,false,''); + +select xslt_process( query_to_xml('select x from generate_series(1,5) as +x',true,false,'')::text, +$$ + + + + + + + + + + + +$$::text); + +CREATE TABLE xpath_test (id integer NOT NULL, t xml); +INSERT INTO xpath_test VALUES (1, '1'); +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4); +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4, doc int4); + +DROP TABLE xpath_test; +CREATE TABLE xpath_test (id integer NOT NULL, t text); +INSERT INTO xpath_test VALUES (1, '1'); +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4); +SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true') +as t(id int4, doc int4); + +create table articles (article_id integer, article_xml xml, date_entered date); +insert into articles (article_id, article_xml, date_entered) +values (2, '
test37
', now()); +SELECT * FROM +xpath_table('article_id', + 'article_xml', + 'articles', + '/article/author|/article/pages|/article/title', + 'date_entered > ''2003-01-01'' ') +AS t(article_id integer, author text, page_count integer, title text); + +-- this used to fail when invoked a second time +select xslt_process('',$$ + + + + + +$$)::xml; + +select xslt_process('',$$ + + + + + +$$)::xml; + +create table t1 (id integer, xml_data xml); +insert into t1 (id, xml_data) +values +(1, 'Some +Value'); + +create index idx_xpath on t1 ( xpath_string +('/attributes/attribute[@name="attr_1"]/text()', xml_data::text)); diff --git a/contrib/xml2/xpath.c b/contrib/xml2/xpath.c index b2458e9b11..8e289bdc49 100644 --- a/contrib/xml2/xpath.c +++ b/contrib/xml2/xpath.c @@ -1,5 +1,5 @@ /* - * $PostgreSQL: pgsql/contrib/xml2/xpath.c,v 1.26 2010/02/28 19:51:37 tgl Exp $ + * $PostgreSQL: pgsql/contrib/xml2/xpath.c,v 1.27 2010/02/28 21:31:57 tgl Exp $ * * Parser interface for DOM-based parser (libxml) rather than * stream-based SAX-type parser @@ -42,10 +42,6 @@ void pgxml_parser_init(void); /* local declarations */ -static void *pgxml_palloc(size_t size); -static void *pgxml_repalloc(void *ptr, size_t size); -static void pgxml_pfree(void *ptr); -static char *pgxml_pstrdup(const char *string); static void pgxml_errorHandler(void *ctxt, const char *msg,...); static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset, @@ -59,41 +55,10 @@ static xmlChar *pgxml_texttoxmlchar(text *textstring); static xmlXPathObjectPtr pgxml_xpath(text *document, xmlChar *xpath); - /* Global variables */ static char *pgxml_errorMsg = NULL; /* overall error message */ -/* memory handling passthrough functions (e.g. palloc, pstrdup are - currently macros, and the others might become so...) */ - -static void * -pgxml_palloc(size_t size) -{ -/* elog(DEBUG1,"Alloc %d in CMC %p",size,CurrentMemoryContext); */ - return palloc(size); -} - -static void * -pgxml_repalloc(void *ptr, size_t size) -{ -/* elog(DEBUG1,"ReAlloc in CMC %p",CurrentMemoryContext);*/ - return repalloc(ptr, size); -} - -static void -pgxml_pfree(void *ptr) -{ -/* elog(DEBUG1,"Free in CMC %p",CurrentMemoryContext); */ - pfree(ptr); -} - -static char * -pgxml_pstrdup(const char *string) -{ - return pstrdup(string); -} - /* * The error handling function. This formats an error message and sets * a flag - an ereport will be issued prior to return @@ -157,9 +122,6 @@ pgxml_parser_init(void) pgxml_errorMsg = NULL; xmlSetGenericErrorFunc(NULL, pgxml_errorHandler); - /* Replace libxml memory handling (DANGEROUS) */ - xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup); - /* Initialize libxml */ xmlInitParser(); @@ -627,7 +589,7 @@ xpath_table(PG_FUNCTION_ARGS) int j; int rownr; /* For issuing multiple rows from one original * document */ - int had_values; /* To determine end of nodeset results */ + bool had_values; /* To determine end of nodeset results */ StringInfoData query_buf; /* We only have a valid tuple description in table function mode */ @@ -654,7 +616,6 @@ xpath_table(PG_FUNCTION_ARGS) * The tuplestore must exist in a higher context than this function call * (per_query_ctx is used) */ - per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; oldcontext = MemoryContextSwitchTo(per_query_ctx); @@ -671,6 +632,12 @@ xpath_table(PG_FUNCTION_ARGS) /* get the requested return tuple description */ ret_tupdesc = CreateTupleDescCopy(rsinfo->expectedDesc); + /* must have at least one output column (for the pkey) */ + if (ret_tupdesc->natts < 1) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("xpath_table must have at least one output column"))); + /* * At the moment we assume that the returned attributes make sense for the * XPath specififed (i.e. we trust the caller). It's not fatal if they get @@ -686,26 +653,27 @@ xpath_table(PG_FUNCTION_ARGS) rsinfo->setDesc = ret_tupdesc; values = (char **) palloc(ret_tupdesc->natts * sizeof(char *)); - xpaths = (xmlChar **) palloc(ret_tupdesc->natts * sizeof(xmlChar *)); - /* Split XPaths. xpathset is a writable CString. */ - - /* Note that we stop splitting once we've done all needed for tupdesc */ - + /* + * Split XPaths. xpathset is a writable CString. + * + * Note that we stop splitting once we've done all needed for tupdesc + */ numpaths = 0; pos = xpathset; - do + while (numpaths < (ret_tupdesc->natts - 1)) { - xpaths[numpaths] = (xmlChar *) pos; + xpaths[numpaths++] = (xmlChar *) pos; pos = strstr(pos, pathsep); if (pos != NULL) { *pos = '\0'; pos++; } - numpaths++; - } while ((pos != NULL) && (numpaths < (ret_tupdesc->natts - 1))); + else + break; + } /* Now build query */ initStringInfo(&query_buf); @@ -800,7 +768,7 @@ xpath_table(PG_FUNCTION_ARGS) do { /* Now evaluate the set of xpaths. */ - had_values = 0; + had_values = false; for (j = 0; j < numpaths; j++) { ctxt = xmlXPathNewContext(doctree); @@ -813,10 +781,7 @@ xpath_table(PG_FUNCTION_ARGS) { xmlCleanupParser(); xmlFreeDoc(doctree); - elog_error("XPath Syntax Error", true); - - PG_RETURN_NULL(); /* Keep compiler happy */ } /* Now evaluate the path expression. */ @@ -829,11 +794,12 @@ xpath_table(PG_FUNCTION_ARGS) { case XPATH_NODESET: /* We see if this nodeset has enough nodes */ - if ((res->nodesetval != NULL) && (rownr < res->nodesetval->nodeNr)) + if (res->nodesetval != NULL && + rownr < res->nodesetval->nodeNr) { resstr = xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]); - had_values = 1; + had_values = true; } else resstr = NULL;