mirror of https://github.com/postgres/postgres
Move new version of contrib/ xml into xml2, keep old version in /xml.
This commit is contained in:
parent
adca025c9e
commit
31f4b59a46
|
@ -217,5 +217,9 @@ vacuumlo -
|
|||
by Peter T Mount <peter@retep.org.uk>
|
||||
|
||||
xml -
|
||||
Storing XML in PostgreSQL (obsolete version)
|
||||
by John Gray <jgray@azuli.co.uk>
|
||||
|
||||
xml2 -
|
||||
Storing XML in PostgreSQL
|
||||
by John Gray <jgray@azuli.co.uk>
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
PGXML TODO List
|
||||
===============
|
||||
|
||||
Some of these items still require much more thought! Since the first
|
||||
release, the XPath support has improved (because I'm no longer using a
|
||||
homemade algorithm!).
|
||||
|
||||
1. Performance considerations
|
||||
|
||||
At present each document is parsed to produce the DOM tree on every query.
|
||||
|
||||
Pros:
|
||||
Easy
|
||||
No persistent memory or storage allocation for parsed trees
|
||||
(libxml docs suggest representation of a document might
|
||||
be 4 times the size of the text)
|
||||
|
||||
Cons:
|
||||
Slow/ CPU intensive to parse.
|
||||
Makes it difficult for PLs to apply libxml manipulations to create
|
||||
new documents or amend existing ones.
|
||||
|
||||
|
||||
2. XQuery
|
||||
|
||||
I'm not sure if the addition of XQuery would be best as a function or
|
||||
as a new front-end parser. This is one to think about, but with a
|
||||
decent implementation of XPath, one of the prerequisites is covered.
|
||||
|
||||
3. DOM Interfaces
|
||||
|
||||
Expose more aspects of the DOM to user functions/ PLs. This would
|
||||
allow a procedure in a PL to run some queries and then use exposed
|
||||
interfaces to libxml to create an XML document out of the query
|
||||
results. I accept the argument that this might be more properly
|
||||
performed on the client side.
|
||||
|
||||
4. Returning sets of documents from XPath queries.
|
||||
|
||||
Although the current implementation allows you to amalgamate the
|
||||
returned results into a single document, it's quite possible that
|
||||
you'd like to use the returned set of nodes as a source for FROM.
|
||||
|
||||
Is there a good way to optimise/index the results of certain XPath
|
||||
operations to make them faster?:
|
||||
|
||||
select docid, pgxml_xpath(document,'//site/location/text()','','') as location
|
||||
where pgxml_xpath(document,'//site/name/text()','','') = 'Church Farm';
|
||||
|
||||
and with multiple element occurences in a document?
|
||||
|
||||
select d.docid, pgxml_xpath(d.document,'//site/location/text()','','')
|
||||
from docstore d,
|
||||
pgxml_xpaths('docstore','document','//feature/type/text()','docid') ft
|
||||
where ft.key = d.docid and ft.value ='Limekiln';
|
||||
|
||||
pgxml_xpaths params are relname, attrname, xpath, returnkey. It would
|
||||
return a set of two-element tuples (key,value) consisting of the value of
|
||||
returnkey, and the cdata value of the xpath. The XML document would be
|
||||
defined by relname and attrname.
|
||||
|
||||
The pgxml_xpaths function could be the basis of a functional index,
|
||||
which could speed up the above query very substantially, working
|
||||
through the normal query planner mechanism.
|
||||
|
||||
5. Return type support.
|
||||
|
||||
Better support for returning e.g. numeric or boolean values. I need to
|
||||
get to grips with the returned data from libxml first.
|
||||
|
||||
|
||||
John Gray <jgray@azuli.co.uk> 16 August 2001
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,352 @@
|
|||
/********************************************************
|
||||
* Interface code to parse an XML document using expat
|
||||
********************************************************/
|
||||
|
||||
#include "postgres.h"
|
||||
#include "fmgr.h"
|
||||
|
||||
#include "expat.h"
|
||||
#include "pgxml.h"
|
||||
|
||||
/* Memory management - we make expat use standard pg MM */
|
||||
|
||||
XML_Memory_Handling_Suite mhs;
|
||||
|
||||
/* passthrough functions (palloc is a macro) */
|
||||
|
||||
static void *
|
||||
pgxml_palloc(size_t size)
|
||||
{
|
||||
return palloc(size);
|
||||
}
|
||||
|
||||
static void *
|
||||
pgxml_repalloc(void *ptr, size_t size)
|
||||
{
|
||||
return repalloc(ptr, size);
|
||||
}
|
||||
|
||||
static void
|
||||
pgxml_pfree(void *ptr)
|
||||
{
|
||||
return pfree(ptr);
|
||||
}
|
||||
|
||||
static void
|
||||
pgxml_mhs_init()
|
||||
{
|
||||
mhs.malloc_fcn = pgxml_palloc;
|
||||
mhs.realloc_fcn = pgxml_repalloc;
|
||||
mhs.free_fcn = pgxml_pfree;
|
||||
}
|
||||
|
||||
static void
|
||||
pgxml_handler_init()
|
||||
{
|
||||
/*
|
||||
* This code should set up the relevant handlers from user-supplied
|
||||
* settings. Quite how these settings are made is another matter :)
|
||||
*/
|
||||
}
|
||||
|
||||
/* Returns true if document is well-formed */
|
||||
|
||||
PG_FUNCTION_INFO_V1(pgxml_parse);
|
||||
|
||||
Datum
|
||||
pgxml_parse(PG_FUNCTION_ARGS)
|
||||
{
|
||||
/* called as pgxml_parse(document) */
|
||||
XML_Parser p;
|
||||
text *t = PG_GETARG_TEXT_P(0); /* document buffer */
|
||||
int32 docsize = VARSIZE(t) - VARHDRSZ;
|
||||
|
||||
pgxml_mhs_init();
|
||||
|
||||
pgxml_handler_init();
|
||||
|
||||
p = XML_ParserCreate_MM(NULL, &mhs, NULL);
|
||||
if (!p)
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
|
||||
errmsg("could not create expat parser")));
|
||||
PG_RETURN_NULL(); /* seems appropriate if we couldn't parse */
|
||||
}
|
||||
|
||||
if (!XML_Parse(p, (char *) VARDATA(t), docsize, 1))
|
||||
{
|
||||
/*
|
||||
* elog(WARNING, "Parse error at line %d:%s",
|
||||
* XML_GetCurrentLineNumber(p),
|
||||
* XML_ErrorString(XML_GetErrorCode(p)));
|
||||
*/
|
||||
XML_ParserFree(p);
|
||||
PG_RETURN_BOOL(false);
|
||||
}
|
||||
|
||||
XML_ParserFree(p);
|
||||
PG_RETURN_BOOL(true);
|
||||
}
|
||||
|
||||
/* XPath handling functions */
|
||||
|
||||
/* XPath support here is for a very skeletal kind of XPath!
|
||||
It was easy to program though... */
|
||||
|
||||
/* This first is the core function that builds a result set. The
|
||||
actual functions called by the user manipulate that result set
|
||||
in various ways.
|
||||
*/
|
||||
|
||||
static XPath_Results *
|
||||
build_xpath_results(text *doc, text *pathstr)
|
||||
{
|
||||
XPath_Results *xpr;
|
||||
char *res;
|
||||
pgxml_udata *udata;
|
||||
XML_Parser p;
|
||||
int32 docsize;
|
||||
|
||||
xpr = (XPath_Results *) palloc((sizeof(XPath_Results)));
|
||||
memset((void *) xpr, 0, sizeof(XPath_Results));
|
||||
xpr->rescount = 0;
|
||||
|
||||
docsize = VARSIZE(doc) - VARHDRSZ;
|
||||
|
||||
/* res isn't going to be the real return type, it is just a buffer */
|
||||
|
||||
res = (char *) palloc(docsize);
|
||||
memset((void *) res, 0, docsize);
|
||||
|
||||
xpr->resbuf = res;
|
||||
|
||||
udata = (pgxml_udata *) palloc((sizeof(pgxml_udata)));
|
||||
memset((void *) udata, 0, sizeof(pgxml_udata));
|
||||
|
||||
udata->currentpath[0] = '\0';
|
||||
udata->textgrab = 0;
|
||||
|
||||
udata->path = (char *) palloc(VARSIZE(pathstr));
|
||||
memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr) - VARHDRSZ);
|
||||
|
||||
udata->path[VARSIZE(pathstr) - VARHDRSZ] = '\0';
|
||||
|
||||
udata->resptr = res;
|
||||
udata->reslen = 0;
|
||||
|
||||
udata->xpres = xpr;
|
||||
|
||||
/* Now fire up the parser */
|
||||
pgxml_mhs_init();
|
||||
|
||||
p = XML_ParserCreate_MM(NULL, &mhs, NULL);
|
||||
if (!p)
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
|
||||
errmsg("could not create expat parser")));
|
||||
pfree(xpr);
|
||||
pfree(udata->path);
|
||||
pfree(udata);
|
||||
pfree(res);
|
||||
return NULL;
|
||||
}
|
||||
XML_SetUserData(p, (void *) udata);
|
||||
|
||||
/* Set the handlers */
|
||||
|
||||
XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler);
|
||||
XML_SetCharacterDataHandler(p, pgxml_charhandler);
|
||||
|
||||
if (!XML_Parse(p, (char *) VARDATA(doc), docsize, 1))
|
||||
{
|
||||
/*
|
||||
* elog(WARNING, "Parse error at line %d:%s",
|
||||
* XML_GetCurrentLineNumber(p),
|
||||
* XML_ErrorString(XML_GetErrorCode(p)));
|
||||
*/
|
||||
XML_ParserFree(p);
|
||||
pfree(xpr);
|
||||
pfree(udata->path);
|
||||
pfree(udata);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pfree(udata->path);
|
||||
pfree(udata);
|
||||
XML_ParserFree(p);
|
||||
return xpr;
|
||||
}
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(pgxml_xpath);
|
||||
|
||||
Datum
|
||||
pgxml_xpath(PG_FUNCTION_ARGS)
|
||||
{
|
||||
/* called as pgxml_xpath(document,pathstr, index) for the moment */
|
||||
|
||||
XPath_Results *xpresults;
|
||||
text *restext;
|
||||
|
||||
text *t = PG_GETARG_TEXT_P(0); /* document buffer */
|
||||
text *t2 = PG_GETARG_TEXT_P(1);
|
||||
int32 ind = PG_GETARG_INT32(2) - 1;
|
||||
|
||||
xpresults = build_xpath_results(t, t2);
|
||||
|
||||
/*
|
||||
* This needs to be changed depending on the mechanism for returning
|
||||
* our set of results.
|
||||
*/
|
||||
|
||||
if (xpresults == NULL) /* parse error (not WF or parser failure) */
|
||||
PG_RETURN_NULL();
|
||||
|
||||
if (ind >= (xpresults->rescount))
|
||||
PG_RETURN_NULL();
|
||||
|
||||
restext = (text *) palloc(xpresults->reslens[ind] + VARHDRSZ);
|
||||
memcpy(VARDATA(restext), xpresults->results[ind], xpresults->reslens[ind]);
|
||||
|
||||
VARATT_SIZEP(restext) = xpresults->reslens[ind] + VARHDRSZ;
|
||||
|
||||
pfree(xpresults->resbuf);
|
||||
pfree(xpresults);
|
||||
|
||||
PG_RETURN_TEXT_P(restext);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
pgxml_pathcompare(void *userData)
|
||||
{
|
||||
char *matchpos;
|
||||
|
||||
matchpos = strstr(UD->currentpath, UD->path);
|
||||
|
||||
if (matchpos == NULL)
|
||||
{ /* Should we have more logic here ? */
|
||||
if (UD->textgrab)
|
||||
{
|
||||
UD->textgrab = 0;
|
||||
pgxml_finalisegrabbedtext(userData);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* OK, we have a match of some sort. Now we need to check that our
|
||||
* match is anchored to the *end* of the string AND that it is
|
||||
* immediately preceded by a '/'
|
||||
*/
|
||||
|
||||
/*
|
||||
* This test wouldn't work if strlen (UD->path) overran the length of
|
||||
* the currentpath, but that's not possible because we got a match!
|
||||
*/
|
||||
|
||||
if ((matchpos + strlen(UD->path))[0] == '\0')
|
||||
{
|
||||
if ((UD->path)[0] == '/')
|
||||
{
|
||||
if (matchpos == UD->currentpath)
|
||||
UD->textgrab = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((matchpos - 1)[0] == '/')
|
||||
UD->textgrab = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pgxml_starthandler(void *userData, const XML_Char * name,
|
||||
const XML_Char ** atts)
|
||||
{
|
||||
|
||||
char sepstr[] = "/";
|
||||
|
||||
if ((strlen(name) + strlen(UD->currentpath)) > MAXPATHLENGTH - 2)
|
||||
elog(WARNING, "path too long");
|
||||
else
|
||||
{
|
||||
strncat(UD->currentpath, sepstr, 1);
|
||||
strcat(UD->currentpath, name);
|
||||
}
|
||||
if (UD->textgrab)
|
||||
{
|
||||
/*
|
||||
* Depending on user preference, should we "reconstitute" the
|
||||
* element into the result text?
|
||||
*/
|
||||
}
|
||||
else
|
||||
pgxml_pathcompare(userData);
|
||||
}
|
||||
|
||||
static void
|
||||
pgxml_endhandler(void *userData, const XML_Char * name)
|
||||
{
|
||||
/*
|
||||
* Start by removing the current element off the end of the
|
||||
* currentpath
|
||||
*/
|
||||
|
||||
char *sepptr;
|
||||
|
||||
sepptr = strrchr(UD->currentpath, '/');
|
||||
if (sepptr == NULL)
|
||||
{
|
||||
/* internal error */
|
||||
elog(ERROR, "did not find '/'");
|
||||
sepptr = UD->currentpath;
|
||||
}
|
||||
if (strcmp(name, sepptr + 1) != 0)
|
||||
{
|
||||
elog(WARNING, "wanted [%s], got [%s]", sepptr, name);
|
||||
/* unmatched entry, so do nothing */
|
||||
}
|
||||
else
|
||||
{
|
||||
sepptr[0] = '\0'; /* Chop that element off the end */
|
||||
}
|
||||
|
||||
if (UD->textgrab)
|
||||
pgxml_pathcompare(userData);
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
pgxml_charhandler(void *userData, const XML_Char * s, int len)
|
||||
{
|
||||
if (UD->textgrab)
|
||||
{
|
||||
if (len > 0)
|
||||
{
|
||||
memcpy(UD->resptr, s, len);
|
||||
UD->resptr += len;
|
||||
UD->reslen += len;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Should I be using PG list types here? */
|
||||
|
||||
static void
|
||||
pgxml_finalisegrabbedtext(void *userData)
|
||||
{
|
||||
/* In res/reslen, we have a single result. */
|
||||
UD->xpres->results[UD->xpres->rescount] = UD->resptr - UD->reslen;
|
||||
UD->xpres->reslens[UD->xpres->rescount] = UD->reslen;
|
||||
UD->reslen = 0;
|
||||
UD->xpres->rescount++;
|
||||
|
||||
/*
|
||||
* This effectively concatenates all the results together but we do
|
||||
* know where one ends and the next begins
|
||||
*/
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/* Header for pg xml parser interface */
|
||||
|
||||
static void *pgxml_palloc(size_t size);
|
||||
static void *pgxml_repalloc(void *ptr, size_t size);
|
||||
static void pgxml_pfree(void *ptr);
|
||||
static void pgxml_mhs_init();
|
||||
static void pgxml_handler_init();
|
||||
Datum pgxml_parse(PG_FUNCTION_ARGS);
|
||||
Datum pgxml_xpath(PG_FUNCTION_ARGS);
|
||||
static void pgxml_starthandler(void *userData, const XML_Char * name,
|
||||
const XML_Char ** atts);
|
||||
static void pgxml_endhandler(void *userData, const XML_Char * name);
|
||||
static void pgxml_charhandler(void *userData, const XML_Char * s, int len);
|
||||
static void pgxml_pathcompare(void *userData);
|
||||
static void pgxml_finalisegrabbedtext(void *userData);
|
||||
|
||||
#define MAXPATHLENGTH 512
|
||||
#define MAXRESULTS 100
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int rescount;
|
||||
char *results[MAXRESULTS];
|
||||
int32 reslens[MAXRESULTS];
|
||||
char *resbuf; /* pointer to the result buffer for pfree */
|
||||
} XPath_Results;
|
||||
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char currentpath[MAXPATHLENGTH];
|
||||
char *path;
|
||||
int textgrab;
|
||||
char *resptr;
|
||||
int32 reslen;
|
||||
XPath_Results *xpres;
|
||||
} pgxml_udata;
|
||||
|
||||
|
||||
#define UD ((pgxml_udata *) userData)
|
|
@ -0,0 +1,265 @@
|
|||
/* Parser interface for DOM-based parser (libxml) rather than
|
||||
stream-based SAX-type parser */
|
||||
|
||||
#include "postgres.h"
|
||||
#include "fmgr.h"
|
||||
|
||||
/* libxml includes */
|
||||
|
||||
#include <libxml/xpath.h>
|
||||
#include <libxml/tree.h>
|
||||
#include <libxml/xmlmemory.h>
|
||||
|
||||
/* declarations */
|
||||
|
||||
static void *pgxml_palloc(size_t size);
|
||||
static void *pgxml_repalloc(void *ptr, size_t size);
|
||||
static void pgxml_pfree(void *ptr);
|
||||
static char *pgxml_pstrdup(const char *string);
|
||||
|
||||
static void pgxml_parser_init();
|
||||
|
||||
static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset, xmlDocPtr doc,
|
||||
xmlChar * toptagname, xmlChar * septagname,
|
||||
int format);
|
||||
|
||||
static xmlChar *pgxml_texttoxmlchar(text *textstring);
|
||||
|
||||
|
||||
Datum pgxml_parse(PG_FUNCTION_ARGS);
|
||||
Datum pgxml_xpath(PG_FUNCTION_ARGS);
|
||||
|
||||
/* memory handling passthrough functions (e.g. palloc, pstrdup are
|
||||
currently macros, and the others might become so...) */
|
||||
|
||||
static void *
|
||||
pgxml_palloc(size_t size)
|
||||
{
|
||||
return palloc(size);
|
||||
}
|
||||
|
||||
static void *
|
||||
pgxml_repalloc(void *ptr, size_t size)
|
||||
{
|
||||
return repalloc(ptr, size);
|
||||
}
|
||||
|
||||
static void
|
||||
pgxml_pfree(void *ptr)
|
||||
{
|
||||
return pfree(ptr);
|
||||
}
|
||||
|
||||
static char *
|
||||
pgxml_pstrdup(const char *string)
|
||||
{
|
||||
return pstrdup(string);
|
||||
}
|
||||
|
||||
static void
|
||||
pgxml_parser_init()
|
||||
{
|
||||
/*
|
||||
* This code should also set parser settings from user-supplied info.
|
||||
* Quite how these settings are made is another matter :)
|
||||
*/
|
||||
|
||||
xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup);
|
||||
xmlInitParser();
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* Returns true if document is well-formed */
|
||||
|
||||
PG_FUNCTION_INFO_V1(pgxml_parse);
|
||||
|
||||
Datum
|
||||
pgxml_parse(PG_FUNCTION_ARGS)
|
||||
{
|
||||
/* called as pgxml_parse(document) */
|
||||
xmlDocPtr doctree;
|
||||
text *t = PG_GETARG_TEXT_P(0); /* document buffer */
|
||||
int32 docsize = VARSIZE(t) - VARHDRSZ;
|
||||
|
||||
pgxml_parser_init();
|
||||
|
||||
doctree = xmlParseMemory((char *) VARDATA(t), docsize);
|
||||
if (doctree == NULL)
|
||||
{
|
||||
xmlCleanupParser();
|
||||
PG_RETURN_BOOL(false); /* i.e. not well-formed */
|
||||
}
|
||||
xmlCleanupParser();
|
||||
xmlFreeDoc(doctree);
|
||||
PG_RETURN_BOOL(true);
|
||||
}
|
||||
|
||||
static xmlChar
|
||||
*
|
||||
pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
|
||||
xmlDocPtr doc,
|
||||
xmlChar * toptagname,
|
||||
xmlChar * septagname,
|
||||
int format)
|
||||
{
|
||||
/* Function translates a nodeset into a text representation */
|
||||
|
||||
/*
|
||||
* iterates over each node in the set and calls xmlNodeDump to write
|
||||
* it to an xmlBuffer -from which an xmlChar * string is returned.
|
||||
*/
|
||||
/* each representation is surrounded by <tagname> ... </tagname> */
|
||||
/* if format==0, add a newline between nodes?? */
|
||||
|
||||
xmlBufferPtr buf;
|
||||
xmlChar *result;
|
||||
int i;
|
||||
|
||||
buf = xmlBufferCreate();
|
||||
|
||||
if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
|
||||
{
|
||||
xmlBufferWriteChar(buf, "<");
|
||||
xmlBufferWriteCHAR(buf, toptagname);
|
||||
xmlBufferWriteChar(buf, ">");
|
||||
}
|
||||
if (nodeset != NULL)
|
||||
{
|
||||
for (i = 0; i < nodeset->nodeNr; i++)
|
||||
{
|
||||
if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
|
||||
{
|
||||
xmlBufferWriteChar(buf, "<");
|
||||
xmlBufferWriteCHAR(buf, septagname);
|
||||
xmlBufferWriteChar(buf, ">");
|
||||
}
|
||||
xmlNodeDump(buf, doc, nodeset->nodeTab[i], 1, (format == 2));
|
||||
|
||||
if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
|
||||
{
|
||||
xmlBufferWriteChar(buf, "</");
|
||||
xmlBufferWriteCHAR(buf, septagname);
|
||||
xmlBufferWriteChar(buf, ">");
|
||||
}
|
||||
if (format)
|
||||
xmlBufferWriteChar(buf, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
|
||||
{
|
||||
xmlBufferWriteChar(buf, "</");
|
||||
xmlBufferWriteCHAR(buf, toptagname);
|
||||
xmlBufferWriteChar(buf, ">");
|
||||
}
|
||||
result = xmlStrdup(buf->content);
|
||||
xmlBufferFree(buf);
|
||||
return result;
|
||||
}
|
||||
|
||||
static xmlChar *
|
||||
pgxml_texttoxmlchar(text *textstring)
|
||||
{
|
||||
xmlChar *res;
|
||||
int32 txsize;
|
||||
|
||||
txsize = VARSIZE(textstring) - VARHDRSZ;
|
||||
res = (xmlChar *) palloc(txsize + 1);
|
||||
memcpy((char *) res, VARDATA(textstring), txsize);
|
||||
res[txsize] = '\0';
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(pgxml_xpath);
|
||||
|
||||
Datum
|
||||
pgxml_xpath(PG_FUNCTION_ARGS)
|
||||
{
|
||||
xmlDocPtr doctree;
|
||||
xmlXPathContextPtr ctxt;
|
||||
xmlXPathObjectPtr res;
|
||||
xmlChar *xpath,
|
||||
*xpresstr,
|
||||
*toptag,
|
||||
*septag;
|
||||
xmlXPathCompExprPtr comppath;
|
||||
|
||||
int32 docsize,
|
||||
ressize;
|
||||
text *t,
|
||||
*xpres;
|
||||
|
||||
t = PG_GETARG_TEXT_P(0); /* document buffer */
|
||||
xpath = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(1)); /* XPath expression */
|
||||
toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
|
||||
septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(3));
|
||||
|
||||
docsize = VARSIZE(t) - VARHDRSZ;
|
||||
|
||||
pgxml_parser_init();
|
||||
|
||||
doctree = xmlParseMemory((char *) VARDATA(t), docsize);
|
||||
if (doctree == NULL)
|
||||
{ /* not well-formed */
|
||||
xmlCleanupParser();
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
ctxt = xmlXPathNewContext(doctree);
|
||||
ctxt->node = xmlDocGetRootElement(doctree);
|
||||
|
||||
/* compile the path */
|
||||
comppath = xmlXPathCompile(xpath);
|
||||
if (comppath == NULL)
|
||||
{
|
||||
elog(WARNING, "XPath syntax error");
|
||||
xmlFreeDoc(doctree);
|
||||
pfree((void *) xpath);
|
||||
xmlCleanupParser();
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
/* Now evaluate the path expression. */
|
||||
res = xmlXPathCompiledEval(comppath, ctxt);
|
||||
xmlXPathFreeCompExpr(comppath);
|
||||
|
||||
if (res == NULL)
|
||||
{
|
||||
xmlFreeDoc(doctree);
|
||||
pfree((void *) xpath);
|
||||
xmlCleanupParser();
|
||||
PG_RETURN_NULL(); /* seems appropriate */
|
||||
}
|
||||
/* now we dump this node, ?surrounding by tags? */
|
||||
/* To do this, we look first at the type */
|
||||
switch (res->type)
|
||||
{
|
||||
case XPATH_NODESET:
|
||||
xpresstr = pgxmlNodeSetToText(res->nodesetval,
|
||||
doctree,
|
||||
toptag, septag, 0);
|
||||
break;
|
||||
case XPATH_STRING:
|
||||
xpresstr = xmlStrdup(res->stringval);
|
||||
break;
|
||||
default:
|
||||
elog(WARNING, "Unsupported XQuery result: %d", res->type);
|
||||
xpresstr = xmlStrdup("<unsupported/>");
|
||||
}
|
||||
|
||||
|
||||
/* Now convert this result back to text */
|
||||
ressize = strlen(xpresstr);
|
||||
xpres = (text *) palloc(ressize + VARHDRSZ);
|
||||
memcpy(VARDATA(xpres), xpresstr, ressize);
|
||||
VARATT_SIZEP(xpres) = ressize + VARHDRSZ;
|
||||
|
||||
/* Free various storage */
|
||||
xmlFreeDoc(doctree);
|
||||
pfree((void *) xpath);
|
||||
xmlFree(xpresstr);
|
||||
xmlCleanupParser();
|
||||
PG_RETURN_TEXT_P(xpres);
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
-- SQL for XML parser
|
||||
|
||||
-- Adjust this setting to control where the objects get created.
|
||||
SET search_path TO public;
|
||||
|
||||
CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS boolean
|
||||
AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
|
||||
|
||||
CREATE OR REPLACE FUNCTION pgxml_xpath(text, text, text, text) RETURNS text
|
||||
AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
|
Loading…
Reference in New Issue