XML conversion utility, requires expat library.
John Gray
This commit is contained in:
parent
d4cafeba31
commit
113bb9b5ac
@ -1,6 +1,6 @@
|
|||||||
|
|
||||||
The PostgreSQL contrib tree
|
The PostgreSQL contrib tree
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
---------------------------
|
||||||
|
|
||||||
This subtree contains tools, modules, and examples that are not
|
This subtree contains tools, modules, and examples that are not
|
||||||
maintained as part of the core PostgreSQL system, mainly because
|
maintained as part of the core PostgreSQL system, mainly because
|
||||||
@ -177,3 +177,7 @@ userlock -
|
|||||||
vacuumlo -
|
vacuumlo -
|
||||||
Remove orphaned large objects
|
Remove orphaned large objects
|
||||||
by Peter T Mount <peter@retep.org.uk>
|
by Peter T Mount <peter@retep.org.uk>
|
||||||
|
|
||||||
|
xml -
|
||||||
|
Storing XML in PostgreSQL
|
||||||
|
by John Gray <jgray@beansindustry.co.uk>
|
||||||
|
43
contrib/xml/Makefile
Normal file
43
contrib/xml/Makefile
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
#-------------------------------------------------------------------------
|
||||||
|
#
|
||||||
|
# Makefile--
|
||||||
|
# Adapted from tutorial makefile
|
||||||
|
#-------------------------------------------------------------------------
|
||||||
|
|
||||||
|
subdir = contrib/xml
|
||||||
|
top_builddir = ../..
|
||||||
|
include $(top_builddir)/src/Makefile.global
|
||||||
|
|
||||||
|
override CFLAGS+= $(CFLAGS_SL)
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# DLOBJS is the dynamically-loaded object files. The "funcs" queries
|
||||||
|
# include CREATE FUNCTIONs that load routines from these files.
|
||||||
|
#
|
||||||
|
DLOBJS= pgxml$(DLSUFFIX)
|
||||||
|
|
||||||
|
|
||||||
|
QUERIES= pgxml.sql
|
||||||
|
|
||||||
|
all: $(DLOBJS) $(QUERIES)
|
||||||
|
|
||||||
|
# Requires the expat library
|
||||||
|
|
||||||
|
%.so: %.o
|
||||||
|
$(CC) -shared -lexpat -o $@ $<
|
||||||
|
|
||||||
|
|
||||||
|
%.sql: %.source
|
||||||
|
if [ -z "$$USER" ]; then USER=$$LOGNAME; fi; \
|
||||||
|
if [ -z "$$USER" ]; then USER=`whoami`; fi; \
|
||||||
|
if [ -z "$$USER" ]; then echo 'Cannot deduce $$USER.'; exit 1; fi; \
|
||||||
|
rm -f $@; \
|
||||||
|
C=`pwd`; \
|
||||||
|
sed -e "s:_CWD_:$$C:g" \
|
||||||
|
-e "s:_OBJWD_:$$C:g" \
|
||||||
|
-e "s:_DLSUFFIX_:$(DLSUFFIX):g" \
|
||||||
|
-e "s/_USER_/$$USER/g" < $< > $@
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f $(DLOBJS) $(QUERIES)
|
78
contrib/xml/README
Normal file
78
contrib/xml/README
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
This package contains a couple of simple routines for hooking the
|
||||||
|
expat XML parser up to PostgreSQL. This is a work-in-progress and all
|
||||||
|
very basic at the moment (see the file TODO for some outline of what
|
||||||
|
remains to be done).
|
||||||
|
|
||||||
|
At present, two functions are defined, one which checks
|
||||||
|
well-formedness, and the other which performs very simple XPath-type
|
||||||
|
queries.
|
||||||
|
|
||||||
|
Prerequisite:
|
||||||
|
|
||||||
|
expat parser 1.95.0 or newer (http://expat.sourceforge.net)
|
||||||
|
|
||||||
|
I used a shared library version -I'm sure you could use a static
|
||||||
|
library if you wished though. I had no problems compiling from source.
|
||||||
|
|
||||||
|
Function documentation and usage:
|
||||||
|
---------------------------------
|
||||||
|
|
||||||
|
pgxml_parse(text) returns bool
|
||||||
|
parses the provided text and returns true or false if it is
|
||||||
|
well-formed or not. It returns NULL if the parser couldn't be
|
||||||
|
created for any reason.
|
||||||
|
|
||||||
|
pgxml_xpath(text doc, text xpath, int n) returns text
|
||||||
|
parses doc and returns the cdata of the nth occurence of
|
||||||
|
the "XPath" listed. See below for details on the syntax.
|
||||||
|
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
Given a table docstore:
|
||||||
|
|
||||||
|
Attribute | Type | Modifier
|
||||||
|
-----------+---------+----------
|
||||||
|
docid | integer |
|
||||||
|
document | text |
|
||||||
|
|
||||||
|
containing documents such as (these are archaeological site
|
||||||
|
descriptions, in case anyone is wondering):
|
||||||
|
|
||||||
|
<?XML version="1.0"?>
|
||||||
|
<site provider="Foundations" sitecode="ak97" version="1">
|
||||||
|
<name>Church Farm, Ashton Keynes</name>
|
||||||
|
<invtype>watching brief</invtype>
|
||||||
|
<location scheme="osgb">SU04209424</location>
|
||||||
|
</site>
|
||||||
|
|
||||||
|
one can type:
|
||||||
|
|
||||||
|
select docid,
|
||||||
|
pgxml_xpath(document,'/site/name',1) as sitename,
|
||||||
|
pgxml_xpath(document,'/site/location',1) as location
|
||||||
|
from docstore;
|
||||||
|
|
||||||
|
and get as output:
|
||||||
|
|
||||||
|
docid | sitename | location
|
||||||
|
-------+-----------------------------+------------
|
||||||
|
1 | Church Farm, Ashton Keynes | SU04209424
|
||||||
|
2 | Glebe Farm, Long Itchington | SP41506500
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
|
||||||
|
"XPath" syntax supported
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
At present it only supports paths of the form:
|
||||||
|
'tag1/tag2' or '/tag1/tag2'
|
||||||
|
|
||||||
|
The first case will find any <tag2> within a <tag1>, the second will
|
||||||
|
find any <tag2> within a <tag1> at the top level of the document.
|
||||||
|
|
||||||
|
The real XPath is much more complex (see TODO file).
|
||||||
|
|
||||||
|
|
||||||
|
John Gray <jgray@azuli.co.uk> 26 July 2001
|
||||||
|
|
83
contrib/xml/TODO
Normal file
83
contrib/xml/TODO
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
PGXML TODO List
|
||||||
|
===============
|
||||||
|
|
||||||
|
Some of these items still require much more thought! The data model
|
||||||
|
for XML documents and the parsing model of expat don't really fit so
|
||||||
|
well with a standard SQL model.
|
||||||
|
|
||||||
|
1. Generalised XML parsing support
|
||||||
|
|
||||||
|
Allow a user to specify handlers (in any PL) to be used by the parser.
|
||||||
|
This must permit distinct sets of parser settings -user may want some
|
||||||
|
documents in a database to parsed with one set of handlers, others
|
||||||
|
with a different set.
|
||||||
|
|
||||||
|
i.e. the pgxml_parse function would take as parameters (document,
|
||||||
|
parsername) where parsername was the identifier for a collection of
|
||||||
|
handler etc. settings.
|
||||||
|
|
||||||
|
"Stub" handlers in the pgxml code would invoke the functions through
|
||||||
|
the standard fmgr interface. The parser interface would define the
|
||||||
|
prototype for these functions. How does the handler function know
|
||||||
|
which document/context has resulted it in being called?
|
||||||
|
|
||||||
|
Mechanism for defining collection of parser settings (in a table? -but
|
||||||
|
maybe copied for efficiency into a structure when first required by a
|
||||||
|
query?)
|
||||||
|
|
||||||
|
2. Support for other parsers
|
||||||
|
|
||||||
|
Expat may not be the best choice as a parser because a new parser
|
||||||
|
instance is needed for each document i.e. all the handlers must be set
|
||||||
|
again for each document. Another parser may have a more efficient way
|
||||||
|
of parsing a set of documents identically.
|
||||||
|
|
||||||
|
3. XPath support
|
||||||
|
|
||||||
|
Proper XPath support. I really need to sit down and plough
|
||||||
|
through the specification...
|
||||||
|
|
||||||
|
The very simple text comparison system currently used is too
|
||||||
|
basic. Need to convert the path to an ordered list of nodes. Each node
|
||||||
|
is an element qualifier, and may have a list of attribute
|
||||||
|
qualifications attached. This probably requires lexx/yacc combination.
|
||||||
|
(James Clark has written a yacc grammar for XPath). Not all the
|
||||||
|
features of XPath are necessarily relevant.
|
||||||
|
|
||||||
|
An option to return subdocuments (i.e. subelements AND cdata, not just
|
||||||
|
cdata). This should maybe be the default.
|
||||||
|
|
||||||
|
4. Multiple occurences of elements.
|
||||||
|
|
||||||
|
This section is all very sketchy, and has various weaknesses.
|
||||||
|
|
||||||
|
Is there a good way to optimise/index the results of certain XPath
|
||||||
|
operations to make them faster?:
|
||||||
|
|
||||||
|
select docid, pgxml_xpath(document,'/site/location',1) as location
|
||||||
|
where pgxml_xpath(document,'/site/name',1) = 'Church Farm';
|
||||||
|
|
||||||
|
and with multiple element occurences in a document?
|
||||||
|
|
||||||
|
select d.docid, pgxml_xpath(d.document,'/site/location',1)
|
||||||
|
from docstore d,
|
||||||
|
pgxml_xpaths('docstore','document','feature/type','docid') ft
|
||||||
|
where ft.key = d.docid and ft.value ='Limekiln';
|
||||||
|
|
||||||
|
pgxml_xpaths params are relname, attrname, xpath, returnkey. It would
|
||||||
|
return a set of two-element tuples (key,value) consisting of the value of
|
||||||
|
returnkey, and the cdata value of the xpath. The XML document would be
|
||||||
|
defined by relname and attrname.
|
||||||
|
|
||||||
|
The pgxml_xpaths function could be the basis of a functional index,
|
||||||
|
which could speed up the above query very substantially, working
|
||||||
|
through the normal query planner mechanism. Syntax above is fragile
|
||||||
|
through using names rather than OID.
|
||||||
|
|
||||||
|
John Gray <jgray@azuli.co.uk>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
310
contrib/xml/pgxml.c
Normal file
310
contrib/xml/pgxml.c
Normal file
@ -0,0 +1,310 @@
|
|||||||
|
/********************************************************
|
||||||
|
* Interface code to parse an XML document using expat
|
||||||
|
********************************************************/
|
||||||
|
|
||||||
|
#include "postgres.h"
|
||||||
|
#include "fmgr.h"
|
||||||
|
|
||||||
|
#include "expat.h"
|
||||||
|
#include "pgxml.h"
|
||||||
|
|
||||||
|
/* Memory management - we make expat use standard pg MM */
|
||||||
|
|
||||||
|
XML_Memory_Handling_Suite mhs;
|
||||||
|
|
||||||
|
/* passthrough functions (palloc is a macro) */
|
||||||
|
|
||||||
|
static void *pgxml_palloc(size_t size)
|
||||||
|
{
|
||||||
|
return palloc(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *pgxml_repalloc(void *ptr, size_t size)
|
||||||
|
{
|
||||||
|
return repalloc(ptr,size);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pgxml_pfree(void *ptr)
|
||||||
|
{
|
||||||
|
return pfree(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pgxml_mhs_init()
|
||||||
|
{
|
||||||
|
mhs.malloc_fcn = pgxml_palloc;
|
||||||
|
mhs.realloc_fcn = pgxml_repalloc;
|
||||||
|
mhs.free_fcn = pgxml_pfree;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pgxml_handler_init()
|
||||||
|
{
|
||||||
|
/* This code should set up the relevant handlers from user-supplied
|
||||||
|
settings. Quite how these settings are made is another matter :) */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns true if document is well-formed */
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(pgxml_parse);
|
||||||
|
|
||||||
|
Datum
|
||||||
|
pgxml_parse(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
/* called as pgxml_parse(document) */
|
||||||
|
XML_Parser p;
|
||||||
|
text *t = PG_GETARG_TEXT_P(0); /*document buffer */
|
||||||
|
int32 docsize = VARSIZE(t) - VARHDRSZ;
|
||||||
|
|
||||||
|
pgxml_mhs_init();
|
||||||
|
|
||||||
|
pgxml_handler_init();
|
||||||
|
|
||||||
|
p = XML_ParserCreate_MM(NULL,&mhs,NULL);
|
||||||
|
if (! p) {
|
||||||
|
elog(ERROR, "pgxml: Could not create expat parser");
|
||||||
|
PG_RETURN_NULL(); /* seems appropriate if we couldn't parse */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (! XML_Parse(p, (char *)VARDATA(t) , docsize, 1)) {
|
||||||
|
/* elog(NOTICE, "Parse error at line %d:%s",
|
||||||
|
XML_GetCurrentLineNumber(p),
|
||||||
|
XML_ErrorString(XML_GetErrorCode(p))); */
|
||||||
|
XML_ParserFree(p);
|
||||||
|
PG_RETURN_BOOL(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
XML_ParserFree(p);
|
||||||
|
PG_RETURN_BOOL(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* XPath handling functions */
|
||||||
|
|
||||||
|
/* XPath support here is for a very skeletal kind of XPath!
|
||||||
|
It was easy to program though... */
|
||||||
|
|
||||||
|
/* This first is the core function that builds a result set. The
|
||||||
|
actual functions called by the user manipulate that result set
|
||||||
|
in various ways.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static XPath_Results *build_xpath_results(text *doc, text *pathstr)
|
||||||
|
{
|
||||||
|
XPath_Results *xpr;
|
||||||
|
char *res;
|
||||||
|
pgxml_udata *udata;
|
||||||
|
XML_Parser p;
|
||||||
|
int32 docsize;
|
||||||
|
|
||||||
|
xpr = (XPath_Results *) palloc((sizeof(XPath_Results)));
|
||||||
|
memset((void *)xpr, 0, sizeof(XPath_Results));
|
||||||
|
xpr->rescount=0;
|
||||||
|
|
||||||
|
docsize=VARSIZE(doc)-VARHDRSZ;
|
||||||
|
|
||||||
|
/* res isn't going to be the real return type, it is just a buffer */
|
||||||
|
|
||||||
|
res = (char *) palloc(docsize);
|
||||||
|
memset((void *)res, 0, docsize);
|
||||||
|
|
||||||
|
xpr->resbuf = res;
|
||||||
|
|
||||||
|
udata = (pgxml_udata *) palloc((sizeof(pgxml_udata)));
|
||||||
|
memset((void *)udata,0,sizeof(pgxml_udata));
|
||||||
|
|
||||||
|
udata->currentpath[0]='\0';
|
||||||
|
udata->textgrab=0;
|
||||||
|
|
||||||
|
udata->path= (char *) palloc(VARSIZE(pathstr));
|
||||||
|
memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr)-VARHDRSZ);
|
||||||
|
|
||||||
|
udata->path[VARSIZE(pathstr)-VARHDRSZ]='\0';
|
||||||
|
|
||||||
|
udata->resptr = res;
|
||||||
|
udata->reslen = 0;
|
||||||
|
|
||||||
|
udata->xpres = xpr;
|
||||||
|
|
||||||
|
/* Now fire up the parser */
|
||||||
|
pgxml_mhs_init();
|
||||||
|
|
||||||
|
p = XML_ParserCreate_MM(NULL,&mhs,NULL);
|
||||||
|
if (! p) {
|
||||||
|
elog(ERROR, "pgxml: Could not create expat parser");
|
||||||
|
pfree(xpr);
|
||||||
|
pfree(udata->path);
|
||||||
|
pfree(udata);
|
||||||
|
pfree(res);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
XML_SetUserData(p, (void *)udata);
|
||||||
|
|
||||||
|
/* Set the handlers */
|
||||||
|
|
||||||
|
XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler);
|
||||||
|
XML_SetCharacterDataHandler(p, pgxml_charhandler);
|
||||||
|
|
||||||
|
if (! XML_Parse(p, (char *)VARDATA(doc) , docsize, 1)) {
|
||||||
|
/* elog(NOTICE, "Parse error at line %d:%s",
|
||||||
|
XML_GetCurrentLineNumber(p),
|
||||||
|
XML_ErrorString(XML_GetErrorCode(p))); */
|
||||||
|
XML_ParserFree(p);
|
||||||
|
pfree(xpr);
|
||||||
|
pfree(udata->path);
|
||||||
|
pfree(udata);
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
pfree(udata->path);
|
||||||
|
pfree(udata);
|
||||||
|
XML_ParserFree(p);
|
||||||
|
return xpr;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(pgxml_xpath);
|
||||||
|
|
||||||
|
Datum
|
||||||
|
pgxml_xpath(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
/* called as pgxml_xpath(document,pathstr, index) for the moment*/
|
||||||
|
|
||||||
|
XPath_Results *xpresults;
|
||||||
|
text *restext;
|
||||||
|
|
||||||
|
text *t = PG_GETARG_TEXT_P(0); /*document buffer */
|
||||||
|
text *t2= PG_GETARG_TEXT_P(1);
|
||||||
|
int32 ind = PG_GETARG_INT32(2) - 1;
|
||||||
|
|
||||||
|
xpresults = build_xpath_results(t,t2);
|
||||||
|
|
||||||
|
/* This needs to be changed depending on the mechanism for returning
|
||||||
|
our set of results. */
|
||||||
|
|
||||||
|
if (xpresults==NULL) /*parse error (not WF or parser failure) */
|
||||||
|
{
|
||||||
|
PG_RETURN_NULL();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ind >= (xpresults->rescount))
|
||||||
|
{
|
||||||
|
PG_RETURN_NULL();
|
||||||
|
}
|
||||||
|
|
||||||
|
restext = (text *) palloc(xpresults->reslens[ind]+VARHDRSZ);
|
||||||
|
memcpy(VARDATA(restext),xpresults->results[ind],xpresults->reslens[ind]);
|
||||||
|
|
||||||
|
VARATT_SIZEP(restext) = xpresults->reslens[ind]+VARHDRSZ;
|
||||||
|
|
||||||
|
pfree(xpresults->resbuf);
|
||||||
|
pfree(xpresults);
|
||||||
|
|
||||||
|
PG_RETURN_TEXT_P(restext);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void pgxml_pathcompare(void *userData)
|
||||||
|
{
|
||||||
|
char *matchpos;
|
||||||
|
|
||||||
|
matchpos=strstr(UD->currentpath, UD->path);
|
||||||
|
|
||||||
|
if (matchpos == NULL) { /* Should we have more logic here ? */
|
||||||
|
if (UD->textgrab) {
|
||||||
|
UD->textgrab=0;
|
||||||
|
pgxml_finalisegrabbedtext(userData);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
/* OK, we have a match of some sort. Now we need to check that
|
||||||
|
our match is anchored to the *end* of the string AND
|
||||||
|
that it is immediately preceded by a '/'*/
|
||||||
|
/* This test wouldn't work if strlen (UD->path) overran the length
|
||||||
|
of the currentpath, but that's not possible because we got a match! */
|
||||||
|
|
||||||
|
if ((matchpos + strlen(UD->path))[0]=='\0')
|
||||||
|
{
|
||||||
|
if ((UD->path)[0]=='/') {
|
||||||
|
if (matchpos == UD->currentpath) {
|
||||||
|
UD->textgrab=1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if ((matchpos-1)[0]=='/') {
|
||||||
|
UD->textgrab=1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pgxml_starthandler(void *userData, const XML_Char *name,
|
||||||
|
const XML_Char **atts)
|
||||||
|
{
|
||||||
|
|
||||||
|
char sepstr[]="/";
|
||||||
|
|
||||||
|
if ((strlen(name)+strlen(UD->currentpath))>MAXPATHLENGTH-2) {
|
||||||
|
elog(NOTICE,"Path too long");
|
||||||
|
} else {
|
||||||
|
strncat(UD->currentpath,sepstr,1);
|
||||||
|
strcat(UD->currentpath, name);
|
||||||
|
}
|
||||||
|
if (UD->textgrab)
|
||||||
|
{
|
||||||
|
/* Depending on user preference, should we "reconstitute"
|
||||||
|
the element into the result text?
|
||||||
|
*/
|
||||||
|
} else {
|
||||||
|
pgxml_pathcompare(userData);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pgxml_endhandler(void *userData, const XML_Char *name)
|
||||||
|
{
|
||||||
|
/* Start by removing the current element off the end of the
|
||||||
|
currentpath */
|
||||||
|
|
||||||
|
char *sepptr;
|
||||||
|
|
||||||
|
sepptr=strrchr(UD->currentpath,'/');
|
||||||
|
if (sepptr==NULL) {
|
||||||
|
elog(ERROR,"There's a problem...");
|
||||||
|
sepptr=UD->currentpath;
|
||||||
|
}
|
||||||
|
if (strcmp(name, sepptr+1) !=0) {
|
||||||
|
elog(NOTICE,"Wanted [%s], got [%s]",sepptr,name);
|
||||||
|
/* unmatched entry, so do nothing */
|
||||||
|
} else {
|
||||||
|
sepptr[0]='\0'; /* Chop that element off the end */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (UD->textgrab) {
|
||||||
|
pgxml_pathcompare(userData);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pgxml_charhandler(void *userData, const XML_Char *s, int len)
|
||||||
|
{
|
||||||
|
if (UD->textgrab) {
|
||||||
|
if (len>0) {
|
||||||
|
memcpy(UD->resptr,s,len);
|
||||||
|
UD->resptr += len;
|
||||||
|
UD->reslen += len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Should I be using PG list types here? */
|
||||||
|
|
||||||
|
static void pgxml_finalisegrabbedtext(void *userData)
|
||||||
|
{
|
||||||
|
/* In res/reslen, we have a single result. */
|
||||||
|
UD->xpres->results[UD->xpres->rescount]= UD->resptr - UD->reslen;
|
||||||
|
UD->xpres->reslens[UD->xpres->rescount]= UD->reslen;
|
||||||
|
UD->reslen=0;
|
||||||
|
UD->xpres->rescount++;
|
||||||
|
|
||||||
|
/* This effectively concatenates all the results together but we
|
||||||
|
do know where one ends and the next begins */
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
60
contrib/xml/pgxml.h
Normal file
60
contrib/xml/pgxml.h
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
/* Header for pg xml parser interface */
|
||||||
|
|
||||||
|
static void *pgxml_palloc(size_t size);
|
||||||
|
static void *pgxml_repalloc(void *ptr, size_t size);
|
||||||
|
static void pgxml_pfree(void *ptr);
|
||||||
|
static void pgxml_mhs_init();
|
||||||
|
static void pgxml_handler_init();
|
||||||
|
Datum pgxml_parse(PG_FUNCTION_ARGS);
|
||||||
|
Datum pgxml_xpath(PG_FUNCTION_ARGS);
|
||||||
|
static void pgxml_starthandler(void *userData, const XML_Char *name,
|
||||||
|
const XML_Char **atts);
|
||||||
|
static void pgxml_endhandler(void *userData, const XML_Char *name);
|
||||||
|
static void pgxml_charhandler(void *userData, const XML_Char *s, int len);
|
||||||
|
static void pgxml_pathcompare(void *userData);
|
||||||
|
static void pgxml_finalisegrabbedtext(void *userData);
|
||||||
|
|
||||||
|
#define MAXPATHLENGTH 512
|
||||||
|
#define MAXRESULTS 100
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int rescount;
|
||||||
|
char *results[MAXRESULTS];
|
||||||
|
int32 reslens[MAXRESULTS];
|
||||||
|
char *resbuf; /* pointer to the result buffer for pfree */
|
||||||
|
} XPath_Results;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
char currentpath[MAXPATHLENGTH];
|
||||||
|
char *path;
|
||||||
|
int textgrab;
|
||||||
|
char *resptr;
|
||||||
|
int32 reslen;
|
||||||
|
XPath_Results *xpres;
|
||||||
|
} pgxml_udata;
|
||||||
|
|
||||||
|
|
||||||
|
#define UD ((pgxml_udata *) userData)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
7
contrib/xml/pgxml.source
Normal file
7
contrib/xml/pgxml.source
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
--SQL for XML parser
|
||||||
|
|
||||||
|
CREATE FUNCTION pgxml_parse(text) RETURNS bool
|
||||||
|
AS '_OBJWD_/pgxml_DLSUFFIX_' LANGUAGE 'c' WITH (isStrict);
|
||||||
|
|
||||||
|
CREATE FUNCTION pgxml_xpath(text,text,int) RETURNS text
|
||||||
|
AS '_OBJWD_/pgxml_DLSUFFIX_' LANGUAGE 'c' WITH (isStrict);
|
178
contrib/xml/xpath-yacc
Normal file
178
contrib/xml/xpath-yacc
Normal file
@ -0,0 +1,178 @@
|
|||||||
|
|
||||||
|
%token QNAME
|
||||||
|
%token NAME_COLON_STAR
|
||||||
|
%token DOT
|
||||||
|
%token DOT_DOT
|
||||||
|
%token AT
|
||||||
|
%token AXIS_NAME
|
||||||
|
%token FUNCTION_NAME
|
||||||
|
%token COMMENT
|
||||||
|
%token PI
|
||||||
|
%token TEXT
|
||||||
|
%token NODE
|
||||||
|
%token STAR
|
||||||
|
%token LPAR
|
||||||
|
%token RPAR
|
||||||
|
%token LSQB
|
||||||
|
%token RSQB
|
||||||
|
%token LITERAL
|
||||||
|
%token NUMBER
|
||||||
|
%token COLON_COLON
|
||||||
|
%token DOLLAR_QNAME
|
||||||
|
%token SLASH
|
||||||
|
%token SLASH_SLASH
|
||||||
|
%token VBAR
|
||||||
|
%token COMMA
|
||||||
|
%token PLUS
|
||||||
|
%token MINUS
|
||||||
|
%token EQUALS
|
||||||
|
%token GT
|
||||||
|
%token LT
|
||||||
|
%token GTE
|
||||||
|
%token LTE
|
||||||
|
%token MULTIPLY
|
||||||
|
%token AND
|
||||||
|
%token OR
|
||||||
|
%token MOD
|
||||||
|
%token DIV
|
||||||
|
%token QUO
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
expr :
|
||||||
|
or_expr
|
||||||
|
;
|
||||||
|
|
||||||
|
or_expr :
|
||||||
|
and_expr
|
||||||
|
| or_expr OR and_expr
|
||||||
|
;
|
||||||
|
|
||||||
|
and_expr :
|
||||||
|
equality_expr
|
||||||
|
| and_expr AND equality_expr
|
||||||
|
;
|
||||||
|
|
||||||
|
equality_expr :
|
||||||
|
relational_expr
|
||||||
|
| equality_expr EQUALS relational_expr
|
||||||
|
;
|
||||||
|
|
||||||
|
relational_expr :
|
||||||
|
additive_expr
|
||||||
|
| relational_expr LT additive_expr
|
||||||
|
| relational_expr GT additive_expr
|
||||||
|
| relational_expr LTE additive_expr
|
||||||
|
| relational_expr GTE additive_expr
|
||||||
|
;
|
||||||
|
|
||||||
|
additive_expr :
|
||||||
|
multiplicative_expr
|
||||||
|
| additive_expr PLUS multiplicative_expr
|
||||||
|
| additive_expr MINUS multiplicative_expr
|
||||||
|
;
|
||||||
|
|
||||||
|
multiplicative_expr :
|
||||||
|
unary_expr
|
||||||
|
| multiplicative_expr MULTIPLY unary_expr
|
||||||
|
| multiplicative_expr DIV unary_expr
|
||||||
|
| multiplicative_expr MOD unary_expr
|
||||||
|
;
|
||||||
|
|
||||||
|
unary_expr :
|
||||||
|
union_expr
|
||||||
|
| '-' unary_expr
|
||||||
|
;
|
||||||
|
|
||||||
|
union_expr :
|
||||||
|
path_expr
|
||||||
|
| union_expr VBAR path_expr
|
||||||
|
;
|
||||||
|
|
||||||
|
path_expr :
|
||||||
|
location_path
|
||||||
|
| primary_expr predicates segment
|
||||||
|
;
|
||||||
|
|
||||||
|
segment :
|
||||||
|
/* empty */
|
||||||
|
| SLASH relative_location_path
|
||||||
|
| SLASH_SLASH relative_location_path
|
||||||
|
;
|
||||||
|
|
||||||
|
location_path :
|
||||||
|
relative_location_path
|
||||||
|
| absolute_location_path
|
||||||
|
;
|
||||||
|
|
||||||
|
absolute_location_path :
|
||||||
|
SLASH
|
||||||
|
| SLASH relative_location_path
|
||||||
|
| SLASH_SLASH relative_location_path
|
||||||
|
;
|
||||||
|
|
||||||
|
relative_location_path :
|
||||||
|
step
|
||||||
|
| relative_location_path SLASH step
|
||||||
|
| relative_location_path SLASH_SLASH step
|
||||||
|
;
|
||||||
|
|
||||||
|
step :
|
||||||
|
axis node_test predicates
|
||||||
|
| DOT
|
||||||
|
| DOT_DOT
|
||||||
|
;
|
||||||
|
|
||||||
|
axis:
|
||||||
|
/* empty */
|
||||||
|
| AXIS_NAME COLON_COLON
|
||||||
|
| AT
|
||||||
|
;
|
||||||
|
|
||||||
|
predicates :
|
||||||
|
/* empty */
|
||||||
|
| predicates LSQB expr RSQB
|
||||||
|
;
|
||||||
|
|
||||||
|
primary_expr :
|
||||||
|
DOLLAR_QNAME
|
||||||
|
| LPAR expr RPAR
|
||||||
|
| LITERAL
|
||||||
|
| NUMBER
|
||||||
|
| function_call
|
||||||
|
;
|
||||||
|
|
||||||
|
function_call :
|
||||||
|
FUNCTION_NAME LPAR opt_args RPAR
|
||||||
|
;
|
||||||
|
|
||||||
|
opt_args :
|
||||||
|
/* empty */
|
||||||
|
| args
|
||||||
|
;
|
||||||
|
|
||||||
|
args :
|
||||||
|
expr
|
||||||
|
| args COMMA expr
|
||||||
|
;
|
||||||
|
|
||||||
|
node_test :
|
||||||
|
QNAME
|
||||||
|
| STAR
|
||||||
|
| NAME_COLON_STAR
|
||||||
|
| PI LPAR opt_literal RPAR
|
||||||
|
| COMMENT LPAR RPAR
|
||||||
|
| TEXT LPAR RPAR
|
||||||
|
| NODE LPAR RPAR
|
||||||
|
;
|
||||||
|
|
||||||
|
opt_literal :
|
||||||
|
/* empty */
|
||||||
|
| LITERAL
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user