postgres/contrib/tsearch2/wparser.c
Tom Lane 375369acd1 Replace TupleTableSlot convention for whole-row variables and function
results with tuples as ordinary varlena Datums.  This commit does not
in itself do much for us, except eliminate the horrid memory leak
associated with evaluation of whole-row variables.  However, it lays the
groundwork for allowing composite types as table columns, and perhaps
some other useful features as well.  Per my proposal of a few days ago.
2004-04-01 21:28:47 +00:00

580 lines
13 KiB
C

/*
* interface functions to parser
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "postgres.h"
#include "fmgr.h"
#include "utils/array.h"
#include "catalog/pg_type.h"
#include "executor/spi.h"
#include "funcapi.h"
#include "wparser.h"
#include "ts_cfg.h"
#include "snmap.h"
#include "common.h"
/*********top interface**********/
static void *plan_getparser = NULL;
static Oid current_parser_id = InvalidOid;
void
init_prs(Oid id, WParserInfo * prs)
{
Oid arg[1] = {OIDOID};
bool isnull;
Datum pars[1] = {ObjectIdGetDatum(id)};
int stat;
memset(prs, 0, sizeof(WParserInfo));
SPI_connect();
if (!plan_getparser)
{
plan_getparser = SPI_saveplan(SPI_prepare("select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1", 1, arg));
if (!plan_getparser)
ts_error(ERROR, "SPI_prepare() failed");
}
stat = SPI_execp(plan_getparser, pars, " ", 1);
if (stat < 0)
ts_error(ERROR, "SPI_execp return %d", stat);
if (SPI_processed > 0)
{
Oid oid = InvalidOid;
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull));
fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull));
fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
prs->lextype = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull));
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull));
fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
prs->prs_id = id;
}
else
ts_error(ERROR, "No parser with id %d", id);
SPI_finish();
}
typedef struct
{
WParserInfo *last_prs;
int len;
int reallen;
WParserInfo *list;
SNMap name2id_map;
} PrsList;
static PrsList PList = {NULL, 0, 0, NULL, {0, 0, NULL}};
void
reset_prs(void)
{
freeSNMap(&(PList.name2id_map));
if (PList.list)
free(PList.list);
memset(&PList, 0, sizeof(PrsList));
}
static int
compareprs(const void *a, const void *b)
{
return ((WParserInfo *) a)->prs_id - ((WParserInfo *) b)->prs_id;
}
WParserInfo *
findprs(Oid id)
{
/* last used prs */
if (PList.last_prs && PList.last_prs->prs_id == id)
return PList.last_prs;
/* already used prs */
if (PList.len != 0)
{
WParserInfo key;
key.prs_id = id;
PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
if (PList.last_prs != NULL)
return PList.last_prs;
}
/* last chance */
if (PList.len == PList.reallen)
{
WParserInfo *tmp;
int reallen = (PList.reallen) ? 2 * PList.reallen : 16;
tmp = (WParserInfo *) realloc(PList.list, sizeof(WParserInfo) * reallen);
if (!tmp)
ts_error(ERROR, "No memory");
PList.reallen = reallen;
PList.list = tmp;
}
PList.last_prs = &(PList.list[PList.len]);
init_prs(id, PList.last_prs);
PList.len++;
qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
return findprs(id); /* qsort changed order!! */ ;
}
static void *plan_name2id = NULL;
Oid
name2id_prs(text *name)
{
Oid arg[1] = {TEXTOID};
bool isnull;
Datum pars[1] = {PointerGetDatum(name)};
int stat;
Oid id = findSNMap_t(&(PList.name2id_map), name);
if (id)
return id;
SPI_connect();
if (!plan_name2id)
{
plan_name2id = SPI_saveplan(SPI_prepare("select oid from pg_ts_parser where prs_name = $1", 1, arg));
if (!plan_name2id)
ts_error(ERROR, "SPI_prepare() failed");
}
stat = SPI_execp(plan_name2id, pars, " ", 1);
if (stat < 0)
ts_error(ERROR, "SPI_execp return %d", stat);
if (SPI_processed > 0)
id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
else
ts_error(ERROR, "No parser '%s'", text2char(name));
SPI_finish();
addSNMap_t(&(PList.name2id_map), name, id);
return id;
}
/******sql-level interface******/
typedef struct
{
int cur;
LexDescr *list;
} TypeStorage;
static void
setup_firstcall(FuncCallContext *funcctx, Oid prsid)
{
TupleDesc tupdesc;
MemoryContext oldcontext;
TypeStorage *st;
WParserInfo *prs = findprs(prsid);
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
st = (TypeStorage *) palloc(sizeof(TypeStorage));
st->cur = 0;
st->list = (LexDescr *) DatumGetPointer(
OidFunctionCall1(prs->lextype, PointerGetDatum(prs->prs))
);
funcctx->user_fctx = (void *) st;
tupdesc = RelationNameGetTupleDesc("tokentype");
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
MemoryContextSwitchTo(oldcontext);
}
static Datum
process_call(FuncCallContext *funcctx)
{
TypeStorage *st;
st = (TypeStorage *) funcctx->user_fctx;
if (st->list && st->list[st->cur].lexid)
{
Datum result;
char *values[3];
char txtid[16];
HeapTuple tuple;
values[0] = txtid;
sprintf(txtid, "%d", st->list[st->cur].lexid);
values[1] = st->list[st->cur].alias;
values[2] = st->list[st->cur].descr;
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
result = HeapTupleGetDatum(tuple);
pfree(values[1]);
pfree(values[2]);
st->cur++;
return result;
}
else
{
if (st->list)
pfree(st->list);
pfree(st);
}
return (Datum) 0;
}
PG_FUNCTION_INFO_V1(token_type);
Datum token_type(PG_FUNCTION_ARGS);
Datum
token_type(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
funcctx = SRF_FIRSTCALL_INIT();
setup_firstcall(funcctx, PG_GETARG_OID(0));
}
funcctx = SRF_PERCALL_SETUP();
if ((result = process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(token_type_byname);
Datum token_type_byname(PG_FUNCTION_ARGS);
Datum
token_type_byname(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
text *name = PG_GETARG_TEXT_P(0);
funcctx = SRF_FIRSTCALL_INIT();
setup_firstcall(funcctx, name2id_prs(name));
PG_FREE_IF_COPY(name, 0);
}
funcctx = SRF_PERCALL_SETUP();
if ((result = process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(token_type_current);
Datum token_type_current(PG_FUNCTION_ARGS);
Datum
token_type_current(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
funcctx = SRF_FIRSTCALL_INIT();
if (current_parser_id == InvalidOid)
current_parser_id = name2id_prs(char2text("default"));
setup_firstcall(funcctx, current_parser_id);
}
funcctx = SRF_PERCALL_SETUP();
if ((result = process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(set_curprs);
Datum set_curprs(PG_FUNCTION_ARGS);
Datum
set_curprs(PG_FUNCTION_ARGS)
{
findprs(PG_GETARG_OID(0));
current_parser_id = PG_GETARG_OID(0);
PG_RETURN_VOID();
}
PG_FUNCTION_INFO_V1(set_curprs_byname);
Datum set_curprs_byname(PG_FUNCTION_ARGS);
Datum
set_curprs_byname(PG_FUNCTION_ARGS)
{
text *name = PG_GETARG_TEXT_P(0);
DirectFunctionCall1(
set_curprs,
ObjectIdGetDatum(name2id_prs(name))
);
PG_FREE_IF_COPY(name, 0);
PG_RETURN_VOID();
}
typedef struct
{
int type;
char *lexem;
} LexemEntry;
typedef struct
{
int cur;
int len;
LexemEntry *list;
} PrsStorage;
static void
prs_setup_firstcall(FuncCallContext *funcctx, int prsid, text *txt)
{
TupleDesc tupdesc;
MemoryContext oldcontext;
PrsStorage *st;
WParserInfo *prs = findprs(prsid);
char *lex = NULL;
int llen = 0,
type = 0;
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
st = (PrsStorage *) palloc(sizeof(PrsStorage));
st->cur = 0;
st->len = 16;
st->list = (LexemEntry *) palloc(sizeof(LexemEntry) * st->len);
prs->prs = (void *) DatumGetPointer(
FunctionCall2(
&(prs->start_info),
PointerGetDatum(VARDATA(txt)),
Int32GetDatum(VARSIZE(txt) - VARHDRSZ)
)
);
while ((type = DatumGetInt32(FunctionCall3(
&(prs->getlexeme_info),
PointerGetDatum(prs->prs),
PointerGetDatum(&lex),
PointerGetDatum(&llen)))) != 0)
{
if (st->cur >= st->len)
{
st->len = 2 * st->len;
st->list = (LexemEntry *) repalloc(st->list, sizeof(LexemEntry) * st->len);
}
st->list[st->cur].lexem = palloc(llen + 1);
memcpy(st->list[st->cur].lexem, lex, llen);
st->list[st->cur].lexem[llen] = '\0';
st->list[st->cur].type = type;
st->cur++;
}
FunctionCall1(
&(prs->end_info),
PointerGetDatum(prs->prs)
);
st->len = st->cur;
st->cur = 0;
funcctx->user_fctx = (void *) st;
tupdesc = RelationNameGetTupleDesc("tokenout");
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
MemoryContextSwitchTo(oldcontext);
}
static Datum
prs_process_call(FuncCallContext *funcctx)
{
PrsStorage *st;
st = (PrsStorage *) funcctx->user_fctx;
if (st->cur < st->len)
{
Datum result;
char *values[2];
char tid[16];
HeapTuple tuple;
values[0] = tid;
sprintf(tid, "%d", st->list[st->cur].type);
values[1] = st->list[st->cur].lexem;
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
result = HeapTupleGetDatum(tuple);
pfree(values[1]);
st->cur++;
return result;
}
else
{
if (st->list)
pfree(st->list);
pfree(st);
}
return (Datum) 0;
}
PG_FUNCTION_INFO_V1(parse);
Datum parse(PG_FUNCTION_ARGS);
Datum
parse(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
text *txt = PG_GETARG_TEXT_P(1);
funcctx = SRF_FIRSTCALL_INIT();
prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt);
PG_FREE_IF_COPY(txt, 1);
}
funcctx = SRF_PERCALL_SETUP();
if ((result = prs_process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(parse_byname);
Datum parse_byname(PG_FUNCTION_ARGS);
Datum
parse_byname(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
text *name = PG_GETARG_TEXT_P(0);
text *txt = PG_GETARG_TEXT_P(1);
funcctx = SRF_FIRSTCALL_INIT();
prs_setup_firstcall(funcctx, name2id_prs(name), txt);
PG_FREE_IF_COPY(name, 0);
PG_FREE_IF_COPY(txt, 1);
}
funcctx = SRF_PERCALL_SETUP();
if ((result = prs_process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(parse_current);
Datum parse_current(PG_FUNCTION_ARGS);
Datum
parse_current(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
text *txt = PG_GETARG_TEXT_P(0);
funcctx = SRF_FIRSTCALL_INIT();
if (current_parser_id == InvalidOid)
current_parser_id = name2id_prs(char2text("default"));
prs_setup_firstcall(funcctx, current_parser_id, txt);
PG_FREE_IF_COPY(txt, 0);
}
funcctx = SRF_PERCALL_SETUP();
if ((result = prs_process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(headline);
Datum headline(PG_FUNCTION_ARGS);
Datum
headline(PG_FUNCTION_ARGS)
{
TSCfgInfo *cfg = findcfg(PG_GETARG_OID(0));
text *in = PG_GETARG_TEXT_P(1);
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
HLPRSTEXT prs;
text *out;
WParserInfo *prsobj = findprs(cfg->prs_id);
memset(&prs, 0, sizeof(HLPRSTEXT));
prs.lenwords = 32;
prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
FunctionCall3(
&(prsobj->headline_info),
PointerGetDatum(&prs),
PointerGetDatum(opt),
PointerGetDatum(query)
);
out = genhl(&prs);
PG_FREE_IF_COPY(in, 1);
PG_FREE_IF_COPY(query, 2);
if (opt)
PG_FREE_IF_COPY(opt, 3);
pfree(prs.words);
pfree(prs.startsel);
pfree(prs.stopsel);
PG_RETURN_POINTER(out);
}
PG_FUNCTION_INFO_V1(headline_byname);
Datum headline_byname(PG_FUNCTION_ARGS);
Datum
headline_byname(PG_FUNCTION_ARGS)
{
text *cfg = PG_GETARG_TEXT_P(0);
Datum out = DirectFunctionCall4(
headline,
ObjectIdGetDatum(name2id_cfg(cfg)),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2),
(PG_NARGS() > 3) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
);
PG_FREE_IF_COPY(cfg, 0);
PG_RETURN_DATUM(out);
}
PG_FUNCTION_INFO_V1(headline_current);
Datum headline_current(PG_FUNCTION_ARGS);
Datum
headline_current(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall4(
headline,
ObjectIdGetDatum(get_currcfg()),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
(PG_NARGS() > 2) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
));
}