Add sample text search dictionary templates and parsers, to replace the
hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov.
This commit is contained in:
parent
fb631dba2a
commit
5fcb079858
@ -1,4 +1,4 @@
|
||||
# $PostgreSQL: pgsql/contrib/Makefile,v 1.80 2007/10/13 22:59:43 tgl Exp $
|
||||
# $PostgreSQL: pgsql/contrib/Makefile,v 1.81 2007/10/15 21:36:49 tgl Exp $
|
||||
|
||||
subdir = contrib
|
||||
top_builddir = ..
|
||||
@ -10,6 +10,8 @@ WANTED_DIRS = \
|
||||
chkpass \
|
||||
cube \
|
||||
dblink \
|
||||
dict_int \
|
||||
dict_xsyn \
|
||||
earthdistance \
|
||||
fuzzystrmatch \
|
||||
hstore \
|
||||
@ -31,6 +33,7 @@ WANTED_DIRS = \
|
||||
seg \
|
||||
spi \
|
||||
tablefunc \
|
||||
test_parser \
|
||||
vacuumlo
|
||||
|
||||
ifeq ($(with_openssl),yes)
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
The PostgreSQL contrib tree
|
||||
---------------------------
|
||||
|
||||
@ -29,8 +28,8 @@ adminpack -
|
||||
by Dave Page <dpage@vale-housing.co.uk>
|
||||
|
||||
btree_gist -
|
||||
Support for emulating BTREE indexing in GiST
|
||||
by Oleg Bartunov <oleg@sai.msu.su> and Teodor Sigaev <teodor@sigaev.ru>
|
||||
Support for emulating BTREE indexing in GiST
|
||||
by Oleg Bartunov <oleg@sai.msu.su> and Teodor Sigaev <teodor@sigaev.ru>
|
||||
|
||||
chkpass -
|
||||
An auto-encrypted password datatype
|
||||
@ -44,8 +43,16 @@ dblink -
|
||||
Allows remote query execution
|
||||
by Joe Conway <mail@joeconway.com>
|
||||
|
||||
dict_int -
|
||||
Text search dictionary template for integers
|
||||
by Sergey Karpov <karpov@sao.ru>
|
||||
|
||||
dict_xsyn -
|
||||
Text search dictionary template for extended synonym processing
|
||||
by Sergey Karpov <karpov@sao.ru>
|
||||
|
||||
earthdistance -
|
||||
Operator for computing earth distance for two points
|
||||
Operator for computing earth distance between two points
|
||||
by Hal Snyder <hal@vailsys.com>
|
||||
|
||||
fuzzystrmatch -
|
||||
@ -53,8 +60,8 @@ fuzzystrmatch -
|
||||
by Joe Conway <mail@joeconway.com>, Joel Burton <jburton@scw.org>
|
||||
|
||||
hstore -
|
||||
Hstore - module for storing (key,value) pairs
|
||||
by Oleg Bartunov <oleg@sai.msu.su> and Teodor Sigaev <teodor@sigaev.ru>
|
||||
Module for storing (key, value) pairs
|
||||
by Oleg Bartunov <oleg@sai.msu.su> and Teodor Sigaev <teodor@sigaev.ru>
|
||||
|
||||
intagg -
|
||||
Integer aggregator
|
||||
@ -92,6 +99,10 @@ pg_freespacemap -
|
||||
Displays the contents of the free space map (FSM)
|
||||
by Mark Kirkwood <markir@paradise.net.nz>
|
||||
|
||||
pg_standby -
|
||||
Sample archive_command for warm standby operation
|
||||
by Simon Riggs <simon@2ndquadrant.com>
|
||||
|
||||
pg_trgm -
|
||||
Functions for determining the similarity of text based on trigram
|
||||
matching.
|
||||
@ -110,7 +121,7 @@ pgrowlocks -
|
||||
by Tatsuo Ishii <ishii@sraoss.co.jp>
|
||||
|
||||
pgstattuple -
|
||||
A function to return statistics about "dead" tuples and free
|
||||
Functions to return statistics about "dead" tuples and free
|
||||
space within a table
|
||||
by Tatsuo Ishii <ishii@sraoss.co.jp>
|
||||
|
||||
@ -126,12 +137,16 @@ sslinfo -
|
||||
by Victor Wagner <vitus@cryptocom.ru>
|
||||
|
||||
start-scripts -
|
||||
Scripts for starting the server at boot time.
|
||||
Scripts for starting the server at boot time on various platforms.
|
||||
|
||||
tablefunc -
|
||||
Examples of functions returning tables
|
||||
by Joe Conway <mail@joeconway.com>
|
||||
|
||||
test_parser -
|
||||
Sample text search parser
|
||||
by Sergey Karpov <karpov@sao.ru>
|
||||
|
||||
tsearch2 -
|
||||
Full-text-index support using GiST
|
||||
by Teodor Sigaev <teodor@sigaev.ru> and Oleg Bartunov
|
||||
|
19
contrib/dict_int/Makefile
Normal file
19
contrib/dict_int/Makefile
Normal file
@ -0,0 +1,19 @@
|
||||
# $PostgreSQL: pgsql/contrib/dict_int/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
|
||||
MODULE_big = dict_int
|
||||
OBJS = dict_int.o
|
||||
DATA_built = dict_int.sql
|
||||
DATA = uninstall_dict_int.sql
|
||||
DOCS = README.dict_int
|
||||
REGRESS = dict_int
|
||||
|
||||
ifdef USE_PGXS
|
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS)
|
||||
else
|
||||
subdir = contrib/dict_int
|
||||
top_builddir = ../..
|
||||
include $(top_builddir)/src/Makefile.global
|
||||
include $(top_srcdir)/contrib/contrib-global.mk
|
||||
endif
|
41
contrib/dict_int/README.dict_int
Normal file
41
contrib/dict_int/README.dict_int
Normal file
@ -0,0 +1,41 @@
|
||||
Dictionary for integers
|
||||
=======================
|
||||
|
||||
The motivation for this example dictionary is to control the indexing of
|
||||
integers (signed and unsigned), and, consequently, to minimize the number of
|
||||
unique words which greatly affect the performance of searching.
|
||||
|
||||
* Configuration
|
||||
|
||||
The dictionary accepts two options:
|
||||
|
||||
- The MAXLEN parameter specifies the maximum length (number of digits)
|
||||
allowed in an integer word. The default value is 6.
|
||||
|
||||
- The REJECTLONG parameter specifies if an overlength integer should be
|
||||
truncated or ignored. If REJECTLONG=FALSE (default), the dictionary returns
|
||||
the first MAXLEN digits of the integer. If REJECTLONG=TRUE, the
|
||||
dictionary treats an overlength integer as a stop word, so that it will
|
||||
not be indexed.
|
||||
|
||||
* Usage
|
||||
|
||||
1. Compile and install
|
||||
|
||||
2. Load dictionary
|
||||
|
||||
psql mydb < dict_int.sql
|
||||
|
||||
3. Test it
|
||||
|
||||
mydb# select ts_lexize('intdict', '12345678');
|
||||
ts_lexize
|
||||
-----------
|
||||
{123456}
|
||||
|
||||
4. Change its options as you wish
|
||||
|
||||
mydb# ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = 4, REJECTLONG = true);
|
||||
ALTER TEXT SEARCH DICTIONARY
|
||||
|
||||
That's all.
|
99
contrib/dict_int/dict_int.c
Normal file
99
contrib/dict_int/dict_int.c
Normal file
@ -0,0 +1,99 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* dict_int.c
|
||||
* Text search dictionary for integers
|
||||
*
|
||||
* Copyright (c) 2007, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/contrib/dict_int/dict_int.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "commands/defrem.h"
|
||||
#include "fmgr.h"
|
||||
#include "tsearch/ts_public.h"
|
||||
|
||||
PG_MODULE_MAGIC;
|
||||
|
||||
|
||||
typedef struct {
|
||||
int maxlen;
|
||||
bool rejectlong;
|
||||
} DictInt;
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(dintdict_init);
|
||||
Datum dintdict_init(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(dintdict_lexize);
|
||||
Datum dintdict_lexize(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
dintdict_init(PG_FUNCTION_ARGS)
|
||||
{
|
||||
List *dictoptions = (List *) PG_GETARG_POINTER(0);
|
||||
DictInt *d;
|
||||
ListCell *l;
|
||||
|
||||
d = (DictInt *) palloc0(sizeof(DictInt));
|
||||
d->maxlen = 6;
|
||||
d->rejectlong = false;
|
||||
|
||||
foreach(l, dictoptions)
|
||||
{
|
||||
DefElem *defel = (DefElem *) lfirst(l);
|
||||
|
||||
if (pg_strcasecmp(defel->defname, "MAXLEN") == 0)
|
||||
{
|
||||
d->maxlen = atoi(defGetString(defel));
|
||||
}
|
||||
else if (pg_strcasecmp(defel->defname, "REJECTLONG") == 0)
|
||||
{
|
||||
d->rejectlong = defGetBoolean(defel);
|
||||
}
|
||||
else
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("unrecognized intdict parameter: \"%s\"",
|
||||
defel->defname)));
|
||||
}
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(d);
|
||||
}
|
||||
|
||||
Datum
|
||||
dintdict_lexize(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictInt *d = (DictInt*)PG_GETARG_POINTER(0);
|
||||
char *in = (char*)PG_GETARG_POINTER(1);
|
||||
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
|
||||
TSLexeme *res=palloc(sizeof(TSLexeme)*2);
|
||||
|
||||
res[1].lexeme = NULL;
|
||||
if (PG_GETARG_INT32(2) > d->maxlen)
|
||||
{
|
||||
if ( d->rejectlong )
|
||||
{
|
||||
/* reject by returning void array */
|
||||
pfree(txt);
|
||||
res[0].lexeme = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* trim integer */
|
||||
txt[d->maxlen] = '\0';
|
||||
res[0].lexeme = txt;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
res[0].lexeme = txt;
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(res);
|
||||
}
|
29
contrib/dict_int/dict_int.sql.in
Normal file
29
contrib/dict_int/dict_int.sql.in
Normal file
@ -0,0 +1,29 @@
|
||||
-- $PostgreSQL: pgsql/contrib/dict_int/dict_int.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
|
||||
-- Adjust this setting to control where the objects get created.
|
||||
SET search_path = public;
|
||||
|
||||
BEGIN;
|
||||
|
||||
CREATE FUNCTION dintdict_init(internal)
|
||||
RETURNS internal
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION dintdict_lexize(internal, internal, internal, internal)
|
||||
RETURNS internal
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE TEXT SEARCH TEMPLATE intdict_template (
|
||||
LEXIZE = dintdict_lexize,
|
||||
INIT = dintdict_init
|
||||
);
|
||||
|
||||
CREATE TEXT SEARCH DICTIONARY intdict (
|
||||
TEMPLATE = intdict_template
|
||||
);
|
||||
|
||||
COMMENT ON TEXT SEARCH DICTIONARY intdict IS 'dictionary for integers';
|
||||
|
||||
END;
|
308
contrib/dict_int/expected/dict_int.out
Normal file
308
contrib/dict_int/expected/dict_int.out
Normal file
@ -0,0 +1,308 @@
|
||||
--
|
||||
-- first, define the datatype. Turn off echoing so that expected file
|
||||
-- does not depend on contents of this file.
|
||||
--
|
||||
SET client_min_messages = warning;
|
||||
\set ECHO none
|
||||
RESET client_min_messages;
|
||||
--lexize
|
||||
select ts_lexize('intdict', '511673');
|
||||
ts_lexize
|
||||
-----------
|
||||
{511673}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '129');
|
||||
ts_lexize
|
||||
-----------
|
||||
{129}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '40865854');
|
||||
ts_lexize
|
||||
-----------
|
||||
{408658}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '952');
|
||||
ts_lexize
|
||||
-----------
|
||||
{952}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '654980341');
|
||||
ts_lexize
|
||||
-----------
|
||||
{654980}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '09810106');
|
||||
ts_lexize
|
||||
-----------
|
||||
{098101}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '14262713');
|
||||
ts_lexize
|
||||
-----------
|
||||
{142627}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '6532082986');
|
||||
ts_lexize
|
||||
-----------
|
||||
{653208}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '0150061');
|
||||
ts_lexize
|
||||
-----------
|
||||
{015006}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '7778');
|
||||
ts_lexize
|
||||
-----------
|
||||
{7778}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '9547');
|
||||
ts_lexize
|
||||
-----------
|
||||
{9547}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '753395478');
|
||||
ts_lexize
|
||||
-----------
|
||||
{753395}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '647652');
|
||||
ts_lexize
|
||||
-----------
|
||||
{647652}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '6988655574');
|
||||
ts_lexize
|
||||
-----------
|
||||
{698865}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '1279');
|
||||
ts_lexize
|
||||
-----------
|
||||
{1279}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '1266645909');
|
||||
ts_lexize
|
||||
-----------
|
||||
{126664}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '7594193969');
|
||||
ts_lexize
|
||||
-----------
|
||||
{759419}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '16928207');
|
||||
ts_lexize
|
||||
-----------
|
||||
{169282}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '196850350328');
|
||||
ts_lexize
|
||||
-----------
|
||||
{196850}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '22026985592');
|
||||
ts_lexize
|
||||
-----------
|
||||
{220269}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '2063765');
|
||||
ts_lexize
|
||||
-----------
|
||||
{206376}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '242387310');
|
||||
ts_lexize
|
||||
-----------
|
||||
{242387}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '93595');
|
||||
ts_lexize
|
||||
-----------
|
||||
{93595}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '9374');
|
||||
ts_lexize
|
||||
-----------
|
||||
{9374}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '996969');
|
||||
ts_lexize
|
||||
-----------
|
||||
{996969}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '353595982');
|
||||
ts_lexize
|
||||
-----------
|
||||
{353595}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '925860');
|
||||
ts_lexize
|
||||
-----------
|
||||
{925860}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '11848378337');
|
||||
ts_lexize
|
||||
-----------
|
||||
{118483}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '333');
|
||||
ts_lexize
|
||||
-----------
|
||||
{333}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '799287416765');
|
||||
ts_lexize
|
||||
-----------
|
||||
{799287}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '745939');
|
||||
ts_lexize
|
||||
-----------
|
||||
{745939}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '67601305734');
|
||||
ts_lexize
|
||||
-----------
|
||||
{676013}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '3361113');
|
||||
ts_lexize
|
||||
-----------
|
||||
{336111}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '9033778607');
|
||||
ts_lexize
|
||||
-----------
|
||||
{903377}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '7507648');
|
||||
ts_lexize
|
||||
-----------
|
||||
{750764}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '1166');
|
||||
ts_lexize
|
||||
-----------
|
||||
{1166}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '9360498');
|
||||
ts_lexize
|
||||
-----------
|
||||
{936049}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '917795');
|
||||
ts_lexize
|
||||
-----------
|
||||
{917795}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '9387894');
|
||||
ts_lexize
|
||||
-----------
|
||||
{938789}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '42764329');
|
||||
ts_lexize
|
||||
-----------
|
||||
{427643}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '564062');
|
||||
ts_lexize
|
||||
-----------
|
||||
{564062}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '5413377');
|
||||
ts_lexize
|
||||
-----------
|
||||
{541337}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '060965');
|
||||
ts_lexize
|
||||
-----------
|
||||
{060965}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '08273593');
|
||||
ts_lexize
|
||||
-----------
|
||||
{082735}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '593556010144');
|
||||
ts_lexize
|
||||
-----------
|
||||
{593556}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '17988843352');
|
||||
ts_lexize
|
||||
-----------
|
||||
{179888}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '252281774');
|
||||
ts_lexize
|
||||
-----------
|
||||
{252281}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '313425');
|
||||
ts_lexize
|
||||
-----------
|
||||
{313425}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '641439323669');
|
||||
ts_lexize
|
||||
-----------
|
||||
{641439}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '314532610153');
|
||||
ts_lexize
|
||||
-----------
|
||||
{314532}
|
||||
(1 row)
|
||||
|
61
contrib/dict_int/sql/dict_int.sql
Normal file
61
contrib/dict_int/sql/dict_int.sql
Normal file
@ -0,0 +1,61 @@
|
||||
--
|
||||
-- first, define the datatype. Turn off echoing so that expected file
|
||||
-- does not depend on contents of this file.
|
||||
--
|
||||
SET client_min_messages = warning;
|
||||
\set ECHO none
|
||||
\i dict_int.sql
|
||||
\set ECHO all
|
||||
RESET client_min_messages;
|
||||
|
||||
--lexize
|
||||
select ts_lexize('intdict', '511673');
|
||||
select ts_lexize('intdict', '129');
|
||||
select ts_lexize('intdict', '40865854');
|
||||
select ts_lexize('intdict', '952');
|
||||
select ts_lexize('intdict', '654980341');
|
||||
select ts_lexize('intdict', '09810106');
|
||||
select ts_lexize('intdict', '14262713');
|
||||
select ts_lexize('intdict', '6532082986');
|
||||
select ts_lexize('intdict', '0150061');
|
||||
select ts_lexize('intdict', '7778');
|
||||
select ts_lexize('intdict', '9547');
|
||||
select ts_lexize('intdict', '753395478');
|
||||
select ts_lexize('intdict', '647652');
|
||||
select ts_lexize('intdict', '6988655574');
|
||||
select ts_lexize('intdict', '1279');
|
||||
select ts_lexize('intdict', '1266645909');
|
||||
select ts_lexize('intdict', '7594193969');
|
||||
select ts_lexize('intdict', '16928207');
|
||||
select ts_lexize('intdict', '196850350328');
|
||||
select ts_lexize('intdict', '22026985592');
|
||||
select ts_lexize('intdict', '2063765');
|
||||
select ts_lexize('intdict', '242387310');
|
||||
select ts_lexize('intdict', '93595');
|
||||
select ts_lexize('intdict', '9374');
|
||||
select ts_lexize('intdict', '996969');
|
||||
select ts_lexize('intdict', '353595982');
|
||||
select ts_lexize('intdict', '925860');
|
||||
select ts_lexize('intdict', '11848378337');
|
||||
select ts_lexize('intdict', '333');
|
||||
select ts_lexize('intdict', '799287416765');
|
||||
select ts_lexize('intdict', '745939');
|
||||
select ts_lexize('intdict', '67601305734');
|
||||
select ts_lexize('intdict', '3361113');
|
||||
select ts_lexize('intdict', '9033778607');
|
||||
select ts_lexize('intdict', '7507648');
|
||||
select ts_lexize('intdict', '1166');
|
||||
select ts_lexize('intdict', '9360498');
|
||||
select ts_lexize('intdict', '917795');
|
||||
select ts_lexize('intdict', '9387894');
|
||||
select ts_lexize('intdict', '42764329');
|
||||
select ts_lexize('intdict', '564062');
|
||||
select ts_lexize('intdict', '5413377');
|
||||
select ts_lexize('intdict', '060965');
|
||||
select ts_lexize('intdict', '08273593');
|
||||
select ts_lexize('intdict', '593556010144');
|
||||
select ts_lexize('intdict', '17988843352');
|
||||
select ts_lexize('intdict', '252281774');
|
||||
select ts_lexize('intdict', '313425');
|
||||
select ts_lexize('intdict', '641439323669');
|
||||
select ts_lexize('intdict', '314532610153');
|
9
contrib/dict_int/uninstall_dict_int.sql
Normal file
9
contrib/dict_int/uninstall_dict_int.sql
Normal file
@ -0,0 +1,9 @@
|
||||
SET search_path = public;
|
||||
|
||||
DROP TEXT SEARCH DICTIONARY intdict;
|
||||
|
||||
DROP TEXT SEARCH TEMPLATE intdict_template;
|
||||
|
||||
DROP FUNCTION dintdict_init(internal);
|
||||
|
||||
DROP FUNCTION dintdict_lexize(internal,internal,internal,internal);
|
38
contrib/dict_xsyn/Makefile
Normal file
38
contrib/dict_xsyn/Makefile
Normal file
@ -0,0 +1,38 @@
|
||||
# $PostgreSQL: pgsql/contrib/dict_xsyn/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
|
||||
MODULE_big = dict_xsyn
|
||||
OBJS = dict_xsyn.o
|
||||
DATA_built = dict_xsyn.sql
|
||||
DATA = uninstall_dict_xsyn.sql
|
||||
DOCS = README.dict_xsyn
|
||||
REGRESS = dict_xsyn
|
||||
|
||||
DICTDIR = tsearch_data
|
||||
DICTFILES = xsyn_sample.rules
|
||||
|
||||
ifdef USE_PGXS
|
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS)
|
||||
else
|
||||
subdir = contrib/dict_xsyn
|
||||
top_builddir = ../..
|
||||
include $(top_builddir)/src/Makefile.global
|
||||
include $(top_srcdir)/contrib/contrib-global.mk
|
||||
endif
|
||||
|
||||
install: install-data
|
||||
|
||||
.PHONY: install-data
|
||||
install-data: $(DICTFILES)
|
||||
for i in $(DICTFILES); \
|
||||
do $(INSTALL_DATA) $(srcdir)/$$i '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i; \
|
||||
done
|
||||
|
||||
uninstall: uninstall-data
|
||||
|
||||
.PHONY: uninstall-data
|
||||
uninstall-data:
|
||||
for i in $(DICTFILES); \
|
||||
do rm -rf '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i ; \
|
||||
done
|
52
contrib/dict_xsyn/README.dict_xsyn
Normal file
52
contrib/dict_xsyn/README.dict_xsyn
Normal file
@ -0,0 +1,52 @@
|
||||
Extended Synonym dictionary
|
||||
===========================
|
||||
|
||||
This is a simple synonym dictionary. It replaces words with groups of their
|
||||
synonyms, and so makes it possible to search for a word using any of its
|
||||
synonyms.
|
||||
|
||||
* Configuration
|
||||
|
||||
It accepts the following options:
|
||||
|
||||
- KEEPORIG controls whether the original word is included, or only its
|
||||
synonyms. Default is 'true'.
|
||||
|
||||
- RULES is the base name of the file containing the list of synonyms.
|
||||
This file must be in $(prefix)/share/tsearch_data/, and its name must
|
||||
end in ".rules" (which is not included in the RULES parameter).
|
||||
|
||||
The rules file has the following format:
|
||||
|
||||
- Each line represents a group of synonyms for a single word, which is
|
||||
given first on the line. Synonyms are separated by whitespace:
|
||||
|
||||
word syn1 syn2 syn3
|
||||
|
||||
- Sharp ('#') sign is a comment delimiter. It may appear at any position
|
||||
inside the line. The rest of the line will be skipped.
|
||||
|
||||
Look at xsyn_sample.rules, which is installed in $(prefix)/share/tsearch_data/,
|
||||
for an example.
|
||||
|
||||
* Usage
|
||||
|
||||
1. Compile and install
|
||||
|
||||
2. Load dictionary
|
||||
|
||||
psql mydb < dict_xsyn.sql
|
||||
|
||||
3. Test it
|
||||
|
||||
mydb=# SELECT ts_lexize('xsyn','word');
|
||||
ts_lexize
|
||||
----------------
|
||||
{word,syn1,syn2,syn3)
|
||||
|
||||
4. Change the dictionary options as you wish
|
||||
|
||||
mydb# ALTER TEXT SEARCH DICTIONARY xsyn (KEEPORIG=false);
|
||||
ALTER TEXT SEARCH DICTIONARY
|
||||
|
||||
That's all.
|
235
contrib/dict_xsyn/dict_xsyn.c
Normal file
235
contrib/dict_xsyn/dict_xsyn.c
Normal file
@ -0,0 +1,235 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* dict_xsyn.c
|
||||
* Extended synonym dictionary
|
||||
*
|
||||
* Copyright (c) 2007, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#include "commands/defrem.h"
|
||||
#include "fmgr.h"
|
||||
#include "storage/fd.h"
|
||||
#include "tsearch/ts_locale.h"
|
||||
#include "tsearch/ts_utils.h"
|
||||
|
||||
PG_MODULE_MAGIC;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char *key; /* Word */
|
||||
char *value; /* Unparsed list of synonyms, including the word itself */
|
||||
} Syn;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int len;
|
||||
Syn *syn;
|
||||
|
||||
bool keeporig;
|
||||
} DictSyn;
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(dxsyn_init);
|
||||
Datum dxsyn_init(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(dxsyn_lexize);
|
||||
Datum dxsyn_lexize(PG_FUNCTION_ARGS);
|
||||
|
||||
static char *
|
||||
find_word(char *in, char **end)
|
||||
{
|
||||
char *start;
|
||||
|
||||
*end = NULL;
|
||||
while (*in && t_isspace(in))
|
||||
in += pg_mblen(in);
|
||||
|
||||
if (!*in || *in == '#')
|
||||
return NULL;
|
||||
start = in;
|
||||
|
||||
while (*in && !t_isspace(in))
|
||||
in += pg_mblen(in);
|
||||
|
||||
*end = in;
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
static int
|
||||
compare_syn(const void *a, const void *b)
|
||||
{
|
||||
return strcmp(((Syn *) a)->key, ((Syn *) b)->key);
|
||||
}
|
||||
|
||||
static void
|
||||
read_dictionary(DictSyn *d, char *filename)
|
||||
{
|
||||
char *real_filename = get_tsearch_config_filename(filename, "rules");
|
||||
FILE *fin;
|
||||
char *line;
|
||||
int cur = 0;
|
||||
|
||||
if ((fin = AllocateFile(real_filename, "r")) == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("could not open synonym file \"%s\": %m",
|
||||
real_filename)));
|
||||
|
||||
while ((line = t_readline(fin)) != NULL)
|
||||
{
|
||||
char *value;
|
||||
char *key;
|
||||
char *end = NULL;
|
||||
|
||||
if (*line == '\0')
|
||||
continue;
|
||||
|
||||
value = lowerstr(line);
|
||||
pfree(line);
|
||||
|
||||
key = find_word(value, &end);
|
||||
if (!key)
|
||||
{
|
||||
pfree(value);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cur == d->len)
|
||||
{
|
||||
d->len = (d->len > 0) ? 2 * d->len : 16;
|
||||
if (d->syn)
|
||||
d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
|
||||
else
|
||||
d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
|
||||
}
|
||||
|
||||
d->syn[cur].key = pnstrdup(key, end - key);
|
||||
d->syn[cur].value = value;
|
||||
|
||||
cur++;
|
||||
}
|
||||
|
||||
FreeFile(fin);
|
||||
|
||||
d->len = cur;
|
||||
if (cur > 1)
|
||||
qsort(d->syn, d->len, sizeof(Syn), compare_syn);
|
||||
|
||||
pfree(real_filename);
|
||||
}
|
||||
|
||||
Datum
|
||||
dxsyn_init(PG_FUNCTION_ARGS)
|
||||
{
|
||||
List *dictoptions = (List *) PG_GETARG_POINTER(0);
|
||||
DictSyn *d;
|
||||
ListCell *l;
|
||||
|
||||
d = (DictSyn *) palloc0(sizeof(DictSyn));
|
||||
d->len = 0;
|
||||
d->syn = NULL;
|
||||
d->keeporig = true;
|
||||
|
||||
foreach(l, dictoptions)
|
||||
{
|
||||
DefElem *defel = (DefElem *) lfirst(l);
|
||||
|
||||
if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0)
|
||||
{
|
||||
d->keeporig = defGetBoolean(defel);
|
||||
}
|
||||
else if (pg_strcasecmp(defel->defname, "RULES") == 0)
|
||||
{
|
||||
read_dictionary(d, defGetString(defel));
|
||||
}
|
||||
else
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("unrecognized xsyn parameter: \"%s\"",
|
||||
defel->defname)));
|
||||
}
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(d);
|
||||
}
|
||||
|
||||
Datum
|
||||
dxsyn_lexize(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
|
||||
char *in = (char *) PG_GETARG_POINTER(1);
|
||||
int length = PG_GETARG_INT32(2);
|
||||
Syn word;
|
||||
Syn *found;
|
||||
TSLexeme *res = NULL;
|
||||
|
||||
if (!length || d->len == 0)
|
||||
PG_RETURN_POINTER(NULL);
|
||||
|
||||
/* Create search pattern */
|
||||
{
|
||||
char *temp = pnstrdup(in, length);
|
||||
|
||||
word.key = lowerstr(temp);
|
||||
pfree(temp);
|
||||
word.value = NULL;
|
||||
}
|
||||
|
||||
/* Look for matching syn */
|
||||
found = (Syn *)bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn);
|
||||
pfree(word.key);
|
||||
|
||||
if (!found)
|
||||
PG_RETURN_POINTER(NULL);
|
||||
|
||||
/* Parse string of synonyms and return array of words */
|
||||
{
|
||||
char *value = pstrdup(found->value);
|
||||
int value_length = strlen(value);
|
||||
char *pos = value;
|
||||
int nsyns = 0;
|
||||
bool is_first = true;
|
||||
|
||||
res = palloc(0);
|
||||
|
||||
while(pos < value + value_length)
|
||||
{
|
||||
char *end;
|
||||
char *syn = find_word(pos, &end);
|
||||
|
||||
if (!syn)
|
||||
break;
|
||||
*end = '\0';
|
||||
|
||||
res = repalloc(res, sizeof(TSLexeme)*(nsyns + 2));
|
||||
res[nsyns].lexeme = NULL;
|
||||
|
||||
/* first word is added to result only if KEEPORIG flag is set */
|
||||
if(d->keeporig || !is_first)
|
||||
{
|
||||
res[nsyns].lexeme = pstrdup(syn);
|
||||
res[nsyns + 1].lexeme = NULL;
|
||||
|
||||
nsyns++;
|
||||
}
|
||||
|
||||
is_first = false;
|
||||
|
||||
pos = end + 1;
|
||||
}
|
||||
|
||||
pfree(value);
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(res);
|
||||
}
|
29
contrib/dict_xsyn/dict_xsyn.sql.in
Normal file
29
contrib/dict_xsyn/dict_xsyn.sql.in
Normal file
@ -0,0 +1,29 @@
|
||||
-- $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
|
||||
-- Adjust this setting to control where the objects get created.
|
||||
SET search_path = public;
|
||||
|
||||
BEGIN;
|
||||
|
||||
CREATE FUNCTION dxsyn_init(internal)
|
||||
RETURNS internal
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION dxsyn_lexize(internal, internal, internal, internal)
|
||||
RETURNS internal
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE TEXT SEARCH TEMPLATE xsyn_template (
|
||||
LEXIZE = dxsyn_lexize,
|
||||
INIT = dxsyn_init
|
||||
);
|
||||
|
||||
CREATE TEXT SEARCH DICTIONARY xsyn (
|
||||
TEMPLATE = xsyn_template
|
||||
);
|
||||
|
||||
COMMENT ON TEXT SEARCH DICTIONARY xsyn IS 'eXtended synonym dictionary';
|
||||
|
||||
END;
|
22
contrib/dict_xsyn/expected/dict_xsyn.out
Normal file
22
contrib/dict_xsyn/expected/dict_xsyn.out
Normal file
@ -0,0 +1,22 @@
|
||||
--
|
||||
-- first, define the datatype. Turn off echoing so that expected file
|
||||
-- does not depend on contents of this file.
|
||||
--
|
||||
SET client_min_messages = warning;
|
||||
\set ECHO none
|
||||
RESET client_min_messages;
|
||||
--configuration
|
||||
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
|
||||
--lexize
|
||||
SELECT ts_lexize('xsyn', 'supernova');
|
||||
ts_lexize
|
||||
----------------
|
||||
{sn,sne,1987a}
|
||||
(1 row)
|
||||
|
||||
SELECT ts_lexize('xsyn', 'grb');
|
||||
ts_lexize
|
||||
-----------
|
||||
|
||||
(1 row)
|
||||
|
16
contrib/dict_xsyn/sql/dict_xsyn.sql
Normal file
16
contrib/dict_xsyn/sql/dict_xsyn.sql
Normal file
@ -0,0 +1,16 @@
|
||||
--
|
||||
-- first, define the datatype. Turn off echoing so that expected file
|
||||
-- does not depend on contents of this file.
|
||||
--
|
||||
SET client_min_messages = warning;
|
||||
\set ECHO none
|
||||
\i dict_xsyn.sql
|
||||
\set ECHO all
|
||||
RESET client_min_messages;
|
||||
|
||||
--configuration
|
||||
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
|
||||
|
||||
--lexize
|
||||
SELECT ts_lexize('xsyn', 'supernova');
|
||||
SELECT ts_lexize('xsyn', 'grb');
|
9
contrib/dict_xsyn/uninstall_dict_xsyn.sql
Normal file
9
contrib/dict_xsyn/uninstall_dict_xsyn.sql
Normal file
@ -0,0 +1,9 @@
|
||||
SET search_path = public;
|
||||
|
||||
DROP TEXT SEARCH DICTIONARY xsyn;
|
||||
|
||||
DROP TEXT SEARCH TEMPLATE xsyn_template;
|
||||
|
||||
DROP FUNCTION dxsyn_init(internal);
|
||||
|
||||
DROP FUNCTION dxsyn_lexize(internal,internal,internal,internal);
|
6
contrib/dict_xsyn/xsyn_sample.rules
Normal file
6
contrib/dict_xsyn/xsyn_sample.rules
Normal file
@ -0,0 +1,6 @@
|
||||
# Sample rules file for eXtended Synonym (xsyn) dictionary
|
||||
# format is as follows:
|
||||
#
|
||||
# word synonym1 synonym2 ...
|
||||
#
|
||||
supernova sn sne 1987a
|
19
contrib/test_parser/Makefile
Normal file
19
contrib/test_parser/Makefile
Normal file
@ -0,0 +1,19 @@
|
||||
# $PostgreSQL: pgsql/contrib/test_parser/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
|
||||
MODULE_big = test_parser
|
||||
OBJS = test_parser.o
|
||||
DATA_built = test_parser.sql
|
||||
DATA = uninstall_test_parser.sql
|
||||
DOCS = README.test_parser
|
||||
REGRESS = test_parser
|
||||
|
||||
ifdef USE_PGXS
|
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS)
|
||||
else
|
||||
subdir = contrib/test_parser
|
||||
top_builddir = ../..
|
||||
include $(top_builddir)/src/Makefile.global
|
||||
include $(top_srcdir)/contrib/contrib-global.mk
|
||||
endif
|
52
contrib/test_parser/README.test_parser
Normal file
52
contrib/test_parser/README.test_parser
Normal file
@ -0,0 +1,52 @@
|
||||
Example parser
|
||||
==============
|
||||
|
||||
This is an example of a custom parser for full text search.
|
||||
|
||||
It recognizes space-delimited words and returns only two token types:
|
||||
|
||||
- 3, word, Word
|
||||
|
||||
- 12, blank, Space symbols
|
||||
|
||||
The token numbers have been chosen to keep compatibility with the default
|
||||
ts_headline() function, since we do not want to implement our own version.
|
||||
|
||||
* Configuration
|
||||
|
||||
The parser has no user-configurable parameters.
|
||||
|
||||
* Usage
|
||||
|
||||
1. Compile and install
|
||||
|
||||
2. Load dictionary
|
||||
|
||||
psql mydb < test_parser.sql
|
||||
|
||||
3. Test it
|
||||
|
||||
mydb# SELECT * FROM ts_parse('testparser','That''s my first own parser');
|
||||
tokid | token
|
||||
-------+--------
|
||||
3 | That's
|
||||
12 |
|
||||
3 | my
|
||||
12 |
|
||||
3 | first
|
||||
12 |
|
||||
3 | own
|
||||
12 |
|
||||
3 | parser
|
||||
|
||||
mydb# SELECT to_tsvector('testcfg','That''s my first own parser');
|
||||
to_tsvector
|
||||
-------------------------------------------------
|
||||
'my':2 'own':4 'first':3 'parser':5 'that''s':1
|
||||
|
||||
mydb# SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies', to_tsquery('testcfg', 'star'));
|
||||
headline
|
||||
-----------------------------------------------------------------
|
||||
Supernovae <b>stars</b> are the brightest phenomena in galaxies
|
||||
|
||||
That's all.
|
50
contrib/test_parser/expected/test_parser.out
Normal file
50
contrib/test_parser/expected/test_parser.out
Normal file
@ -0,0 +1,50 @@
|
||||
--
|
||||
-- first, define the parser. Turn off echoing so that expected file
|
||||
-- does not depend on contents of this file.
|
||||
--
|
||||
SET client_min_messages = warning;
|
||||
\set ECHO none
|
||||
RESET client_min_messages;
|
||||
-- make test configuration using parser
|
||||
CREATE TEXT SEARCH CONFIGURATION testcfg (PARSER = testparser);
|
||||
ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple;
|
||||
-- ts_parse
|
||||
SELECT * FROM ts_parse('testparser', 'That''s simple parser can''t parse urls like http://some.url/here/');
|
||||
tokid | token
|
||||
-------+-----------------------
|
||||
3 | That's
|
||||
12 |
|
||||
3 | simple
|
||||
12 |
|
||||
3 | parser
|
||||
12 |
|
||||
3 | can't
|
||||
12 |
|
||||
3 | parse
|
||||
12 |
|
||||
3 | urls
|
||||
12 |
|
||||
3 | like
|
||||
12 |
|
||||
3 | http://some.url/here/
|
||||
(15 rows)
|
||||
|
||||
SELECT to_tsvector('testcfg','That''s my first own parser');
|
||||
to_tsvector
|
||||
-------------------------------------------------
|
||||
'my':2 'own':4 'first':3 'parser':5 'that''s':1
|
||||
(1 row)
|
||||
|
||||
SELECT to_tsquery('testcfg', 'star');
|
||||
to_tsquery
|
||||
------------
|
||||
'star'
|
||||
(1 row)
|
||||
|
||||
SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies',
|
||||
to_tsquery('testcfg', 'stars'));
|
||||
ts_headline
|
||||
-----------------------------------------------------------------
|
||||
Supernovae <b>stars</b> are the brightest phenomena in galaxies
|
||||
(1 row)
|
||||
|
26
contrib/test_parser/sql/test_parser.sql
Normal file
26
contrib/test_parser/sql/test_parser.sql
Normal file
@ -0,0 +1,26 @@
|
||||
--
|
||||
-- first, define the parser. Turn off echoing so that expected file
|
||||
-- does not depend on contents of this file.
|
||||
--
|
||||
SET client_min_messages = warning;
|
||||
\set ECHO none
|
||||
\i test_parser.sql
|
||||
\set ECHO all
|
||||
RESET client_min_messages;
|
||||
|
||||
-- make test configuration using parser
|
||||
|
||||
CREATE TEXT SEARCH CONFIGURATION testcfg (PARSER = testparser);
|
||||
|
||||
ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple;
|
||||
|
||||
-- ts_parse
|
||||
|
||||
SELECT * FROM ts_parse('testparser', 'That''s simple parser can''t parse urls like http://some.url/here/');
|
||||
|
||||
SELECT to_tsvector('testcfg','That''s my first own parser');
|
||||
|
||||
SELECT to_tsquery('testcfg', 'star');
|
||||
|
||||
SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies',
|
||||
to_tsquery('testcfg', 'stars'));
|
130
contrib/test_parser/test_parser.c
Normal file
130
contrib/test_parser/test_parser.c
Normal file
@ -0,0 +1,130 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* test_parser.c
|
||||
* Simple example of a text search parser
|
||||
*
|
||||
* Copyright (c) 2007, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/contrib/test_parser/test_parser.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "fmgr.h"
|
||||
|
||||
PG_MODULE_MAGIC;
|
||||
|
||||
|
||||
/*
|
||||
* types
|
||||
*/
|
||||
|
||||
/* self-defined type */
|
||||
typedef struct {
|
||||
char * buffer; /* text to parse */
|
||||
int len; /* length of the text in buffer */
|
||||
int pos; /* position of the parser */
|
||||
} ParserState;
|
||||
|
||||
/* copy-paste from wparser.h of tsearch2 */
|
||||
typedef struct {
|
||||
int lexid;
|
||||
char *alias;
|
||||
char *descr;
|
||||
} LexDescr;
|
||||
|
||||
/*
|
||||
* prototypes
|
||||
*/
|
||||
PG_FUNCTION_INFO_V1(testprs_start);
|
||||
Datum testprs_start(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(testprs_getlexeme);
|
||||
Datum testprs_getlexeme(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(testprs_end);
|
||||
Datum testprs_end(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(testprs_lextype);
|
||||
Datum testprs_lextype(PG_FUNCTION_ARGS);
|
||||
|
||||
/*
|
||||
* functions
|
||||
*/
|
||||
Datum testprs_start(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ParserState *pst = (ParserState *) palloc0(sizeof(ParserState));
|
||||
pst->buffer = (char *) PG_GETARG_POINTER(0);
|
||||
pst->len = PG_GETARG_INT32(1);
|
||||
pst->pos = 0;
|
||||
|
||||
PG_RETURN_POINTER(pst);
|
||||
}
|
||||
|
||||
Datum testprs_getlexeme(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
|
||||
char **t = (char **) PG_GETARG_POINTER(1);
|
||||
int *tlen = (int *) PG_GETARG_POINTER(2);
|
||||
int type;
|
||||
|
||||
*tlen = pst->pos;
|
||||
*t = pst->buffer + pst->pos;
|
||||
|
||||
if ((pst->buffer)[pst->pos] == ' ')
|
||||
{
|
||||
/* blank type */
|
||||
type = 12;
|
||||
/* go to the next non-white-space character */
|
||||
while ((pst->buffer)[pst->pos] == ' ' &&
|
||||
pst->pos < pst->len)
|
||||
(pst->pos)++;
|
||||
} else {
|
||||
/* word type */
|
||||
type = 3;
|
||||
/* go to the next white-space character */
|
||||
while ((pst->buffer)[pst->pos] != ' ' &&
|
||||
pst->pos < pst->len)
|
||||
(pst->pos)++;
|
||||
}
|
||||
|
||||
*tlen = pst->pos - *tlen;
|
||||
|
||||
/* we are finished if (*tlen == 0) */
|
||||
if (*tlen == 0)
|
||||
type=0;
|
||||
|
||||
PG_RETURN_INT32(type);
|
||||
}
|
||||
|
||||
Datum testprs_end(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
|
||||
pfree(pst);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
Datum testprs_lextype(PG_FUNCTION_ARGS)
|
||||
{
|
||||
/*
|
||||
* Remarks:
|
||||
* - we have to return the blanks for headline reason
|
||||
* - we use the same lexids like Teodor in the default
|
||||
* word parser; in this way we can reuse the headline
|
||||
* function of the default word parser.
|
||||
*/
|
||||
LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (2+1));
|
||||
|
||||
/* there are only two types in this parser */
|
||||
descr[0].lexid = 3;
|
||||
descr[0].alias = pstrdup("word");
|
||||
descr[0].descr = pstrdup("Word");
|
||||
descr[1].lexid = 12;
|
||||
descr[1].alias = pstrdup("blank");
|
||||
descr[1].descr = pstrdup("Space symbols");
|
||||
descr[2].lexid = 0;
|
||||
|
||||
PG_RETURN_POINTER(descr);
|
||||
}
|
36
contrib/test_parser/test_parser.sql.in
Normal file
36
contrib/test_parser/test_parser.sql.in
Normal file
@ -0,0 +1,36 @@
|
||||
-- $PostgreSQL: pgsql/contrib/test_parser/test_parser.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
|
||||
-- Adjust this setting to control where the objects get created.
|
||||
SET search_path = public;
|
||||
|
||||
BEGIN;
|
||||
|
||||
CREATE FUNCTION testprs_start(internal, int4)
|
||||
RETURNS internal
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION testprs_getlexeme(internal, internal, internal)
|
||||
RETURNS internal
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION testprs_end(internal)
|
||||
RETURNS void
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION testprs_lextype(internal)
|
||||
RETURNS internal
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE TEXT SEARCH PARSER testparser (
|
||||
START = testprs_start,
|
||||
GETTOKEN = testprs_getlexeme,
|
||||
END = testprs_end,
|
||||
HEADLINE = pg_catalog.prsd_headline,
|
||||
LEXTYPES = testprs_lextype
|
||||
);
|
||||
|
||||
END;
|
11
contrib/test_parser/uninstall_test_parser.sql
Normal file
11
contrib/test_parser/uninstall_test_parser.sql
Normal file
@ -0,0 +1,11 @@
|
||||
SET search_path = public;
|
||||
|
||||
DROP TEXT SEARCH PARSER testparser;
|
||||
|
||||
DROP FUNCTION testprs_start(internal, int4);
|
||||
|
||||
DROP FUNCTION testprs_getlexeme(internal, internal, internal);
|
||||
|
||||
DROP FUNCTION testprs_end(internal);
|
||||
|
||||
DROP FUNCTION testprs_lextype(internal);
|
Loading…
x
Reference in New Issue
Block a user