Add sample text search dictionary templates and parsers, to replace the
hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov.
This commit is contained in:
parent
fb631dba2a
commit
5fcb079858
@ -1,4 +1,4 @@
|
|||||||
# $PostgreSQL: pgsql/contrib/Makefile,v 1.80 2007/10/13 22:59:43 tgl Exp $
|
# $PostgreSQL: pgsql/contrib/Makefile,v 1.81 2007/10/15 21:36:49 tgl Exp $
|
||||||
|
|
||||||
subdir = contrib
|
subdir = contrib
|
||||||
top_builddir = ..
|
top_builddir = ..
|
||||||
@ -10,6 +10,8 @@ WANTED_DIRS = \
|
|||||||
chkpass \
|
chkpass \
|
||||||
cube \
|
cube \
|
||||||
dblink \
|
dblink \
|
||||||
|
dict_int \
|
||||||
|
dict_xsyn \
|
||||||
earthdistance \
|
earthdistance \
|
||||||
fuzzystrmatch \
|
fuzzystrmatch \
|
||||||
hstore \
|
hstore \
|
||||||
@ -31,6 +33,7 @@ WANTED_DIRS = \
|
|||||||
seg \
|
seg \
|
||||||
spi \
|
spi \
|
||||||
tablefunc \
|
tablefunc \
|
||||||
|
test_parser \
|
||||||
vacuumlo
|
vacuumlo
|
||||||
|
|
||||||
ifeq ($(with_openssl),yes)
|
ifeq ($(with_openssl),yes)
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
|
|
||||||
The PostgreSQL contrib tree
|
The PostgreSQL contrib tree
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
@ -29,8 +28,8 @@ adminpack -
|
|||||||
by Dave Page <dpage@vale-housing.co.uk>
|
by Dave Page <dpage@vale-housing.co.uk>
|
||||||
|
|
||||||
btree_gist -
|
btree_gist -
|
||||||
Support for emulating BTREE indexing in GiST
|
Support for emulating BTREE indexing in GiST
|
||||||
by Oleg Bartunov <oleg@sai.msu.su> and Teodor Sigaev <teodor@sigaev.ru>
|
by Oleg Bartunov <oleg@sai.msu.su> and Teodor Sigaev <teodor@sigaev.ru>
|
||||||
|
|
||||||
chkpass -
|
chkpass -
|
||||||
An auto-encrypted password datatype
|
An auto-encrypted password datatype
|
||||||
@ -44,8 +43,16 @@ dblink -
|
|||||||
Allows remote query execution
|
Allows remote query execution
|
||||||
by Joe Conway <mail@joeconway.com>
|
by Joe Conway <mail@joeconway.com>
|
||||||
|
|
||||||
|
dict_int -
|
||||||
|
Text search dictionary template for integers
|
||||||
|
by Sergey Karpov <karpov@sao.ru>
|
||||||
|
|
||||||
|
dict_xsyn -
|
||||||
|
Text search dictionary template for extended synonym processing
|
||||||
|
by Sergey Karpov <karpov@sao.ru>
|
||||||
|
|
||||||
earthdistance -
|
earthdistance -
|
||||||
Operator for computing earth distance for two points
|
Operator for computing earth distance between two points
|
||||||
by Hal Snyder <hal@vailsys.com>
|
by Hal Snyder <hal@vailsys.com>
|
||||||
|
|
||||||
fuzzystrmatch -
|
fuzzystrmatch -
|
||||||
@ -53,8 +60,8 @@ fuzzystrmatch -
|
|||||||
by Joe Conway <mail@joeconway.com>, Joel Burton <jburton@scw.org>
|
by Joe Conway <mail@joeconway.com>, Joel Burton <jburton@scw.org>
|
||||||
|
|
||||||
hstore -
|
hstore -
|
||||||
Hstore - module for storing (key,value) pairs
|
Module for storing (key, value) pairs
|
||||||
by Oleg Bartunov <oleg@sai.msu.su> and Teodor Sigaev <teodor@sigaev.ru>
|
by Oleg Bartunov <oleg@sai.msu.su> and Teodor Sigaev <teodor@sigaev.ru>
|
||||||
|
|
||||||
intagg -
|
intagg -
|
||||||
Integer aggregator
|
Integer aggregator
|
||||||
@ -92,6 +99,10 @@ pg_freespacemap -
|
|||||||
Displays the contents of the free space map (FSM)
|
Displays the contents of the free space map (FSM)
|
||||||
by Mark Kirkwood <markir@paradise.net.nz>
|
by Mark Kirkwood <markir@paradise.net.nz>
|
||||||
|
|
||||||
|
pg_standby -
|
||||||
|
Sample archive_command for warm standby operation
|
||||||
|
by Simon Riggs <simon@2ndquadrant.com>
|
||||||
|
|
||||||
pg_trgm -
|
pg_trgm -
|
||||||
Functions for determining the similarity of text based on trigram
|
Functions for determining the similarity of text based on trigram
|
||||||
matching.
|
matching.
|
||||||
@ -110,7 +121,7 @@ pgrowlocks -
|
|||||||
by Tatsuo Ishii <ishii@sraoss.co.jp>
|
by Tatsuo Ishii <ishii@sraoss.co.jp>
|
||||||
|
|
||||||
pgstattuple -
|
pgstattuple -
|
||||||
A function to return statistics about "dead" tuples and free
|
Functions to return statistics about "dead" tuples and free
|
||||||
space within a table
|
space within a table
|
||||||
by Tatsuo Ishii <ishii@sraoss.co.jp>
|
by Tatsuo Ishii <ishii@sraoss.co.jp>
|
||||||
|
|
||||||
@ -126,12 +137,16 @@ sslinfo -
|
|||||||
by Victor Wagner <vitus@cryptocom.ru>
|
by Victor Wagner <vitus@cryptocom.ru>
|
||||||
|
|
||||||
start-scripts -
|
start-scripts -
|
||||||
Scripts for starting the server at boot time.
|
Scripts for starting the server at boot time on various platforms.
|
||||||
|
|
||||||
tablefunc -
|
tablefunc -
|
||||||
Examples of functions returning tables
|
Examples of functions returning tables
|
||||||
by Joe Conway <mail@joeconway.com>
|
by Joe Conway <mail@joeconway.com>
|
||||||
|
|
||||||
|
test_parser -
|
||||||
|
Sample text search parser
|
||||||
|
by Sergey Karpov <karpov@sao.ru>
|
||||||
|
|
||||||
tsearch2 -
|
tsearch2 -
|
||||||
Full-text-index support using GiST
|
Full-text-index support using GiST
|
||||||
by Teodor Sigaev <teodor@sigaev.ru> and Oleg Bartunov
|
by Teodor Sigaev <teodor@sigaev.ru> and Oleg Bartunov
|
||||||
|
19
contrib/dict_int/Makefile
Normal file
19
contrib/dict_int/Makefile
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# $PostgreSQL: pgsql/contrib/dict_int/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||||
|
|
||||||
|
MODULE_big = dict_int
|
||||||
|
OBJS = dict_int.o
|
||||||
|
DATA_built = dict_int.sql
|
||||||
|
DATA = uninstall_dict_int.sql
|
||||||
|
DOCS = README.dict_int
|
||||||
|
REGRESS = dict_int
|
||||||
|
|
||||||
|
ifdef USE_PGXS
|
||||||
|
PG_CONFIG = pg_config
|
||||||
|
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||||
|
include $(PGXS)
|
||||||
|
else
|
||||||
|
subdir = contrib/dict_int
|
||||||
|
top_builddir = ../..
|
||||||
|
include $(top_builddir)/src/Makefile.global
|
||||||
|
include $(top_srcdir)/contrib/contrib-global.mk
|
||||||
|
endif
|
41
contrib/dict_int/README.dict_int
Normal file
41
contrib/dict_int/README.dict_int
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
Dictionary for integers
|
||||||
|
=======================
|
||||||
|
|
||||||
|
The motivation for this example dictionary is to control the indexing of
|
||||||
|
integers (signed and unsigned), and, consequently, to minimize the number of
|
||||||
|
unique words which greatly affect the performance of searching.
|
||||||
|
|
||||||
|
* Configuration
|
||||||
|
|
||||||
|
The dictionary accepts two options:
|
||||||
|
|
||||||
|
- The MAXLEN parameter specifies the maximum length (number of digits)
|
||||||
|
allowed in an integer word. The default value is 6.
|
||||||
|
|
||||||
|
- The REJECTLONG parameter specifies if an overlength integer should be
|
||||||
|
truncated or ignored. If REJECTLONG=FALSE (default), the dictionary returns
|
||||||
|
the first MAXLEN digits of the integer. If REJECTLONG=TRUE, the
|
||||||
|
dictionary treats an overlength integer as a stop word, so that it will
|
||||||
|
not be indexed.
|
||||||
|
|
||||||
|
* Usage
|
||||||
|
|
||||||
|
1. Compile and install
|
||||||
|
|
||||||
|
2. Load dictionary
|
||||||
|
|
||||||
|
psql mydb < dict_int.sql
|
||||||
|
|
||||||
|
3. Test it
|
||||||
|
|
||||||
|
mydb# select ts_lexize('intdict', '12345678');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{123456}
|
||||||
|
|
||||||
|
4. Change its options as you wish
|
||||||
|
|
||||||
|
mydb# ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = 4, REJECTLONG = true);
|
||||||
|
ALTER TEXT SEARCH DICTIONARY
|
||||||
|
|
||||||
|
That's all.
|
99
contrib/dict_int/dict_int.c
Normal file
99
contrib/dict_int/dict_int.c
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* dict_int.c
|
||||||
|
* Text search dictionary for integers
|
||||||
|
*
|
||||||
|
* Copyright (c) 2007, PostgreSQL Global Development Group
|
||||||
|
*
|
||||||
|
* IDENTIFICATION
|
||||||
|
* $PostgreSQL: pgsql/contrib/dict_int/dict_int.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "commands/defrem.h"
|
||||||
|
#include "fmgr.h"
|
||||||
|
#include "tsearch/ts_public.h"
|
||||||
|
|
||||||
|
PG_MODULE_MAGIC;
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int maxlen;
|
||||||
|
bool rejectlong;
|
||||||
|
} DictInt;
|
||||||
|
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(dintdict_init);
|
||||||
|
Datum dintdict_init(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(dintdict_lexize);
|
||||||
|
Datum dintdict_lexize(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
Datum
|
||||||
|
dintdict_init(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
List *dictoptions = (List *) PG_GETARG_POINTER(0);
|
||||||
|
DictInt *d;
|
||||||
|
ListCell *l;
|
||||||
|
|
||||||
|
d = (DictInt *) palloc0(sizeof(DictInt));
|
||||||
|
d->maxlen = 6;
|
||||||
|
d->rejectlong = false;
|
||||||
|
|
||||||
|
foreach(l, dictoptions)
|
||||||
|
{
|
||||||
|
DefElem *defel = (DefElem *) lfirst(l);
|
||||||
|
|
||||||
|
if (pg_strcasecmp(defel->defname, "MAXLEN") == 0)
|
||||||
|
{
|
||||||
|
d->maxlen = atoi(defGetString(defel));
|
||||||
|
}
|
||||||
|
else if (pg_strcasecmp(defel->defname, "REJECTLONG") == 0)
|
||||||
|
{
|
||||||
|
d->rejectlong = defGetBoolean(defel);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
|
errmsg("unrecognized intdict parameter: \"%s\"",
|
||||||
|
defel->defname)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
Datum
|
||||||
|
dintdict_lexize(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
DictInt *d = (DictInt*)PG_GETARG_POINTER(0);
|
||||||
|
char *in = (char*)PG_GETARG_POINTER(1);
|
||||||
|
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
|
||||||
|
TSLexeme *res=palloc(sizeof(TSLexeme)*2);
|
||||||
|
|
||||||
|
res[1].lexeme = NULL;
|
||||||
|
if (PG_GETARG_INT32(2) > d->maxlen)
|
||||||
|
{
|
||||||
|
if ( d->rejectlong )
|
||||||
|
{
|
||||||
|
/* reject by returning void array */
|
||||||
|
pfree(txt);
|
||||||
|
res[0].lexeme = NULL;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* trim integer */
|
||||||
|
txt[d->maxlen] = '\0';
|
||||||
|
res[0].lexeme = txt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
res[0].lexeme = txt;
|
||||||
|
}
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(res);
|
||||||
|
}
|
29
contrib/dict_int/dict_int.sql.in
Normal file
29
contrib/dict_int/dict_int.sql.in
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
-- $PostgreSQL: pgsql/contrib/dict_int/dict_int.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||||
|
|
||||||
|
-- Adjust this setting to control where the objects get created.
|
||||||
|
SET search_path = public;
|
||||||
|
|
||||||
|
BEGIN;
|
||||||
|
|
||||||
|
CREATE FUNCTION dintdict_init(internal)
|
||||||
|
RETURNS internal
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION dintdict_lexize(internal, internal, internal, internal)
|
||||||
|
RETURNS internal
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE TEXT SEARCH TEMPLATE intdict_template (
|
||||||
|
LEXIZE = dintdict_lexize,
|
||||||
|
INIT = dintdict_init
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TEXT SEARCH DICTIONARY intdict (
|
||||||
|
TEMPLATE = intdict_template
|
||||||
|
);
|
||||||
|
|
||||||
|
COMMENT ON TEXT SEARCH DICTIONARY intdict IS 'dictionary for integers';
|
||||||
|
|
||||||
|
END;
|
308
contrib/dict_int/expected/dict_int.out
Normal file
308
contrib/dict_int/expected/dict_int.out
Normal file
@ -0,0 +1,308 @@
|
|||||||
|
--
|
||||||
|
-- first, define the datatype. Turn off echoing so that expected file
|
||||||
|
-- does not depend on contents of this file.
|
||||||
|
--
|
||||||
|
SET client_min_messages = warning;
|
||||||
|
\set ECHO none
|
||||||
|
RESET client_min_messages;
|
||||||
|
--lexize
|
||||||
|
select ts_lexize('intdict', '511673');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{511673}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '129');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{129}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '40865854');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{408658}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '952');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{952}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '654980341');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{654980}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '09810106');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{098101}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '14262713');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{142627}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '6532082986');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{653208}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '0150061');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{015006}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '7778');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{7778}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '9547');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{9547}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '753395478');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{753395}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '647652');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{647652}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '6988655574');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{698865}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '1279');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{1279}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '1266645909');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{126664}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '7594193969');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{759419}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '16928207');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{169282}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '196850350328');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{196850}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '22026985592');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{220269}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '2063765');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{206376}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '242387310');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{242387}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '93595');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{93595}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '9374');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{9374}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '996969');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{996969}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '353595982');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{353595}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '925860');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{925860}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '11848378337');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{118483}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '333');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{333}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '799287416765');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{799287}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '745939');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{745939}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '67601305734');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{676013}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '3361113');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{336111}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '9033778607');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{903377}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '7507648');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{750764}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '1166');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{1166}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '9360498');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{936049}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '917795');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{917795}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '9387894');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{938789}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '42764329');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{427643}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '564062');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{564062}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '5413377');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{541337}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '060965');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{060965}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '08273593');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{082735}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '593556010144');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{593556}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '17988843352');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{179888}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '252281774');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{252281}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '313425');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{313425}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '641439323669');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{641439}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select ts_lexize('intdict', '314532610153');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
{314532}
|
||||||
|
(1 row)
|
||||||
|
|
61
contrib/dict_int/sql/dict_int.sql
Normal file
61
contrib/dict_int/sql/dict_int.sql
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
--
|
||||||
|
-- first, define the datatype. Turn off echoing so that expected file
|
||||||
|
-- does not depend on contents of this file.
|
||||||
|
--
|
||||||
|
SET client_min_messages = warning;
|
||||||
|
\set ECHO none
|
||||||
|
\i dict_int.sql
|
||||||
|
\set ECHO all
|
||||||
|
RESET client_min_messages;
|
||||||
|
|
||||||
|
--lexize
|
||||||
|
select ts_lexize('intdict', '511673');
|
||||||
|
select ts_lexize('intdict', '129');
|
||||||
|
select ts_lexize('intdict', '40865854');
|
||||||
|
select ts_lexize('intdict', '952');
|
||||||
|
select ts_lexize('intdict', '654980341');
|
||||||
|
select ts_lexize('intdict', '09810106');
|
||||||
|
select ts_lexize('intdict', '14262713');
|
||||||
|
select ts_lexize('intdict', '6532082986');
|
||||||
|
select ts_lexize('intdict', '0150061');
|
||||||
|
select ts_lexize('intdict', '7778');
|
||||||
|
select ts_lexize('intdict', '9547');
|
||||||
|
select ts_lexize('intdict', '753395478');
|
||||||
|
select ts_lexize('intdict', '647652');
|
||||||
|
select ts_lexize('intdict', '6988655574');
|
||||||
|
select ts_lexize('intdict', '1279');
|
||||||
|
select ts_lexize('intdict', '1266645909');
|
||||||
|
select ts_lexize('intdict', '7594193969');
|
||||||
|
select ts_lexize('intdict', '16928207');
|
||||||
|
select ts_lexize('intdict', '196850350328');
|
||||||
|
select ts_lexize('intdict', '22026985592');
|
||||||
|
select ts_lexize('intdict', '2063765');
|
||||||
|
select ts_lexize('intdict', '242387310');
|
||||||
|
select ts_lexize('intdict', '93595');
|
||||||
|
select ts_lexize('intdict', '9374');
|
||||||
|
select ts_lexize('intdict', '996969');
|
||||||
|
select ts_lexize('intdict', '353595982');
|
||||||
|
select ts_lexize('intdict', '925860');
|
||||||
|
select ts_lexize('intdict', '11848378337');
|
||||||
|
select ts_lexize('intdict', '333');
|
||||||
|
select ts_lexize('intdict', '799287416765');
|
||||||
|
select ts_lexize('intdict', '745939');
|
||||||
|
select ts_lexize('intdict', '67601305734');
|
||||||
|
select ts_lexize('intdict', '3361113');
|
||||||
|
select ts_lexize('intdict', '9033778607');
|
||||||
|
select ts_lexize('intdict', '7507648');
|
||||||
|
select ts_lexize('intdict', '1166');
|
||||||
|
select ts_lexize('intdict', '9360498');
|
||||||
|
select ts_lexize('intdict', '917795');
|
||||||
|
select ts_lexize('intdict', '9387894');
|
||||||
|
select ts_lexize('intdict', '42764329');
|
||||||
|
select ts_lexize('intdict', '564062');
|
||||||
|
select ts_lexize('intdict', '5413377');
|
||||||
|
select ts_lexize('intdict', '060965');
|
||||||
|
select ts_lexize('intdict', '08273593');
|
||||||
|
select ts_lexize('intdict', '593556010144');
|
||||||
|
select ts_lexize('intdict', '17988843352');
|
||||||
|
select ts_lexize('intdict', '252281774');
|
||||||
|
select ts_lexize('intdict', '313425');
|
||||||
|
select ts_lexize('intdict', '641439323669');
|
||||||
|
select ts_lexize('intdict', '314532610153');
|
9
contrib/dict_int/uninstall_dict_int.sql
Normal file
9
contrib/dict_int/uninstall_dict_int.sql
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
SET search_path = public;
|
||||||
|
|
||||||
|
DROP TEXT SEARCH DICTIONARY intdict;
|
||||||
|
|
||||||
|
DROP TEXT SEARCH TEMPLATE intdict_template;
|
||||||
|
|
||||||
|
DROP FUNCTION dintdict_init(internal);
|
||||||
|
|
||||||
|
DROP FUNCTION dintdict_lexize(internal,internal,internal,internal);
|
38
contrib/dict_xsyn/Makefile
Normal file
38
contrib/dict_xsyn/Makefile
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
# $PostgreSQL: pgsql/contrib/dict_xsyn/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||||
|
|
||||||
|
MODULE_big = dict_xsyn
|
||||||
|
OBJS = dict_xsyn.o
|
||||||
|
DATA_built = dict_xsyn.sql
|
||||||
|
DATA = uninstall_dict_xsyn.sql
|
||||||
|
DOCS = README.dict_xsyn
|
||||||
|
REGRESS = dict_xsyn
|
||||||
|
|
||||||
|
DICTDIR = tsearch_data
|
||||||
|
DICTFILES = xsyn_sample.rules
|
||||||
|
|
||||||
|
ifdef USE_PGXS
|
||||||
|
PG_CONFIG = pg_config
|
||||||
|
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||||
|
include $(PGXS)
|
||||||
|
else
|
||||||
|
subdir = contrib/dict_xsyn
|
||||||
|
top_builddir = ../..
|
||||||
|
include $(top_builddir)/src/Makefile.global
|
||||||
|
include $(top_srcdir)/contrib/contrib-global.mk
|
||||||
|
endif
|
||||||
|
|
||||||
|
install: install-data
|
||||||
|
|
||||||
|
.PHONY: install-data
|
||||||
|
install-data: $(DICTFILES)
|
||||||
|
for i in $(DICTFILES); \
|
||||||
|
do $(INSTALL_DATA) $(srcdir)/$$i '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i; \
|
||||||
|
done
|
||||||
|
|
||||||
|
uninstall: uninstall-data
|
||||||
|
|
||||||
|
.PHONY: uninstall-data
|
||||||
|
uninstall-data:
|
||||||
|
for i in $(DICTFILES); \
|
||||||
|
do rm -rf '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i ; \
|
||||||
|
done
|
52
contrib/dict_xsyn/README.dict_xsyn
Normal file
52
contrib/dict_xsyn/README.dict_xsyn
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
Extended Synonym dictionary
|
||||||
|
===========================
|
||||||
|
|
||||||
|
This is a simple synonym dictionary. It replaces words with groups of their
|
||||||
|
synonyms, and so makes it possible to search for a word using any of its
|
||||||
|
synonyms.
|
||||||
|
|
||||||
|
* Configuration
|
||||||
|
|
||||||
|
It accepts the following options:
|
||||||
|
|
||||||
|
- KEEPORIG controls whether the original word is included, or only its
|
||||||
|
synonyms. Default is 'true'.
|
||||||
|
|
||||||
|
- RULES is the base name of the file containing the list of synonyms.
|
||||||
|
This file must be in $(prefix)/share/tsearch_data/, and its name must
|
||||||
|
end in ".rules" (which is not included in the RULES parameter).
|
||||||
|
|
||||||
|
The rules file has the following format:
|
||||||
|
|
||||||
|
- Each line represents a group of synonyms for a single word, which is
|
||||||
|
given first on the line. Synonyms are separated by whitespace:
|
||||||
|
|
||||||
|
word syn1 syn2 syn3
|
||||||
|
|
||||||
|
- Sharp ('#') sign is a comment delimiter. It may appear at any position
|
||||||
|
inside the line. The rest of the line will be skipped.
|
||||||
|
|
||||||
|
Look at xsyn_sample.rules, which is installed in $(prefix)/share/tsearch_data/,
|
||||||
|
for an example.
|
||||||
|
|
||||||
|
* Usage
|
||||||
|
|
||||||
|
1. Compile and install
|
||||||
|
|
||||||
|
2. Load dictionary
|
||||||
|
|
||||||
|
psql mydb < dict_xsyn.sql
|
||||||
|
|
||||||
|
3. Test it
|
||||||
|
|
||||||
|
mydb=# SELECT ts_lexize('xsyn','word');
|
||||||
|
ts_lexize
|
||||||
|
----------------
|
||||||
|
{word,syn1,syn2,syn3)
|
||||||
|
|
||||||
|
4. Change the dictionary options as you wish
|
||||||
|
|
||||||
|
mydb# ALTER TEXT SEARCH DICTIONARY xsyn (KEEPORIG=false);
|
||||||
|
ALTER TEXT SEARCH DICTIONARY
|
||||||
|
|
||||||
|
That's all.
|
235
contrib/dict_xsyn/dict_xsyn.c
Normal file
235
contrib/dict_xsyn/dict_xsyn.c
Normal file
@ -0,0 +1,235 @@
|
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* dict_xsyn.c
|
||||||
|
* Extended synonym dictionary
|
||||||
|
*
|
||||||
|
* Copyright (c) 2007, PostgreSQL Global Development Group
|
||||||
|
*
|
||||||
|
* IDENTIFICATION
|
||||||
|
* $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
|
#include "commands/defrem.h"
|
||||||
|
#include "fmgr.h"
|
||||||
|
#include "storage/fd.h"
|
||||||
|
#include "tsearch/ts_locale.h"
|
||||||
|
#include "tsearch/ts_utils.h"
|
||||||
|
|
||||||
|
PG_MODULE_MAGIC;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
char *key; /* Word */
|
||||||
|
char *value; /* Unparsed list of synonyms, including the word itself */
|
||||||
|
} Syn;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
int len;
|
||||||
|
Syn *syn;
|
||||||
|
|
||||||
|
bool keeporig;
|
||||||
|
} DictSyn;
|
||||||
|
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(dxsyn_init);
|
||||||
|
Datum dxsyn_init(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(dxsyn_lexize);
|
||||||
|
Datum dxsyn_lexize(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
static char *
|
||||||
|
find_word(char *in, char **end)
|
||||||
|
{
|
||||||
|
char *start;
|
||||||
|
|
||||||
|
*end = NULL;
|
||||||
|
while (*in && t_isspace(in))
|
||||||
|
in += pg_mblen(in);
|
||||||
|
|
||||||
|
if (!*in || *in == '#')
|
||||||
|
return NULL;
|
||||||
|
start = in;
|
||||||
|
|
||||||
|
while (*in && !t_isspace(in))
|
||||||
|
in += pg_mblen(in);
|
||||||
|
|
||||||
|
*end = in;
|
||||||
|
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
compare_syn(const void *a, const void *b)
|
||||||
|
{
|
||||||
|
return strcmp(((Syn *) a)->key, ((Syn *) b)->key);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
read_dictionary(DictSyn *d, char *filename)
|
||||||
|
{
|
||||||
|
char *real_filename = get_tsearch_config_filename(filename, "rules");
|
||||||
|
FILE *fin;
|
||||||
|
char *line;
|
||||||
|
int cur = 0;
|
||||||
|
|
||||||
|
if ((fin = AllocateFile(real_filename, "r")) == NULL)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||||
|
errmsg("could not open synonym file \"%s\": %m",
|
||||||
|
real_filename)));
|
||||||
|
|
||||||
|
while ((line = t_readline(fin)) != NULL)
|
||||||
|
{
|
||||||
|
char *value;
|
||||||
|
char *key;
|
||||||
|
char *end = NULL;
|
||||||
|
|
||||||
|
if (*line == '\0')
|
||||||
|
continue;
|
||||||
|
|
||||||
|
value = lowerstr(line);
|
||||||
|
pfree(line);
|
||||||
|
|
||||||
|
key = find_word(value, &end);
|
||||||
|
if (!key)
|
||||||
|
{
|
||||||
|
pfree(value);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cur == d->len)
|
||||||
|
{
|
||||||
|
d->len = (d->len > 0) ? 2 * d->len : 16;
|
||||||
|
if (d->syn)
|
||||||
|
d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
|
||||||
|
else
|
||||||
|
d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
|
||||||
|
}
|
||||||
|
|
||||||
|
d->syn[cur].key = pnstrdup(key, end - key);
|
||||||
|
d->syn[cur].value = value;
|
||||||
|
|
||||||
|
cur++;
|
||||||
|
}
|
||||||
|
|
||||||
|
FreeFile(fin);
|
||||||
|
|
||||||
|
d->len = cur;
|
||||||
|
if (cur > 1)
|
||||||
|
qsort(d->syn, d->len, sizeof(Syn), compare_syn);
|
||||||
|
|
||||||
|
pfree(real_filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
Datum
|
||||||
|
dxsyn_init(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
List *dictoptions = (List *) PG_GETARG_POINTER(0);
|
||||||
|
DictSyn *d;
|
||||||
|
ListCell *l;
|
||||||
|
|
||||||
|
d = (DictSyn *) palloc0(sizeof(DictSyn));
|
||||||
|
d->len = 0;
|
||||||
|
d->syn = NULL;
|
||||||
|
d->keeporig = true;
|
||||||
|
|
||||||
|
foreach(l, dictoptions)
|
||||||
|
{
|
||||||
|
DefElem *defel = (DefElem *) lfirst(l);
|
||||||
|
|
||||||
|
if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0)
|
||||||
|
{
|
||||||
|
d->keeporig = defGetBoolean(defel);
|
||||||
|
}
|
||||||
|
else if (pg_strcasecmp(defel->defname, "RULES") == 0)
|
||||||
|
{
|
||||||
|
read_dictionary(d, defGetString(defel));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
|
errmsg("unrecognized xsyn parameter: \"%s\"",
|
||||||
|
defel->defname)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
Datum
|
||||||
|
dxsyn_lexize(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
|
||||||
|
char *in = (char *) PG_GETARG_POINTER(1);
|
||||||
|
int length = PG_GETARG_INT32(2);
|
||||||
|
Syn word;
|
||||||
|
Syn *found;
|
||||||
|
TSLexeme *res = NULL;
|
||||||
|
|
||||||
|
if (!length || d->len == 0)
|
||||||
|
PG_RETURN_POINTER(NULL);
|
||||||
|
|
||||||
|
/* Create search pattern */
|
||||||
|
{
|
||||||
|
char *temp = pnstrdup(in, length);
|
||||||
|
|
||||||
|
word.key = lowerstr(temp);
|
||||||
|
pfree(temp);
|
||||||
|
word.value = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Look for matching syn */
|
||||||
|
found = (Syn *)bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn);
|
||||||
|
pfree(word.key);
|
||||||
|
|
||||||
|
if (!found)
|
||||||
|
PG_RETURN_POINTER(NULL);
|
||||||
|
|
||||||
|
/* Parse string of synonyms and return array of words */
|
||||||
|
{
|
||||||
|
char *value = pstrdup(found->value);
|
||||||
|
int value_length = strlen(value);
|
||||||
|
char *pos = value;
|
||||||
|
int nsyns = 0;
|
||||||
|
bool is_first = true;
|
||||||
|
|
||||||
|
res = palloc(0);
|
||||||
|
|
||||||
|
while(pos < value + value_length)
|
||||||
|
{
|
||||||
|
char *end;
|
||||||
|
char *syn = find_word(pos, &end);
|
||||||
|
|
||||||
|
if (!syn)
|
||||||
|
break;
|
||||||
|
*end = '\0';
|
||||||
|
|
||||||
|
res = repalloc(res, sizeof(TSLexeme)*(nsyns + 2));
|
||||||
|
res[nsyns].lexeme = NULL;
|
||||||
|
|
||||||
|
/* first word is added to result only if KEEPORIG flag is set */
|
||||||
|
if(d->keeporig || !is_first)
|
||||||
|
{
|
||||||
|
res[nsyns].lexeme = pstrdup(syn);
|
||||||
|
res[nsyns + 1].lexeme = NULL;
|
||||||
|
|
||||||
|
nsyns++;
|
||||||
|
}
|
||||||
|
|
||||||
|
is_first = false;
|
||||||
|
|
||||||
|
pos = end + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
pfree(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(res);
|
||||||
|
}
|
29
contrib/dict_xsyn/dict_xsyn.sql.in
Normal file
29
contrib/dict_xsyn/dict_xsyn.sql.in
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
-- $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||||
|
|
||||||
|
-- Adjust this setting to control where the objects get created.
|
||||||
|
SET search_path = public;
|
||||||
|
|
||||||
|
BEGIN;
|
||||||
|
|
||||||
|
CREATE FUNCTION dxsyn_init(internal)
|
||||||
|
RETURNS internal
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION dxsyn_lexize(internal, internal, internal, internal)
|
||||||
|
RETURNS internal
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE TEXT SEARCH TEMPLATE xsyn_template (
|
||||||
|
LEXIZE = dxsyn_lexize,
|
||||||
|
INIT = dxsyn_init
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TEXT SEARCH DICTIONARY xsyn (
|
||||||
|
TEMPLATE = xsyn_template
|
||||||
|
);
|
||||||
|
|
||||||
|
COMMENT ON TEXT SEARCH DICTIONARY xsyn IS 'eXtended synonym dictionary';
|
||||||
|
|
||||||
|
END;
|
22
contrib/dict_xsyn/expected/dict_xsyn.out
Normal file
22
contrib/dict_xsyn/expected/dict_xsyn.out
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
--
|
||||||
|
-- first, define the datatype. Turn off echoing so that expected file
|
||||||
|
-- does not depend on contents of this file.
|
||||||
|
--
|
||||||
|
SET client_min_messages = warning;
|
||||||
|
\set ECHO none
|
||||||
|
RESET client_min_messages;
|
||||||
|
--configuration
|
||||||
|
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
|
||||||
|
--lexize
|
||||||
|
SELECT ts_lexize('xsyn', 'supernova');
|
||||||
|
ts_lexize
|
||||||
|
----------------
|
||||||
|
{sn,sne,1987a}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT ts_lexize('xsyn', 'grb');
|
||||||
|
ts_lexize
|
||||||
|
-----------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
16
contrib/dict_xsyn/sql/dict_xsyn.sql
Normal file
16
contrib/dict_xsyn/sql/dict_xsyn.sql
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
--
|
||||||
|
-- first, define the datatype. Turn off echoing so that expected file
|
||||||
|
-- does not depend on contents of this file.
|
||||||
|
--
|
||||||
|
SET client_min_messages = warning;
|
||||||
|
\set ECHO none
|
||||||
|
\i dict_xsyn.sql
|
||||||
|
\set ECHO all
|
||||||
|
RESET client_min_messages;
|
||||||
|
|
||||||
|
--configuration
|
||||||
|
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
|
||||||
|
|
||||||
|
--lexize
|
||||||
|
SELECT ts_lexize('xsyn', 'supernova');
|
||||||
|
SELECT ts_lexize('xsyn', 'grb');
|
9
contrib/dict_xsyn/uninstall_dict_xsyn.sql
Normal file
9
contrib/dict_xsyn/uninstall_dict_xsyn.sql
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
SET search_path = public;
|
||||||
|
|
||||||
|
DROP TEXT SEARCH DICTIONARY xsyn;
|
||||||
|
|
||||||
|
DROP TEXT SEARCH TEMPLATE xsyn_template;
|
||||||
|
|
||||||
|
DROP FUNCTION dxsyn_init(internal);
|
||||||
|
|
||||||
|
DROP FUNCTION dxsyn_lexize(internal,internal,internal,internal);
|
6
contrib/dict_xsyn/xsyn_sample.rules
Normal file
6
contrib/dict_xsyn/xsyn_sample.rules
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
# Sample rules file for eXtended Synonym (xsyn) dictionary
|
||||||
|
# format is as follows:
|
||||||
|
#
|
||||||
|
# word synonym1 synonym2 ...
|
||||||
|
#
|
||||||
|
supernova sn sne 1987a
|
19
contrib/test_parser/Makefile
Normal file
19
contrib/test_parser/Makefile
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# $PostgreSQL: pgsql/contrib/test_parser/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||||
|
|
||||||
|
MODULE_big = test_parser
|
||||||
|
OBJS = test_parser.o
|
||||||
|
DATA_built = test_parser.sql
|
||||||
|
DATA = uninstall_test_parser.sql
|
||||||
|
DOCS = README.test_parser
|
||||||
|
REGRESS = test_parser
|
||||||
|
|
||||||
|
ifdef USE_PGXS
|
||||||
|
PG_CONFIG = pg_config
|
||||||
|
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||||
|
include $(PGXS)
|
||||||
|
else
|
||||||
|
subdir = contrib/test_parser
|
||||||
|
top_builddir = ../..
|
||||||
|
include $(top_builddir)/src/Makefile.global
|
||||||
|
include $(top_srcdir)/contrib/contrib-global.mk
|
||||||
|
endif
|
52
contrib/test_parser/README.test_parser
Normal file
52
contrib/test_parser/README.test_parser
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
Example parser
|
||||||
|
==============
|
||||||
|
|
||||||
|
This is an example of a custom parser for full text search.
|
||||||
|
|
||||||
|
It recognizes space-delimited words and returns only two token types:
|
||||||
|
|
||||||
|
- 3, word, Word
|
||||||
|
|
||||||
|
- 12, blank, Space symbols
|
||||||
|
|
||||||
|
The token numbers have been chosen to keep compatibility with the default
|
||||||
|
ts_headline() function, since we do not want to implement our own version.
|
||||||
|
|
||||||
|
* Configuration
|
||||||
|
|
||||||
|
The parser has no user-configurable parameters.
|
||||||
|
|
||||||
|
* Usage
|
||||||
|
|
||||||
|
1. Compile and install
|
||||||
|
|
||||||
|
2. Load dictionary
|
||||||
|
|
||||||
|
psql mydb < test_parser.sql
|
||||||
|
|
||||||
|
3. Test it
|
||||||
|
|
||||||
|
mydb# SELECT * FROM ts_parse('testparser','That''s my first own parser');
|
||||||
|
tokid | token
|
||||||
|
-------+--------
|
||||||
|
3 | That's
|
||||||
|
12 |
|
||||||
|
3 | my
|
||||||
|
12 |
|
||||||
|
3 | first
|
||||||
|
12 |
|
||||||
|
3 | own
|
||||||
|
12 |
|
||||||
|
3 | parser
|
||||||
|
|
||||||
|
mydb# SELECT to_tsvector('testcfg','That''s my first own parser');
|
||||||
|
to_tsvector
|
||||||
|
-------------------------------------------------
|
||||||
|
'my':2 'own':4 'first':3 'parser':5 'that''s':1
|
||||||
|
|
||||||
|
mydb# SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies', to_tsquery('testcfg', 'star'));
|
||||||
|
headline
|
||||||
|
-----------------------------------------------------------------
|
||||||
|
Supernovae <b>stars</b> are the brightest phenomena in galaxies
|
||||||
|
|
||||||
|
That's all.
|
50
contrib/test_parser/expected/test_parser.out
Normal file
50
contrib/test_parser/expected/test_parser.out
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
--
|
||||||
|
-- first, define the parser. Turn off echoing so that expected file
|
||||||
|
-- does not depend on contents of this file.
|
||||||
|
--
|
||||||
|
SET client_min_messages = warning;
|
||||||
|
\set ECHO none
|
||||||
|
RESET client_min_messages;
|
||||||
|
-- make test configuration using parser
|
||||||
|
CREATE TEXT SEARCH CONFIGURATION testcfg (PARSER = testparser);
|
||||||
|
ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple;
|
||||||
|
-- ts_parse
|
||||||
|
SELECT * FROM ts_parse('testparser', 'That''s simple parser can''t parse urls like http://some.url/here/');
|
||||||
|
tokid | token
|
||||||
|
-------+-----------------------
|
||||||
|
3 | That's
|
||||||
|
12 |
|
||||||
|
3 | simple
|
||||||
|
12 |
|
||||||
|
3 | parser
|
||||||
|
12 |
|
||||||
|
3 | can't
|
||||||
|
12 |
|
||||||
|
3 | parse
|
||||||
|
12 |
|
||||||
|
3 | urls
|
||||||
|
12 |
|
||||||
|
3 | like
|
||||||
|
12 |
|
||||||
|
3 | http://some.url/here/
|
||||||
|
(15 rows)
|
||||||
|
|
||||||
|
SELECT to_tsvector('testcfg','That''s my first own parser');
|
||||||
|
to_tsvector
|
||||||
|
-------------------------------------------------
|
||||||
|
'my':2 'own':4 'first':3 'parser':5 'that''s':1
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT to_tsquery('testcfg', 'star');
|
||||||
|
to_tsquery
|
||||||
|
------------
|
||||||
|
'star'
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies',
|
||||||
|
to_tsquery('testcfg', 'stars'));
|
||||||
|
ts_headline
|
||||||
|
-----------------------------------------------------------------
|
||||||
|
Supernovae <b>stars</b> are the brightest phenomena in galaxies
|
||||||
|
(1 row)
|
||||||
|
|
26
contrib/test_parser/sql/test_parser.sql
Normal file
26
contrib/test_parser/sql/test_parser.sql
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
--
|
||||||
|
-- first, define the parser. Turn off echoing so that expected file
|
||||||
|
-- does not depend on contents of this file.
|
||||||
|
--
|
||||||
|
SET client_min_messages = warning;
|
||||||
|
\set ECHO none
|
||||||
|
\i test_parser.sql
|
||||||
|
\set ECHO all
|
||||||
|
RESET client_min_messages;
|
||||||
|
|
||||||
|
-- make test configuration using parser
|
||||||
|
|
||||||
|
CREATE TEXT SEARCH CONFIGURATION testcfg (PARSER = testparser);
|
||||||
|
|
||||||
|
ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple;
|
||||||
|
|
||||||
|
-- ts_parse
|
||||||
|
|
||||||
|
SELECT * FROM ts_parse('testparser', 'That''s simple parser can''t parse urls like http://some.url/here/');
|
||||||
|
|
||||||
|
SELECT to_tsvector('testcfg','That''s my first own parser');
|
||||||
|
|
||||||
|
SELECT to_tsquery('testcfg', 'star');
|
||||||
|
|
||||||
|
SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies',
|
||||||
|
to_tsquery('testcfg', 'stars'));
|
130
contrib/test_parser/test_parser.c
Normal file
130
contrib/test_parser/test_parser.c
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* test_parser.c
|
||||||
|
* Simple example of a text search parser
|
||||||
|
*
|
||||||
|
* Copyright (c) 2007, PostgreSQL Global Development Group
|
||||||
|
*
|
||||||
|
* IDENTIFICATION
|
||||||
|
* $PostgreSQL: pgsql/contrib/test_parser/test_parser.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "fmgr.h"
|
||||||
|
|
||||||
|
PG_MODULE_MAGIC;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* types
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* self-defined type */
|
||||||
|
typedef struct {
|
||||||
|
char * buffer; /* text to parse */
|
||||||
|
int len; /* length of the text in buffer */
|
||||||
|
int pos; /* position of the parser */
|
||||||
|
} ParserState;
|
||||||
|
|
||||||
|
/* copy-paste from wparser.h of tsearch2 */
|
||||||
|
typedef struct {
|
||||||
|
int lexid;
|
||||||
|
char *alias;
|
||||||
|
char *descr;
|
||||||
|
} LexDescr;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* prototypes
|
||||||
|
*/
|
||||||
|
PG_FUNCTION_INFO_V1(testprs_start);
|
||||||
|
Datum testprs_start(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(testprs_getlexeme);
|
||||||
|
Datum testprs_getlexeme(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(testprs_end);
|
||||||
|
Datum testprs_end(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(testprs_lextype);
|
||||||
|
Datum testprs_lextype(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* functions
|
||||||
|
*/
|
||||||
|
Datum testprs_start(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
ParserState *pst = (ParserState *) palloc0(sizeof(ParserState));
|
||||||
|
pst->buffer = (char *) PG_GETARG_POINTER(0);
|
||||||
|
pst->len = PG_GETARG_INT32(1);
|
||||||
|
pst->pos = 0;
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(pst);
|
||||||
|
}
|
||||||
|
|
||||||
|
Datum testprs_getlexeme(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
|
||||||
|
char **t = (char **) PG_GETARG_POINTER(1);
|
||||||
|
int *tlen = (int *) PG_GETARG_POINTER(2);
|
||||||
|
int type;
|
||||||
|
|
||||||
|
*tlen = pst->pos;
|
||||||
|
*t = pst->buffer + pst->pos;
|
||||||
|
|
||||||
|
if ((pst->buffer)[pst->pos] == ' ')
|
||||||
|
{
|
||||||
|
/* blank type */
|
||||||
|
type = 12;
|
||||||
|
/* go to the next non-white-space character */
|
||||||
|
while ((pst->buffer)[pst->pos] == ' ' &&
|
||||||
|
pst->pos < pst->len)
|
||||||
|
(pst->pos)++;
|
||||||
|
} else {
|
||||||
|
/* word type */
|
||||||
|
type = 3;
|
||||||
|
/* go to the next white-space character */
|
||||||
|
while ((pst->buffer)[pst->pos] != ' ' &&
|
||||||
|
pst->pos < pst->len)
|
||||||
|
(pst->pos)++;
|
||||||
|
}
|
||||||
|
|
||||||
|
*tlen = pst->pos - *tlen;
|
||||||
|
|
||||||
|
/* we are finished if (*tlen == 0) */
|
||||||
|
if (*tlen == 0)
|
||||||
|
type=0;
|
||||||
|
|
||||||
|
PG_RETURN_INT32(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
Datum testprs_end(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
|
||||||
|
pfree(pst);
|
||||||
|
PG_RETURN_VOID();
|
||||||
|
}
|
||||||
|
|
||||||
|
Datum testprs_lextype(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Remarks:
|
||||||
|
* - we have to return the blanks for headline reason
|
||||||
|
* - we use the same lexids like Teodor in the default
|
||||||
|
* word parser; in this way we can reuse the headline
|
||||||
|
* function of the default word parser.
|
||||||
|
*/
|
||||||
|
LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (2+1));
|
||||||
|
|
||||||
|
/* there are only two types in this parser */
|
||||||
|
descr[0].lexid = 3;
|
||||||
|
descr[0].alias = pstrdup("word");
|
||||||
|
descr[0].descr = pstrdup("Word");
|
||||||
|
descr[1].lexid = 12;
|
||||||
|
descr[1].alias = pstrdup("blank");
|
||||||
|
descr[1].descr = pstrdup("Space symbols");
|
||||||
|
descr[2].lexid = 0;
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(descr);
|
||||||
|
}
|
36
contrib/test_parser/test_parser.sql.in
Normal file
36
contrib/test_parser/test_parser.sql.in
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
-- $PostgreSQL: pgsql/contrib/test_parser/test_parser.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||||
|
|
||||||
|
-- Adjust this setting to control where the objects get created.
|
||||||
|
SET search_path = public;
|
||||||
|
|
||||||
|
BEGIN;
|
||||||
|
|
||||||
|
CREATE FUNCTION testprs_start(internal, int4)
|
||||||
|
RETURNS internal
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION testprs_getlexeme(internal, internal, internal)
|
||||||
|
RETURNS internal
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION testprs_end(internal)
|
||||||
|
RETURNS void
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION testprs_lextype(internal)
|
||||||
|
RETURNS internal
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE TEXT SEARCH PARSER testparser (
|
||||||
|
START = testprs_start,
|
||||||
|
GETTOKEN = testprs_getlexeme,
|
||||||
|
END = testprs_end,
|
||||||
|
HEADLINE = pg_catalog.prsd_headline,
|
||||||
|
LEXTYPES = testprs_lextype
|
||||||
|
);
|
||||||
|
|
||||||
|
END;
|
11
contrib/test_parser/uninstall_test_parser.sql
Normal file
11
contrib/test_parser/uninstall_test_parser.sql
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
SET search_path = public;
|
||||||
|
|
||||||
|
DROP TEXT SEARCH PARSER testparser;
|
||||||
|
|
||||||
|
DROP FUNCTION testprs_start(internal, int4);
|
||||||
|
|
||||||
|
DROP FUNCTION testprs_getlexeme(internal, internal, internal);
|
||||||
|
|
||||||
|
DROP FUNCTION testprs_end(internal);
|
||||||
|
|
||||||
|
DROP FUNCTION testprs_lextype(internal);
|
Loading…
x
Reference in New Issue
Block a user