mirror of https://github.com/postgres/postgres
Replace the now-incompatible-with-core contrib/tsearch2 module with a
compatibility package. This supports importing dumps from past versions using tsearch2, and provides the old names and API for most functions that were changed. (rewrite(ARRAY[...]) is a glaring omission, though.) Pavel Stehule and Tom Lane
This commit is contained in:
parent
4ea3210a04
commit
90e3f2aca7
|
@ -1,29 +1,10 @@
|
|||
# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.19 2007/06/26 22:05:03 tgl Exp $
|
||||
# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.20 2007/11/13 21:02:28 tgl Exp $
|
||||
|
||||
MODULE_big = tsearch2
|
||||
OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
|
||||
dict_snowball.o dict_ispell.o dict_syn.o dict_thesaurus.o \
|
||||
wparser.o wparser_def.o \
|
||||
ts_cfg.o tsvector.o query_cleanup.o crc32.o query.o gistidx.o \
|
||||
tsvector_op.o rank.o ts_stat.o \
|
||||
query_util.o query_support.o query_rewrite.o query_gist.o \
|
||||
ts_locale.o ts_lexize.o ginidx.o
|
||||
|
||||
SUBDIRS = snowball ispell wordparser
|
||||
SUBDIROBJS = $(SUBDIRS:%=%/SUBSYS.o)
|
||||
|
||||
OBJS += $(SUBDIROBJS)
|
||||
|
||||
PG_CPPFLAGS = -I$(srcdir)/snowball -I$(srcdir)/ispell -I$(srcdir)/wordparser
|
||||
|
||||
DATA = stopword/english.stop stopword/russian.stop stopword/russian.stop.utf8 thesaurus
|
||||
DATA_built = tsearch2.sql uninstall_tsearch2.sql
|
||||
DOCS = README.tsearch2
|
||||
MODULES = tsearch2
|
||||
DATA_built = tsearch2.sql
|
||||
DATA = uninstall_tsearch2.sql
|
||||
REGRESS = tsearch2
|
||||
|
||||
SHLIB_LINK += $(filter -lm, $(LIBS))
|
||||
|
||||
|
||||
ifdef USE_PGXS
|
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
|
@ -34,23 +15,3 @@ top_builddir = ../..
|
|||
include $(top_builddir)/src/Makefile.global
|
||||
include $(top_srcdir)/contrib/contrib-global.mk
|
||||
endif
|
||||
|
||||
|
||||
$(SUBDIROBJS): $(SUBDIRS:%=%-recursive) ;
|
||||
|
||||
.PHONY: $(SUBDIRS:%=%-recursive)
|
||||
|
||||
$(SUBDIRS:%=%-recursive):
|
||||
$(MAKE) -C $(subst -recursive,,$@) SUBSYS.o
|
||||
|
||||
tsearch2.sql: tsearch.sql.in
|
||||
sed -e 's,MODULE_PATHNAME,$$libdir/$(MODULE_big),g' $< >$@
|
||||
|
||||
uninstall_tsearch2.sql: untsearch.sql.in
|
||||
cp $< $@
|
||||
|
||||
.PHONY: subclean
|
||||
clean: subclean
|
||||
|
||||
subclean:
|
||||
for dir in $(SUBDIRS); do $(MAKE) -C $$dir clean || exit; done
|
||||
|
|
|
@ -1,210 +0,0 @@
|
|||
Tsearch2 - full text search extension for PostgreSQL
|
||||
|
||||
[1]Online version of this document is available
|
||||
|
||||
Tsearch2 - is the full text engine, fully integrated into PostgreSQL
|
||||
RDBMS.
|
||||
|
||||
Main features
|
||||
|
||||
* Full online update
|
||||
* Supports multiple table driven configurations
|
||||
* flexible and rich linguistic support (dictionaries, stop words),
|
||||
thesaurus
|
||||
* full multibyte (UTF-8) support
|
||||
* Sophisticated ranking functions with support of proximity and
|
||||
structure information (rank, rank_cd)
|
||||
* Index support (GiST and Gin) with concurrency and recovery support
|
||||
* Rich query language with query rewriting support
|
||||
* Headline support (text fragments with highlighted search terms)
|
||||
* Ability to plug-in custom dictionaries and parsers
|
||||
* Template generator for tsearch2 dictionaries with [2]snowball
|
||||
stemmer support
|
||||
* It is mature (5 years of development)
|
||||
|
||||
Tsearch2, in a nutshell, provides FTS operator (contains) for the new
|
||||
data types, representing document (tsvector) and query (tsquery).
|
||||
Table driven configuration allows creation of custom searches using
|
||||
standard SQL commands.
|
||||
|
||||
tsvector is a searchable data type, representing document. It is a set
|
||||
of unique words along with their positional information in the
|
||||
document, organized in a special structure optimized for fast access
|
||||
and lookup. Each entry could be labelled to reflect its importance in
|
||||
document.
|
||||
|
||||
tsquery is a data type for textual queries with support of boolean
|
||||
operators. It consists of lexemes (optionally labelled) with boolean
|
||||
operators between.
|
||||
|
||||
Table driven configuration allows to specify:
|
||||
* parser, which used to break document onto lexemes
|
||||
* what lexemes to index and the way they are processed
|
||||
* dictionaries to be used along with stop words recognition.
|
||||
|
||||
OpenFTS vs Tsearch2
|
||||
|
||||
[3]OpenFTS is a middleware between application and database. OpenFTS
|
||||
uses tsearch2 as a storage and database engine as a query executor
|
||||
(searching). Everything else, i.e. parsing of documents, query
|
||||
processing, linguistics, carry outs on client side. That's why OpenFTS
|
||||
has its own configuration table (fts_conf) and works with its own set
|
||||
of dictionaries. OpenFTS is more flexible, because it could be used in
|
||||
multi-server architecture with separate machines for repository of
|
||||
documents (documents could be stored in filesystem), database and
|
||||
query engine.
|
||||
|
||||
See [4]Documentation Roadmap for links to documentation.
|
||||
|
||||
Authors
|
||||
|
||||
* Oleg Bartunov <oleg@sai.msu.su>, Moscow, Moscow University, Russia
|
||||
* Teodor Sigaev <teodor@sigaev.ru>, Moscow,Moscow University,Russia
|
||||
|
||||
Contributors
|
||||
|
||||
* Robert John Shepherd and Andrew J. Kopciuch submitted
|
||||
"Introduction to tsearch" (Robert - tsearch v1, Andrew - tsearch
|
||||
v2)
|
||||
* Brandon Craig Rhodes wrote "Tsearch2 Guide" and "Tsearch2
|
||||
Reference" and proposed new naming convention for tsearch V2
|
||||
|
||||
Sponsors
|
||||
|
||||
* ABC Startsiden - compound words support
|
||||
* University of Mannheim for UTF-8 support (in 8.2)
|
||||
* jfg:networks ([5]http:www.jfg-networks.com/) for Gin - Generalized
|
||||
Inverted index (in 8.2)
|
||||
* Georgia Public Library Service and LibLime, Inc. for Thesaurus
|
||||
dictionary
|
||||
* PostGIS community - GiST Concurrency and Recovery
|
||||
|
||||
The authors are grateful to the Russian Foundation for Basic Research
|
||||
and Delta-Soft Ltd., Moscow, Russia for support.
|
||||
|
||||
Limitations
|
||||
|
||||
* Length of lexeme < 2K
|
||||
* Length of tsvector (lexemes + positions) < 1Mb
|
||||
* The number of lexemes < 4^32
|
||||
* 0< Positional information < 16383
|
||||
* No more than 256 positions per lexeme
|
||||
* The number of nodes ( lexemes + operations) in tsquery < 32768
|
||||
|
||||
References
|
||||
|
||||
* GiST development site -
|
||||
[6]http://www.sai.msu.su/~megera/postgres/gist
|
||||
* GiN development - [7]http://www.sigaev.ru/gin/
|
||||
* OpenFTS home page - [8]http://openfts.sourceforge.net/
|
||||
* Mailing list -
|
||||
[9]http://sourceforge.net/mailarchive/forum.php?forum=openfts-gene
|
||||
ral
|
||||
|
||||
Documentation Roadmap
|
||||
|
||||
* Several docs are available from docs/ subdirectory
|
||||
+ "Tsearch V2 Introduction" by Andrew Kopciuch
|
||||
+ "Tsearch2 Guide" by Brandon Rhodes
|
||||
+ "Tsearch2 Reference" by Brandon Rhodes
|
||||
* Readme.gendict in gendict/ subdirectory
|
||||
+ Also, check [10]Gendict tutorial
|
||||
* Check [11]tsearch2 Wiki pages for various documentation
|
||||
|
||||
Support
|
||||
|
||||
Authors urgently recommend people to use [12]openfts-general or
|
||||
[13]pgsql-general mailing lists for questions and discussions.
|
||||
|
||||
Development History
|
||||
|
||||
Latest news
|
||||
|
||||
To the PostgreSQL 8.2 release we added:
|
||||
* multibyte (UTF-8) support
|
||||
* Thesaurus dictionary
|
||||
* Query rewriting
|
||||
* rank_cd relevation function now support different weights of
|
||||
lexemes
|
||||
* GiN support adds scalability of tsearch2
|
||||
|
||||
Pre-tsearch era
|
||||
Development of OpenFTS began in 2000 after realizing that we
|
||||
need a search engine optimized for online updates with access
|
||||
to metadata from the database. This is essential for online
|
||||
news agencies, web portals, digital libraries, etc. Most search
|
||||
engines available utilize an inverted index which is very fast
|
||||
for searching but very slow for online updates. Incremental
|
||||
updates of an inverted index is a complex engineering task
|
||||
while we needed something light, free and with the ability to
|
||||
access metadata from the database. The last requirement was
|
||||
very important because in a real life application search engine
|
||||
should always consult metadata ( topic, permissions, date
|
||||
range, version, etc.). We extensively use PostgreSQL as a
|
||||
database backend and have no intention to move from it, so the
|
||||
problem was to find a data structure and a fast way to access
|
||||
it. PostgreSQL has rather unique data type for storing sets
|
||||
(think about words) - arrays, but lacks index access to them.
|
||||
During our research we found a paper of Joseph Hellerstein, who
|
||||
introduced an interesting data structure suitable for sets -
|
||||
RD-tree (Russian Doll tree). Further research lead us to the
|
||||
idea to use GiST for implementing RD-tree, but at that time the
|
||||
GiST code was untouched for a long time and contained several
|
||||
bugs. After work on improving GiST for version 7.0.3 of
|
||||
PostgreSQL was done, we were able to implement RD-Tree and use
|
||||
it for index access to arrays of integers. This implementation
|
||||
was ideally suited for small arrays and eliminated complex
|
||||
joins, but was practically useless for indexing large arrays.
|
||||
The next improvement came from an idea to represent a document
|
||||
by a single bit-signature, a so-called superimposed signature
|
||||
(see "Index Structures for Databases Containing Data Items with
|
||||
Set-valued Attributes", 1997, Sven Helmer for details). We
|
||||
developed the contrib/intarray module and used it for full
|
||||
text indexing.
|
||||
|
||||
tsearch v1
|
||||
It was inconvenient to use integer id's instead of words, so we
|
||||
introduced a new data type called 'txtidx' - a searchable data
|
||||
type (textual) with indexed access. This was a first step of
|
||||
our work on an implementation of a built-in PostgreSQL full
|
||||
text search engine. Even though tsearch v1 had many features of
|
||||
a search engine it lacked configuration support and relevance
|
||||
ranking. People were encouraged to use OpenFTS, which provided
|
||||
relevance ranking based on positional information and flexible
|
||||
configuration. OpenFTS v.0.34 is the last version based on
|
||||
tsearch v1.
|
||||
|
||||
tsearch V2
|
||||
People recognized tsearch as a powerful tool for full text
|
||||
searching and insisted on adding ranking support, better
|
||||
configurability, etc. We already thought about moving most of
|
||||
the features of OpenFTS to tsearch, and in the early 2003 we
|
||||
decided to work on a new version of tsearch. We abandoned
|
||||
auxiliary index tables which were used by OpenFTS to store
|
||||
positional information and modified the txtidx type to store
|
||||
them internally. We added table-driven configuration, support
|
||||
of ispell dictionaries, snowball stemmers and the ability to
|
||||
specify which types of lexemes to index. Now, it's possible to
|
||||
generate headlines of documents with highlighted search terms.
|
||||
These changes make tsearch more user friendly and turn it into
|
||||
a really powerful full text search engine. Brandon Rhodes
|
||||
proposed to rename tsearch functions for consistency and we
|
||||
renamed txtidx type to tsvector and other things as well. To
|
||||
allow users of tsearch v1 smooth upgrade, we named the module
|
||||
as tsearch2. Since version 0.35 OpenFTS uses tsearch2.
|
||||
|
||||
References
|
||||
|
||||
1. http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/Tsearch_V2_Readme.html
|
||||
2. http://snowball.tartarus.org/
|
||||
3. http://openfts.sourceforge.net/
|
||||
4. file://localhost/u/megera/WWW/postgres/gist/tsearch/V2/docs/Tsearch_V2_Readme82.html#dm
|
||||
5. http:www.jfg-networks.com/
|
||||
6. http://www.sai.msu.su/~megera/postgres/gist
|
||||
7. http://www.sigaev.ru/gin/
|
||||
8. http://openfts.sourceforge.net/
|
||||
9. http://sourceforge.net/mailarchive/forum.php?forum=openfts-general
|
||||
10. http://www.sai.msu.su/~megera/wiki/Gendict
|
||||
11. http://www.sai.msu.su/~megera/wiki/Tsearch2
|
||||
12. http://sourceforge.net/mailarchive/forum.php?forum=openfts-general
|
||||
13. http://archives.postgresql.org/pgsql-general/
|
|
@ -1,188 +0,0 @@
|
|||
#include "postgres.h"
|
||||
|
||||
#include "fmgr.h"
|
||||
#include "catalog/pg_namespace.h"
|
||||
#include "catalog/pg_proc.h"
|
||||
#include "utils/syscache.h"
|
||||
#include "miscadmin.h"
|
||||
|
||||
#include "ts_cfg.h"
|
||||
#include "dict.h"
|
||||
#include "wparser.h"
|
||||
#include "snmap.h"
|
||||
#include "common.h"
|
||||
#include "tsvector.h"
|
||||
|
||||
|
||||
|
||||
#include "common.h"
|
||||
#include "wparser.h"
|
||||
#include "ts_cfg.h"
|
||||
#include "dict.h"
|
||||
|
||||
|
||||
Oid TSNSP_FunctionOid = InvalidOid;
|
||||
|
||||
|
||||
text *
|
||||
char2text(char *in)
|
||||
{
|
||||
return charl2text(in, strlen(in));
|
||||
}
|
||||
|
||||
text *
|
||||
charl2text(char *in, int len)
|
||||
{
|
||||
text *out = (text *) palloc(len + VARHDRSZ);
|
||||
|
||||
memcpy(VARDATA(out), in, len);
|
||||
SET_VARSIZE(out, len + VARHDRSZ);
|
||||
return out;
|
||||
}
|
||||
|
||||
char
|
||||
*
|
||||
text2char(text *in)
|
||||
{
|
||||
char *out = palloc(VARSIZE(in));
|
||||
|
||||
memcpy(out, VARDATA(in), VARSIZE(in) - VARHDRSZ);
|
||||
out[VARSIZE(in) - VARHDRSZ] = '\0';
|
||||
return out;
|
||||
}
|
||||
|
||||
char
|
||||
*
|
||||
pnstrdup(char *in, int len)
|
||||
{
|
||||
char *out = palloc(len + 1);
|
||||
|
||||
memcpy(out, in, len);
|
||||
out[len] = '\0';
|
||||
return out;
|
||||
}
|
||||
|
||||
text
|
||||
*
|
||||
ptextdup(text *in)
|
||||
{
|
||||
text *out = (text *) palloc(VARSIZE(in));
|
||||
|
||||
memcpy(out, in, VARSIZE(in));
|
||||
return out;
|
||||
}
|
||||
|
||||
text
|
||||
*
|
||||
mtextdup(text *in)
|
||||
{
|
||||
text *out = (text *) malloc(VARSIZE(in));
|
||||
|
||||
if (!out)
|
||||
ts_error(ERROR, "No memory");
|
||||
memcpy(out, in, VARSIZE(in));
|
||||
return out;
|
||||
}
|
||||
|
||||
void
|
||||
ts_error(int state, const char *format,...)
|
||||
{
|
||||
va_list args;
|
||||
int tlen = 128,
|
||||
len = 0;
|
||||
char *buf;
|
||||
|
||||
reset_cfg();
|
||||
reset_dict();
|
||||
reset_prs();
|
||||
|
||||
va_start(args, format);
|
||||
buf = palloc(tlen);
|
||||
len = vsnprintf(buf, tlen - 1, format, args);
|
||||
if (len >= tlen)
|
||||
{
|
||||
tlen = len + 1;
|
||||
buf = repalloc(buf, tlen);
|
||||
vsnprintf(buf, tlen - 1, format, args);
|
||||
}
|
||||
va_end(args);
|
||||
|
||||
/* ?? internal error ?? */
|
||||
elog(state, "%s", buf);
|
||||
pfree(buf);
|
||||
}
|
||||
|
||||
int
|
||||
text_cmp(text *a, text *b)
|
||||
{
|
||||
if (VARSIZE(a) == VARSIZE(b))
|
||||
return strncmp(VARDATA(a), VARDATA(b), VARSIZE(a) - VARHDRSZ);
|
||||
return (int) VARSIZE(a) - (int) VARSIZE(b);
|
||||
|
||||
}
|
||||
|
||||
char *
|
||||
get_namespace(Oid funcoid)
|
||||
{
|
||||
HeapTuple tuple;
|
||||
Form_pg_proc proc;
|
||||
Form_pg_namespace nsp;
|
||||
Oid nspoid;
|
||||
char *txt;
|
||||
|
||||
tuple = SearchSysCache(PROCOID, ObjectIdGetDatum(funcoid), 0, 0, 0);
|
||||
if (!HeapTupleIsValid(tuple))
|
||||
elog(ERROR, "cache lookup failed for proc oid %u", funcoid);
|
||||
proc = (Form_pg_proc) GETSTRUCT(tuple);
|
||||
nspoid = proc->pronamespace;
|
||||
ReleaseSysCache(tuple);
|
||||
|
||||
tuple = SearchSysCache(NAMESPACEOID, ObjectIdGetDatum(nspoid), 0, 0, 0);
|
||||
if (!HeapTupleIsValid(tuple))
|
||||
elog(ERROR, "cache lookup failed for namespace oid %u", nspoid);
|
||||
nsp = (Form_pg_namespace) GETSTRUCT(tuple);
|
||||
txt = pstrdup(NameStr((nsp->nspname)));
|
||||
ReleaseSysCache(tuple);
|
||||
|
||||
return txt;
|
||||
}
|
||||
|
||||
Oid
|
||||
get_oidnamespace(Oid funcoid)
|
||||
{
|
||||
HeapTuple tuple;
|
||||
Form_pg_proc proc;
|
||||
Oid nspoid;
|
||||
|
||||
tuple = SearchSysCache(PROCOID, ObjectIdGetDatum(funcoid), 0, 0, 0);
|
||||
if (!HeapTupleIsValid(tuple))
|
||||
elog(ERROR, "cache lookup failed for proc oid %u", funcoid);
|
||||
proc = (Form_pg_proc) GETSTRUCT(tuple);
|
||||
nspoid = proc->pronamespace;
|
||||
ReleaseSysCache(tuple);
|
||||
|
||||
return nspoid;
|
||||
}
|
||||
|
||||
/* if path is relative, take it as relative to share dir */
|
||||
char *
|
||||
to_absfilename(char *filename)
|
||||
{
|
||||
if (!is_absolute_path(filename))
|
||||
{
|
||||
char sharepath[MAXPGPATH];
|
||||
char *absfn;
|
||||
|
||||
#ifdef WIN32
|
||||
char delim = '\\';
|
||||
#else
|
||||
char delim = '/';
|
||||
#endif
|
||||
get_share_path(my_exec_path, sharepath);
|
||||
absfn = palloc(strlen(sharepath) + strlen(filename) + 2);
|
||||
sprintf(absfn, "%s%c%s", sharepath, delim, filename);
|
||||
filename = absfn;
|
||||
}
|
||||
|
||||
return filename;
|
||||
}
|
|
@ -1,34 +0,0 @@
|
|||
#ifndef __TS_COMMON_H__
|
||||
#define __TS_COMMON_H__
|
||||
|
||||
#include "postgres.h"
|
||||
#include "fmgr.h"
|
||||
#include "utils/array.h"
|
||||
|
||||
text *char2text(char *in);
|
||||
text *charl2text(char *in, int len);
|
||||
char *text2char(text *in);
|
||||
char *pnstrdup(char *in, int len);
|
||||
text *ptextdup(text *in);
|
||||
text *mtextdup(text *in);
|
||||
|
||||
int text_cmp(text *a, text *b);
|
||||
|
||||
char *to_absfilename(char *filename);
|
||||
|
||||
#define NEXTVAL(x) ( (text*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
|
||||
#define ARRNELEMS(x) ArrayGetNItems( ARR_NDIM(x), ARR_DIMS(x))
|
||||
|
||||
void ts_error(int state, const char *format,...);
|
||||
|
||||
extern Oid TSNSP_FunctionOid; /* oid of called function, needed only for
|
||||
* determ namespace, no more */
|
||||
char *get_namespace(Oid funcoid);
|
||||
Oid get_oidnamespace(Oid funcoid);
|
||||
|
||||
#define SET_FUNCOID() do { \
|
||||
if ( fcinfo->flinfo && fcinfo->flinfo->fn_oid != InvalidOid ) \
|
||||
TSNSP_FunctionOid = fcinfo->flinfo->fn_oid; \
|
||||
} while(0)
|
||||
|
||||
#endif
|
|
@ -1,105 +0,0 @@
|
|||
/* Both POSIX and CRC32 checksums */
|
||||
|
||||
/* $PostgreSQL: pgsql/contrib/tsearch2/crc32.c,v 1.4 2007/07/15 22:40:28 tgl Exp $ */
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "crc32.h"
|
||||
|
||||
/*
|
||||
* This code implements the AUTODIN II polynomial
|
||||
* The variable corresponding to the macro argument "crc" should
|
||||
* be an unsigned long.
|
||||
* Oroginal code by Spencer Garrett <srg@quick.com>
|
||||
*/
|
||||
|
||||
#define _CRC32_(crc, ch) ((crc) = ((crc) >> 8) ^ crc32tab[((crc) ^ (ch)) & 0xff])
|
||||
|
||||
/* generated using the AUTODIN II polynomial
|
||||
* x^32 + x^26 + x^23 + x^22 + x^16 +
|
||||
* x^12 + x^11 + x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + 1
|
||||
*/
|
||||
|
||||
static const unsigned int crc32tab[256] = {
|
||||
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
|
||||
0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
|
||||
0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
|
||||
0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
|
||||
0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
|
||||
0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
|
||||
0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
|
||||
0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
|
||||
0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
|
||||
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
|
||||
0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
|
||||
0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
|
||||
0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
|
||||
0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
|
||||
0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
|
||||
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
|
||||
0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
|
||||
0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
|
||||
0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
|
||||
0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
|
||||
0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
|
||||
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
|
||||
0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
|
||||
0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
|
||||
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
|
||||
0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
|
||||
0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
|
||||
0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
|
||||
0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
|
||||
0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
|
||||
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
|
||||
0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
|
||||
0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
|
||||
0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
|
||||
0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
|
||||
0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
|
||||
0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
|
||||
0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
|
||||
0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
|
||||
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
|
||||
0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
|
||||
0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
|
||||
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
|
||||
0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
|
||||
0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
|
||||
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
|
||||
0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
|
||||
0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
|
||||
0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
|
||||
0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
|
||||
0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
|
||||
0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
|
||||
0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
|
||||
0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
|
||||
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
|
||||
0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
|
||||
0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
|
||||
0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
|
||||
0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
|
||||
0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
|
||||
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
|
||||
0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
|
||||
0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
|
||||
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,
|
||||
};
|
||||
|
||||
unsigned int
|
||||
crc32_sz(char *buf, int size)
|
||||
{
|
||||
unsigned int crc = ~((unsigned int) 0);
|
||||
char *p;
|
||||
int len,
|
||||
nr;
|
||||
|
||||
len = 0;
|
||||
nr = size;
|
||||
for (len += nr, p = buf; nr--; ++p)
|
||||
_CRC32_(crc, *p);
|
||||
return ~crc;
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
#ifndef _CRC32_H
|
||||
#define _CRC32_H
|
||||
|
||||
/* $PostgreSQL: pgsql/contrib/tsearch2/crc32.h,v 1.2 2006/03/11 04:38:30 momjian Exp $ */
|
||||
|
||||
/* Returns crc32 of data block */
|
||||
extern unsigned int crc32_sz(char *buf, int size);
|
||||
|
||||
/* Returns crc32 of null-terminated string */
|
||||
#define crc32(buf) crc32_sz((buf),strlen(buf))
|
||||
|
||||
#endif
|
|
@ -1,349 +0,0 @@
|
|||
/* $PostgreSQL: pgsql/contrib/tsearch2/dict.c,v 1.13 2006/10/04 00:29:46 momjian Exp $ */
|
||||
|
||||
/*
|
||||
* interface functions to dictionary
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#include "catalog/pg_type.h"
|
||||
#include "executor/spi.h"
|
||||
#include "fmgr.h"
|
||||
#include "utils/array.h"
|
||||
#include "utils/memutils.h"
|
||||
|
||||
#include "dict.h"
|
||||
#include "common.h"
|
||||
#include "snmap.h"
|
||||
|
||||
/*********top interface**********/
|
||||
|
||||
void
|
||||
init_dict(Oid id, DictInfo * dict)
|
||||
{
|
||||
Oid arg[1];
|
||||
bool isnull;
|
||||
Datum pars[1];
|
||||
int stat;
|
||||
void *plan;
|
||||
char buf[1024];
|
||||
char *nsp = get_namespace(TSNSP_FunctionOid);
|
||||
|
||||
arg[0] = OIDOID;
|
||||
pars[0] = ObjectIdGetDatum(id);
|
||||
|
||||
memset(dict, 0, sizeof(DictInfo));
|
||||
SPI_connect();
|
||||
sprintf(buf, "select dict_init, dict_initoption, dict_lexize from %s.pg_ts_dict where oid = $1", nsp);
|
||||
pfree(nsp);
|
||||
plan = SPI_prepare(buf, 1, arg);
|
||||
if (!plan)
|
||||
ts_error(ERROR, "SPI_prepare() failed");
|
||||
|
||||
stat = SPI_execp(plan, pars, " ", 1);
|
||||
if (stat < 0)
|
||||
ts_error(ERROR, "SPI_execp return %d", stat);
|
||||
if (SPI_processed > 0)
|
||||
{
|
||||
Datum opt;
|
||||
Oid oid = InvalidOid;
|
||||
|
||||
/* setup dictlexize method */
|
||||
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull));
|
||||
if (isnull || oid == InvalidOid)
|
||||
ts_error(ERROR, "Null dict_lexize for dictonary %d", id);
|
||||
fmgr_info_cxt(oid, &(dict->lexize_info), TopMemoryContext);
|
||||
|
||||
/* setup and call dictinit method, optinally */
|
||||
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
|
||||
if (!(isnull || oid == InvalidOid))
|
||||
{
|
||||
opt = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull);
|
||||
dict->dictionary = (void *) DatumGetPointer(OidFunctionCall1(oid, opt));
|
||||
}
|
||||
dict->dict_id = id;
|
||||
}
|
||||
else
|
||||
ts_error(ERROR, "No dictionary with id %d", id);
|
||||
SPI_freeplan(plan);
|
||||
SPI_finish();
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
DictInfo *last_dict;
|
||||
int len;
|
||||
int reallen;
|
||||
DictInfo *list;
|
||||
SNMap name2id_map;
|
||||
} DictList;
|
||||
|
||||
static DictList DList = {NULL, 0, 0, NULL, {0, 0, NULL}};
|
||||
|
||||
void
|
||||
reset_dict(void)
|
||||
{
|
||||
freeSNMap(&(DList.name2id_map));
|
||||
/* XXX need to free DList.list[*].dictionary */
|
||||
if (DList.list)
|
||||
free(DList.list);
|
||||
memset(&DList, 0, sizeof(DictList));
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
comparedict(const void *a, const void *b)
|
||||
{
|
||||
if (((DictInfo *) a)->dict_id == ((DictInfo *) b)->dict_id)
|
||||
return 0;
|
||||
return (((DictInfo *) a)->dict_id < ((DictInfo *) b)->dict_id) ? -1 : 1;
|
||||
}
|
||||
|
||||
static void
|
||||
insertdict(Oid id)
|
||||
{
|
||||
DictInfo newdict;
|
||||
|
||||
if (DList.len == DList.reallen)
|
||||
{
|
||||
DictInfo *tmp;
|
||||
int reallen = (DList.reallen) ? 2 * DList.reallen : 16;
|
||||
|
||||
tmp = (DictInfo *) realloc(DList.list, sizeof(DictInfo) * reallen);
|
||||
if (!tmp)
|
||||
ts_error(ERROR, "No memory");
|
||||
DList.reallen = reallen;
|
||||
DList.list = tmp;
|
||||
}
|
||||
init_dict(id, &newdict);
|
||||
|
||||
DList.list[DList.len] = newdict;
|
||||
DList.len++;
|
||||
|
||||
qsort(DList.list, DList.len, sizeof(DictInfo), comparedict);
|
||||
}
|
||||
|
||||
DictInfo *
|
||||
finddict(Oid id)
|
||||
{
|
||||
/* last used dict */
|
||||
if (DList.last_dict && DList.last_dict->dict_id == id)
|
||||
return DList.last_dict;
|
||||
|
||||
|
||||
/* already used dict */
|
||||
if (DList.len != 0)
|
||||
{
|
||||
DictInfo key;
|
||||
|
||||
key.dict_id = id;
|
||||
DList.last_dict = bsearch(&key, DList.list, DList.len, sizeof(DictInfo), comparedict);
|
||||
if (DList.last_dict != NULL)
|
||||
return DList.last_dict;
|
||||
}
|
||||
|
||||
/* insert new dictionary */
|
||||
insertdict(id);
|
||||
return finddict(id); /* qsort changed order!! */ ;
|
||||
}
|
||||
|
||||
Oid
|
||||
name2id_dict(text *name)
|
||||
{
|
||||
Oid arg[1];
|
||||
bool isnull;
|
||||
Datum pars[1];
|
||||
int stat;
|
||||
Oid id = findSNMap_t(&(DList.name2id_map), name);
|
||||
void *plan;
|
||||
char buf[1024],
|
||||
*nsp;
|
||||
|
||||
arg[0] = TEXTOID;
|
||||
pars[0] = PointerGetDatum(name);
|
||||
|
||||
if (id)
|
||||
return id;
|
||||
|
||||
nsp = get_namespace(TSNSP_FunctionOid);
|
||||
SPI_connect();
|
||||
sprintf(buf, "select oid from %s.pg_ts_dict where dict_name = $1", nsp);
|
||||
pfree(nsp);
|
||||
plan = SPI_prepare(buf, 1, arg);
|
||||
if (!plan)
|
||||
ts_error(ERROR, "SPI_prepare() failed");
|
||||
|
||||
stat = SPI_execp(plan, pars, " ", 1);
|
||||
if (stat < 0)
|
||||
ts_error(ERROR, "SPI_execp return %d", stat);
|
||||
if (SPI_processed > 0)
|
||||
id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
|
||||
else
|
||||
ts_error(ERROR, "No dictionary with name '%s'", text2char(name));
|
||||
SPI_freeplan(plan);
|
||||
SPI_finish();
|
||||
addSNMap_t(&(DList.name2id_map), name, id);
|
||||
return id;
|
||||
}
|
||||
|
||||
|
||||
/******sql-level interface******/
|
||||
PG_FUNCTION_INFO_V1(lexize);
|
||||
Datum lexize(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
lexize(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *in = PG_GETARG_TEXT_P(1);
|
||||
DictInfo *dict;
|
||||
TSLexeme *res,
|
||||
*ptr;
|
||||
Datum *da;
|
||||
ArrayType *a;
|
||||
DictSubState dstate = {false, false, NULL};
|
||||
|
||||
SET_FUNCOID();
|
||||
dict = finddict(PG_GETARG_OID(0));
|
||||
|
||||
ptr = res = (TSLexeme *) DatumGetPointer(
|
||||
FunctionCall4(&(dict->lexize_info),
|
||||
PointerGetDatum(dict->dictionary),
|
||||
PointerGetDatum(VARDATA(in)),
|
||||
Int32GetDatum(VARSIZE(in) - VARHDRSZ),
|
||||
PointerGetDatum(&dstate)
|
||||
)
|
||||
);
|
||||
|
||||
if (dstate.getnext)
|
||||
{
|
||||
dstate.isend = true;
|
||||
ptr = res = (TSLexeme *) DatumGetPointer(
|
||||
FunctionCall4(&(dict->lexize_info),
|
||||
PointerGetDatum(dict->dictionary),
|
||||
PointerGetDatum(VARDATA(in)),
|
||||
Int32GetDatum(VARSIZE(in) - VARHDRSZ),
|
||||
PointerGetDatum(&dstate)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(in, 1);
|
||||
if (!res)
|
||||
{
|
||||
if (PG_NARGS() > 2)
|
||||
PG_RETURN_POINTER(NULL);
|
||||
else
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
while (ptr->lexeme)
|
||||
ptr++;
|
||||
da = (Datum *) palloc(sizeof(Datum) * (ptr - res + 1));
|
||||
ptr = res;
|
||||
while (ptr->lexeme)
|
||||
{
|
||||
da[ptr - res] = PointerGetDatum(char2text(ptr->lexeme));
|
||||
ptr++;
|
||||
}
|
||||
|
||||
a = construct_array(
|
||||
da,
|
||||
ptr - res,
|
||||
TEXTOID,
|
||||
-1,
|
||||
false,
|
||||
'i'
|
||||
);
|
||||
|
||||
ptr = res;
|
||||
while (ptr->lexeme)
|
||||
{
|
||||
pfree(DatumGetPointer(da[ptr - res]));
|
||||
pfree(ptr->lexeme);
|
||||
ptr++;
|
||||
}
|
||||
pfree(res);
|
||||
pfree(da);
|
||||
|
||||
PG_RETURN_POINTER(a);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(lexize_byname);
|
||||
Datum lexize_byname(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
lexize_byname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *dictname = PG_GETARG_TEXT_P(0);
|
||||
Datum res;
|
||||
|
||||
SET_FUNCOID();
|
||||
|
||||
res = DirectFunctionCall3(
|
||||
lexize,
|
||||
ObjectIdGetDatum(name2id_dict(dictname)),
|
||||
PG_GETARG_DATUM(1),
|
||||
(Datum) 0
|
||||
);
|
||||
PG_FREE_IF_COPY(dictname, 0);
|
||||
if (res)
|
||||
PG_RETURN_DATUM(res);
|
||||
else
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
static Oid currect_dictionary_id = 0;
|
||||
|
||||
PG_FUNCTION_INFO_V1(set_curdict);
|
||||
Datum set_curdict(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
set_curdict(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SET_FUNCOID();
|
||||
finddict(PG_GETARG_OID(0));
|
||||
currect_dictionary_id = PG_GETARG_OID(0);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(set_curdict_byname);
|
||||
Datum set_curdict_byname(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
set_curdict_byname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *dictname = PG_GETARG_TEXT_P(0);
|
||||
|
||||
SET_FUNCOID();
|
||||
DirectFunctionCall1(
|
||||
set_curdict,
|
||||
ObjectIdGetDatum(name2id_dict(dictname))
|
||||
);
|
||||
PG_FREE_IF_COPY(dictname, 0);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(lexize_bycurrent);
|
||||
Datum lexize_bycurrent(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
lexize_bycurrent(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Datum res;
|
||||
|
||||
SET_FUNCOID();
|
||||
if (currect_dictionary_id == 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("no currect dictionary"),
|
||||
errhint("Execute select set_curdict().")));
|
||||
|
||||
res = DirectFunctionCall3(
|
||||
lexize,
|
||||
ObjectIdGetDatum(currect_dictionary_id),
|
||||
PG_GETARG_DATUM(0),
|
||||
(Datum) 0
|
||||
);
|
||||
if (res)
|
||||
PG_RETURN_DATUM(res);
|
||||
else
|
||||
PG_RETURN_NULL();
|
||||
}
|
|
@ -1,114 +0,0 @@
|
|||
/* $PostgreSQL: pgsql/contrib/tsearch2/dict.h,v 1.8 2006/10/04 00:29:46 momjian Exp $ */
|
||||
|
||||
#ifndef __DICT_H__
|
||||
#define __DICT_H__
|
||||
#include "postgres.h"
|
||||
#include "fmgr.h"
|
||||
#include "ts_cfg.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int len;
|
||||
char **stop;
|
||||
char *(*wordop) (char *);
|
||||
} StopList;
|
||||
|
||||
void sortstoplist(StopList * s);
|
||||
void freestoplist(StopList * s);
|
||||
void readstoplist(text *in, StopList * s);
|
||||
bool searchstoplist(StopList * s, char *key);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
Oid dict_id;
|
||||
FmgrInfo lexize_info;
|
||||
void *dictionary;
|
||||
} DictInfo;
|
||||
|
||||
void init_dict(Oid id, DictInfo * dict);
|
||||
DictInfo *finddict(Oid id);
|
||||
Oid name2id_dict(text *name);
|
||||
void reset_dict(void);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
bool isend; /* in: marks for lexize_info about text end is
|
||||
* reached */
|
||||
bool getnext; /* out: dict wants next lexeme */
|
||||
void *private; /* internal dict state between calls with
|
||||
* getnext == true */
|
||||
} DictSubState;
|
||||
|
||||
/* simple parser of cfg string */
|
||||
typedef struct
|
||||
{
|
||||
char *key;
|
||||
char *value;
|
||||
} Map;
|
||||
|
||||
void parse_cfgdict(text *in, Map ** m);
|
||||
|
||||
/* return struct for any lexize function */
|
||||
typedef struct
|
||||
{
|
||||
/*
|
||||
* number of variant of split word , for example Word 'fotballklubber'
|
||||
* (norwegian) has two varian to split: ( fotball, klubb ) and ( fot,
|
||||
* ball, klubb ). So, dictionary should return: nvariant lexeme 1
|
||||
* fotball 1 klubb 2 fot 2 ball 2 klubb
|
||||
*/
|
||||
uint16 nvariant;
|
||||
|
||||
uint16 flags;
|
||||
|
||||
/* C-string */
|
||||
char *lexeme;
|
||||
} TSLexeme;
|
||||
|
||||
#define TSL_ADDPOS 0x01
|
||||
|
||||
|
||||
/*
|
||||
* Lexize subsystem
|
||||
*/
|
||||
|
||||
typedef struct ParsedLex
|
||||
{
|
||||
int type;
|
||||
char *lemm;
|
||||
int lenlemm;
|
||||
bool resfollow;
|
||||
struct ParsedLex *next;
|
||||
} ParsedLex;
|
||||
|
||||
typedef struct ListParsedLex
|
||||
{
|
||||
ParsedLex *head;
|
||||
ParsedLex *tail;
|
||||
} ListParsedLex;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
TSCfgInfo *cfg;
|
||||
Oid curDictId;
|
||||
int posDict;
|
||||
DictSubState dictState;
|
||||
ParsedLex *curSub;
|
||||
ListParsedLex towork; /* current list to work */
|
||||
ListParsedLex waste; /* list of lexemes that already lexized */
|
||||
|
||||
/*
|
||||
* fields to store last variant to lexize (basically, thesaurus or similar
|
||||
* to, which wants several lexemes
|
||||
*/
|
||||
|
||||
ParsedLex *lastRes;
|
||||
TSLexeme *tmpRes;
|
||||
} LexizeData;
|
||||
|
||||
|
||||
void LexizeInit(LexizeData * ld, TSCfgInfo * cfg);
|
||||
void LexizeAddLemm(LexizeData * ld, int type, char *lemm, int lenlemm);
|
||||
TSLexeme *LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem);
|
||||
|
||||
#endif
|
|
@ -1,70 +0,0 @@
|
|||
/* $PostgreSQL: pgsql/contrib/tsearch2/dict_ex.c,v 1.9 2006/11/20 14:03:30 teodor Exp $ */
|
||||
|
||||
/*
|
||||
* example of dictionary
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "dict.h"
|
||||
#include "common.h"
|
||||
#include "ts_locale.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
StopList stoplist;
|
||||
} DictExample;
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(dex_init);
|
||||
Datum dex_init(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(dex_lexize);
|
||||
Datum dex_lexize(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
dex_init(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictExample *d = (DictExample *) malloc(sizeof(DictExample));
|
||||
|
||||
if (!d)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
memset(d, 0, sizeof(DictExample));
|
||||
|
||||
d->stoplist.wordop = lowerstr;
|
||||
|
||||
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
|
||||
{
|
||||
text *in = PG_GETARG_TEXT_P(0);
|
||||
|
||||
readstoplist(in, &(d->stoplist));
|
||||
sortstoplist(&(d->stoplist));
|
||||
PG_FREE_IF_COPY(in, 0);
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(d);
|
||||
}
|
||||
|
||||
Datum
|
||||
dex_lexize(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictExample *d = (DictExample *) PG_GETARG_POINTER(0);
|
||||
char *in = (char *) PG_GETARG_POINTER(1);
|
||||
char *utxt = pnstrdup(in, PG_GETARG_INT32(2));
|
||||
TSLexeme *res = palloc(sizeof(TSLexeme) * 2);
|
||||
char *txt = lowerstr(utxt);
|
||||
|
||||
pfree(utxt);
|
||||
memset(res, 0, sizeof(TSLexeme) * 2);
|
||||
|
||||
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
|
||||
{
|
||||
pfree(txt);
|
||||
}
|
||||
else
|
||||
res[0].lexeme = txt;
|
||||
|
||||
PG_RETURN_POINTER(res);
|
||||
}
|
|
@ -1,196 +0,0 @@
|
|||
/* $PostgreSQL: pgsql/contrib/tsearch2/dict_ispell.c,v 1.10 2006/03/11 04:38:30 momjian Exp $ */
|
||||
|
||||
/*
|
||||
* ISpell interface
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#include "dict.h"
|
||||
#include "common.h"
|
||||
#include "ispell/spell.h"
|
||||
#include "ts_locale.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
StopList stoplist;
|
||||
IspellDict obj;
|
||||
} DictISpell;
|
||||
|
||||
PG_FUNCTION_INFO_V1(spell_init);
|
||||
Datum spell_init(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(spell_lexize);
|
||||
Datum spell_lexize(PG_FUNCTION_ARGS);
|
||||
|
||||
static void
|
||||
freeDictISpell(DictISpell * d)
|
||||
{
|
||||
NIFree(&(d->obj));
|
||||
freestoplist(&(d->stoplist));
|
||||
free(d);
|
||||
}
|
||||
|
||||
Datum
|
||||
spell_init(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictISpell *d;
|
||||
Map *cfg,
|
||||
*pcfg;
|
||||
text *in;
|
||||
bool affloaded = false,
|
||||
dictloaded = false,
|
||||
stoploaded = false;
|
||||
|
||||
if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("ISpell confguration error")));
|
||||
|
||||
d = (DictISpell *) malloc(sizeof(DictISpell));
|
||||
if (!d)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
memset(d, 0, sizeof(DictISpell));
|
||||
d->stoplist.wordop = lowerstr;
|
||||
|
||||
in = PG_GETARG_TEXT_P(0);
|
||||
parse_cfgdict(in, &cfg);
|
||||
PG_FREE_IF_COPY(in, 0);
|
||||
pcfg = cfg;
|
||||
while (pcfg->key)
|
||||
{
|
||||
if (pg_strcasecmp("DictFile", pcfg->key) == 0)
|
||||
{
|
||||
if (dictloaded)
|
||||
{
|
||||
freeDictISpell(d);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("dictionary already loaded")));
|
||||
}
|
||||
if (NIImportDictionary(&(d->obj), pcfg->value))
|
||||
{
|
||||
freeDictISpell(d);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("could not load dictionary file \"%s\"",
|
||||
pcfg->value)));
|
||||
}
|
||||
dictloaded = true;
|
||||
}
|
||||
else if (pg_strcasecmp("AffFile", pcfg->key) == 0)
|
||||
{
|
||||
if (affloaded)
|
||||
{
|
||||
freeDictISpell(d);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("affixes already loaded")));
|
||||
}
|
||||
if (NIImportAffixes(&(d->obj), pcfg->value))
|
||||
{
|
||||
freeDictISpell(d);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("could not load affix file \"%s\"",
|
||||
pcfg->value)));
|
||||
}
|
||||
affloaded = true;
|
||||
}
|
||||
else if (pg_strcasecmp("StopFile", pcfg->key) == 0)
|
||||
{
|
||||
text *tmp = char2text(pcfg->value);
|
||||
|
||||
if (stoploaded)
|
||||
{
|
||||
freeDictISpell(d);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("stop words already loaded")));
|
||||
}
|
||||
readstoplist(tmp, &(d->stoplist));
|
||||
sortstoplist(&(d->stoplist));
|
||||
pfree(tmp);
|
||||
stoploaded = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
freeDictISpell(d);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("unrecognized option: %s => %s",
|
||||
pcfg->key, pcfg->value)));
|
||||
}
|
||||
pfree(pcfg->key);
|
||||
pfree(pcfg->value);
|
||||
pcfg++;
|
||||
}
|
||||
pfree(cfg);
|
||||
|
||||
if (affloaded && dictloaded)
|
||||
{
|
||||
NISortDictionary(&(d->obj));
|
||||
NISortAffixes(&(d->obj));
|
||||
}
|
||||
else if (!affloaded)
|
||||
{
|
||||
freeDictISpell(d);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("no affixes")));
|
||||
}
|
||||
else
|
||||
{
|
||||
freeDictISpell(d);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("no dictionary")));
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(d);
|
||||
}
|
||||
|
||||
Datum
|
||||
spell_lexize(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0);
|
||||
char *in = (char *) PG_GETARG_POINTER(1);
|
||||
char *txt;
|
||||
TSLexeme *res;
|
||||
TSLexeme *ptr,
|
||||
*cptr;
|
||||
|
||||
if (!PG_GETARG_INT32(2))
|
||||
PG_RETURN_POINTER(NULL);
|
||||
|
||||
txt = pnstrdup(in, PG_GETARG_INT32(2));
|
||||
res = NINormalizeWord(&(d->obj), txt);
|
||||
pfree(txt);
|
||||
|
||||
if (res == NULL)
|
||||
PG_RETURN_POINTER(NULL);
|
||||
|
||||
ptr = cptr = res;
|
||||
while (ptr->lexeme)
|
||||
{
|
||||
if (searchstoplist(&(d->stoplist), ptr->lexeme))
|
||||
{
|
||||
pfree(ptr->lexeme);
|
||||
ptr->lexeme = NULL;
|
||||
ptr++;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(cptr, ptr, sizeof(TSLexeme));
|
||||
cptr++;
|
||||
ptr++;
|
||||
}
|
||||
}
|
||||
cptr->lexeme = NULL;
|
||||
|
||||
PG_RETURN_POINTER(res);
|
||||
}
|
|
@ -1,169 +0,0 @@
|
|||
/* $PostgreSQL: pgsql/contrib/tsearch2/dict_snowball.c,v 1.13 2006/11/20 14:03:30 teodor Exp $ */
|
||||
|
||||
/*
|
||||
* example of Snowball dictionary
|
||||
* http://snowball.tartarus.org/
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "dict.h"
|
||||
#include "common.h"
|
||||
#include "snowball/english_stem.h"
|
||||
#include "snowball/header.h"
|
||||
#include "snowball/russian_stem.h"
|
||||
#include "snowball/russian_stem_UTF8.h"
|
||||
#include "ts_locale.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
struct SN_env *z;
|
||||
StopList stoplist;
|
||||
int (*stem) (struct SN_env * z);
|
||||
} DictSnowball;
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(snb_en_init);
|
||||
Datum snb_en_init(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(snb_ru_init_koi8);
|
||||
Datum snb_ru_init_koi8(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(snb_ru_init_utf8);
|
||||
Datum snb_ru_init_utf8(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(snb_lexize);
|
||||
Datum snb_lexize(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
snb_en_init(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball));
|
||||
|
||||
if (!d)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
memset(d, 0, sizeof(DictSnowball));
|
||||
d->stoplist.wordop = lowerstr;
|
||||
|
||||
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
|
||||
{
|
||||
text *in = PG_GETARG_TEXT_P(0);
|
||||
|
||||
readstoplist(in, &(d->stoplist));
|
||||
sortstoplist(&(d->stoplist));
|
||||
PG_FREE_IF_COPY(in, 0);
|
||||
}
|
||||
|
||||
d->z = english_ISO_8859_1_create_env();
|
||||
if (!d->z)
|
||||
{
|
||||
freestoplist(&(d->stoplist));
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
}
|
||||
d->stem = english_ISO_8859_1_stem;
|
||||
|
||||
PG_RETURN_POINTER(d);
|
||||
}
|
||||
|
||||
Datum
|
||||
snb_ru_init_koi8(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball));
|
||||
|
||||
if (!d)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
memset(d, 0, sizeof(DictSnowball));
|
||||
d->stoplist.wordop = lowerstr;
|
||||
|
||||
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
|
||||
{
|
||||
text *in = PG_GETARG_TEXT_P(0);
|
||||
|
||||
readstoplist(in, &(d->stoplist));
|
||||
sortstoplist(&(d->stoplist));
|
||||
PG_FREE_IF_COPY(in, 0);
|
||||
}
|
||||
|
||||
d->z = russian_KOI8_R_create_env();
|
||||
if (!d->z)
|
||||
{
|
||||
freestoplist(&(d->stoplist));
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
}
|
||||
d->stem = russian_KOI8_R_stem;
|
||||
|
||||
PG_RETURN_POINTER(d);
|
||||
}
|
||||
|
||||
Datum
|
||||
snb_ru_init_utf8(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball));
|
||||
|
||||
if (!d)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
memset(d, 0, sizeof(DictSnowball));
|
||||
d->stoplist.wordop = lowerstr;
|
||||
|
||||
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
|
||||
{
|
||||
text *in = PG_GETARG_TEXT_P(0);
|
||||
|
||||
readstoplist(in, &(d->stoplist));
|
||||
sortstoplist(&(d->stoplist));
|
||||
PG_FREE_IF_COPY(in, 0);
|
||||
}
|
||||
|
||||
d->z = russian_UTF_8_create_env();
|
||||
if (!d->z)
|
||||
{
|
||||
freestoplist(&(d->stoplist));
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
}
|
||||
d->stem = russian_UTF_8_stem;
|
||||
|
||||
PG_RETURN_POINTER(d);
|
||||
}
|
||||
|
||||
Datum
|
||||
snb_lexize(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictSnowball *d = (DictSnowball *) PG_GETARG_POINTER(0);
|
||||
char *in = (char *) PG_GETARG_POINTER(1);
|
||||
char *utxt = pnstrdup(in, PG_GETARG_INT32(2));
|
||||
TSLexeme *res = palloc(sizeof(TSLexeme) * 2);
|
||||
char *txt = lowerstr(utxt);
|
||||
|
||||
pfree(utxt);
|
||||
memset(res, 0, sizeof(TSLexeme) * 2);
|
||||
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
|
||||
{
|
||||
pfree(txt);
|
||||
}
|
||||
else
|
||||
{
|
||||
SN_set_current(d->z, strlen(txt), (symbol *) txt);
|
||||
(d->stem) (d->z);
|
||||
if (d->z->p && d->z->l)
|
||||
{
|
||||
txt = repalloc(txt, d->z->l + 1);
|
||||
memcpy(txt, d->z->p, d->z->l);
|
||||
txt[d->z->l] = '\0';
|
||||
}
|
||||
res->lexeme = txt;
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(res);
|
||||
}
|
|
@ -1,185 +0,0 @@
|
|||
/* $PostgreSQL: pgsql/contrib/tsearch2/dict_syn.c,v 1.14 2007/03/28 01:28:34 tgl Exp $ */
|
||||
|
||||
/*
|
||||
* ISpell interface
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#include "dict.h"
|
||||
#include "common.h"
|
||||
#include "ts_locale.h"
|
||||
|
||||
#define SYNBUFLEN 4096
|
||||
typedef struct
|
||||
{
|
||||
char *in;
|
||||
char *out;
|
||||
} Syn;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int len;
|
||||
Syn *syn;
|
||||
} DictSyn;
|
||||
|
||||
PG_FUNCTION_INFO_V1(syn_init);
|
||||
Datum syn_init(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(syn_lexize);
|
||||
Datum syn_lexize(PG_FUNCTION_ARGS);
|
||||
|
||||
static char *
|
||||
findwrd(char *in, char **end)
|
||||
{
|
||||
char *start;
|
||||
|
||||
*end = NULL;
|
||||
while (*in && isspace((unsigned char) *in))
|
||||
in++;
|
||||
|
||||
if (*in=='\0')
|
||||
return NULL;
|
||||
start = in;
|
||||
|
||||
while (*in && !isspace((unsigned char) *in))
|
||||
in++;
|
||||
|
||||
*end = in;
|
||||
return start;
|
||||
}
|
||||
|
||||
static int
|
||||
compareSyn(const void *a, const void *b)
|
||||
{
|
||||
return strcmp(((Syn *) a)->in, ((Syn *) b)->in);
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
syn_init(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *in;
|
||||
DictSyn *d;
|
||||
int cur = 0;
|
||||
FILE *fin;
|
||||
char *filename;
|
||||
char buf[SYNBUFLEN];
|
||||
char *starti,
|
||||
*starto,
|
||||
*end = NULL;
|
||||
int slen;
|
||||
|
||||
if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("NULL config")));
|
||||
|
||||
in = PG_GETARG_TEXT_P(0);
|
||||
if (VARSIZE(in) - VARHDRSZ == 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("VOID config")));
|
||||
|
||||
filename = text2char(in);
|
||||
PG_FREE_IF_COPY(in, 0);
|
||||
if ((fin = fopen(filename, "r")) == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not open file \"%s\": %m",
|
||||
filename)));
|
||||
|
||||
d = (DictSyn *) malloc(sizeof(DictSyn));
|
||||
if (!d)
|
||||
{
|
||||
fclose(fin);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
}
|
||||
memset(d, 0, sizeof(DictSyn));
|
||||
|
||||
while (fgets(buf, sizeof(buf), fin))
|
||||
{
|
||||
slen = strlen(buf);
|
||||
pg_verifymbstr(buf, slen, false);
|
||||
if (cur == d->len)
|
||||
{
|
||||
d->len = (d->len) ? 2 * d->len : 16;
|
||||
d->syn = (Syn *) realloc(d->syn, sizeof(Syn) * d->len);
|
||||
if (!d->syn)
|
||||
{
|
||||
fclose(fin);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
}
|
||||
}
|
||||
|
||||
starti = findwrd(buf, &end);
|
||||
if (!starti)
|
||||
continue;
|
||||
*end = '\0';
|
||||
if (end >= buf + slen)
|
||||
continue;
|
||||
|
||||
starto = findwrd(end + 1, &end);
|
||||
if (!starto)
|
||||
continue;
|
||||
*end = '\0';
|
||||
|
||||
d->syn[cur].in = strdup(lowerstr(starti));
|
||||
d->syn[cur].out = strdup(lowerstr(starto));
|
||||
if (!(d->syn[cur].in && d->syn[cur].out))
|
||||
{
|
||||
fclose(fin);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
}
|
||||
|
||||
cur++;
|
||||
}
|
||||
|
||||
fclose(fin);
|
||||
|
||||
d->len = cur;
|
||||
if (cur > 1)
|
||||
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
|
||||
|
||||
pfree(filename);
|
||||
PG_RETURN_POINTER(d);
|
||||
}
|
||||
|
||||
Datum
|
||||
syn_lexize(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
|
||||
char *in = (char *) PG_GETARG_POINTER(1);
|
||||
Syn key,
|
||||
*found;
|
||||
TSLexeme *res = NULL;
|
||||
char *wrd;
|
||||
|
||||
if (!PG_GETARG_INT32(2))
|
||||
PG_RETURN_POINTER(NULL);
|
||||
|
||||
key.out = NULL;
|
||||
wrd = pnstrdup(in, PG_GETARG_INT32(2));
|
||||
key.in = lowerstr(wrd);
|
||||
pfree(wrd);
|
||||
|
||||
found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
|
||||
pfree(key.in);
|
||||
|
||||
if (!found)
|
||||
PG_RETURN_POINTER(NULL);
|
||||
|
||||
res = palloc(sizeof(TSLexeme) * 2);
|
||||
memset(res, 0, sizeof(TSLexeme) * 2);
|
||||
res[0].lexeme = pstrdup(found->out);
|
||||
|
||||
PG_RETURN_POINTER(res);
|
||||
}
|
|
@ -1,921 +0,0 @@
|
|||
/* $PostgreSQL: pgsql/contrib/tsearch2/dict_thesaurus.c,v 1.9 2007/07/15 22:57:48 tgl Exp $ */
|
||||
|
||||
/*
|
||||
* thesaurus
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include "postgres.h"
|
||||
#include "executor/spi.h"
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#include "dict.h"
|
||||
#include "common.h"
|
||||
#include "ts_locale.h"
|
||||
|
||||
/*
|
||||
* Temporay we use TSLexeme.flags for inner use...
|
||||
*/
|
||||
#define DT_USEASIS 0x1000
|
||||
|
||||
typedef struct LexemeInfo
|
||||
{
|
||||
uint16 idsubst; /* entry's number in DictThesaurus->subst */
|
||||
uint16 posinsubst; /* pos info in entry */
|
||||
uint16 tnvariant; /* total num lexemes in one variant */
|
||||
struct LexemeInfo *nextentry;
|
||||
struct LexemeInfo *nextvariant;
|
||||
} LexemeInfo;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char *lexeme;
|
||||
LexemeInfo *entries;
|
||||
} TheLexeme;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint16 lastlexeme; /* number lexemes to substitute */
|
||||
uint16 reslen;
|
||||
TSLexeme *res; /* prepared substituted result */
|
||||
} TheSubstitute;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* subdictionary to normalize lexemes */
|
||||
DictInfo subdict;
|
||||
|
||||
/* Array to search lexeme by exact match */
|
||||
TheLexeme *wrds;
|
||||
int nwrds;
|
||||
int ntwrds;
|
||||
|
||||
/*
|
||||
* Storage of substituted result, n-th element is for n-th expression
|
||||
*/
|
||||
TheSubstitute *subst;
|
||||
int nsubst;
|
||||
} DictThesaurus;
|
||||
|
||||
PG_FUNCTION_INFO_V1(thesaurus_init);
|
||||
Datum thesaurus_init(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(thesaurus_lexize);
|
||||
Datum thesaurus_lexize(PG_FUNCTION_ARGS);
|
||||
|
||||
static void
|
||||
freeDictThesaurus(DictThesaurus * d)
|
||||
{
|
||||
free(d);
|
||||
}
|
||||
|
||||
static void
|
||||
newLexeme(DictThesaurus * d, char *b, char *e, uint16 idsubst, uint16 posinsubst)
|
||||
{
|
||||
TheLexeme *ptr;
|
||||
|
||||
if (d->nwrds >= d->ntwrds)
|
||||
{
|
||||
if (d->ntwrds == 0)
|
||||
{
|
||||
d->ntwrds = 16;
|
||||
d->wrds = (TheLexeme *) malloc(sizeof(TheLexeme) * d->ntwrds);
|
||||
}
|
||||
else
|
||||
{
|
||||
d->ntwrds *= 2;
|
||||
d->wrds = (TheLexeme *) realloc(d->wrds, sizeof(TheLexeme) * d->ntwrds);
|
||||
}
|
||||
if (!d->wrds)
|
||||
elog(ERROR, "Out of memory");
|
||||
}
|
||||
|
||||
ptr = d->wrds + d->nwrds;
|
||||
d->nwrds++;
|
||||
|
||||
if ((ptr->lexeme = malloc(e - b + 1)) == NULL)
|
||||
elog(ERROR, "Out of memory");
|
||||
|
||||
memcpy(ptr->lexeme, b, e - b);
|
||||
ptr->lexeme[e - b] = '\0';
|
||||
|
||||
if ((ptr->entries = (LexemeInfo *) malloc(sizeof(LexemeInfo))) == NULL)
|
||||
elog(ERROR, "Out of memory");
|
||||
|
||||
ptr->entries->nextentry = NULL;
|
||||
ptr->entries->idsubst = idsubst;
|
||||
ptr->entries->posinsubst = posinsubst;
|
||||
}
|
||||
|
||||
static void
|
||||
addWrd(DictThesaurus * d, char *b, char *e, uint16 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis)
|
||||
{
|
||||
static int nres = 0;
|
||||
static int ntres = 0;
|
||||
TheSubstitute *ptr;
|
||||
|
||||
if (nwrd == 0)
|
||||
{
|
||||
nres = ntres = 0;
|
||||
|
||||
if (idsubst >= d->nsubst)
|
||||
{
|
||||
if (d->nsubst == 0)
|
||||
{
|
||||
d->nsubst = 16;
|
||||
d->subst = (TheSubstitute *) malloc(sizeof(TheSubstitute) * d->nsubst);
|
||||
}
|
||||
else
|
||||
{
|
||||
d->nsubst *= 2;
|
||||
d->subst = (TheSubstitute *) realloc(d->subst, sizeof(TheSubstitute) * d->nsubst);
|
||||
}
|
||||
if (!d->subst)
|
||||
elog(ERROR, "Out of memory");
|
||||
}
|
||||
}
|
||||
|
||||
ptr = d->subst + idsubst;
|
||||
|
||||
ptr->lastlexeme = posinsubst - 1;
|
||||
|
||||
if (nres + 1 >= ntres)
|
||||
{
|
||||
if (ntres == 0)
|
||||
{
|
||||
ntres = 2;
|
||||
ptr->res = (TSLexeme *) malloc(sizeof(TSLexeme) * ntres);
|
||||
}
|
||||
else
|
||||
{
|
||||
ntres *= 2;
|
||||
ptr->res = (TSLexeme *) realloc(ptr->res, sizeof(TSLexeme) * ntres);
|
||||
}
|
||||
|
||||
if (!ptr->res)
|
||||
elog(ERROR, "Out of memory");
|
||||
}
|
||||
|
||||
if ((ptr->res[nres].lexeme = malloc(e - b + 1)) == 0)
|
||||
elog(ERROR, "Out of memory");
|
||||
memcpy(ptr->res[nres].lexeme, b, e - b);
|
||||
ptr->res[nres].lexeme[e - b] = '\0';
|
||||
|
||||
ptr->res[nres].nvariant = nwrd;
|
||||
if (useasis)
|
||||
ptr->res[nres].flags = DT_USEASIS;
|
||||
else
|
||||
ptr->res[nres].flags = 0;
|
||||
|
||||
ptr->res[++nres].lexeme = NULL;
|
||||
}
|
||||
|
||||
#define TR_WAITLEX 1
|
||||
#define TR_INLEX 2
|
||||
#define TR_WAITSUBS 3
|
||||
#define TR_INSUBS 4
|
||||
|
||||
static void
|
||||
thesaurusRead(char *filename, DictThesaurus * d)
|
||||
{
|
||||
FILE *fh;
|
||||
char str[BUFSIZ];
|
||||
int lineno = 0;
|
||||
uint16 idsubst = 0;
|
||||
bool useasis = false;
|
||||
|
||||
fh = fopen(to_absfilename(filename), "r");
|
||||
if (!fh)
|
||||
elog(ERROR, "Thesaurus: cannot open '%s' file", filename);
|
||||
|
||||
while (fgets(str, sizeof(str), fh))
|
||||
{
|
||||
char *ptr = str;
|
||||
int state = TR_WAITLEX;
|
||||
char *beginwrd = NULL;
|
||||
uint16 posinsubst = 0;
|
||||
uint16 nwrd = 0;
|
||||
|
||||
lineno++;
|
||||
|
||||
/* is it comment ? */
|
||||
while (t_isspace(ptr))
|
||||
ptr += pg_mblen(ptr);
|
||||
if (t_iseq(str, '#') || *str == '\0' || t_iseq(str, '\n') || t_iseq(str, '\r'))
|
||||
continue;
|
||||
|
||||
pg_verifymbstr(ptr, strlen(ptr), false);
|
||||
while (*ptr)
|
||||
{
|
||||
if (state == TR_WAITLEX)
|
||||
{
|
||||
if (t_iseq(ptr, ':'))
|
||||
{
|
||||
if (posinsubst == 0)
|
||||
{
|
||||
fclose(fh);
|
||||
elog(ERROR, "Thesaurus: Unexpected delimiter at %d line", lineno);
|
||||
}
|
||||
state = TR_WAITSUBS;
|
||||
}
|
||||
else if (!t_isspace(ptr))
|
||||
{
|
||||
beginwrd = ptr;
|
||||
state = TR_INLEX;
|
||||
}
|
||||
}
|
||||
else if (state == TR_INLEX)
|
||||
{
|
||||
if (t_iseq(ptr, ':'))
|
||||
{
|
||||
newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
|
||||
state = TR_WAITSUBS;
|
||||
}
|
||||
else if (t_isspace(ptr))
|
||||
{
|
||||
newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
|
||||
state = TR_WAITLEX;
|
||||
}
|
||||
}
|
||||
else if (state == TR_WAITSUBS)
|
||||
{
|
||||
if (t_iseq(ptr, '*'))
|
||||
{
|
||||
useasis = true;
|
||||
state = TR_INSUBS;
|
||||
beginwrd = ptr + pg_mblen(ptr);
|
||||
}
|
||||
else if (t_iseq(ptr, '\\'))
|
||||
{
|
||||
useasis = false;
|
||||
state = TR_INSUBS;
|
||||
beginwrd = ptr + pg_mblen(ptr);
|
||||
}
|
||||
else if (!t_isspace(ptr))
|
||||
{
|
||||
useasis = false;
|
||||
beginwrd = ptr;
|
||||
state = TR_INSUBS;
|
||||
}
|
||||
}
|
||||
else if (state == TR_INSUBS)
|
||||
{
|
||||
if (t_isspace(ptr))
|
||||
{
|
||||
if (ptr == beginwrd)
|
||||
elog(ERROR, "Thesaurus: Unexpected end of line or lexeme at %d line", lineno);
|
||||
addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
|
||||
state = TR_WAITSUBS;
|
||||
}
|
||||
}
|
||||
else
|
||||
elog(ERROR, "Thesaurus: Unknown state: %d", state);
|
||||
|
||||
ptr += pg_mblen(ptr);
|
||||
}
|
||||
|
||||
if (state == TR_INSUBS)
|
||||
{
|
||||
if (ptr == beginwrd)
|
||||
elog(ERROR, "Thesaurus: Unexpected end of line or lexeme at %d line", lineno);
|
||||
addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
|
||||
}
|
||||
|
||||
idsubst++;
|
||||
|
||||
if (!(nwrd && posinsubst))
|
||||
{
|
||||
fclose(fh);
|
||||
elog(ERROR, "Thesaurus: Unexpected end of line at %d line", lineno);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
d->nsubst = idsubst;
|
||||
|
||||
fclose(fh);
|
||||
}
|
||||
|
||||
static TheLexeme *
|
||||
addCompiledLexeme(TheLexeme * newwrds, int *nnw, int *tnm, TSLexeme * lexeme, LexemeInfo * src, uint16 tnvariant)
|
||||
{
|
||||
|
||||
if (*nnw >= *tnm)
|
||||
{
|
||||
*tnm *= 2;
|
||||
newwrds = (TheLexeme *) realloc(newwrds, sizeof(TheLexeme) * *tnm);
|
||||
if (!newwrds)
|
||||
elog(ERROR, "Out of memory");
|
||||
}
|
||||
|
||||
newwrds[*nnw].entries = (LexemeInfo *) malloc(sizeof(LexemeInfo));
|
||||
if (!newwrds[*nnw].entries)
|
||||
elog(ERROR, "Out of memory");
|
||||
|
||||
if (lexeme && lexeme->lexeme)
|
||||
{
|
||||
newwrds[*nnw].lexeme = strdup(lexeme->lexeme);
|
||||
if (!newwrds[*nnw].lexeme)
|
||||
elog(ERROR, "Out of memory");
|
||||
|
||||
newwrds[*nnw].entries->tnvariant = tnvariant;
|
||||
}
|
||||
else
|
||||
{
|
||||
newwrds[*nnw].lexeme = NULL;
|
||||
newwrds[*nnw].entries->tnvariant = 1;
|
||||
}
|
||||
|
||||
newwrds[*nnw].entries->idsubst = src->idsubst;
|
||||
newwrds[*nnw].entries->posinsubst = src->posinsubst;
|
||||
|
||||
newwrds[*nnw].entries->nextentry = NULL;
|
||||
|
||||
(*nnw)++;
|
||||
return newwrds;
|
||||
}
|
||||
|
||||
static int
|
||||
cmpLexemeInfo(LexemeInfo * a, LexemeInfo * b)
|
||||
{
|
||||
if (a == NULL || b == NULL)
|
||||
return 0;
|
||||
|
||||
if (a->idsubst == b->idsubst)
|
||||
{
|
||||
if (a->posinsubst == b->posinsubst)
|
||||
{
|
||||
if (a->tnvariant == b->tnvariant)
|
||||
return 0;
|
||||
|
||||
return (a->tnvariant > b->tnvariant) ? 1 : -1;
|
||||
}
|
||||
|
||||
return (a->posinsubst > b->posinsubst) ? 1 : -1;
|
||||
}
|
||||
|
||||
return (a->idsubst > b->idsubst) ? 1 : -1;
|
||||
}
|
||||
|
||||
static int
|
||||
cmpLexeme(TheLexeme * a, TheLexeme * b)
|
||||
{
|
||||
if (a->lexeme == NULL)
|
||||
{
|
||||
if (b->lexeme == NULL)
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else if (b->lexeme == NULL)
|
||||
return -1;
|
||||
|
||||
return strcmp(a->lexeme, b->lexeme);
|
||||
}
|
||||
|
||||
static int
|
||||
cmpLexemeQ(const void *a, const void *b)
|
||||
{
|
||||
return cmpLexeme((TheLexeme *) a, (TheLexeme *) b);
|
||||
}
|
||||
|
||||
static int
|
||||
cmpTheLexeme(const void *a, const void *b)
|
||||
{
|
||||
TheLexeme *la = (TheLexeme *) a;
|
||||
TheLexeme *lb = (TheLexeme *) b;
|
||||
int res;
|
||||
|
||||
if ((res = cmpLexeme(la, lb)) != 0)
|
||||
return res;
|
||||
|
||||
return -cmpLexemeInfo(la->entries, lb->entries);
|
||||
}
|
||||
|
||||
static void
|
||||
compileTheLexeme(DictThesaurus * d)
|
||||
{
|
||||
int i,
|
||||
nnw = 0,
|
||||
tnm = 16;
|
||||
TheLexeme *newwrds = (TheLexeme *) malloc(sizeof(TheLexeme) * tnm),
|
||||
*ptrwrds;
|
||||
|
||||
if (!newwrds)
|
||||
elog(ERROR, "Out of memory");
|
||||
|
||||
for (i = 0; i < d->nwrds; i++)
|
||||
{
|
||||
TSLexeme *ptr;
|
||||
|
||||
ptr = (TSLexeme *) DatumGetPointer(
|
||||
FunctionCall4(
|
||||
&(d->subdict.lexize_info),
|
||||
PointerGetDatum(d->subdict.dictionary),
|
||||
PointerGetDatum(d->wrds[i].lexeme),
|
||||
Int32GetDatum(strlen(d->wrds[i].lexeme)),
|
||||
PointerGetDatum(NULL)
|
||||
)
|
||||
);
|
||||
|
||||
if (!(ptr && ptr->lexeme))
|
||||
{
|
||||
if (!ptr)
|
||||
elog(ERROR, "Thesaurus: word-sample '%s' isn't recognized by subdictionary (rule %d)",
|
||||
d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1);
|
||||
else
|
||||
elog(NOTICE, "Thesaurus: word-sample '%s' is recognized as stop-word, assign any stop-word (rule %d)",
|
||||
d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1);
|
||||
|
||||
newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
while (ptr->lexeme)
|
||||
{
|
||||
TSLexeme *remptr = ptr + 1;
|
||||
int tnvar = 1;
|
||||
int curvar = ptr->nvariant;
|
||||
|
||||
/* compute n words in one variant */
|
||||
while (remptr->lexeme)
|
||||
{
|
||||
if (remptr->nvariant != (remptr - 1)->nvariant)
|
||||
break;
|
||||
tnvar++;
|
||||
remptr++;
|
||||
}
|
||||
|
||||
remptr = ptr;
|
||||
while (remptr->lexeme && remptr->nvariant == curvar)
|
||||
{
|
||||
newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar);
|
||||
remptr++;
|
||||
}
|
||||
|
||||
ptr = remptr;
|
||||
}
|
||||
}
|
||||
|
||||
free(d->wrds[i].lexeme);
|
||||
free(d->wrds[i].entries);
|
||||
}
|
||||
|
||||
free(d->wrds);
|
||||
d->wrds = newwrds;
|
||||
d->nwrds = nnw;
|
||||
d->ntwrds = tnm;
|
||||
|
||||
if (d->nwrds > 1)
|
||||
{
|
||||
qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme);
|
||||
|
||||
/* uniq */
|
||||
newwrds = d->wrds;
|
||||
ptrwrds = d->wrds + 1;
|
||||
while (ptrwrds - d->wrds < d->nwrds)
|
||||
{
|
||||
if (cmpLexeme(ptrwrds, newwrds) == 0)
|
||||
{
|
||||
if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries))
|
||||
{
|
||||
ptrwrds->entries->nextentry = newwrds->entries;
|
||||
newwrds->entries = ptrwrds->entries;
|
||||
}
|
||||
else
|
||||
free(ptrwrds->entries);
|
||||
|
||||
if (ptrwrds->lexeme)
|
||||
free(ptrwrds->lexeme);
|
||||
}
|
||||
else
|
||||
{
|
||||
newwrds++;
|
||||
*newwrds = *ptrwrds;
|
||||
}
|
||||
|
||||
ptrwrds++;
|
||||
}
|
||||
|
||||
d->nwrds = newwrds - d->wrds + 1;
|
||||
d->wrds = (TheLexeme *) realloc(d->wrds, sizeof(TheLexeme) * d->nwrds);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
compileTheSubstitute(DictThesaurus * d)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < d->nsubst; i++)
|
||||
{
|
||||
TSLexeme *rem = d->subst[i].res,
|
||||
*outptr,
|
||||
*inptr;
|
||||
int n = 2;
|
||||
|
||||
outptr = d->subst[i].res = (TSLexeme *) malloc(sizeof(TSLexeme) * n);
|
||||
if (d->subst[i].res == NULL)
|
||||
elog(ERROR, "Out of Memory");
|
||||
outptr->lexeme = NULL;
|
||||
inptr = rem;
|
||||
|
||||
while (inptr && inptr->lexeme)
|
||||
{
|
||||
TSLexeme *lexized,
|
||||
tmplex[2];
|
||||
|
||||
if (inptr->flags & DT_USEASIS)
|
||||
{ /* do not lexize */
|
||||
tmplex[0] = *inptr;
|
||||
tmplex[0].flags = 0;
|
||||
tmplex[1].lexeme = NULL;
|
||||
lexized = tmplex;
|
||||
}
|
||||
else
|
||||
{
|
||||
lexized = (TSLexeme *) DatumGetPointer(
|
||||
FunctionCall4(
|
||||
&(d->subdict.lexize_info),
|
||||
PointerGetDatum(d->subdict.dictionary),
|
||||
PointerGetDatum(inptr->lexeme),
|
||||
Int32GetDatum(strlen(inptr->lexeme)),
|
||||
PointerGetDatum(NULL)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
if (lexized && lexized->lexeme)
|
||||
{
|
||||
int toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -1;
|
||||
|
||||
while (lexized->lexeme)
|
||||
{
|
||||
if (outptr - d->subst[i].res + 1 >= n)
|
||||
{
|
||||
int diff = outptr - d->subst[i].res;
|
||||
|
||||
n *= 2;
|
||||
d->subst[i].res = (TSLexeme *) realloc(d->subst[i].res, sizeof(TSLexeme) * n);
|
||||
if (d->subst[i].res == NULL)
|
||||
elog(ERROR, "Out of Memory");
|
||||
outptr = d->subst[i].res + diff;
|
||||
}
|
||||
|
||||
*outptr = *lexized;
|
||||
if ((outptr->lexeme = strdup(lexized->lexeme)) == NULL)
|
||||
elog(ERROR, "Out of Memory");
|
||||
|
||||
outptr++;
|
||||
lexized++;
|
||||
}
|
||||
|
||||
if (toset > 0)
|
||||
d->subst[i].res[toset].flags |= TSL_ADDPOS;
|
||||
}
|
||||
else if (lexized)
|
||||
{
|
||||
elog(NOTICE, "Thesaurus: word '%s' in substition is a stop-word, ignored (rule %d)", inptr->lexeme, i + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
elog(ERROR, "Thesaurus: word '%s' in substition isn't recognized (rule %d)", inptr->lexeme, i + 1);
|
||||
}
|
||||
|
||||
if (inptr->lexeme)
|
||||
free(inptr->lexeme);
|
||||
inptr++;
|
||||
}
|
||||
|
||||
if (outptr == d->subst[i].res)
|
||||
elog(ERROR, "Thesaurus: all words in subsitution are stop word (rule %d)", i + 1);
|
||||
|
||||
d->subst[i].reslen = outptr - d->subst[i].res;
|
||||
|
||||
free(rem);
|
||||
}
|
||||
}
|
||||
|
||||
Datum
|
||||
thesaurus_init(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictThesaurus *d;
|
||||
Map *cfg,
|
||||
*pcfg;
|
||||
text *in,
|
||||
*subdictname = NULL;
|
||||
bool fileloaded = false;
|
||||
|
||||
if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("Thesaurus confguration error")));
|
||||
|
||||
d = (DictThesaurus *) malloc(sizeof(DictThesaurus));
|
||||
if (!d)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
memset(d, 0, sizeof(DictThesaurus));
|
||||
|
||||
in = PG_GETARG_TEXT_P(0);
|
||||
parse_cfgdict(in, &cfg);
|
||||
PG_FREE_IF_COPY(in, 0);
|
||||
pcfg = cfg;
|
||||
while (pcfg->key)
|
||||
{
|
||||
if (pg_strcasecmp("DictFile", pcfg->key) == 0)
|
||||
{
|
||||
if (fileloaded)
|
||||
{
|
||||
freeDictThesaurus(d);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("Thesaurus file is already loaded")));
|
||||
}
|
||||
fileloaded = true;
|
||||
thesaurusRead(pcfg->value, d);
|
||||
}
|
||||
else if (pg_strcasecmp("Dictionary", pcfg->key) == 0)
|
||||
{
|
||||
if (subdictname)
|
||||
{
|
||||
freeDictThesaurus(d);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("Thesaurus: SubDictionary is already defined")));
|
||||
}
|
||||
subdictname = char2text(pcfg->value);
|
||||
}
|
||||
else
|
||||
{
|
||||
freeDictThesaurus(d);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("unrecognized option: %s => %s",
|
||||
pcfg->key, pcfg->value)));
|
||||
}
|
||||
pfree(pcfg->key);
|
||||
pfree(pcfg->value);
|
||||
pcfg++;
|
||||
}
|
||||
pfree(cfg);
|
||||
|
||||
if (!fileloaded)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("Thesaurus file isn't defined")));
|
||||
|
||||
if (subdictname)
|
||||
{
|
||||
DictInfo *subdictptr;
|
||||
|
||||
/*
|
||||
* we already in SPI, but name2id_dict()/finddict() invoke
|
||||
* SPI_connect()
|
||||
*/
|
||||
SPI_push();
|
||||
|
||||
subdictptr = finddict(name2id_dict(subdictname));
|
||||
|
||||
SPI_pop();
|
||||
|
||||
d->subdict = *subdictptr;
|
||||
}
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("Thesaurus: SubDictionary isn't defined")));
|
||||
|
||||
compileTheLexeme(d);
|
||||
compileTheSubstitute(d);
|
||||
|
||||
PG_RETURN_POINTER(d);
|
||||
}
|
||||
|
||||
static LexemeInfo *
|
||||
findTheLexeme(DictThesaurus * d, char *lexeme)
|
||||
{
|
||||
TheLexeme key, *res;
|
||||
|
||||
if (d->nwrds == 0)
|
||||
return NULL;
|
||||
|
||||
key.lexeme = lexeme;
|
||||
key.entries = NULL;
|
||||
|
||||
res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ);
|
||||
|
||||
if (res == NULL)
|
||||
return NULL;
|
||||
return res->entries;
|
||||
}
|
||||
|
||||
static bool
|
||||
matchIdSubst(LexemeInfo * stored, uint16 idsubst)
|
||||
{
|
||||
bool res = true;
|
||||
|
||||
if (stored)
|
||||
{
|
||||
res = false;
|
||||
|
||||
for (; stored; stored = stored->nextvariant)
|
||||
if (stored->idsubst == idsubst)
|
||||
{
|
||||
res = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static LexemeInfo *
|
||||
findVariant(LexemeInfo * in, LexemeInfo * stored, uint16 curpos, LexemeInfo ** newin, int newn)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
int i;
|
||||
LexemeInfo *ptr = newin[0];
|
||||
|
||||
for (i = 0; i < newn; i++)
|
||||
{
|
||||
while (newin[i] && newin[i]->idsubst < ptr->idsubst)
|
||||
newin[i] = newin[i]->nextentry;
|
||||
|
||||
if (newin[i] == NULL)
|
||||
return in;
|
||||
|
||||
if (newin[i]->idsubst > ptr->idsubst)
|
||||
{
|
||||
ptr = newin[i];
|
||||
i = -1;
|
||||
continue;
|
||||
}
|
||||
|
||||
while (newin[i]->idsubst == ptr->idsubst)
|
||||
{
|
||||
if (newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn)
|
||||
{
|
||||
ptr = newin[i];
|
||||
break;
|
||||
}
|
||||
|
||||
newin[i] = newin[i]->nextentry;
|
||||
if (newin[i] == NULL)
|
||||
return in;
|
||||
}
|
||||
|
||||
if (newin[i]->idsubst != ptr->idsubst)
|
||||
{
|
||||
ptr = newin[i];
|
||||
i = -1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == newn && matchIdSubst(stored, ptr->idsubst) && (in == NULL || !matchIdSubst(in, ptr->idsubst)))
|
||||
{ /* found */
|
||||
|
||||
ptr->nextvariant = in;
|
||||
in = ptr;
|
||||
}
|
||||
|
||||
/* step forward */
|
||||
for (i = 0; i < newn; i++)
|
||||
newin[i] = newin[i]->nextentry;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static TSLexeme *
|
||||
copyTSLexeme(TheSubstitute * ts)
|
||||
{
|
||||
TSLexeme *res;
|
||||
uint16 i;
|
||||
|
||||
res = (TSLexeme *) palloc(sizeof(TSLexeme) * (ts->reslen + 1));
|
||||
for (i = 0; i < ts->reslen; i++)
|
||||
{
|
||||
res[i] = ts->res[i];
|
||||
res[i].lexeme = pstrdup(ts->res[i].lexeme);
|
||||
}
|
||||
|
||||
res[ts->reslen].lexeme = NULL;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static TSLexeme *
|
||||
checkMatch(DictThesaurus * d, LexemeInfo * info, uint16 curpos, bool *moreres)
|
||||
{
|
||||
*moreres = false;
|
||||
while (info)
|
||||
{
|
||||
Assert(info->idsubst < d->nsubst);
|
||||
if (info->nextvariant)
|
||||
*moreres = true;
|
||||
if (d->subst[info->idsubst].lastlexeme == curpos)
|
||||
return copyTSLexeme(d->subst + info->idsubst);
|
||||
info = info->nextvariant;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Datum
|
||||
thesaurus_lexize(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictThesaurus *d = (DictThesaurus *) PG_GETARG_POINTER(0);
|
||||
DictSubState *dstate = (DictSubState *) PG_GETARG_POINTER(3);
|
||||
TSLexeme *res = NULL;
|
||||
LexemeInfo *stored,
|
||||
*info = NULL;
|
||||
uint16 curpos = 0;
|
||||
bool moreres = false;
|
||||
|
||||
if (dstate == NULL || PG_NARGS() < 4)
|
||||
elog(ERROR, "Forbidden call of thesaurus or nested call");
|
||||
|
||||
if (dstate->isend)
|
||||
PG_RETURN_POINTER(NULL);
|
||||
stored = (LexemeInfo *) dstate->private;
|
||||
|
||||
if (stored)
|
||||
curpos = stored->posinsubst + 1;
|
||||
|
||||
res = (TSLexeme *) DatumGetPointer(
|
||||
FunctionCall4(
|
||||
&(d->subdict.lexize_info),
|
||||
PointerGetDatum(d->subdict.dictionary),
|
||||
PG_GETARG_DATUM(1),
|
||||
PG_GETARG_INT32(2),
|
||||
PointerGetDatum(NULL)
|
||||
)
|
||||
);
|
||||
|
||||
if (res && res->lexeme)
|
||||
{
|
||||
TSLexeme *ptr = res,
|
||||
*basevar;
|
||||
|
||||
while (ptr->lexeme)
|
||||
{
|
||||
uint16 nv = ptr->nvariant;
|
||||
uint16 i,
|
||||
nlex = 0;
|
||||
LexemeInfo **infos;
|
||||
|
||||
basevar = ptr;
|
||||
while (ptr->lexeme && nv == ptr->nvariant)
|
||||
{
|
||||
nlex++;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
infos = (LexemeInfo **) palloc(sizeof(LexemeInfo *) * nlex);
|
||||
for (i = 0; i < nlex; i++)
|
||||
if ((infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL)
|
||||
break;
|
||||
|
||||
if (i < nlex)
|
||||
{
|
||||
/* no chance to find */
|
||||
pfree(infos);
|
||||
continue;
|
||||
}
|
||||
|
||||
info = findVariant(info, stored, curpos, infos, nlex);
|
||||
}
|
||||
}
|
||||
else if (res)
|
||||
{ /* stop-word */
|
||||
LexemeInfo *infos = findTheLexeme(d, NULL);
|
||||
|
||||
info = findVariant(NULL, stored, curpos, &infos, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
info = NULL; /* word isn't recognized */
|
||||
}
|
||||
|
||||
dstate->private = (void *) info;
|
||||
|
||||
if (!info)
|
||||
{
|
||||
dstate->getnext = false;
|
||||
PG_RETURN_POINTER(NULL);
|
||||
}
|
||||
|
||||
if ((res = checkMatch(d, info, curpos, &moreres)) != NULL)
|
||||
{
|
||||
dstate->getnext = moreres;
|
||||
PG_RETURN_POINTER(res);
|
||||
}
|
||||
|
||||
dstate->getnext = true;
|
||||
|
||||
PG_RETURN_POINTER(NULL);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,842 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<html><head>
|
||||
|
||||
<title>tsearch2 reference</title></head>
|
||||
|
||||
<body>
|
||||
<h1 align="center">The tsearch2 Reference</h1>
|
||||
|
||||
<p align="center">
|
||||
Brandon Craig Rhodes<br>30 June 2003 (edited by Oleg Bartunov, 2 Aug 2003).
|
||||
<br>Massive update for 8.2 release by Oleg Bartunov, October 2006
|
||||
</p>
|
||||
<p>
|
||||
This Reference documents the user types and functions
|
||||
of the tsearch2 module for PostgreSQL.
|
||||
An introduction to the module is provided
|
||||
by the <a href="http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/tsearch2-guide.html">tsearch2 Guide</a>,
|
||||
a companion document to this one.
|
||||
</p>
|
||||
|
||||
<h2>Table of Contents</h2>
|
||||
<blockquote>
|
||||
<a href="#vq">Vectors and Queries</a><br>
|
||||
<a href="#vqo">Vector Operations</a><br>
|
||||
<a href="#qo">Query Operations</a><br>
|
||||
<a href="#fts">Full Text Search Operator</a><br>
|
||||
<a href="#configurations">Configurations</a><br>
|
||||
<a href="#testing">Testing</a><br>
|
||||
<a href="#parsers">Parsers</a><br>
|
||||
<a href="#dictionaries">Dictionaries</a><br>
|
||||
<a href="#ranking">Ranking</a><br>
|
||||
<a href="#headlines">Headlines</a><br>
|
||||
<a href="#indexes">Indexes</a><br>
|
||||
<a href="#tz">Thesaurus dictionary</a><br>
|
||||
</blockquote>
|
||||
|
||||
|
||||
|
||||
|
||||
<h2><a name="vq">Vectors and Queries</a></h2>
|
||||
|
||||
Vectors and queries both store lexemes,
|
||||
but for different purposes.
|
||||
A <tt>tsvector</tt> stores the lexemes
|
||||
of the words that are parsed out of a document,
|
||||
and can also remember the position of each word.
|
||||
A <tt>tsquery</tt> specifies a boolean condition among lexemes.
|
||||
<p>
|
||||
Any of the following functions with a <tt><i>configuration</i></tt> argument
|
||||
can use either an integer <tt>id</tt> or textual <tt>ts_name</tt>
|
||||
to select a configuration;
|
||||
if the option is omitted, then the current configuration is used.
|
||||
For more information on the current configuration,
|
||||
read the next section on Configurations.
|
||||
</p>
|
||||
|
||||
<h3><a name="vqo">Vector Operations</a></h3>
|
||||
|
||||
<dl><dt>
|
||||
<tt>to_tsvector( <em>[</em><i>configuration</i>,<em>]</em>
|
||||
<i>document</i> TEXT) RETURNS TSVECTOR</tt>
|
||||
</dt><dd>
|
||||
Parses a document into tokens,
|
||||
reduces the tokens to lexemes,
|
||||
and returns a <tt>tsvector</tt> which lists the lexemes
|
||||
together with their positions in the document.
|
||||
For the best description of this process,
|
||||
see the section on <a href="http://www.sai.msu.su/%7Emegera/postgres/gist/tsearch/V2/docs/tsearch2-guide.html#ps">Parsing and Stemming</a>
|
||||
in the accompanying tsearch2 Guide.
|
||||
</dd><dt>
|
||||
<tt>strip(<i>vector</i> TSVECTOR) RETURNS TSVECTOR</tt>
|
||||
</dt><dd>
|
||||
Return a vector which lists the same lexemes
|
||||
as the given <tt><i>vector</i></tt>,
|
||||
but which lacks any information
|
||||
about where in the document each lexeme appeared.
|
||||
While the returned vector is thus useless for relevance ranking,
|
||||
it will usually be much smaller.
|
||||
</dd><dt>
|
||||
<tt>setweight(<i>vector</i> TSVECTOR, <i>letter</i>) RETURNS TSVECTOR</tt>
|
||||
</dt><dd>
|
||||
This function returns a copy of the input vector
|
||||
in which every location has been labeled
|
||||
with either the <tt><i>letter</i></tt>
|
||||
<tt>'A'</tt>, <tt>'B'</tt>, or <tt>'C'</tt>,
|
||||
or the default label <tt>'D'</tt>
|
||||
(which is the default with which new vectors are created,
|
||||
and as such is usually not displayed).
|
||||
These labels are retained when vectors are concatenated,
|
||||
allowing words from different parts of a document
|
||||
to be weighted differently by ranking functions.
|
||||
</dd>
|
||||
<dt>
|
||||
<tt><i>vector1</i> || <i>vector2</i></tt><BR>
|
||||
<tt>concat(<i>vector1</i> TSVECTOR, <i>vector2</i> TSVECTOR)
|
||||
RETURNS TSVECTOR</tt>
|
||||
</dt><dd>
|
||||
Returns a vector which combines the lexemes and position information
|
||||
in the two vectors given as arguments.
|
||||
Position weight labels (described in the previous paragraph)
|
||||
are retained intact during the concatenation.
|
||||
This has at least two uses.
|
||||
First,
|
||||
if some sections of your document
|
||||
need be parsed with different configurations than others,
|
||||
you can parse them separately
|
||||
and concatenate the resulting vectors into one.
|
||||
Second,
|
||||
you can weight words from some sections of you document
|
||||
more heavily than those from others by:
|
||||
parsing the sections into separate vectors;
|
||||
assigning the vectors different position labels
|
||||
with the <tt>setweight()</tt> function;
|
||||
concatenating them into a single vector;
|
||||
and then providing a <tt><i>weights</i></tt> argument
|
||||
to the <tt>rank()</tt> function
|
||||
that assigns different weights to positions with different labels.
|
||||
</dd><dt>
|
||||
<tt>length(<i>vector</i> TSVECTOR) RETURNS INT4</tt>
|
||||
</dt><dd>
|
||||
Returns the number of lexemes stored in the vector.
|
||||
</dd><dt>
|
||||
<tt><i>text</i>::TSVECTOR RETURNS TSVECTOR</tt>
|
||||
</dt><dd>
|
||||
Directly casting text to a <tt>tsvector</tt>
|
||||
allows you to directly inject lexemes into a vector,
|
||||
with whatever positions and position weights you choose to specify.
|
||||
The <tt><i>text</i></tt> should be formatted
|
||||
like the vector would be printed by the output of a <tt>SELECT</tt>.
|
||||
See the <a href="http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/tsearch2-guide.html#casting">Casting</a>
|
||||
section in the Guide for details.
|
||||
</dd><dt>
|
||||
<tt>tsearch2(<i>vector_column_name</i>[, (<i>my_filter_name</i> | <i>text_column_name1</i>) [...] ], <i>text_column_nameN</i>)</tt>
|
||||
</dt><dd>
|
||||
<tt>tsearch2()</tt> trigger used to automatically update <i>vector_column_name</i>, <i>my_filter_name</i>
|
||||
is the function name to preprocess <i>text_column_name</i>. There are can be many
|
||||
functions and text columns specified in <tt>tsearch2()</tt> trigger.
|
||||
The following rule used:
|
||||
function applied to all subsequent text columns until next function occurs.
|
||||
Example, function <tt>dropatsymbol</tt> replaces all entries of <tt>@</tt>
|
||||
sign by space.
|
||||
<pre>
|
||||
CREATE FUNCTION dropatsymbol(text) RETURNS text
|
||||
AS 'select replace($1, ''@'', '' '');'
|
||||
LANGUAGE SQL;
|
||||
|
||||
CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT
|
||||
ON tblMessages FOR EACH ROW EXECUTE PROCEDURE
|
||||
tsearch2(tsvector_column,dropatsymbol, strMessage);
|
||||
</pre>
|
||||
</dd>
|
||||
|
||||
<dt>
|
||||
<tt>stat(<i>sqlquery</i> text [, <i>weight</i> text]) RETURNS SETOF statinfo</tt>
|
||||
</dt><dd>
|
||||
Here <tt>statinfo</tt> is a type, defined as
|
||||
<tt>
|
||||
CREATE TYPE statinfo as (<i>word</i> text, <i>ndoc</i> int4, <i>nentry</i> int4)
|
||||
</tt> and <i>sqlquery</i> is a query, which returns column <tt>tsvector</tt>.
|
||||
<P>
|
||||
This returns statistics (the number of documents <i>ndoc</i> and total number <i>nentry</i> of <i>word</i>
|
||||
in the collection) about column <i>vector</i> <tt>tsvector</tt>.
|
||||
Useful to check how good is your configuration and
|
||||
to find stop-words candidates.For example, find top 10 most frequent words:
|
||||
<pre>
|
||||
=# select * from stat('select vector from apod') order by ndoc desc, nentry desc,word limit 10;
|
||||
</pre>
|
||||
Optionally, one can specify <i>weight</i> to obtain statistics about words with specific weight.
|
||||
<pre>
|
||||
=# select * from stat('select vector from apod','a') order by ndoc desc, nentry desc,word limit 10;
|
||||
</pre>
|
||||
|
||||
</dd>
|
||||
<dt>
|
||||
<tt>TSVECTOR < TSVECTOR</tt><BR>
|
||||
<tt>TSVECTOR <= TSVECTOR</tt><BR>
|
||||
<tt>TSVECTOR = TSVECTOR</tt><BR>
|
||||
<tt>TSVECTOR >= TSVECTOR</tt><BR>
|
||||
<tt>TSVECTOR > TSVECTOR</tt>
|
||||
</dt><dd>
|
||||
All btree operations defined for <tt>tsvector</tt> type. <tt>tsvectors</tt> compares
|
||||
with each other using lexicographical order.
|
||||
</dd>
|
||||
</dl>
|
||||
|
||||
<h3><a name="qo">Query Operations</a></h3>
|
||||
|
||||
<dl>
|
||||
<dt>
|
||||
<tt>to_tsquery( <em>[</em><i>configuration</i>,<em>]</em>
|
||||
<i>querytext</i> text) RETURNS TSQUERY[A</tt>
|
||||
</dt>
|
||||
<dd>
|
||||
Parses a query,
|
||||
which should be single words separated by the boolean operators
|
||||
"<tt>&</tt>" and,
|
||||
"<tt>|</tt>" or,
|
||||
and "<tt>!</tt>" not,
|
||||
which can be grouped using parenthesis.
|
||||
Each word is reduced to a lexeme using the current
|
||||
or specified configuration.
|
||||
Weight class can be assigned to each lexeme entry
|
||||
to restrict search region
|
||||
(see <tt>setweight</tt> for explanation), for example
|
||||
"<tt>fat:a & rats</tt>".
|
||||
</dd><dt>
|
||||
<dt>
|
||||
<tt>plainto_tsquery( <em>[</em><i>configuration</i>,<em>]</em>
|
||||
<i>querytext</i> text) RETURNS TSQUERY</tt>
|
||||
</dt>
|
||||
<dd>
|
||||
Transforms unformatted text to tsquery. It is the same as to_tsquery,
|
||||
but assumes "<tt>&</tt>" boolean operator between words and doesn't
|
||||
recognizes weight classes.
|
||||
</dd><dt>
|
||||
|
||||
<tt>querytree(<i>query</i> TSQUERY) RETURNS text</tt>
|
||||
</dt><dd>
|
||||
This returns a query which actually used in searching in GiST index.
|
||||
</dd><dt>
|
||||
<tt><i>text</i>::TSQUERY RETURNS TSQUERY</tt>
|
||||
</dt><dd>
|
||||
Directly casting text to a <tt>tsquery</tt>
|
||||
allows you to directly inject lexemes into a query,
|
||||
with whatever positions and position weight flags you choose to specify.
|
||||
The <tt><i>text</i></tt> should be formatted
|
||||
like the query would be printed by the output of a <tt>SELECT</tt>.
|
||||
See the <a href="http://www.sai.msu.su/%7Emegera/postgres/gist/tsearch/V2/docs/tsearch2-guide.html#casting">Casting</a>
|
||||
section in the Guide for details.
|
||||
</dd>
|
||||
<dt>
|
||||
<tt>numnode(<i>query</i> TSQUERY) RETURNS INTEGER</tt>
|
||||
</dt><dd>
|
||||
This returns the number of nodes in query tree
|
||||
</dd><dt>
|
||||
<tt>TSQUERY && TSQUERY RETURNS TSQUERY</tt>
|
||||
</dt><dd>
|
||||
AND-ed TSQUERY
|
||||
</dd><dt>
|
||||
<tt>TSQUERY || TSQUERY RETURNS TSQUERY</tt>
|
||||
</dt> <dd>
|
||||
OR-ed TSQUERY
|
||||
</dd><dt>
|
||||
<tt>!! TSQUERY RETURNS TSQUERY</tt>
|
||||
</dt> <dd>
|
||||
negation of TSQUERY
|
||||
</dd>
|
||||
<dt>
|
||||
<tt>TSQUERY < TSQUERY</tt><BR>
|
||||
<tt>TSQUERY <= TSQUERY</tt><BR>
|
||||
<tt>TSQUERY = TSQUERY</tt><BR>
|
||||
<tt>TSQUERY >= TSQUERY</tt><BR>
|
||||
<tt>TSQUERY > TSQUERY</tt>
|
||||
</dt><dd>
|
||||
All btree operations defined for <tt>tsquery</tt> type. <tt>tsqueries</tt> compares
|
||||
with each other using lexicographical order.
|
||||
</dd>
|
||||
</dl>
|
||||
|
||||
<h3>Query rewriting</h3>
|
||||
Query rewriting is a set of functions and operators for tsquery type.
|
||||
It allows to control search at query time without reindexing (opposite to thesaurus), for example,
|
||||
expand search using synonyms (new york, big apple, nyc, gotham).
|
||||
<P>
|
||||
<tt><b>rewrite()</b></tt> function changes original <i>query</i> by replacing <i>target</i> by <i>sample</i>.
|
||||
There are three possibilities to use <tt>rewrite()</tt> function. Notice, that arguments of <tt>rewrite()</tt>
|
||||
function can be column names of type <tt>tsquery</tt>.
|
||||
<pre>
|
||||
create table rw (q TSQUERY, t TSQUERY, s TSQUERY);
|
||||
insert into rw values('a & b','a', 'c');
|
||||
</pre>
|
||||
<dl>
|
||||
<dt> <tt>rewrite (<i>query</i> TSQUERY, <i>target</i> TSQUERY, <i>sample</i> TSQUERY) RETURNS TSQUERY</tt>
|
||||
</dt>
|
||||
<dd>
|
||||
<pre>
|
||||
=# select rewrite('a & b'::TSQUERY, 'a'::TSQUERY, 'c'::TSQUERY);
|
||||
rewrite
|
||||
-----------
|
||||
'c' & 'b'
|
||||
</pre>
|
||||
</dd>
|
||||
<dt> <tt>rewrite (ARRAY[<i>query</i> TSQUERY, <i>target</i> TSQUERY, <i>sample</i> TSQUERY]) RETURNS TSQUERY</tt>
|
||||
</dt>
|
||||
<dd>
|
||||
<pre>
|
||||
=# select rewrite(ARRAY['a & b'::TSQUERY, t,s]) from rw;
|
||||
rewrite
|
||||
-----------
|
||||
'c' & 'b'
|
||||
</pre>
|
||||
</dd>
|
||||
<dt> <tt>rewrite (<i>query</i> TSQUERY,'select <i>target</i> ,<i>sample</i> from test'::text) RETURNS TSQUERY</tt>
|
||||
</dt>
|
||||
<dd>
|
||||
<pre>
|
||||
=# select rewrite('a & b'::TSQUERY, 'select t,s from rw'::text);
|
||||
rewrite
|
||||
-----------
|
||||
'c' & 'b'
|
||||
</pre>
|
||||
</dd>
|
||||
</dl>
|
||||
Two operators defined for <tt>tsquery</tt> type:
|
||||
<dl>
|
||||
<dt><tt>TSQUERY @ TSQUERY</tt></dt>
|
||||
<dd>
|
||||
Returns <tt>TRUE</tt> if right agrument might contained in left argument.
|
||||
</dd>
|
||||
<dt><tt>TSQUERY ~ TSQUERY</tt></dt>
|
||||
<dd>
|
||||
Returns <tt>TRUE</tt> if left agrument might contained in right argument.
|
||||
</dd>
|
||||
</dl>
|
||||
To speed up these operators one can use GiST index with <tt>gist_tp_tsquery_ops</tt> opclass.
|
||||
<pre>
|
||||
create index qq on test_tsquery using gist (keyword gist_tp_tsquery_ops);
|
||||
</pre>
|
||||
|
||||
<h2><a name="fts">Full Text Search operator</a></h2>
|
||||
|
||||
<dl><dt>
|
||||
<tt>TSQUERY @@ TSVECTOR</tt><br>
|
||||
<tt>TSVECTOR @@ TSQUERY</tt>
|
||||
</dt>
|
||||
<dd>
|
||||
Returns <tt>TRUE</tt> if <tt>TSQUERY</tt> contained in <tt>TSVECTOR</tt> and
|
||||
<tt>FALSE</tt> otherwise.
|
||||
<pre>
|
||||
=# select 'cat & rat':: tsquery @@ 'a fat cat sat on a mat and ate a fat rat'::tsvector;
|
||||
?column?
|
||||
----------
|
||||
t
|
||||
=# select 'fat & cow':: tsquery @@ 'a fat cat sat on a mat and ate a fat rat'::tsvector;
|
||||
?column?
|
||||
----------
|
||||
f
|
||||
</pre>
|
||||
</dd>
|
||||
</dl>
|
||||
|
||||
<h2><a name="configurations">Configurations</a></h2>
|
||||
|
||||
A configuration specifies all of the equipment necessary
|
||||
to transform a document into a <tt>tsvector</tt>:
|
||||
the parser that breaks its text into tokens,
|
||||
and the dictionaries which then transform each token into a lexeme.
|
||||
Every call to <tt>to_tsvector(), to_tsquery()</tt> (described above)
|
||||
uses a configuration to perform its processing.
|
||||
Three configurations come with tsearch2:
|
||||
|
||||
<ul>
|
||||
<li><b>default</b> -- Indexes words and numbers,
|
||||
using the <i>en_stem</i> English Snowball stemmer for Latin-alphabet words
|
||||
and the <i>simple</i> dictionary for all others.
|
||||
</li><li><b>default_russian</b> -- Indexes words and numbers,
|
||||
using the <i>en_stem</i> English Snowball stemmer for Latin-alphabet words
|
||||
and the <i>ru_stem</i> Russian Snowball dictionary for all others. It's default
|
||||
for <tt>ru_RU.KOI8-R</tt> locale.
|
||||
</li><li><b>utf8_russian</b> -- the same as <b>default_russian</b> but
|
||||
for <tt>ru_RU.UTF-8</tt> locale.
|
||||
</li><li><b>simple</b> -- Processes both words and numbers
|
||||
with the <i>simple</i> dictionary,
|
||||
which neither discards any stop words nor alters them.
|
||||
</li></ul>
|
||||
|
||||
The tsearch2 modules initially chooses your current configuration
|
||||
by looking for your current locale in the <tt>locale</tt> field
|
||||
of the <tt>pg_ts_cfg</tt> table described below.
|
||||
You can manipulate the current configuration yourself with these functions:
|
||||
|
||||
<dl><dt>
|
||||
<tt>set_curcfg( <i>id</i> INT <em>|</em> <i>ts_name</i> TEXT
|
||||
) RETURNS VOID</tt>
|
||||
</dt><dd>
|
||||
Set the current configuration used by <tt>to_tsvector</tt>
|
||||
and <tt>to_tsquery</tt>.
|
||||
</dd><dt>
|
||||
<tt>show_curcfg() RETURNS INT4</tt>
|
||||
</dt><dd>
|
||||
Returns the integer <tt>id</tt> of the current configuration.
|
||||
</dd></dl>
|
||||
|
||||
<p>
|
||||
Each configuration is defined by a record in the <tt>pg_ts_cfg</tt> table:
|
||||
|
||||
</p><pre>create table pg_ts_cfg (
|
||||
id int not null primary key,
|
||||
ts_name text not null,
|
||||
prs_name text not null,
|
||||
locale text
|
||||
);</pre>
|
||||
|
||||
The <tt>id</tt> and <tt>ts_name</tt> are unique values
|
||||
which identify the configuration;
|
||||
the <tt>prs_name</tt> specifies which parser the configuration uses.
|
||||
Once this parser has split document text into tokens,
|
||||
the type of each resulting token --
|
||||
or, more specifically, the type's <tt>tok_alias</tt>
|
||||
as specified in the parser's <tt>lexem_type()</tt> table --
|
||||
is searched for together with the configuration's <tt>ts_name</tt>
|
||||
in the <tt>pg_ts_cfgmap</tt> table:
|
||||
|
||||
<pre>create table pg_ts_cfgmap (
|
||||
ts_name text not null,
|
||||
tok_alias text not null,
|
||||
dict_name text[],
|
||||
primary key (ts_name,tok_alias)
|
||||
);</pre>
|
||||
|
||||
Those tokens whose types are not listed are discarded.
|
||||
The remaining tokens are assigned integer positions,
|
||||
starting with 1 for the first token in the document,
|
||||
and turned into lexemes with the help of the dictionaries
|
||||
whose names are given in the <tt>dict_name</tt> array for their type.
|
||||
These dictionaries are tried in order,
|
||||
stopping either with the first one to return a lexeme for the token,
|
||||
or discarding the token if no dictionary returns a lexeme for it.
|
||||
|
||||
<h2><a name="testing">Testing</a></h2>
|
||||
|
||||
Function <tt>ts_debug</tt> allows easy testing of your <b>current</b> configuration.
|
||||
You may always test another configuration using <tt>set_curcfg</tt> function.
|
||||
<p>
|
||||
Example:
|
||||
</p><pre>apod=# select * from ts_debug('Tsearch module for PostgreSQL 7.3.3');
|
||||
ts_name | tok_type | description | token | dict_name | tsvector
|
||||
---------+----------+-------------+------------+-----------+--------------
|
||||
default | lword | Latin word | Tsearch | {en_stem} | 'tsearch'
|
||||
default | lword | Latin word | module | {en_stem} | 'modul'
|
||||
default | lword | Latin word | for | {en_stem} |
|
||||
default | lword | Latin word | PostgreSQL | {en_stem} | 'postgresql'
|
||||
default | version | VERSION | 7.3.3 | {simple} | '7.3.3'
|
||||
</pre>
|
||||
Here:
|
||||
<br>
|
||||
<ul>
|
||||
<li>tsname - configuration name
|
||||
</li><li>tok_type - token type
|
||||
</li><li>description - human readable name of tok_type
|
||||
</li><li>token - parser's token
|
||||
</li><li>dict_name - dictionary used for the token
|
||||
</li><li>tsvector - final result</li>
|
||||
</ul>
|
||||
|
||||
|
||||
<h2><a name="parsers">Parsers</a></h2>
|
||||
|
||||
Each parser is defined by a record in the <tt>pg_ts_parser</tt> table:
|
||||
|
||||
<pre>create table pg_ts_parser (
|
||||
prs_name text not null,
|
||||
prs_start regprocedure not null,
|
||||
prs_nexttoken regprocedure not null,
|
||||
prs_end regprocedure not null,
|
||||
prs_headline regprocedure not null,
|
||||
prs_lextype regprocedure not null,
|
||||
prs_comment text
|
||||
);</pre>
|
||||
|
||||
The <tt>prs_name</tt> uniquely identify the parser,
|
||||
while <tt>prs_comment</tt> usually describes its name and version
|
||||
for the reference of users.
|
||||
The other items identify the low-level functions
|
||||
which make the parser operate,
|
||||
and are only of interest to someone writing a parser of their own.
|
||||
<p>
|
||||
The tsearch2 module comes with one parser named <tt>default</tt>
|
||||
which is suitable for parsing most plain text and HTML documents.
|
||||
</p><p>
|
||||
Each <tt><i>parser</i></tt> argument below
|
||||
must designate a parser with <tt><i>prs_name</i></tt>;
|
||||
the current parser is used when this argument is omitted.
|
||||
|
||||
</p><dl><dt>
|
||||
<tt>CREATE FUNCTION set_curprs(<i>parser</i>) RETURNS VOID</tt>
|
||||
</dt><dd>
|
||||
Selects a current parser
|
||||
which will be used when any of the following functions
|
||||
are called without a parser as an argument.
|
||||
</dd><dt>
|
||||
<tt>CREATE FUNCTION token_type(
|
||||
<em>[</em> <i>parser</i> <em>]</em>
|
||||
) RETURNS SETOF tokentype</tt>
|
||||
</dt><dd>
|
||||
Returns a table which defines and describes
|
||||
each kind of token the parser may produce as output.
|
||||
For each token type the table gives the <tt>tokid</tt>
|
||||
which the parser will label each token of that type,
|
||||
the <tt>alias</tt> which names the token type,
|
||||
and a short description <tt>descr</tt> for the user to read.
|
||||
</dd><dt>
|
||||
<tt>CREATE FUNCTION parse(
|
||||
<em>[</em> <i>parser</i>, <em>]</em> <i>document</i> TEXT
|
||||
) RETURNS SETOF tokenout</tt>
|
||||
</dt><dd>
|
||||
Parses the given document and returns a series of records,
|
||||
one for each token produced by parsing.
|
||||
Each token includes a <tt>tokid</tt> giving its type
|
||||
and a <tt>lexem</tt> which gives its content.
|
||||
</dd></dl>
|
||||
|
||||
<h2><a name="dictionaries">Dictionaries</a></h2>
|
||||
|
||||
Dictionary is a program, which accepts lexeme(s), usually those produced by a parser,
|
||||
on input and returns:
|
||||
<ul>
|
||||
<li>array of lexeme(s) if input lexeme is known to the dictionary
|
||||
<li>void array - dictionary knows lexeme, but it's stop word.
|
||||
<li> NULL - dictionary doesn't recognized input lexeme
|
||||
</ul>
|
||||
Usually, dictionaries used for normalization of words ( ispell, stemmer dictionaries),
|
||||
but see, for example, <tt>intdict</tt> dictionary (available from
|
||||
<a href="http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/">Tsearch2</a> home page,
|
||||
which controls indexing of integers.
|
||||
|
||||
<P>
|
||||
Among the dictionaries which come installed with tsearch2 are:
|
||||
|
||||
<ul>
|
||||
<li><b>simple</b> simply folds uppercase letters to lowercase
|
||||
before returning the word.
|
||||
</li>
|
||||
<li><b>ispell_template</b> - template for ispell dictionaries.
|
||||
</li>
|
||||
<li><b>en_stem</b> runs an English Snowball stemmer on each word
|
||||
that attempts to reduce the various forms of a verb or noun
|
||||
to a single recognizable form.
|
||||
</li><li><b>ru_stem_koi8</b>, <b>ru_stem_utf8</b> runs a Russian Snowball stemmer on each word.
|
||||
</li>
|
||||
<li><b>synonym</b> - simple lexeme-to-lexeme replacement
|
||||
</li>
|
||||
<li><b>thesaurus_template</b> - template for <a href="#tz">thesaurus dictionary</a>. It's
|
||||
phrase-to-phrase replacement
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<P>
|
||||
Each dictionary is defined by an entry in the <tt>pg_ts_dict</tt> table:
|
||||
|
||||
<pre>CREATE TABLE pg_ts_dict (
|
||||
dict_name text not null,
|
||||
dict_init regprocedure,
|
||||
dict_initoption text,
|
||||
dict_lexize regprocedure not null,
|
||||
dict_comment text
|
||||
);</pre>
|
||||
|
||||
The <tt>dict_name</tt>
|
||||
serve as unique identifiers for the dictionary.
|
||||
The meaning of the <tt>dict_initoption</tt> varies among dictionaries,
|
||||
but for the built-in Snowball dictionaries
|
||||
it specifies a file from which stop words should be read.
|
||||
The <tt>dict_comment</tt> is a human-readable description of the dictionary.
|
||||
The other fields are internal function identifiers
|
||||
useful only to developers trying to implement their own dictionaries.
|
||||
|
||||
<blockquote>
|
||||
<b>WARNING:</b> Data files, used by dictionaries, should be in <tt>server_encoding</tt> to
|
||||
avoid possible problems !
|
||||
</blockquote>
|
||||
|
||||
<p>
|
||||
The argument named <tt><i>dictionary</i></tt>
|
||||
in each of the following functions
|
||||
should be <tt>dict_name</tt>
|
||||
identifying which dictionary should be used for the operation;
|
||||
if omitted then the current dictionary is used.
|
||||
|
||||
</p><dl><dt>
|
||||
<tt>CREATE FUNCTION set_curdict(<i>dictionary</i>) RETURNS VOID</tt>
|
||||
</dt><dd>
|
||||
Selects a current dictionary for use by functions
|
||||
that do not select a dictionary explicitly.
|
||||
</dd><dt>
|
||||
<tt>CREATE FUNCTION lexize(
|
||||
<em>[</em> <i>dictionary</i>, <em>]</em> <i>word</i> text)
|
||||
RETURNS TEXT[]</tt>
|
||||
</dt><dd>
|
||||
Reduces a single word to a lexeme.
|
||||
Note that lexemes are arrays of zero or more strings,
|
||||
since in some languages there might be several base words
|
||||
from which an inflected form could arise.
|
||||
</dd></dl>
|
||||
|
||||
<h3>Using dictionaries template</h3>
|
||||
Templates used to define new dictionaries, for example,
|
||||
<pre>
|
||||
INSERT INTO pg_ts_dict
|
||||
(SELECT 'en_ispell', dict_init,
|
||||
'DictFile="/usr/local/share/dicts/ispell/english.dict",'
|
||||
'AffFile="/usr/local/share/dicts/ispell/english.aff",'
|
||||
'StopFile="/usr/local/share/dicts/english.stop"',
|
||||
dict_lexize
|
||||
FROM pg_ts_dict
|
||||
WHERE dict_name = 'ispell_template');
|
||||
</pre>
|
||||
|
||||
<h3>Working with stop words</h3>
|
||||
Ispell and snowball stemmers treat stop words differently:
|
||||
<ul>
|
||||
<li>ispell - normalize word and then lookups normalized form in stop-word file
|
||||
<li>snowball stemmer - first, it lookups word in stop-word file and then does it job.
|
||||
The reason - to minimize possible 'noise'.
|
||||
</ul>
|
||||
|
||||
<h2><a name="ranking">Ranking</a></h2>
|
||||
|
||||
Ranking attempts to measure how relevant documents are to particular queries
|
||||
by inspecting the number of times each search word appears in the document,
|
||||
and whether different search terms occur near each other.
|
||||
Note that this information is only available in unstripped vectors --
|
||||
ranking functions will only return a useful result
|
||||
for a <tt>tsvector</tt> which still has position information!
|
||||
<p>
|
||||
Notice, that ranking functions supplied are just an examples and
|
||||
doesn't belong to the tsearch2 core, you can
|
||||
write your very own ranking function and/or combine additional
|
||||
factors to fit your specific interest.
|
||||
</p>
|
||||
|
||||
The two ranking functions currently available are:
|
||||
|
||||
<dl><dt>
|
||||
<tt>CREATE FUNCTION rank(<br>
|
||||
<em>[</em> <i>weights</i> float4[], <em>]</em>
|
||||
<i>vector</i> TSVECTOR, <i>query</i> TSQUERY,
|
||||
<em>[</em> <i>normalization</i> int4 <em>]</em><br>
|
||||
) RETURNS float4</tt>
|
||||
</dt><dd>
|
||||
This is the ranking function from the old version of OpenFTS,
|
||||
and offers the ability to weight word instances more heavily
|
||||
depending on how you have classified them.
|
||||
The <i>weights</i> specify how heavily to weight each category of word:
|
||||
<pre>{<i>D-weight</i>, <i>C-weight</i>, <i>B-weight</i>, <i>A-weight</i>}</pre>
|
||||
If no weights are provided, then these defaults are used:
|
||||
<pre>{0.1, 0.2, 0.4, 1.0}</pre>
|
||||
Often weights are used to mark words from special areas of the document,
|
||||
like the title or an initial abstract,
|
||||
and make them more or less important than words in the document body.
|
||||
</dd><dt>
|
||||
<tt>CREATE FUNCTION rank_cd(<br>
|
||||
<em>[</em> <i>weights</i> float4[], <em>]</em>
|
||||
<i>vector</i> TSVECTOR, <i>query</i> TSQUERY,
|
||||
<em>[</em> <i>normalization</i> int4 <em>]</em><br>
|
||||
) RETURNS float4</tt>
|
||||
</dt><dd>
|
||||
This function computes the cover density ranking
|
||||
for the given document <i>vector</i> and <i>query</i>,
|
||||
as described in Clarke, Cormack, and Tudhope's
|
||||
"<a href="http://citeseer.nj.nec.com/clarke00relevance.html">Relevance Ranking for One to Three Term Queries</a>"
|
||||
in the 1999 <i>Information Processing and Management</i>.
|
||||
</dd>
|
||||
<dt>
|
||||
<tt>CREATE FUNCTION get_covers(vector TSVECTOR, query TSQUERY) RETURNS text</tt>
|
||||
</dt>
|
||||
<dd>
|
||||
Returns <tt>extents</tt>, which are a shortest and non-nested sequences of words, which satisfy a query.
|
||||
Extents (covers) used in <tt>rank_cd</tt> algorithm for fast calculation of proximity ranking.
|
||||
In example below there are two extents - <tt><b>{1</b>...<b>}1</b> and <b>{2</b> ...<b>}2</b></tt>.
|
||||
<pre>
|
||||
=# select get_covers('1:1,2,10 2:4'::tsvector,'1& 2');
|
||||
get_covers
|
||||
----------------------
|
||||
1 {1 1 {2 2 }1 1 }2
|
||||
</pre>
|
||||
</dd>
|
||||
|
||||
</dl>
|
||||
|
||||
<p>
|
||||
Both of these (<tt>rank(), rank_cd()</tt>) ranking functions
|
||||
take an integer <i>normalization</i> option
|
||||
that specifies whether a document's length should impact its rank.
|
||||
This is often desirable,
|
||||
since a hundred-word document with five instances of a search word
|
||||
is probably more relevant than a thousand-word document with five instances.
|
||||
The option can have the values, which could be combined using "|" ( 2|4) to
|
||||
take into account several factors:
|
||||
|
||||
</p>
|
||||
<ul>
|
||||
<li><tt>0</tt> (the default) ignores document length.</li>
|
||||
<li><tt>1</tt> divides the rank by the 1 + logarithm of the length </li>
|
||||
<li><tt>2</tt> divides the rank by the length itself.</li>
|
||||
<li><tt>4</tt> divides the rank by the mean harmonic distance between extents</li>
|
||||
<li><tt>8</tt> divides the rank by the number of unique words in document</li>
|
||||
<li><tt>16</tt> divides the rank by 1 + logarithm of the number of unique words in document
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h2><a name="headlines">Headlines</a></h2>
|
||||
|
||||
<dl><dt>
|
||||
<tt>CREATE FUNCTION headline(<br>
|
||||
<em>[</em> <i>id</i> int4, <em>|</em> <i>ts_name</i> text, <em>]</em>
|
||||
<i>document</i> text, <i>query</i> TSQUERY,
|
||||
<em>[</em> <i>options</i> text <em>]</em><br>
|
||||
) RETURNS text</tt>
|
||||
</dt><dd>
|
||||
Every form of the the <tt>headline()</tt> function
|
||||
accepts a <tt>document</tt> along with a <tt>query</tt>,
|
||||
and returns one or more ellipse-separated excerpts from the document
|
||||
in which terms from the query are highlighted.
|
||||
The configuration with which to parse the document
|
||||
can be specified by either its <i>id</i> or <i>ts_name</i>;
|
||||
if none is specified that the current configuration is used instead.
|
||||
<p>
|
||||
An <i>options</i> string if provided should be a comma-separated list
|
||||
of one or more '<i>option</i><tt>=</tt><i>value</i>' pairs.
|
||||
The available options are:
|
||||
</p><ul>
|
||||
<li><tt>StartSel</tt>, <tt>StopSel</tt> --
|
||||
the strings with which query words appearing in the document
|
||||
should be delimited to distinguish them from other excerpted words.
|
||||
</li><li><tt>MaxWords</tt>, <tt>MinWords</tt> --
|
||||
limits on the shortest and longest headlines you will accept.
|
||||
</li><li><tt>ShortWord</tt> --
|
||||
this prevents your headline from beginning or ending
|
||||
with a word which has this many characters or less.
|
||||
The default value of <tt>3</tt> should eliminate most English
|
||||
conjunctions and articles.
|
||||
</li><li><tt>HighlightAll</tt> --
|
||||
boolean flag, if TRUE, than the whole document will be highlighted.
|
||||
</li></ul>
|
||||
Any unspecified options receive these defaults:
|
||||
<pre>StartSel=<b>, StopSel=</b>, MaxWords=35, MinWords=15, ShortWord=3, HighlightAll=FALSE
|
||||
</pre>
|
||||
</dd></dl>
|
||||
|
||||
|
||||
<h2><a name="indexes">Indexes</a></h2>
|
||||
Tsearch2 supports indexed access to tsvector in order to further speedup FTS. Notice, indexes are not mandatory for FTS !
|
||||
<ul>
|
||||
<li> RD-Tree (Russian Doll Tree, matryoshka), based on GiST (Generalized Search Tree)
|
||||
<pre>
|
||||
=# create index fts_idx on apod using gist(fts);
|
||||
</pre>
|
||||
<li>GIN - Generalized Inverted Index
|
||||
<pre>
|
||||
=# create index fts_idx on apod using gin(fts);
|
||||
</pre>
|
||||
</ul>
|
||||
<b>GiST</b> index is very good for online update, but is not as scalable as <b>GIN</b> index,
|
||||
which, in turn, isn't good for updates. Both indexes support concurrency and recovery.
|
||||
|
||||
<h2><a name="tz">Thesaurus dictionary</a></h2>
|
||||
|
||||
<P>
|
||||
Thesaurus - is a collection of words with included information about the relationships of words and phrases,
|
||||
i.e., broader terms (BT), narrower terms (NT), preferred terms, non-preferred, related terms,etc.</p>
|
||||
<p>Basically,thesaurus dictionary replaces all non-preferred terms by one preferred term and, optionally,
|
||||
preserves them for indexing. Thesaurus used when indexing, so any changes in thesaurus require reindexing.
|
||||
Tsearch2's <tt>thesaurus</tt> dictionary (TZ) is an extension of <tt>synonym</tt> dictionary
|
||||
with <b>phrase</b> support. Thesaurus is a plain file of the following format:
|
||||
<pre>
|
||||
# this is a comment
|
||||
sample word(s) : indexed word(s)
|
||||
...............................
|
||||
</pre>
|
||||
<ul>
|
||||
<li><strong>Colon</strong> (:) symbol used as a delimiter.</li>
|
||||
<li>Use asterisk (<b>*</b>) at the beginning of <tt>indexed word</tt> to skip subdictionary.
|
||||
It's still required, that <tt>sample words</tt> should be known.</li>
|
||||
<li>thesaurus dictionary looks for the most longest match</li></ul>
|
||||
<P>
|
||||
TZ uses <strong>subdictionary</strong> (should be defined in tsearch2 configuration)
|
||||
to normalize thesaurus text. It's possible to define only <strong>one dictionary</strong>.
|
||||
Notice, that subdictionary produces an error, if it couldn't recognize word.
|
||||
In that case, you should remove definition line with this word or teach subdictionary to know it.
|
||||
</p>
|
||||
<p>Stop-words recognized by subdictionary replaced by 'stop-word placeholder', i.e.,
|
||||
important only their position.
|
||||
To break possible ties thesaurus applies the last definition. For example, consider
|
||||
thesaurus (with simple subdictionary) rules with pattern 'swsw'
|
||||
('s' designates stop-word and 'w' - known word): </p>
|
||||
<pre>
|
||||
a one the two : swsw
|
||||
the one a two : swsw2
|
||||
</pre>
|
||||
<p>Words 'a' and 'the' are stop-words defined in the configuration of a subdictionary.
|
||||
Thesaurus considers texts 'the one the two' and 'that one then two' as equal and will use definition
|
||||
'swsw2'.</p>
|
||||
<p>As a normal dictionary, it should be assigned to the specific lexeme types.
|
||||
Since TZ has a capability to recognize phrases it must remember its state and interact with parser.
|
||||
TZ use these assignments to check if it should handle next word or stop accumulation.
|
||||
Compiler of TZ should take care about proper configuration to avoid confusion.
|
||||
For example, if TZ is assigned to handle only <tt>lword</tt> lexeme, then TZ definition like
|
||||
' one 1:11' will not works, since lexeme type <tt>digit</tt> doesn't assigned to the TZ.</p>
|
||||
|
||||
<h3>Configuration</h3>
|
||||
|
||||
<dl><dt>tsearch2</dt><dd></dd></dl><p>tsearch2 comes with thesaurus template, which could be used to define new dictionary: </p>
|
||||
<pre class="real">INSERT INTO pg_ts_dict
|
||||
(SELECT 'tz_simple', dict_init,
|
||||
'DictFile="/path/to/tz_simple.txt",'
|
||||
'Dictionary="en_stem"',
|
||||
dict_lexize
|
||||
FROM pg_ts_dict
|
||||
WHERE dict_name = 'thesaurus_template');
|
||||
|
||||
</pre>
|
||||
<p>Here: </p>
|
||||
<ul>
|
||||
<li><tt>tz_simple</tt> - is the dictionary name</li>
|
||||
<li><tt>DictFile="/path/to/tz_simple.txt"</tt> - is the location of thesaurus file</li>
|
||||
<li><tt>Dictionary="en_stem"</tt> defines dictionary (snowball english stemmer) to use for thesaurus normalization. Notice, that <em>en_stem</em> dictionary has it's own configuration (stop-words, for example).</li>
|
||||
</ul>
|
||||
<p>Now, it's possible to use <tt>tz_simple</tt> in pg_ts_cfgmap, for example: </p>
|
||||
<pre>
|
||||
update pg_ts_cfgmap set dict_name='{tz_simple,en_stem}' where ts_name = 'default_russian' and
|
||||
tok_alias in ('lhword', 'lword', 'lpart_hword');
|
||||
</pre>
|
||||
<h3>Examples</h3>
|
||||
<p>tz_simple: </p>
|
||||
<pre>
|
||||
one : 1
|
||||
two : 2
|
||||
one two : 12
|
||||
the one : 1
|
||||
one 1 : 11
|
||||
</pre>
|
||||
<p>To see, how thesaurus works, one could use <tt>to_tsvector</tt>, <tt>to_tsquery</tt> or <tt>plainto_tsquery</tt> functions: </p><pre class="real">=# select plainto_tsquery('default_russian',' one day is oneday');
|
||||
plainto_tsquery
|
||||
------------------------
|
||||
'1' & 'day' & 'oneday'
|
||||
|
||||
=# select plainto_tsquery('default_russian','one two day is oneday');
|
||||
plainto_tsquery
|
||||
-------------------------
|
||||
'12' & 'day' & 'oneday'
|
||||
|
||||
=# select plainto_tsquery('default_russian','the one');
|
||||
NOTICE: Thesaurus: word 'the' is recognized as stop-word, assign any stop-word (rule 3)
|
||||
plainto_tsquery
|
||||
-----------------
|
||||
'1'
|
||||
</pre>
|
||||
|
||||
Additional information about thesaurus dictionary is available from
|
||||
<a href="http://www.sai.msu.su/~megera/wiki/Thesaurus_dictionary">Wiki</a> page.
|
||||
</body></html>
|
|
@ -382,9 +382,9 @@ select numnode( 'new & york | qwery'::tsquery );
|
|||
create table test_tsquery (txtkeyword text, txtsample text);
|
||||
\set ECHO none
|
||||
alter table test_tsquery add column keyword tsquery;
|
||||
update test_tsquery set keyword = to_tsquery('default', txtkeyword);
|
||||
update test_tsquery set keyword = to_tsquery('english', txtkeyword);
|
||||
alter table test_tsquery add column sample tsquery;
|
||||
update test_tsquery set sample = to_tsquery('default', txtsample::text);
|
||||
update test_tsquery set sample = to_tsquery('english', txtsample::text);
|
||||
create unique index bt_tsq on test_tsquery (keyword);
|
||||
select count(*) from test_tsquery where keyword < 'new & york';
|
||||
count
|
||||
|
@ -451,7 +451,7 @@ set enable_seqscan=on;
|
|||
select rewrite('foo & bar & qq & new & york', 'new & york'::tsquery, 'big & apple | nyc | new & york & city');
|
||||
rewrite
|
||||
----------------------------------------------------------------------------------
|
||||
'qq' & 'foo' & 'bar' & ( 'city' & 'york' & 'new' | ( 'nyc' | 'apple' & 'big' ) )
|
||||
'foo' & 'bar' & 'qq' & ( 'city' & 'new' & 'york' | ( 'nyc' | 'big' & 'apple' ) )
|
||||
(1 row)
|
||||
|
||||
select rewrite('moscow', 'select keyword, sample from test_tsquery'::text );
|
||||
|
@ -463,31 +463,13 @@ select rewrite('moscow', 'select keyword, sample from test_tsquery'::text );
|
|||
select rewrite('moscow & hotel', 'select keyword, sample from test_tsquery'::text );
|
||||
rewrite
|
||||
-----------------------------------
|
||||
( 'moskva' | 'moscow' ) & 'hotel'
|
||||
'hotel' & ( 'moskva' | 'moscow' )
|
||||
(1 row)
|
||||
|
||||
select rewrite('bar & new & qq & foo & york', 'select keyword, sample from test_tsquery'::text );
|
||||
rewrite
|
||||
-------------------------------------------------------------------------------------
|
||||
'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) )
|
||||
(1 row)
|
||||
|
||||
select rewrite( ARRAY['moscow', keyword, sample] ) from test_tsquery;
|
||||
rewrite
|
||||
---------------------
|
||||
'moskva' | 'moscow'
|
||||
(1 row)
|
||||
|
||||
select rewrite( ARRAY['moscow & hotel', keyword, sample] ) from test_tsquery;
|
||||
rewrite
|
||||
-----------------------------------
|
||||
( 'moskva' | 'moscow' ) & 'hotel'
|
||||
(1 row)
|
||||
|
||||
select rewrite( ARRAY['bar & new & qq & foo & york', keyword, sample] ) from test_tsquery;
|
||||
rewrite
|
||||
-------------------------------------------------------------------------------------
|
||||
'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) )
|
||||
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
|
||||
(1 row)
|
||||
|
||||
select keyword from test_tsquery where keyword @> 'new';
|
||||
|
@ -513,42 +495,6 @@ select keyword from test_tsquery where keyword <@ 'moscow';
|
|||
'moscow'
|
||||
(1 row)
|
||||
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where keyword <@ query;
|
||||
rewrite
|
||||
---------------------
|
||||
'moskva' | 'moscow'
|
||||
(1 row)
|
||||
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where keyword <@ query;
|
||||
rewrite
|
||||
-----------------------------------
|
||||
( 'moskva' | 'moscow' ) & 'hotel'
|
||||
(1 row)
|
||||
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where keyword <@ query;
|
||||
rewrite
|
||||
-------------------------------------------------------------------------------------
|
||||
'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) )
|
||||
(1 row)
|
||||
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where query @> keyword;
|
||||
rewrite
|
||||
---------------------
|
||||
'moskva' | 'moscow'
|
||||
(1 row)
|
||||
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where query @> keyword;
|
||||
rewrite
|
||||
-----------------------------------
|
||||
( 'moskva' | 'moscow' ) & 'hotel'
|
||||
(1 row)
|
||||
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where query @> keyword;
|
||||
rewrite
|
||||
-------------------------------------------------------------------------------------
|
||||
'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) )
|
||||
(1 row)
|
||||
|
||||
create index qq on test_tsquery using gist (keyword gist_tp_tsquery_ops);
|
||||
set enable_seqscan='off';
|
||||
select keyword from test_tsquery where keyword @> 'new';
|
||||
|
@ -574,42 +520,6 @@ select keyword from test_tsquery where keyword <@ 'moscow';
|
|||
'moscow'
|
||||
(1 row)
|
||||
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where keyword <@ query;
|
||||
rewrite
|
||||
---------------------
|
||||
'moskva' | 'moscow'
|
||||
(1 row)
|
||||
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where keyword <@ query;
|
||||
rewrite
|
||||
-----------------------------------
|
||||
( 'moskva' | 'moscow' ) & 'hotel'
|
||||
(1 row)
|
||||
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where keyword <@ query;
|
||||
rewrite
|
||||
-------------------------------------------------------------------------------------
|
||||
'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) )
|
||||
(1 row)
|
||||
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where query @> keyword;
|
||||
rewrite
|
||||
---------------------
|
||||
'moskva' | 'moscow'
|
||||
(1 row)
|
||||
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where query @> keyword;
|
||||
rewrite
|
||||
-----------------------------------
|
||||
( 'moskva' | 'moscow' ) & 'hotel'
|
||||
(1 row)
|
||||
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where query @> keyword;
|
||||
rewrite
|
||||
-------------------------------------------------------------------------------------
|
||||
'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) )
|
||||
(1 row)
|
||||
|
||||
set enable_seqscan='on';
|
||||
select lexize('simple', 'ASD56 hsdkf');
|
||||
lexize
|
||||
|
@ -617,38 +527,38 @@ select lexize('simple', 'ASD56 hsdkf');
|
|||
{"asd56 hsdkf"}
|
||||
(1 row)
|
||||
|
||||
select lexize('en_stem', 'SKIES Problems identity');
|
||||
select lexize('english_stem', 'SKIES Problems identity');
|
||||
lexize
|
||||
--------------------------
|
||||
{"skies problems ident"}
|
||||
(1 row)
|
||||
|
||||
select * from token_type('default');
|
||||
tokid | alias | descr
|
||||
-------+--------------+-----------------------------------
|
||||
1 | lword | Latin word
|
||||
2 | nlword | Non-latin word
|
||||
3 | word | Word
|
||||
4 | email | Email
|
||||
5 | url | URL
|
||||
6 | host | Host
|
||||
7 | sfloat | Scientific notation
|
||||
8 | version | VERSION
|
||||
9 | part_hword | Part of hyphenated word
|
||||
10 | nlpart_hword | Non-latin part of hyphenated word
|
||||
11 | lpart_hword | Latin part of hyphenated word
|
||||
12 | blank | Space symbols
|
||||
13 | tag | HTML Tag
|
||||
14 | protocol | Protocol head
|
||||
15 | hword | Hyphenated word
|
||||
16 | lhword | Latin hyphenated word
|
||||
17 | nlhword | Non-latin hyphenated word
|
||||
18 | uri | URI
|
||||
19 | file | File or path name
|
||||
20 | float | Decimal notation
|
||||
21 | int | Signed integer
|
||||
22 | uint | Unsigned integer
|
||||
23 | entity | HTML Entity
|
||||
tokid | alias | descr
|
||||
-------+-----------------+------------------------------------------
|
||||
1 | asciiword | Word, all ASCII
|
||||
2 | word | Word, all letters
|
||||
3 | numword | Word, letters and digits
|
||||
4 | email | Email address
|
||||
5 | url | URL
|
||||
6 | host | Host
|
||||
7 | sfloat | Scientific notation
|
||||
8 | version | Version number
|
||||
9 | hword_numpart | Hyphenated word part, letters and digits
|
||||
10 | hword_part | Hyphenated word part, all letters
|
||||
11 | hword_asciipart | Hyphenated word part, all ASCII
|
||||
12 | blank | Space symbols
|
||||
13 | tag | HTML tag
|
||||
14 | protocol | Protocol head
|
||||
15 | numhword | Hyphenated word, letters and digits
|
||||
16 | asciihword | Hyphenated word, all ASCII
|
||||
17 | hword | Hyphenated word, all letters
|
||||
18 | url_path | URL path
|
||||
19 | file | File or path name
|
||||
20 | float | Decimal notation
|
||||
21 | int | Signed integer
|
||||
22 | uint | Unsigned integer
|
||||
23 | entity | HTML entity
|
||||
(23 rows)
|
||||
|
||||
select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
||||
|
@ -768,15 +678,11 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc
|
|||
12 | .
|
||||
20 | 4.2
|
||||
12 | ,
|
||||
15 | readline-4.2
|
||||
11 | readline
|
||||
12 | -
|
||||
20 | 4.2
|
||||
1 | readline
|
||||
20 | -4.2
|
||||
12 |
|
||||
15 | readline-4.2
|
||||
11 | readline
|
||||
12 | -
|
||||
20 | 4.2
|
||||
1 | readline
|
||||
20 | -4.2
|
||||
12 | .
|
||||
22 | 234
|
||||
12 |
|
||||
|
@ -793,23 +699,23 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc
|
|||
12 |
|
||||
12 | <>
|
||||
1 | qwerty
|
||||
(135 rows)
|
||||
(131 rows)
|
||||
|
||||
SELECT to_tsvector('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
||||
SELECT to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
||||
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
|
||||
<i <b> wow < jqw <> qwerty');
|
||||
to_tsvector
|
||||
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
'ad':17 'dw':19 'jf':39 '234':63 '345':1 '4.2':54,55,56,59,62 '455':31 'jqw':66 'qwe':2,18,27,28,35 'wer':36 'wow':65 'asdf':37 'ewr1':43 'qwer':38 'sdjk':40 '5.005':32 'efd.r':3 'ewri2':44 'hjwer':42 'qwqwe':29 'wefjn':48 'gist.c':52 'gist.h':50 'qwerti':67 '234.435':30 'qwe-wer':34 'readlin':53,58,61 'www.com':4 '+4.0e-10':26 'gist.h.c':51 'rewt/ewr':47 '/?ad=qwe&dw':7,10,14,22 '/wqe-324/ewr':49 'aew.werc.ewr':6 'readline-4.2':57,60 '1aew.werc.ewr':9 '2aew.werc.ewr':11 '3aew.werc.ewr':13 '4aew.werc.ewr':15 '/usr/local/fff':45 '/awdf/dwqe/4325':46 'teodor@stack.net':33 '/?ad=qwe&dw=%20%32':25 '5aew.werc.ewr:8100':16 '6aew.werc.ewr:8100':21 '7aew.werc.ewr:8100':24 'aew.werc.ewr/?ad=qwe&dw':5 '1aew.werc.ewr/?ad=qwe&dw':8 '3aew.werc.ewr/?ad=qwe&dw':12 '6aew.werc.ewr:8100/?ad=qwe&dw':20 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':23
|
||||
to_tsvector
|
||||
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
'ad':17 'dw':19 'jf':39 '234':61 '345':1 '4.2':54,55,56 '455':31 'jqw':64 'qwe':2,18,27,28,35 'wer':36 'wow':63 '-4.2':58,60 'asdf':37 'ewr1':43 'qwer':38 'sdjk':40 '5.005':32 'efd.r':3 'ewri2':44 'hjwer':42 'qwqwe':29 'wefjn':48 'gist.c':52 'gist.h':50 'qwerti':65 '234.435':30 'qwe-wer':34 'readlin':53,57,59 'www.com':4 '+4.0e-10':26 'gist.h.c':51 'rewt/ewr':47 '/?ad=qwe&dw':7,10,14,22 '/wqe-324/ewr':49 'aew.werc.ewr':6 '1aew.werc.ewr':9 '2aew.werc.ewr':11 '3aew.werc.ewr':13 '4aew.werc.ewr':15 '/usr/local/fff':45 '/awdf/dwqe/4325':46 'teodor@stack.net':33 '/?ad=qwe&dw=%20%32':25 '5aew.werc.ewr:8100':16 '6aew.werc.ewr:8100':21 '7aew.werc.ewr:8100':24 'aew.werc.ewr/?ad=qwe&dw':5 '1aew.werc.ewr/?ad=qwe&dw':8 '3aew.werc.ewr/?ad=qwe&dw':12 '6aew.werc.ewr:8100/?ad=qwe&dw':20 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':23
|
||||
(1 row)
|
||||
|
||||
SELECT length(to_tsvector('default', '345 qw'));
|
||||
SELECT length(to_tsvector('english', '345 qw'));
|
||||
length
|
||||
--------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
SELECT length(to_tsvector('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
||||
SELECT length(to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
||||
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
|
||||
<i <b> wow < jqw <> qwerty'));
|
||||
length
|
||||
|
@ -817,7 +723,7 @@ SELECT length(to_tsvector('default', '345 qwe@efd.r '' http://www.com/ http://ae
|
|||
51
|
||||
(1 row)
|
||||
|
||||
select to_tsquery('default', 'qwe & sKies ');
|
||||
select to_tsquery('english', 'qwe & sKies ');
|
||||
to_tsquery
|
||||
---------------
|
||||
'qwe' & 'sky'
|
||||
|
@ -829,61 +735,61 @@ select to_tsquery('simple', 'qwe & sKies ');
|
|||
'qwe' & 'skies'
|
||||
(1 row)
|
||||
|
||||
select to_tsquery('default', '''the wether'':dc & '' sKies '':BC ');
|
||||
select to_tsquery('english', '''the wether'':dc & '' sKies '':BC ');
|
||||
to_tsquery
|
||||
------------------------
|
||||
'wether':CD & 'sky':BC
|
||||
(1 row)
|
||||
|
||||
select to_tsquery('default', 'asd&(and|fghj)');
|
||||
select to_tsquery('english', 'asd&(and|fghj)');
|
||||
to_tsquery
|
||||
----------------
|
||||
'asd' & 'fghj'
|
||||
(1 row)
|
||||
|
||||
select to_tsquery('default', '(asd&and)|fghj');
|
||||
select to_tsquery('english', '(asd&and)|fghj');
|
||||
to_tsquery
|
||||
----------------
|
||||
'asd' | 'fghj'
|
||||
(1 row)
|
||||
|
||||
select to_tsquery('default', '(asd&!and)|fghj');
|
||||
select to_tsquery('english', '(asd&!and)|fghj');
|
||||
to_tsquery
|
||||
----------------
|
||||
'asd' | 'fghj'
|
||||
(1 row)
|
||||
|
||||
select to_tsquery('default', '(the|and&(i&1))&fghj');
|
||||
select to_tsquery('english', '(the|and&(i&1))&fghj');
|
||||
to_tsquery
|
||||
--------------
|
||||
'1' & 'fghj'
|
||||
(1 row)
|
||||
|
||||
select plainto_tsquery('default', 'the and z 1))& fghj');
|
||||
select plainto_tsquery('english', 'the and z 1))& fghj');
|
||||
plainto_tsquery
|
||||
--------------------
|
||||
'z' & '1' & 'fghj'
|
||||
(1 row)
|
||||
|
||||
select plainto_tsquery('default', 'foo bar') && plainto_tsquery('default', 'asd');
|
||||
select plainto_tsquery('english', 'foo bar') && plainto_tsquery('english', 'asd');
|
||||
?column?
|
||||
-----------------------
|
||||
'foo' & 'bar' & 'asd'
|
||||
(1 row)
|
||||
|
||||
select plainto_tsquery('default', 'foo bar') || plainto_tsquery('default', 'asd fg');
|
||||
select plainto_tsquery('english', 'foo bar') || plainto_tsquery('english', 'asd fg');
|
||||
?column?
|
||||
------------------------------
|
||||
'foo' & 'bar' | 'asd' & 'fg'
|
||||
(1 row)
|
||||
|
||||
select plainto_tsquery('default', 'foo bar') || !!plainto_tsquery('default', 'asd fg');
|
||||
select plainto_tsquery('english', 'foo bar') || !!plainto_tsquery('english', 'asd fg');
|
||||
?column?
|
||||
-----------------------------------
|
||||
'foo' & 'bar' | !( 'asd' & 'fg' )
|
||||
(1 row)
|
||||
|
||||
select plainto_tsquery('default', 'foo bar') && 'asd | fg';
|
||||
select plainto_tsquery('english', 'foo bar') && 'asd | fg';
|
||||
?column?
|
||||
----------------------------------
|
||||
'foo' & 'bar' & ( 'asd' | 'fg' )
|
||||
|
@ -995,7 +901,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
|||
39
|
||||
(1 row)
|
||||
|
||||
select set_curcfg('default');
|
||||
select set_curcfg('english');
|
||||
set_curcfg
|
||||
------------
|
||||
|
||||
|
@ -1024,11 +930,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
|
|||
0
|
||||
(1 row)
|
||||
|
||||
drop trigger tsvectorupdate on test_tsvector;
|
||||
create function wow(text) returns text as 'select $1 || '' copyright''; ' language sql;
|
||||
create trigger tsvectorupdate before update or insert on test_tsvector
|
||||
for each row execute procedure tsearch2(a, wow, t);
|
||||
insert into test_tsvector (t) values ('345 qwerty');
|
||||
insert into test_tsvector (t) values ('345 qwerty copyright');
|
||||
select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
|
||||
count
|
||||
-------
|
||||
|
@ -2135,7 +2037,6 @@ select * from stat('select a from test_tsvector') order by ndoc desc, nentry des
|
|||
8w | 2 | 2
|
||||
9f | 2 | 2
|
||||
9y | 2 | 2
|
||||
copyright | 2 | 2
|
||||
foo | 1 | 3
|
||||
bar | 1 | 2
|
||||
0e | 1 | 1
|
||||
|
@ -2227,6 +2128,7 @@ select * from stat('select a from test_tsvector') order by ndoc desc, nentry des
|
|||
9h | 1 | 1
|
||||
9r | 1 | 1
|
||||
9w | 1 | 1
|
||||
copyright | 1 | 1
|
||||
qwerti | 1 | 1
|
||||
(1146 rows)
|
||||
|
||||
|
@ -2257,11 +2159,11 @@ select * from stat('select a from test_tsvector','d') order by ndoc desc, nentry
|
|||
word | ndoc | nentry
|
||||
-----------+------+--------
|
||||
a | 2 | 2
|
||||
copyright | 2 | 2
|
||||
foo | 1 | 3
|
||||
bar | 1 | 2
|
||||
345 | 1 | 1
|
||||
b | 1 | 1
|
||||
copyright | 1 | 1
|
||||
qq | 1 | 1
|
||||
qwerti | 1 | 1
|
||||
(8 rows)
|
||||
|
@ -2271,22 +2173,15 @@ select * from stat('select a from test_tsvector','ad') order by ndoc desc, nentr
|
|||
-----------+------+--------
|
||||
a | 2 | 4
|
||||
b | 2 | 4
|
||||
copyright | 2 | 2
|
||||
foo | 1 | 3
|
||||
bar | 1 | 2
|
||||
345 | 1 | 1
|
||||
copyright | 1 | 1
|
||||
qq | 1 | 1
|
||||
qwerti | 1 | 1
|
||||
(8 rows)
|
||||
|
||||
select reset_tsearch();
|
||||
NOTICE: TSearch cache cleaned
|
||||
reset_tsearch
|
||||
---------------
|
||||
|
||||
(1 row)
|
||||
|
||||
select to_tsquery('default', 'skies & books');
|
||||
select to_tsquery('english', 'skies & books');
|
||||
to_tsquery
|
||||
----------------
|
||||
'sky' & 'book'
|
||||
|
@ -2334,48 +2229,6 @@ Upon a woman s face. E. J. Pratt (1882 1964)
|
|||
0.2
|
||||
(1 row)
|
||||
|
||||
select get_covers(to_tsvector('Erosion It took the sea a thousand years,
|
||||
A thousand years to trace
|
||||
The granite features of this cliff
|
||||
In crag and scarp and base.
|
||||
It took the sea an hour one night
|
||||
An hour of storm to place
|
||||
The sculpture of these granite seams,
|
||||
Upon a woman s face. E. J. Pratt (1882 1964)
|
||||
'), to_tsquery('sea&thousand&years'));
|
||||
get_covers
|
||||
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
eros took {1 sea thousand year }1 {2 thousand year trace granit featur cliff crag scarp base took sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964
|
||||
(1 row)
|
||||
|
||||
select get_covers(to_tsvector('Erosion It took the sea a thousand years,
|
||||
A thousand years to trace
|
||||
The granite features of this cliff
|
||||
In crag and scarp and base.
|
||||
It took the sea an hour one night
|
||||
An hour of storm to place
|
||||
The sculpture of these granite seams,
|
||||
Upon a woman s face. E. J. Pratt (1882 1964)
|
||||
'), to_tsquery('granite&sea'));
|
||||
get_covers
|
||||
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
eros took {1 sea thousand year thousand year trace {2 granit }1 featur cliff crag scarp base took {3 sea }2 hour one night hour storm place sculptur granit }3 seam upon woman face e j pratt 1882 1964
|
||||
(1 row)
|
||||
|
||||
select get_covers(to_tsvector('Erosion It took the sea a thousand years,
|
||||
A thousand years to trace
|
||||
The granite features of this cliff
|
||||
In crag and scarp and base.
|
||||
It took the sea an hour one night
|
||||
An hour of storm to place
|
||||
The sculpture of these granite seams,
|
||||
Upon a woman s face. E. J. Pratt (1882 1964)
|
||||
'), to_tsquery('sea'));
|
||||
get_covers
|
||||
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
eros took {1 sea }1 thousand year thousand year trace granit featur cliff crag scarp base took {2 sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964
|
||||
(1 row)
|
||||
|
||||
select headline('Erosion It took the sea a thousand years,
|
||||
A thousand years to trace
|
||||
The granite features of this cliff
|
||||
|
@ -2456,15 +2309,19 @@ to_tsquery('sea&foo'), 'HighlightAll=true');
|
|||
(1 row)
|
||||
|
||||
--check debug
|
||||
select * from ts_debug('Tsearch module for PostgreSQL 7.3.3');
|
||||
ts_name | tok_type | description | token | dict_name | tsvector
|
||||
---------+----------+-------------+------------+-----------+--------------
|
||||
default | lword | Latin word | Tsearch | {en_stem} | 'tsearch'
|
||||
default | lword | Latin word | module | {en_stem} | 'modul'
|
||||
default | lword | Latin word | for | {en_stem} |
|
||||
default | lword | Latin word | PostgreSQL | {en_stem} | 'postgresql'
|
||||
default | version | VERSION | 7.3.3 | {simple} | '7.3.3'
|
||||
(5 rows)
|
||||
select * from public.ts_debug('Tsearch module for PostgreSQL 7.3.3');
|
||||
ts_name | tok_type | description | token | dict_name | tsvector
|
||||
---------+-----------+-----------------+------------+----------------+--------------
|
||||
english | asciiword | Word, all ASCII | Tsearch | {english_stem} | 'tsearch'
|
||||
english | blank | Space symbols | | {} |
|
||||
english | asciiword | Word, all ASCII | module | {english_stem} | 'modul'
|
||||
english | blank | Space symbols | | {} |
|
||||
english | asciiword | Word, all ASCII | for | {english_stem} |
|
||||
english | blank | Space symbols | | {} |
|
||||
english | asciiword | Word, all ASCII | PostgreSQL | {english_stem} | 'postgresql'
|
||||
english | blank | Space symbols | | {} |
|
||||
english | version | Version number | 7.3.3 | {simple} | '7.3.3'
|
||||
(9 rows)
|
||||
|
||||
--check ordering
|
||||
insert into test_tsvector values (null, null);
|
||||
|
@ -2480,10 +2337,10 @@ select a is null, a from test_tsvector order by a;
|
|||
f |
|
||||
f |
|
||||
f |
|
||||
f | '345':1 'qwerti':2 'copyright':3
|
||||
f | 'qq':7 'bar':2,8 'foo':1,3,6 'copyright':9
|
||||
f | 'a':1A,2,3B 'b':5A,6A,7C,8
|
||||
f | 'a':1A,2,3C 'b':5A,6B,7C,8B
|
||||
f | 'qq':7 'bar':2,8 'foo':1,3,6
|
||||
f | '345':1 'qwerti':2 'copyright':3
|
||||
f | '7w' 'ch' 'd7' 'eo' 'gw' 'i4' 'lq' 'o6' 'qt' 'y0'
|
||||
f | 'ar' 'ei' 'kq' 'ma' 'qa' 'qh' 'qq' 'qz' 'rx' 'st'
|
||||
f | 'gs' 'i6' 'i9' 'j2' 'l0' 'oq' 'qx' 'sc' 'xe' 'yu'
|
||||
|
|
|
@ -1,12 +0,0 @@
|
|||
subdir = contrib/CFG_DIR
|
||||
top_builddir = ../..
|
||||
include $(top_builddir)/src/Makefile.global
|
||||
|
||||
MODULE_big = dict_CFG_MODNAME
|
||||
OBJS = CFG_OFILE
|
||||
DATA_built = dict_CFG_MODNAME.sql
|
||||
DOCS = README.CFG_MODNAME
|
||||
PG_CPPFLAGS =
|
||||
SHLIB_LINK = ../tsearch2/libtsearch2.a
|
||||
|
||||
include $(top_srcdir)/contrib/contrib-global.mk
|
|
@ -1,131 +0,0 @@
|
|||
Gendict - generate dictionary templates for contrib/tsearch2 module.
|
||||
|
||||
This utility aims to help people creating dictionary for contrib/tsearch v2
|
||||
module. Particularly, it has built-in support for snowball stemmers.
|
||||
|
||||
Programming API to tsearch2 dictionaries is described in tsearch v2
|
||||
documentation.
|
||||
|
||||
|
||||
Prerequisities:
|
||||
|
||||
* PostgreSQL 7.3 and above.
|
||||
|
||||
* You need tsearch2 module sources already compiled
|
||||
|
||||
* Rights to install contrib modules
|
||||
|
||||
Usage:
|
||||
|
||||
run config.sh without parameters to see options and arguments
|
||||
|
||||
Usage:
|
||||
./config.sh -n DICTNAME ( [ -s [ -p PREFIX ] ] | [ -c CFILES ] [ -h HFILES ] [ -i ] ) [ -v ] [ -d DIR ] [ -C COMMENT ]
|
||||
-v - be verbose
|
||||
-d DIR - name of directory in PGSQL_SRC/contrib (default dict_DICTNAME)
|
||||
-C COMMENT - dictionary comment
|
||||
Generate Snowball stemmer:
|
||||
./config.sh -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
|
||||
-s - generate Snowball wrapper
|
||||
-p - prefix of Snowball's function, (default DICTNAME)
|
||||
Generate template dictionary:
|
||||
./config.sh -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
|
||||
-c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.
|
||||
These files will be used in Makefile.
|
||||
-h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.
|
||||
These files will be used in Makefile and subinclude.h
|
||||
-i - dictionary has init method
|
||||
|
||||
|
||||
Example 1:
|
||||
|
||||
Create Portuguese stemmer
|
||||
|
||||
0. cd PGSQL_SRC/contrib/tsearch2/gendict
|
||||
|
||||
1. Obtain stem.{c,h} files for Portuguese
|
||||
|
||||
wget http://snowball.tartarus.org/portuguese/stem.c
|
||||
wget http://snowball.tartarus.org/portuguese/stem.h
|
||||
|
||||
2. Create template files for Portuguese
|
||||
|
||||
./config.sh -n pt -s -p portuguese_ISO_8859_1 -v -C'Snowball stemmer for Portuguese'
|
||||
|
||||
Note, that argument for -p option should be *the same* as name of stemming
|
||||
function in stem.c (without _stem)
|
||||
|
||||
A bunch of files will be generated and placed in PGSQL_SRC/contrib/dict_pt
|
||||
directory.
|
||||
|
||||
3. Compile and install dictionary
|
||||
|
||||
cd PGSQL_SRC/contrib/dict_pt
|
||||
make
|
||||
make install
|
||||
|
||||
4. Test it
|
||||
|
||||
Sample portuguese words with the stemmed forms are available
|
||||
from http://snowball.tartarus.org/portuguese/stemmer.html
|
||||
|
||||
createdb testdict
|
||||
psql testdict < /usr/local/pgsql/share/contrib/tsearch2.sql
|
||||
psql testdict < /usr/local/pgsql/share/contrib/dict_pt.sql
|
||||
psql -d testdict -c "select lexize('pt','bobagem');"
|
||||
lexize
|
||||
---------
|
||||
{bobag}
|
||||
(1 row)
|
||||
|
||||
Here is what I have in pg_ts_dict table
|
||||
|
||||
psql -d testdict -c "select * from pg_ts_dict where dict_name='pt';"
|
||||
dict_name | dict_init | dict_initoption | dict_lexize | dict_comment
|
||||
-----------+--------------------+-----------------+---------------------------------------+---------------------------------
|
||||
pt | dinit_pt(internal) | | snb_lexize(internal,internal,integer) | Snowball stemmer for Portuguese
|
||||
|
||||
(1 row)
|
||||
|
||||
|
||||
Note, that you have already installed dictionary and corresponding
|
||||
entry in tsearch configuration and you may modify it using
|
||||
plain SQL commands, for example, specify stop words.
|
||||
|
||||
Example 2:
|
||||
|
||||
a) Simple template dictionary with init method
|
||||
|
||||
./config.sh -n wow -v -i -C WOW
|
||||
|
||||
b) Create simple template dict (without init method):
|
||||
./config.sh -n wow -v -C WOW
|
||||
|
||||
The same as above, but dictionary will have not init method
|
||||
|
||||
Dictionaries obtained in a) and b) are fully working and ready
|
||||
for use:
|
||||
a) lowercase input word and remove it if it is a stop word
|
||||
b) recognizes any word
|
||||
|
||||
c) Simple template dictionary with source files (with init method):
|
||||
|
||||
./config.sh -n wow -v -i -c a.c -h a.h -C WOW
|
||||
|
||||
Source files ( a.c ) must be placed in contrib/tsearch2/gendict directory.
|
||||
These files will be used in Makefile.
|
||||
|
||||
Header files ( a.h ), must be placed in contrib/tsearch2/gendict directory.
|
||||
These files will be used in Makefile and subinclude.h
|
||||
|
||||
d) Simple template dictionary with source files (without init method):
|
||||
|
||||
./config.sh -n wow -v -c a.c -h a.h -C WOW
|
||||
|
||||
The same as above, but dictionary will have not init method
|
||||
|
||||
After that you have sources in PGSQL_SRC/contrib/dict_wow and
|
||||
you may edit them to create actual dictionary.
|
||||
|
||||
Please, check Tsearch2 home page (http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/)
|
||||
for additional information about "Gendict tutorial" and dictionaries.
|
|
@ -1,190 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
usage () {
|
||||
echo Usage:
|
||||
echo $0 -n DICTNAME \( [ -s [ -p PREFIX ] ] \| [ -c CFILES ] [ -h HFILES ] [ -i ] \) [ -v ] [ -d DIR ] [ -C COMMENT ]
|
||||
echo ' -v - be verbose'
|
||||
echo ' -d DIR - name of directory in PGSQL_SRL/contrib (default dict_DICTNAME)'
|
||||
echo ' -C COMMENT - dictionary comment'
|
||||
echo Generate Snowball stemmer:
|
||||
echo $0 -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
|
||||
echo ' -s - generate Snowball wrapper'
|
||||
echo " -p - prefix of Snowball's function, (default DICTNAME)"
|
||||
echo Generate template dictionary:
|
||||
echo $0 -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
|
||||
echo ' -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.'
|
||||
echo ' These files will be used in Makefile.'
|
||||
echo ' -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.'
|
||||
echo ' These files will be used in Makefile and subinclude.h'
|
||||
echo ' -i - dictionary has init method'
|
||||
exit 1;
|
||||
}
|
||||
|
||||
dictname=
|
||||
stemmode=no
|
||||
verbose=no
|
||||
cfile=
|
||||
hfile=
|
||||
dir=
|
||||
hasinit=no
|
||||
comment=
|
||||
prefix=
|
||||
|
||||
while getopts n:c:C:h:d:p:vis opt
|
||||
do
|
||||
case "$opt" in
|
||||
v) verbose=yes;;
|
||||
s) stemmode=yes;;
|
||||
i) hasinit=yes;;
|
||||
n) dictname="$OPTARG";;
|
||||
c) cfile="$OPTARG";;
|
||||
h) hfile="$OPTARG";;
|
||||
d) dir="$OPTARG";;
|
||||
C) comment="$OPTARG";;
|
||||
p) prefix="$OPTARG";;
|
||||
\?) usage;;
|
||||
esac
|
||||
done
|
||||
|
||||
[ ${#dictname} -eq 0 ] && usage
|
||||
|
||||
dictname=`echo $dictname | tr '[:upper:]' '[:lower:]'`
|
||||
|
||||
if [ $stemmode = "yes" ] ; then
|
||||
[ ${#prefix} -eq 0 ] && prefix=$dictname
|
||||
hasinit=yes
|
||||
cfile="stem.c"
|
||||
hfile="stem.h"
|
||||
fi
|
||||
|
||||
[ ${#dir} -eq 0 ] && dir="dict_$dictname"
|
||||
|
||||
if [ ${#comment} -eq 0 ]; then
|
||||
comment=null
|
||||
else
|
||||
comment="'$comment'"
|
||||
fi
|
||||
|
||||
ofile=
|
||||
for f in $cfile
|
||||
do
|
||||
f=` echo $f | sed 's#c$#o#'`
|
||||
ofile="$ofile $f"
|
||||
done
|
||||
|
||||
if [ $stemmode = "yes" ] ; then
|
||||
ofile="$ofile dict_snowball.o"
|
||||
else
|
||||
ofile="$ofile dict_tmpl.o"
|
||||
fi
|
||||
|
||||
if [ $verbose = "yes" ]; then
|
||||
echo Dictname: "'"$dictname"'"
|
||||
echo Snowball stemmer: $stemmode
|
||||
echo Has init method: $hasinit
|
||||
[ $stemmode = "yes" ] && echo Function prefix: $prefix
|
||||
echo Source files: $cfile
|
||||
echo Header files: $hfile
|
||||
echo Object files: $ofile
|
||||
echo Comment: $comment
|
||||
echo Directory: ../../$dir
|
||||
fi
|
||||
|
||||
|
||||
[ $verbose = "yes" ] && echo -n 'Build directory... '
|
||||
if [ ! -d ../../$dir ]; then
|
||||
if ! mkdir ../../$dir ; then
|
||||
echo "Can't create directory ../../$dir"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
[ $verbose = "yes" ] && echo ok
|
||||
|
||||
|
||||
[ $verbose = "yes" ] && echo -n 'Build Makefile... '
|
||||
sed s#CFG_DIR#$dir# < Makefile.IN | sed s#CFG_MODNAME#$dictname# | sed "s#CFG_OFILE#$ofile#" > ../../$dir/Makefile.tmp
|
||||
if [ $stemmode = "yes" ] ; then
|
||||
sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2/snowball -I../tsearch2#" < ../../$dir/Makefile.tmp > ../../$dir/Makefile
|
||||
else
|
||||
sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2#" < ../../$dir/Makefile.tmp > ../../$dir/Makefile
|
||||
fi
|
||||
rm ../../$dir/Makefile.tmp
|
||||
[ $verbose = "yes" ] && echo ok
|
||||
|
||||
|
||||
[ $verbose = "yes" ] && echo -n Build dict_$dictname'.sql.in... '
|
||||
if [ $hasinit = "yes" ]; then
|
||||
sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^HASINIT## | sed 's#^NOINIT.*$##' > ../../$dir/dict_$dictname.sql.in.tmp
|
||||
if [ $stemmode = "yes" ] ; then
|
||||
sed s#^ISSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^NOSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
|
||||
else
|
||||
sed s#^NOSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
|
||||
fi
|
||||
rm ../../$dir/dict_$dictname.sql.in.tmp
|
||||
else
|
||||
sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^NOINIT## | sed 's#^HASINIT.*$##' | sed s#^NOSNOWBALL## | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
|
||||
fi
|
||||
[ $verbose = "yes" ] && echo ok
|
||||
|
||||
|
||||
|
||||
if [ ${#cfile} -ne 0 ] || [ ${#hfile} -ne 0 ] ; then
|
||||
[ $verbose = "yes" ] && echo -n 'Copy source and header files... '
|
||||
if [ ${#cfile} -ne 0 ] ; then
|
||||
if [ $stemmode = "yes" ] ; then
|
||||
for cfn in $cfile
|
||||
do
|
||||
sed s#../runtime/## < $cfn > ../../$dir/$cfn
|
||||
done
|
||||
else
|
||||
if ! cp $cfile ../../$dir ; then
|
||||
echo "Can't cp all or one of files: $cfile"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
if [ ${#hfile} -ne 0 ] ; then
|
||||
if ! cp $hfile ../../$dir ; then
|
||||
echo "Cant cp all or one of files: $hfile"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
[ $verbose = "yes" ] && echo ok
|
||||
fi
|
||||
|
||||
|
||||
[ $verbose = "yes" ] && echo -n 'Build sub-include header... '
|
||||
echo -n > ../../$dir/subinclude.h
|
||||
for i in $hfile
|
||||
do
|
||||
echo "#include \"$i\"" >> ../../$dir/subinclude.h
|
||||
done
|
||||
[ $verbose = "yes" ] && echo ok
|
||||
|
||||
|
||||
if [ $stemmode = "yes" ] ; then
|
||||
[ $verbose = "yes" ] && echo -n 'Build Snowball stemmer... '
|
||||
sed s#CFG_MODNAME#$dictname#g < dict_snowball.c.IN | sed s#CFG_PREFIX#$prefix#g > ../../$dir/dict_snowball.c
|
||||
else
|
||||
[ $verbose = "yes" ] && echo -n 'Build dictinonary... '
|
||||
sed s#CFG_MODNAME#$dictname#g < dict_tmpl.c.IN > ../../$dir/dict_tmpl.c.tmp
|
||||
if [ $hasinit = "yes" ]; then
|
||||
sed s#^HASINIT## < ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT.*$##' > ../../$dir/dict_tmpl.c
|
||||
else
|
||||
sed s#^HASINIT.*\$## < ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT##' > ../../$dir/dict_tmpl.c
|
||||
fi
|
||||
rm ../../$dir/dict_tmpl.c.tmp
|
||||
fi
|
||||
[ $verbose = "yes" ] && echo ok
|
||||
|
||||
|
||||
[ $verbose = "yes" ] && echo -n "Build README.$dictname... "
|
||||
if [ $stemmode = "yes" ] ; then
|
||||
echo "Autogenerated Snowball's wrapper for $prefix" > ../../$dir/README.$dictname
|
||||
else
|
||||
echo "Autogenerated template for $dictname" > ../../$dir/README.$dictname
|
||||
fi
|
||||
[ $verbose = "yes" ] && echo ok
|
||||
|
||||
echo All is done
|
||||
|
|
@ -1,56 +0,0 @@
|
|||
/* $PostgreSQL: pgsql/contrib/tsearch2/gendict/dict_snowball.c.IN,v 1.5 2006/07/14 05:28:27 tgl Exp $ */
|
||||
|
||||
/*
|
||||
* example of Snowball dictionary
|
||||
* http://snowball.tartarus.org/
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "dict.h"
|
||||
#include "common.h"
|
||||
#include "snowball/header.h"
|
||||
#include "subinclude.h"
|
||||
#include "ts_locale.h"
|
||||
|
||||
typedef struct {
|
||||
struct SN_env *z;
|
||||
StopList stoplist;
|
||||
int (*stem)(struct SN_env * z);
|
||||
} DictSnowball;
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
|
||||
Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
|
||||
DictSnowball *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
|
||||
|
||||
if ( !d )
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
memset(d,0,sizeof(DictSnowball));
|
||||
d->stoplist.wordop=lowerstr;
|
||||
|
||||
if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
|
||||
text *in = PG_GETARG_TEXT_P(0);
|
||||
readstoplist(in, &(d->stoplist));
|
||||
sortstoplist(&(d->stoplist));
|
||||
PG_FREE_IF_COPY(in, 0);
|
||||
}
|
||||
|
||||
d->z = CFG_PREFIX_create_env();
|
||||
if (!d->z) {
|
||||
freestoplist(&(d->stoplist));
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
}
|
||||
d->stem=CFG_PREFIX_stem;
|
||||
|
||||
PG_RETURN_POINTER(d);
|
||||
}
|
||||
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
/* $PostgreSQL: pgsql/contrib/tsearch2/gendict/dict_tmpl.c.IN,v 1.6 2006/07/14 05:28:27 tgl Exp $ */
|
||||
|
||||
/*
|
||||
* example of dictionary
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "dict.h"
|
||||
#include "common.h"
|
||||
|
||||
#include "subinclude.h"
|
||||
#include "ts_locale.h"
|
||||
|
||||
HASINIT typedef struct {
|
||||
HASINIT StopList stoplist;
|
||||
HASINIT } DictExample;
|
||||
|
||||
|
||||
HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
|
||||
HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
|
||||
|
||||
HASINIT Datum
|
||||
HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
|
||||
HASINIT DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
|
||||
HASINIT
|
||||
HASINIT if ( !d )
|
||||
HASINIT ereport(ERROR,
|
||||
HASINIT (errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
HASINIT errmsg("out of memory")));
|
||||
HASINIT memset(d,0,sizeof(DictExample));
|
||||
HASINIT
|
||||
HASINIT d->stoplist.wordop=lowerstr;
|
||||
HASINIT
|
||||
HASINIT /* Your INIT code */
|
||||
HASINIT
|
||||
HASINIT if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
|
||||
HASINIT text *in = PG_GETARG_TEXT_P(0);
|
||||
HASINIT readstoplist(in, &(d->stoplist));
|
||||
HASINIT sortstoplist(&(d->stoplist));
|
||||
HASINIT PG_FREE_IF_COPY(in, 0);
|
||||
HASINIT }
|
||||
HASINIT
|
||||
HASINIT PG_RETURN_POINTER(d);
|
||||
HASINIT }
|
||||
|
||||
PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME);
|
||||
Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) {
|
||||
HASINIT DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
|
||||
char *in = (char*)PG_GETARG_POINTER(1);
|
||||
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
|
||||
TSLexeme *res=palloc(sizeof(TSLexeme*)*2);
|
||||
|
||||
/* Your LEXIZE dictionary code */
|
||||
HASINIT if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
|
||||
HASINIT pfree(txt);
|
||||
HASINIT res[0].lexeme=NULL;
|
||||
HASINIT } else
|
||||
res[0].lexeme=txt;
|
||||
res[1].lexeme=NULL;
|
||||
|
||||
PG_RETURN_POINTER(res);
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
SET search_path = public;
|
||||
BEGIN;
|
||||
|
||||
HASINIT create function dinit_CFG_MODNAME(internal)
|
||||
HASINIT returns internal
|
||||
HASINIT as 'MODULE_PATHNAME'
|
||||
HASINIT language C;
|
||||
|
||||
NOSNOWBALL create function dlexize_CFG_MODNAME(internal,internal,int4)
|
||||
NOSNOWBALL returns internal
|
||||
NOSNOWBALL as 'MODULE_PATHNAME'
|
||||
NOSNOWBALL language C
|
||||
NOSNOWBALL returns null on null input;
|
||||
|
||||
insert into pg_ts_dict select
|
||||
'CFG_MODNAME',
|
||||
HASINIT (select oid from pg_proc where proname='dinit_CFG_MODNAME'),
|
||||
NOINIT null,
|
||||
null,
|
||||
ISSNOWBALL (select oid from pg_proc where proname='snb_lexize'),
|
||||
NOSNOWBALL (select oid from pg_proc where proname='dlexize_CFG_MODNAME'),
|
||||
CFG_COMMENT
|
||||
;
|
||||
|
||||
|
||||
END;
|
|
@ -1,159 +0,0 @@
|
|||
#include "postgres.h"
|
||||
|
||||
#include <float.h>
|
||||
|
||||
#include "access/gist.h"
|
||||
#include "access/itup.h"
|
||||
#include "access/skey.h"
|
||||
#include "access/tuptoaster.h"
|
||||
#include "storage/bufpage.h"
|
||||
#include "utils/array.h"
|
||||
#include "utils/builtins.h"
|
||||
|
||||
#include "tsvector.h"
|
||||
#include "query.h"
|
||||
#include "query_cleanup.h"
|
||||
|
||||
PG_FUNCTION_INFO_V1(gin_extract_tsvector);
|
||||
Datum gin_extract_tsvector(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
gin_extract_tsvector(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsvector *vector = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
||||
Datum *entries = NULL;
|
||||
|
||||
*nentries = 0;
|
||||
if (vector->size > 0)
|
||||
{
|
||||
int i;
|
||||
WordEntry *we = ARRPTR(vector);
|
||||
|
||||
*nentries = (int32) vector->size;
|
||||
entries = (Datum *) palloc(sizeof(Datum) * vector->size);
|
||||
|
||||
for (i = 0; i < vector->size; i++)
|
||||
{
|
||||
text *txt = (text *) palloc(VARHDRSZ + we->len);
|
||||
|
||||
SET_VARSIZE(txt, VARHDRSZ + we->len);
|
||||
memcpy(VARDATA(txt), STRPTR(vector) + we->pos, we->len);
|
||||
|
||||
entries[i] = PointerGetDatum(txt);
|
||||
|
||||
we++;
|
||||
}
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(vector, 0);
|
||||
PG_RETURN_POINTER(entries);
|
||||
}
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(gin_extract_tsquery);
|
||||
Datum gin_extract_tsquery(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
gin_extract_tsquery(PG_FUNCTION_ARGS)
|
||||
{
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
||||
StrategyNumber strategy = DatumGetUInt16(PG_GETARG_DATUM(2));
|
||||
Datum *entries = NULL;
|
||||
|
||||
*nentries = 0;
|
||||
if (query->size > 0)
|
||||
{
|
||||
int4 i,
|
||||
j = 0,
|
||||
len;
|
||||
ITEM *item;
|
||||
|
||||
item = clean_NOT_v2(GETQUERY(query), &len);
|
||||
if (!item)
|
||||
elog(ERROR, "Query requires full scan, GIN doesn't support it");
|
||||
|
||||
item = GETQUERY(query);
|
||||
|
||||
for (i = 0; i < query->size; i++)
|
||||
if (item[i].type == VAL)
|
||||
(*nentries)++;
|
||||
|
||||
entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
|
||||
|
||||
for (i = 0; i < query->size; i++)
|
||||
if (item[i].type == VAL)
|
||||
{
|
||||
text *txt;
|
||||
|
||||
txt = (text *) palloc(VARHDRSZ + item[i].length);
|
||||
|
||||
SET_VARSIZE(txt, VARHDRSZ + item[i].length);
|
||||
memcpy(VARDATA(txt), GETOPERAND(query) + item[i].distance, item[i].length);
|
||||
|
||||
entries[j++] = PointerGetDatum(txt);
|
||||
|
||||
if (strategy == 1 && item[i].weight != 0)
|
||||
elog(ERROR, "With class of lexeme restrictions use @@@ operation");
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
*nentries = -1; /* nothing can be found */
|
||||
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
PG_RETURN_POINTER(entries);
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ITEM *frst;
|
||||
bool *mapped_check;
|
||||
} GinChkVal;
|
||||
|
||||
static bool
|
||||
checkcondition_gin(void *checkval, ITEM * val)
|
||||
{
|
||||
GinChkVal *gcv = (GinChkVal *) checkval;
|
||||
|
||||
return gcv->mapped_check[val - gcv->frst];
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(gin_ts_consistent);
|
||||
Datum gin_ts_consistent(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
gin_ts_consistent(PG_FUNCTION_ARGS)
|
||||
{
|
||||
bool *check = (bool *) PG_GETARG_POINTER(0);
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
|
||||
bool res = FALSE;
|
||||
|
||||
if (query->size > 0)
|
||||
{
|
||||
int4 i,
|
||||
j = 0;
|
||||
ITEM *item;
|
||||
GinChkVal gcv;
|
||||
|
||||
gcv.frst = item = GETQUERY(query);
|
||||
gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size);
|
||||
|
||||
for (i = 0; i < query->size; i++)
|
||||
if (item[i].type == VAL)
|
||||
gcv.mapped_check[i] = check[j++];
|
||||
|
||||
|
||||
res = TS_execute(
|
||||
GETQUERY(query),
|
||||
&gcv,
|
||||
true,
|
||||
checkcondition_gin
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(query, 2);
|
||||
PG_RETURN_BOOL(res);
|
||||
}
|
|
@ -1,751 +0,0 @@
|
|||
/* $PostgreSQL: pgsql/contrib/tsearch2/gistidx.c,v 1.16 2007/02/28 22:44:38 tgl Exp $ */
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include <float.h>
|
||||
|
||||
#include "access/gist.h"
|
||||
#include "access/itup.h"
|
||||
#include "access/tuptoaster.h"
|
||||
#include "storage/bufpage.h"
|
||||
#include "utils/array.h"
|
||||
#include "utils/builtins.h"
|
||||
|
||||
#include "tsvector.h"
|
||||
#include "query.h"
|
||||
#include "gistidx.h"
|
||||
#include "crc32.h"
|
||||
|
||||
PG_FUNCTION_INFO_V1(gtsvector_in);
|
||||
Datum gtsvector_in(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(gtsvector_out);
|
||||
Datum gtsvector_out(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(gtsvector_compress);
|
||||
Datum gtsvector_compress(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(gtsvector_decompress);
|
||||
Datum gtsvector_decompress(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(gtsvector_consistent);
|
||||
Datum gtsvector_consistent(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(gtsvector_union);
|
||||
Datum gtsvector_union(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(gtsvector_same);
|
||||
Datum gtsvector_same(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(gtsvector_penalty);
|
||||
Datum gtsvector_penalty(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(gtsvector_picksplit);
|
||||
Datum gtsvector_picksplit(PG_FUNCTION_ARGS);
|
||||
|
||||
#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
|
||||
|
||||
/* Number of one-bits in an unsigned byte */
|
||||
static const uint8 number_of_ones[256] = {
|
||||
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
|
||||
};
|
||||
|
||||
static int4 sizebitvec(BITVECP sign);
|
||||
|
||||
Datum
|
||||
gtsvector_in(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("gtsvector_in not implemented")));
|
||||
PG_RETURN_DATUM(0);
|
||||
}
|
||||
|
||||
#define SINGOUTSTR "%d true bits, %d false bits"
|
||||
#define ARROUTSTR "%d unique words"
|
||||
#define EXTRALEN ( 2*13 )
|
||||
|
||||
static int outbuf_maxlen = 0;
|
||||
|
||||
Datum
|
||||
gtsvector_out(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GISTTYPE *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_POINTER(0)));
|
||||
char *outbuf;
|
||||
|
||||
if (outbuf_maxlen == 0)
|
||||
outbuf_maxlen = 2 * EXTRALEN + Max(strlen(SINGOUTSTR), strlen(ARROUTSTR)) + 1;
|
||||
outbuf = palloc(outbuf_maxlen);
|
||||
|
||||
if (ISARRKEY(key))
|
||||
sprintf(outbuf, ARROUTSTR, (int) ARRNELEM(key));
|
||||
else
|
||||
{
|
||||
int cnttrue = (ISALLTRUE(key)) ? SIGLENBIT : sizebitvec(GETSIGN(key));
|
||||
|
||||
sprintf(outbuf, SINGOUTSTR, cnttrue, (int) SIGLENBIT - cnttrue);
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(key, 0);
|
||||
PG_RETURN_POINTER(outbuf);
|
||||
}
|
||||
|
||||
static int
|
||||
compareint(const void *a, const void *b)
|
||||
{
|
||||
if (*((int4 *) a) == *((int4 *) b))
|
||||
return 0;
|
||||
return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
|
||||
}
|
||||
|
||||
static int
|
||||
uniqueint(int4 *a, int4 l)
|
||||
{
|
||||
int4 *ptr,
|
||||
*res;
|
||||
|
||||
if (l == 1)
|
||||
return l;
|
||||
|
||||
ptr = res = a;
|
||||
|
||||
qsort((void *) a, l, sizeof(int4), compareint);
|
||||
|
||||
while (ptr - a < l)
|
||||
if (*ptr != *res)
|
||||
*(++res) = *ptr++;
|
||||
else
|
||||
ptr++;
|
||||
return res + 1 - a;
|
||||
}
|
||||
|
||||
static void
|
||||
makesign(BITVECP sign, GISTTYPE * a)
|
||||
{
|
||||
int4 k,
|
||||
len = ARRNELEM(a);
|
||||
int4 *ptr = GETARR(a);
|
||||
|
||||
MemSet((void *) sign, 0, sizeof(BITVEC));
|
||||
for (k = 0; k < len; k++)
|
||||
HASH(sign, ptr[k]);
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsvector_compress(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
|
||||
GISTENTRY *retval = entry;
|
||||
|
||||
if (entry->leafkey)
|
||||
{ /* tsvector */
|
||||
GISTTYPE *res;
|
||||
tsvector *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
|
||||
int4 len;
|
||||
int4 *arr;
|
||||
WordEntry *ptr = ARRPTR(val);
|
||||
char *words = STRPTR(val);
|
||||
|
||||
len = CALCGTSIZE(ARRKEY, val->size);
|
||||
res = (GISTTYPE *) palloc(len);
|
||||
SET_VARSIZE(res, len);
|
||||
res->flag = ARRKEY;
|
||||
arr = GETARR(res);
|
||||
len = val->size;
|
||||
while (len--)
|
||||
{
|
||||
*arr = crc32_sz(&words[ptr->pos], ptr->len);
|
||||
arr++;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
len = uniqueint(GETARR(res), val->size);
|
||||
if (len != val->size)
|
||||
{
|
||||
/*
|
||||
* there is a collision of hash-function; len is always less than
|
||||
* val->size
|
||||
*/
|
||||
len = CALCGTSIZE(ARRKEY, len);
|
||||
res = (GISTTYPE *) repalloc((void *) res, len);
|
||||
SET_VARSIZE(res, len);
|
||||
}
|
||||
|
||||
/* make signature, if array is too long */
|
||||
if (VARSIZE(res) > TOAST_INDEX_TARGET)
|
||||
{
|
||||
GISTTYPE *ressign;
|
||||
|
||||
len = CALCGTSIZE(SIGNKEY, 0);
|
||||
ressign = (GISTTYPE *) palloc(len);
|
||||
SET_VARSIZE(ressign, len);
|
||||
ressign->flag = SIGNKEY;
|
||||
makesign(GETSIGN(ressign), res);
|
||||
res = ressign;
|
||||
}
|
||||
|
||||
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
|
||||
gistentryinit(*retval, PointerGetDatum(res),
|
||||
entry->rel, entry->page,
|
||||
entry->offset, FALSE);
|
||||
}
|
||||
else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
|
||||
!ISALLTRUE(DatumGetPointer(entry->key)))
|
||||
{
|
||||
int4 i,
|
||||
len;
|
||||
GISTTYPE *res;
|
||||
BITVECP sign = GETSIGN(DatumGetPointer(entry->key));
|
||||
|
||||
LOOPBYTE(
|
||||
if ((sign[i] & 0xff) != 0xff)
|
||||
PG_RETURN_POINTER(retval);
|
||||
);
|
||||
|
||||
len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
|
||||
res = (GISTTYPE *) palloc(len);
|
||||
SET_VARSIZE(res, len);
|
||||
res->flag = SIGNKEY | ALLISTRUE;
|
||||
|
||||
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
|
||||
gistentryinit(*retval, PointerGetDatum(res),
|
||||
entry->rel, entry->page,
|
||||
entry->offset, FALSE);
|
||||
}
|
||||
PG_RETURN_POINTER(retval);
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsvector_decompress(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
|
||||
GISTTYPE *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
|
||||
|
||||
if (key != (GISTTYPE *) DatumGetPointer(entry->key))
|
||||
{
|
||||
GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
|
||||
|
||||
gistentryinit(*retval, PointerGetDatum(key),
|
||||
entry->rel, entry->page,
|
||||
entry->offset, FALSE);
|
||||
|
||||
PG_RETURN_POINTER(retval);
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(entry);
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int4 *arrb;
|
||||
int4 *arre;
|
||||
} CHKVAL;
|
||||
|
||||
/*
|
||||
* is there value 'val' in array or not ?
|
||||
*/
|
||||
static bool
|
||||
checkcondition_arr(void *checkval, ITEM * val)
|
||||
{
|
||||
int4 *StopLow = ((CHKVAL *) checkval)->arrb;
|
||||
int4 *StopHigh = ((CHKVAL *) checkval)->arre;
|
||||
int4 *StopMiddle;
|
||||
|
||||
/* Loop invariant: StopLow <= val < StopHigh */
|
||||
|
||||
while (StopLow < StopHigh)
|
||||
{
|
||||
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
||||
if (*StopMiddle == val->val)
|
||||
return (true);
|
||||
else if (*StopMiddle < val->val)
|
||||
StopLow = StopMiddle + 1;
|
||||
else
|
||||
StopHigh = StopMiddle;
|
||||
}
|
||||
|
||||
return (false);
|
||||
}
|
||||
|
||||
static bool
|
||||
checkcondition_bit(void *checkval, ITEM * val)
|
||||
{
|
||||
return GETBIT(checkval, HASHVAL(val->val));
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsvector_consistent(PG_FUNCTION_ARGS)
|
||||
{
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_GETARG_POINTER(1);
|
||||
GISTTYPE *key = (GISTTYPE *) DatumGetPointer(
|
||||
((GISTENTRY *) PG_GETARG_POINTER(0))->key
|
||||
);
|
||||
|
||||
if (!query->size)
|
||||
PG_RETURN_BOOL(false);
|
||||
|
||||
if (ISSIGNKEY(key))
|
||||
{
|
||||
if (ISALLTRUE(key))
|
||||
PG_RETURN_BOOL(true);
|
||||
|
||||
PG_RETURN_BOOL(TS_execute(
|
||||
GETQUERY(query),
|
||||
(void *) GETSIGN(key), false,
|
||||
checkcondition_bit
|
||||
));
|
||||
}
|
||||
else
|
||||
{ /* only leaf pages */
|
||||
CHKVAL chkval;
|
||||
|
||||
chkval.arrb = GETARR(key);
|
||||
chkval.arre = chkval.arrb + ARRNELEM(key);
|
||||
PG_RETURN_BOOL(TS_execute(
|
||||
GETQUERY(query),
|
||||
(void *) &chkval, true,
|
||||
checkcondition_arr
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
static int4
|
||||
unionkey(BITVECP sbase, GISTTYPE * add)
|
||||
{
|
||||
int4 i;
|
||||
|
||||
if (ISSIGNKEY(add))
|
||||
{
|
||||
BITVECP sadd = GETSIGN(add);
|
||||
|
||||
if (ISALLTRUE(add))
|
||||
return 1;
|
||||
|
||||
LOOPBYTE(
|
||||
sbase[i] |= sadd[i];
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
int4 *ptr = GETARR(add);
|
||||
|
||||
for (i = 0; i < ARRNELEM(add); i++)
|
||||
HASH(sbase, ptr[i]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
gtsvector_union(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
|
||||
int *size = (int *) PG_GETARG_POINTER(1);
|
||||
BITVEC base;
|
||||
int4 i,
|
||||
len;
|
||||
int4 flag = 0;
|
||||
GISTTYPE *result;
|
||||
|
||||
MemSet((void *) base, 0, sizeof(BITVEC));
|
||||
for (i = 0; i < entryvec->n; i++)
|
||||
{
|
||||
if (unionkey(base, GETENTRY(entryvec, i)))
|
||||
{
|
||||
flag = ALLISTRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
flag |= SIGNKEY;
|
||||
len = CALCGTSIZE(flag, 0);
|
||||
result = (GISTTYPE *) palloc(len);
|
||||
SET_VARSIZE(result, len);
|
||||
result->flag = flag;
|
||||
if (!ISALLTRUE(result))
|
||||
memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
|
||||
*size = len;
|
||||
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsvector_same(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GISTTYPE *a = (GISTTYPE *) PG_GETARG_POINTER(0);
|
||||
GISTTYPE *b = (GISTTYPE *) PG_GETARG_POINTER(1);
|
||||
bool *result = (bool *) PG_GETARG_POINTER(2);
|
||||
|
||||
if (ISSIGNKEY(a))
|
||||
{ /* then b also ISSIGNKEY */
|
||||
if (ISALLTRUE(a) && ISALLTRUE(b))
|
||||
*result = true;
|
||||
else if (ISALLTRUE(a))
|
||||
*result = false;
|
||||
else if (ISALLTRUE(b))
|
||||
*result = false;
|
||||
else
|
||||
{
|
||||
int4 i;
|
||||
BITVECP sa = GETSIGN(a),
|
||||
sb = GETSIGN(b);
|
||||
|
||||
*result = true;
|
||||
LOOPBYTE(
|
||||
if (sa[i] != sb[i])
|
||||
{
|
||||
*result = false;
|
||||
break;
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
else
|
||||
{ /* a and b ISARRKEY */
|
||||
int4 lena = ARRNELEM(a),
|
||||
lenb = ARRNELEM(b);
|
||||
|
||||
if (lena != lenb)
|
||||
*result = false;
|
||||
else
|
||||
{
|
||||
int4 *ptra = GETARR(a),
|
||||
*ptrb = GETARR(b);
|
||||
int4 i;
|
||||
|
||||
*result = true;
|
||||
for (i = 0; i < lena; i++)
|
||||
if (ptra[i] != ptrb[i])
|
||||
{
|
||||
*result = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
||||
static int4
|
||||
sizebitvec(BITVECP sign)
|
||||
{
|
||||
int4 size = 0,
|
||||
i;
|
||||
|
||||
LOOPBYTE(
|
||||
size += number_of_ones[(unsigned char) sign[i]];
|
||||
);
|
||||
return size;
|
||||
}
|
||||
|
||||
static int
|
||||
hemdistsign(BITVECP a, BITVECP b)
|
||||
{
|
||||
int i,
|
||||
diff,
|
||||
dist = 0;
|
||||
|
||||
LOOPBYTE(
|
||||
diff = (unsigned char) (a[i] ^ b[i]);
|
||||
dist += number_of_ones[diff];
|
||||
);
|
||||
return dist;
|
||||
}
|
||||
|
||||
static int
|
||||
hemdist(GISTTYPE * a, GISTTYPE * b)
|
||||
{
|
||||
if (ISALLTRUE(a))
|
||||
{
|
||||
if (ISALLTRUE(b))
|
||||
return 0;
|
||||
else
|
||||
return SIGLENBIT - sizebitvec(GETSIGN(b));
|
||||
}
|
||||
else if (ISALLTRUE(b))
|
||||
return SIGLENBIT - sizebitvec(GETSIGN(a));
|
||||
|
||||
return hemdistsign(GETSIGN(a), GETSIGN(b));
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsvector_penalty(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
|
||||
GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
|
||||
float *penalty = (float *) PG_GETARG_POINTER(2);
|
||||
GISTTYPE *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
|
||||
GISTTYPE *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
|
||||
BITVECP orig = GETSIGN(origval);
|
||||
|
||||
*penalty = 0.0;
|
||||
|
||||
if (ISARRKEY(newval))
|
||||
{
|
||||
BITVEC sign;
|
||||
|
||||
makesign(sign, newval);
|
||||
|
||||
if (ISALLTRUE(origval))
|
||||
*penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1);
|
||||
else
|
||||
*penalty = hemdistsign(sign, orig);
|
||||
}
|
||||
else
|
||||
*penalty = hemdist(origval, newval);
|
||||
PG_RETURN_POINTER(penalty);
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
bool allistrue;
|
||||
BITVEC sign;
|
||||
} CACHESIGN;
|
||||
|
||||
static void
|
||||
fillcache(CACHESIGN * item, GISTTYPE * key)
|
||||
{
|
||||
item->allistrue = false;
|
||||
if (ISARRKEY(key))
|
||||
makesign(item->sign, key);
|
||||
else if (ISALLTRUE(key))
|
||||
item->allistrue = true;
|
||||
else
|
||||
memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
|
||||
}
|
||||
|
||||
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
|
||||
typedef struct
|
||||
{
|
||||
OffsetNumber pos;
|
||||
int4 cost;
|
||||
} SPLITCOST;
|
||||
|
||||
static int
|
||||
comparecost(const void *a, const void *b)
|
||||
{
|
||||
if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
|
||||
return 0;
|
||||
else
|
||||
return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
hemdistcache(CACHESIGN * a, CACHESIGN * b)
|
||||
{
|
||||
if (a->allistrue)
|
||||
{
|
||||
if (b->allistrue)
|
||||
return 0;
|
||||
else
|
||||
return SIGLENBIT - sizebitvec(b->sign);
|
||||
}
|
||||
else if (b->allistrue)
|
||||
return SIGLENBIT - sizebitvec(a->sign);
|
||||
|
||||
return hemdistsign(a->sign, b->sign);
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsvector_picksplit(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
|
||||
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
|
||||
OffsetNumber k,
|
||||
j;
|
||||
GISTTYPE *datum_l,
|
||||
*datum_r;
|
||||
BITVECP union_l,
|
||||
union_r;
|
||||
int4 size_alpha,
|
||||
size_beta;
|
||||
int4 size_waste,
|
||||
waste = -1;
|
||||
int4 nbytes;
|
||||
OffsetNumber seed_1 = 0,
|
||||
seed_2 = 0;
|
||||
OffsetNumber *left,
|
||||
*right;
|
||||
OffsetNumber maxoff;
|
||||
BITVECP ptr;
|
||||
int i;
|
||||
CACHESIGN *cache;
|
||||
SPLITCOST *costvector;
|
||||
|
||||
maxoff = entryvec->n - 2;
|
||||
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
|
||||
v->spl_left = (OffsetNumber *) palloc(nbytes);
|
||||
v->spl_right = (OffsetNumber *) palloc(nbytes);
|
||||
|
||||
cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
|
||||
fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
|
||||
|
||||
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
|
||||
{
|
||||
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
|
||||
{
|
||||
if (k == FirstOffsetNumber)
|
||||
fillcache(&cache[j], GETENTRY(entryvec, j));
|
||||
|
||||
size_waste = hemdistcache(&(cache[j]), &(cache[k]));
|
||||
if (size_waste > waste)
|
||||
{
|
||||
waste = size_waste;
|
||||
seed_1 = k;
|
||||
seed_2 = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
left = v->spl_left;
|
||||
v->spl_nleft = 0;
|
||||
right = v->spl_right;
|
||||
v->spl_nright = 0;
|
||||
|
||||
if (seed_1 == 0 || seed_2 == 0)
|
||||
{
|
||||
seed_1 = 1;
|
||||
seed_2 = 2;
|
||||
}
|
||||
|
||||
/* form initial .. */
|
||||
if (cache[seed_1].allistrue)
|
||||
{
|
||||
datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
|
||||
SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
|
||||
datum_l->flag = SIGNKEY | ALLISTRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
|
||||
SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0));
|
||||
datum_l->flag = SIGNKEY;
|
||||
memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
|
||||
}
|
||||
if (cache[seed_2].allistrue)
|
||||
{
|
||||
datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
|
||||
SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
|
||||
datum_r->flag = SIGNKEY | ALLISTRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
|
||||
SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0));
|
||||
datum_r->flag = SIGNKEY;
|
||||
memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
|
||||
}
|
||||
|
||||
union_l = GETSIGN(datum_l);
|
||||
union_r = GETSIGN(datum_r);
|
||||
maxoff = OffsetNumberNext(maxoff);
|
||||
fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
|
||||
/* sort before ... */
|
||||
costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
|
||||
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
|
||||
{
|
||||
costvector[j - 1].pos = j;
|
||||
size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
|
||||
size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
|
||||
costvector[j - 1].cost = Abs(size_alpha - size_beta);
|
||||
}
|
||||
qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
|
||||
|
||||
for (k = 0; k < maxoff; k++)
|
||||
{
|
||||
j = costvector[k].pos;
|
||||
if (j == seed_1)
|
||||
{
|
||||
*left++ = j;
|
||||
v->spl_nleft++;
|
||||
continue;
|
||||
}
|
||||
else if (j == seed_2)
|
||||
{
|
||||
*right++ = j;
|
||||
v->spl_nright++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ISALLTRUE(datum_l) || cache[j].allistrue)
|
||||
{
|
||||
if (ISALLTRUE(datum_l) && cache[j].allistrue)
|
||||
size_alpha = 0;
|
||||
else
|
||||
size_alpha = SIGLENBIT - sizebitvec(
|
||||
(cache[j].allistrue) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)
|
||||
);
|
||||
}
|
||||
else
|
||||
size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l));
|
||||
|
||||
if (ISALLTRUE(datum_r) || cache[j].allistrue)
|
||||
{
|
||||
if (ISALLTRUE(datum_r) && cache[j].allistrue)
|
||||
size_beta = 0;
|
||||
else
|
||||
size_beta = SIGLENBIT - sizebitvec(
|
||||
(cache[j].allistrue) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)
|
||||
);
|
||||
}
|
||||
else
|
||||
size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r));
|
||||
|
||||
if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
|
||||
{
|
||||
if (ISALLTRUE(datum_l) || cache[j].allistrue)
|
||||
{
|
||||
if (!ISALLTRUE(datum_l))
|
||||
MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr = cache[j].sign;
|
||||
LOOPBYTE(
|
||||
union_l[i] |= ptr[i];
|
||||
);
|
||||
}
|
||||
*left++ = j;
|
||||
v->spl_nleft++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ISALLTRUE(datum_r) || cache[j].allistrue)
|
||||
{
|
||||
if (!ISALLTRUE(datum_r))
|
||||
MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr = cache[j].sign;
|
||||
LOOPBYTE(
|
||||
union_r[i] |= ptr[i];
|
||||
);
|
||||
}
|
||||
*right++ = j;
|
||||
v->spl_nright++;
|
||||
}
|
||||
}
|
||||
|
||||
*right = *left = FirstOffsetNumber;
|
||||
v->spl_ldatum = PointerGetDatum(datum_l);
|
||||
v->spl_rdatum = PointerGetDatum(datum_r);
|
||||
|
||||
PG_RETURN_POINTER(v);
|
||||
}
|
|
@ -1,62 +0,0 @@
|
|||
/* $PostgreSQL: pgsql/contrib/tsearch2/gistidx.h,v 1.8 2007/02/28 22:44:38 tgl Exp $ */
|
||||
|
||||
#ifndef __GISTIDX_H__
|
||||
#define __GISTIDX_H__
|
||||
|
||||
/*
|
||||
#define GISTIDX_DEBUG
|
||||
*/
|
||||
|
||||
/*
|
||||
* signature defines
|
||||
*/
|
||||
|
||||
#define SIGLENINT 63 /* >121 => key will toast, so it will not work
|
||||
* !!! */
|
||||
#define SIGLEN ( sizeof(int4) * SIGLENINT )
|
||||
#define SIGLENBIT (SIGLEN * BITS_PER_BYTE)
|
||||
|
||||
typedef char BITVEC[SIGLEN];
|
||||
typedef char *BITVECP;
|
||||
|
||||
#define LOOPBYTE(a) \
|
||||
for(i=0;i<SIGLEN;i++) {\
|
||||
a;\
|
||||
}
|
||||
|
||||
#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITS_PER_BYTE ) ) )
|
||||
#define GETBITBYTE(x,i) ( ((char)(x)) >> (i) & 0x01 )
|
||||
#define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITS_PER_BYTE ) )
|
||||
#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITS_PER_BYTE ) )
|
||||
#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITS_PER_BYTE )) & 0x01 )
|
||||
|
||||
#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
|
||||
#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
|
||||
|
||||
|
||||
/*
|
||||
* type of index key
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
int32 vl_len_; /* varlena header (do not touch directly!) */
|
||||
int4 flag;
|
||||
char data[1];
|
||||
} GISTTYPE;
|
||||
|
||||
#define ARRKEY 0x01
|
||||
#define SIGNKEY 0x02
|
||||
#define ALLISTRUE 0x04
|
||||
|
||||
#define ISARRKEY(x) ( ((GISTTYPE*)(x))->flag & ARRKEY )
|
||||
#define ISSIGNKEY(x) ( ((GISTTYPE*)(x))->flag & SIGNKEY )
|
||||
#define ISALLTRUE(x) ( ((GISTTYPE*)(x))->flag & ALLISTRUE )
|
||||
|
||||
#define GTHDRSIZE ( VARHDRSZ + sizeof(int4) )
|
||||
#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
|
||||
|
||||
#define GETSIGN(x) ( (BITVECP)( (char*)(x)+GTHDRSIZE ) )
|
||||
#define GETARR(x) ( (int4*)( (char*)(x)+GTHDRSIZE ) )
|
||||
#define ARRNELEM(x) ( ( VARSIZE(x) - GTHDRSIZE )/sizeof(int4) )
|
||||
|
||||
#endif
|
|
@ -1,27 +0,0 @@
|
|||
# $PostgreSQL: pgsql/contrib/tsearch2/ispell/Makefile,v 1.10 2007/06/26 22:05:03 tgl Exp $
|
||||
|
||||
SUBOBJS = spell.o regis.o
|
||||
|
||||
EXTRA_CLEAN = SUBSYS.o $(SUBOBJS)
|
||||
|
||||
PG_CPPFLAGS = -I$(srcdir)/..
|
||||
|
||||
ifdef USE_PGXS
|
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS)
|
||||
else
|
||||
subdir = contrib/tsearch2/ispell
|
||||
top_builddir = ../../..
|
||||
include $(top_builddir)/src/Makefile.global
|
||||
include $(top_srcdir)/contrib/contrib-global.mk
|
||||
endif
|
||||
|
||||
override CFLAGS += $(CFLAGS_SL)
|
||||
|
||||
all: SUBSYS.o
|
||||
|
||||
SUBSYS.o: $(SUBOBJS)
|
||||
$(LD) $(LDREL) $(LDOUT) $@ $^
|
||||
|
||||
|
|
@ -1,215 +0,0 @@
|
|||
#include "regis.h"
|
||||
#include "ts_locale.h"
|
||||
#include "common.h"
|
||||
|
||||
bool
|
||||
RS_isRegis(const char *str)
|
||||
{
|
||||
while (str && *str)
|
||||
{
|
||||
if (t_isalpha(str) ||
|
||||
t_iseq(str, '[') ||
|
||||
t_iseq(str, ']') ||
|
||||
t_iseq(str, '^'))
|
||||
str += pg_mblen(str);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#define RS_IN_ONEOF 1
|
||||
#define RS_IN_ONEOF_IN 2
|
||||
#define RS_IN_NONEOF 3
|
||||
#define RS_IN_WAIT 4
|
||||
|
||||
static RegisNode *
|
||||
newRegisNode(RegisNode * prev, int len)
|
||||
{
|
||||
RegisNode *ptr;
|
||||
|
||||
ptr = (RegisNode *) malloc(RNHDRSZ + len + 1);
|
||||
if (!ptr)
|
||||
ts_error(ERROR, "No memory");
|
||||
memset(ptr, 0, RNHDRSZ + len + 1);
|
||||
if (prev)
|
||||
prev->next = ptr;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void
|
||||
RS_compile(Regis * r, bool issuffix, char *str)
|
||||
{
|
||||
int len = strlen(str);
|
||||
int state = RS_IN_WAIT;
|
||||
char *c = (char *) str;
|
||||
RegisNode *ptr = NULL;
|
||||
|
||||
memset(r, 0, sizeof(Regis));
|
||||
r->issuffix = (issuffix) ? 1 : 0;
|
||||
|
||||
while (*c)
|
||||
{
|
||||
if (state == RS_IN_WAIT)
|
||||
{
|
||||
if (t_isalpha(c))
|
||||
{
|
||||
if (ptr)
|
||||
ptr = newRegisNode(ptr, len);
|
||||
else
|
||||
ptr = r->node = newRegisNode(NULL, len);
|
||||
COPYCHAR(ptr->data, c);
|
||||
ptr->type = RSF_ONEOF;
|
||||
ptr->len = pg_mblen(c);
|
||||
}
|
||||
else if (t_iseq(c, '['))
|
||||
{
|
||||
if (ptr)
|
||||
ptr = newRegisNode(ptr, len);
|
||||
else
|
||||
ptr = r->node = newRegisNode(NULL, len);
|
||||
ptr->type = RSF_ONEOF;
|
||||
state = RS_IN_ONEOF;
|
||||
}
|
||||
else
|
||||
ts_error(ERROR, "Error in regis: %s", str);
|
||||
}
|
||||
else if (state == RS_IN_ONEOF)
|
||||
{
|
||||
if (t_iseq(c, '^'))
|
||||
{
|
||||
ptr->type = RSF_NONEOF;
|
||||
state = RS_IN_NONEOF;
|
||||
}
|
||||
else if (t_isalpha(c))
|
||||
{
|
||||
COPYCHAR(ptr->data, c);
|
||||
ptr->len = pg_mblen(c);
|
||||
state = RS_IN_ONEOF_IN;
|
||||
}
|
||||
else
|
||||
ts_error(ERROR, "Error in regis: %s", str);
|
||||
}
|
||||
else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
|
||||
{
|
||||
if (t_isalpha(c))
|
||||
{
|
||||
COPYCHAR(ptr->data + ptr->len, c);
|
||||
ptr->len += pg_mblen(c);
|
||||
}
|
||||
else if (t_iseq(c, ']'))
|
||||
state = RS_IN_WAIT;
|
||||
else
|
||||
ts_error(ERROR, "Error in regis: %s", str);
|
||||
}
|
||||
else
|
||||
ts_error(ERROR, "Internal error in RS_compile: %d", state);
|
||||
c += pg_mblen(c);
|
||||
}
|
||||
|
||||
ptr = r->node;
|
||||
while (ptr)
|
||||
{
|
||||
r->nchar++;
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
RS_free(Regis * r)
|
||||
{
|
||||
RegisNode *ptr = r->node,
|
||||
*tmp;
|
||||
|
||||
while (ptr)
|
||||
{
|
||||
tmp = ptr->next;
|
||||
free(ptr);
|
||||
ptr = tmp;
|
||||
}
|
||||
|
||||
r->node = NULL;
|
||||
}
|
||||
|
||||
#ifdef TS_USE_WIDE
|
||||
static bool
|
||||
mb_strchr(char *str, char *c)
|
||||
{
|
||||
int clen = pg_mblen(c),
|
||||
plen,
|
||||
i;
|
||||
char *ptr = str;
|
||||
bool res = false;
|
||||
|
||||
clen = pg_mblen(c);
|
||||
while (*ptr && !res)
|
||||
{
|
||||
plen = pg_mblen(ptr);
|
||||
if (plen == clen)
|
||||
{
|
||||
i = plen;
|
||||
res = true;
|
||||
while (i--)
|
||||
if (*(ptr + i) != *(c + i))
|
||||
{
|
||||
res = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ptr += plen;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
#else
|
||||
#define mb_strchr(s,c) ( (strchr((s),*(c)) == NULL) ? false : true )
|
||||
#endif
|
||||
|
||||
|
||||
bool
|
||||
RS_execute(Regis * r, char *str)
|
||||
{
|
||||
RegisNode *ptr = r->node;
|
||||
char *c = str;
|
||||
int len = 0;
|
||||
|
||||
while (*c)
|
||||
{
|
||||
len++;
|
||||
c += pg_mblen(c);
|
||||
}
|
||||
|
||||
if (len < r->nchar)
|
||||
return false;
|
||||
|
||||
c = str;
|
||||
if (r->issuffix)
|
||||
{
|
||||
len -= r->nchar;
|
||||
while (len-- > 0)
|
||||
c += pg_mblen(c);
|
||||
}
|
||||
|
||||
|
||||
while (ptr)
|
||||
{
|
||||
switch (ptr->type)
|
||||
{
|
||||
case RSF_ONEOF:
|
||||
if (mb_strchr((char *) ptr->data, c) != true)
|
||||
return false;
|
||||
break;
|
||||
case RSF_NONEOF:
|
||||
if (mb_strchr((char *) ptr->data, c) == true)
|
||||
return false;
|
||||
break;
|
||||
default:
|
||||
ts_error(ERROR, "RS_execute: Unknown type node: %d\n", ptr->type);
|
||||
}
|
||||
ptr = ptr->next;
|
||||
c += pg_mblen(c);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
|
@ -1,38 +0,0 @@
|
|||
#ifndef __REGIS_H__
|
||||
#define __REGIS_H__
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
typedef struct RegisNode
|
||||
{
|
||||
uint32
|
||||
type:2,
|
||||
len:16,
|
||||
unused:14;
|
||||
struct RegisNode *next;
|
||||
unsigned char data[1];
|
||||
} RegisNode;
|
||||
|
||||
#define RNHDRSZ (offsetof(RegisNode,data))
|
||||
|
||||
#define RSF_ONEOF 1
|
||||
#define RSF_NONEOF 2
|
||||
|
||||
typedef struct Regis
|
||||
{
|
||||
RegisNode *node;
|
||||
uint32
|
||||
issuffix:1,
|
||||
nchar:16,
|
||||
unused:15;
|
||||
} Regis;
|
||||
|
||||
bool RS_isRegis(const char *str);
|
||||
|
||||
void RS_compile(Regis * r, bool issuffix, char *str);
|
||||
void RS_free(Regis * r);
|
||||
|
||||
/*returns true if matches */
|
||||
bool RS_execute(Regis * r, char *str);
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -1,135 +0,0 @@
|
|||
#ifndef __SPELL_H__
|
||||
#define __SPELL_H__
|
||||
|
||||
#include "c.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "regex/regex.h"
|
||||
|
||||
#include "regis.h"
|
||||
#include "dict.h"
|
||||
|
||||
struct SPNode;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32
|
||||
val:8,
|
||||
isword:1,
|
||||
compoundallow:1,
|
||||
affix:22;
|
||||
struct SPNode *node;
|
||||
} SPNodeData;
|
||||
|
||||
typedef struct SPNode
|
||||
{
|
||||
uint32 length;
|
||||
SPNodeData data[1];
|
||||
} SPNode;
|
||||
|
||||
#define SPNHRDSZ (sizeof(uint32))
|
||||
|
||||
|
||||
typedef struct spell_struct
|
||||
{
|
||||
union
|
||||
{
|
||||
char flag[16];
|
||||
struct
|
||||
{
|
||||
int affix;
|
||||
int len;
|
||||
} d;
|
||||
} p;
|
||||
char word[1];
|
||||
} SPELL;
|
||||
|
||||
#define SPELLHDRSZ (offsetof(SPELL, word))
|
||||
|
||||
typedef struct aff_struct
|
||||
{
|
||||
uint32
|
||||
flag:8,
|
||||
type:2,
|
||||
compile:1,
|
||||
flagflags:3,
|
||||
issimple:1,
|
||||
isregis:1,
|
||||
unused:1,
|
||||
replen:16;
|
||||
char *mask;
|
||||
char *find;
|
||||
char *repl;
|
||||
union
|
||||
{
|
||||
regex_t regex;
|
||||
Regis regis;
|
||||
} reg;
|
||||
} AFFIX;
|
||||
|
||||
#define FF_CROSSPRODUCT 0x01
|
||||
#define FF_COMPOUNDWORD 0x02
|
||||
#define FF_COMPOUNDONLYAFX 0x04
|
||||
#define FF_SUFFIX 2
|
||||
#define FF_PREFIX 1
|
||||
|
||||
struct AffixNode;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32
|
||||
val:8,
|
||||
naff:24;
|
||||
AFFIX **aff;
|
||||
struct AffixNode *node;
|
||||
} AffixNodeData;
|
||||
|
||||
typedef struct AffixNode
|
||||
{
|
||||
uint32 isvoid:1,
|
||||
length:31;
|
||||
AffixNodeData data[1];
|
||||
} AffixNode;
|
||||
|
||||
#define ANHRDSZ (sizeof(uint32))
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char *affix;
|
||||
int len;
|
||||
} CMPDAffix;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int maffixes;
|
||||
int naffixes;
|
||||
AFFIX *Affix;
|
||||
char compoundcontrol;
|
||||
|
||||
int nspell;
|
||||
int mspell;
|
||||
SPELL **Spell;
|
||||
|
||||
AffixNode *Suffix;
|
||||
AffixNode *Prefix;
|
||||
|
||||
SPNode *Dictionary;
|
||||
char **AffixData;
|
||||
CMPDAffix *CompoundAffix;
|
||||
|
||||
} IspellDict;
|
||||
|
||||
TSLexeme *NINormalizeWord(IspellDict * Conf, char *word);
|
||||
int NIImportAffixes(IspellDict * Conf, const char *filename);
|
||||
int NIImportOOAffixes(IspellDict * Conf, const char *filename);
|
||||
int NIImportDictionary(IspellDict * Conf, const char *filename);
|
||||
|
||||
int NIAddSpell(IspellDict * Conf, const char *word, const char *flag);
|
||||
int NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type);
|
||||
void NISortDictionary(IspellDict * Conf);
|
||||
void NISortAffixes(IspellDict * Conf);
|
||||
void NIFree(IspellDict * Conf);
|
||||
|
||||
#endif
|
|
@ -1,47 +0,0 @@
|
|||
ZIPFILE=nb_NO
|
||||
LANGUAGE=norsk
|
||||
|
||||
|
||||
UNZIP=unzip -o
|
||||
|
||||
|
||||
all: $(LANGUAGE).dict $(LANGUAGE).aff
|
||||
|
||||
$(ZIPFILE).aff: $(ZIPFILE).zip
|
||||
$(UNZIP) $? $@
|
||||
touch $@
|
||||
|
||||
|
||||
# 1 Cleanup dictionary
|
||||
# 2 remove " symbol
|
||||
# 3 add compoundwords controlled flag to word which hasn't it, but
|
||||
# has compound only suffixes
|
||||
|
||||
$(LANGUAGE).dict: $(ZIPFILE).zip
|
||||
$(UNZIP) $? $(ZIPFILE).dic
|
||||
grep -v -E '^[[:digit:]]+$$' < $(ZIPFILE).dic \
|
||||
| grep -v '\.' \
|
||||
| sed -e 's/"//g' \
|
||||
| perl -pi -e 's|/(\S+)| $$q=$$1; ( $$q=~/[\\_`]/ && $$q!~/z/ ) ? "/$${q}z" : "/$${q}"|e' \
|
||||
| sort \
|
||||
> $@
|
||||
|
||||
#just convert affix file
|
||||
|
||||
$(LANGUAGE).aff: $(ZIPFILE).aff
|
||||
grep -v -i zyzyzy $(ZIPFILE).aff \
|
||||
| grep -v -i zyzyzy \
|
||||
| perl -pi \
|
||||
-e 's/^COMPOUNDFLAG\s+(\S+)/compoundwords controlled $$1/;' \
|
||||
-e 's/^COMPOUNDMIN\s+(\d+)/compoundmin $$1/;' \
|
||||
-e 's/^PFX\s+(\S+)\s+([YN])\s+\d+.*$$/ if ( !$$wasprf ) { $$wasprf=1; "prefixes\n\nflag $$1:" } else { "flag $$1:" } /e;' \
|
||||
-e 's/^PFX\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)/ uc(" $$3 > $$2")/e;' \
|
||||
-e 's/^(.*)SFX\s+(\S+)\s+([YN])\s+\d+.*$$/ $$flg=($$3 eq "Y") ? "*" : ""; $$flg="~$$flg" if length $$1; $$q=$$2; $$q="\\$$q" if $$q!~m#[a-zA-Z]#; if ( !$$wassfx ) { $$wassfx=1; "suffixes\n\nflag $$flg$$q:" } else { "flag $$flg$$q:" } /e;' \
|
||||
-e 's/^.*SFX\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)/ uc(" $$3 > ".( ($$1 eq "0") ? "" : "-$$1,").( ($$2 eq "0") ? "" : "$$2") )/e;' \
|
||||
-e 's/^(SET|TRY)/#$$1/' \
|
||||
> $@
|
||||
|
||||
clean:
|
||||
rm -rf $(ZIPFILE).aff $(ZIPFILE).dic $(LANGUAGE).dict $(LANGUAGE).aff
|
||||
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
Utility for convert MySpell dictionary and affix from
|
||||
myspell to ispell format.
|
||||
Utility tested on nb_NO.zip and nn_NO.zip from
|
||||
OpenOffice (http://lingucomponent.openoffice.org/download_dictionary.html)
|
||||
|
||||
usage:
|
||||
For example, make norwegian dictionary and affix:
|
||||
% cp nb_NO.zip my2ispell
|
||||
% cd my2ispell
|
||||
% gmake ZIPFILE=nb_NO LANGUAGE=norsk
|
||||
|
||||
Author: Teodor Sigaev <teodor@sigaev.ru>
|
|
@ -1,178 +0,0 @@
|
|||
/*
|
||||
* Simple config parser
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#include "dict.h"
|
||||
#include "common.h"
|
||||
#include "ts_locale.h"
|
||||
|
||||
#define CS_WAITKEY 0
|
||||
#define CS_INKEY 1
|
||||
#define CS_WAITEQ 2
|
||||
#define CS_WAITVALUE 3
|
||||
#define CS_INVALUE 4
|
||||
#define CS_IN2VALUE 5
|
||||
#define CS_WAITDELIM 6
|
||||
#define CS_INESC 7
|
||||
#define CS_IN2ESC 8
|
||||
|
||||
static char *
|
||||
nstrdup(char *ptr, int len)
|
||||
{
|
||||
char *res = palloc(len + 1),
|
||||
*cptr;
|
||||
|
||||
memcpy(res, ptr, len);
|
||||
res[len] = '\0';
|
||||
cptr = ptr = res;
|
||||
while (*ptr)
|
||||
{
|
||||
if (t_iseq(ptr, '\\'))
|
||||
ptr++;
|
||||
COPYCHAR(cptr, ptr);
|
||||
cptr += pg_mblen(ptr);
|
||||
ptr += pg_mblen(ptr);
|
||||
}
|
||||
*cptr = '\0';
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void
|
||||
parse_cfgdict(text *in, Map ** m)
|
||||
{
|
||||
Map *mptr;
|
||||
char *ptr = VARDATA(in),
|
||||
*begin = NULL;
|
||||
char num = 0;
|
||||
int state = CS_WAITKEY;
|
||||
|
||||
while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ)
|
||||
{
|
||||
if (t_iseq(ptr, ','))
|
||||
num++;
|
||||
ptr += pg_mblen(ptr);
|
||||
}
|
||||
|
||||
*m = mptr = (Map *) palloc(sizeof(Map) * (num + 2));
|
||||
memset(mptr, 0, sizeof(Map) * (num + 2));
|
||||
ptr = VARDATA(in);
|
||||
while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ)
|
||||
{
|
||||
if (state == CS_WAITKEY)
|
||||
{
|
||||
if (t_isalpha(ptr))
|
||||
{
|
||||
begin = ptr;
|
||||
state = CS_INKEY;
|
||||
}
|
||||
else if (!t_isspace(ptr))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error"),
|
||||
errdetail("Syntax error in position %d.",
|
||||
(int) (ptr - VARDATA(in)))));
|
||||
}
|
||||
else if (state == CS_INKEY)
|
||||
{
|
||||
if (t_isspace(ptr))
|
||||
{
|
||||
mptr->key = nstrdup(begin, ptr - begin);
|
||||
state = CS_WAITEQ;
|
||||
}
|
||||
else if (t_iseq(ptr, '='))
|
||||
{
|
||||
mptr->key = nstrdup(begin, ptr - begin);
|
||||
state = CS_WAITVALUE;
|
||||
}
|
||||
else if (!t_isalpha(ptr))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error"),
|
||||
errdetail("Syntax error in position %d.",
|
||||
(int) (ptr - VARDATA(in)))));
|
||||
}
|
||||
else if (state == CS_WAITEQ)
|
||||
{
|
||||
if (t_iseq(ptr, '='))
|
||||
state = CS_WAITVALUE;
|
||||
else if (!t_isspace(ptr))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error"),
|
||||
errdetail("Syntax error in position %d.",
|
||||
(int) (ptr - VARDATA(in)))));
|
||||
}
|
||||
else if (state == CS_WAITVALUE)
|
||||
{
|
||||
if (t_iseq(ptr, '"'))
|
||||
{
|
||||
begin = ptr + 1;
|
||||
state = CS_INVALUE;
|
||||
}
|
||||
else if (!t_isspace(ptr))
|
||||
{
|
||||
begin = ptr;
|
||||
state = CS_IN2VALUE;
|
||||
}
|
||||
}
|
||||
else if (state == CS_INVALUE)
|
||||
{
|
||||
if (t_iseq(ptr, '"'))
|
||||
{
|
||||
mptr->value = nstrdup(begin, ptr - begin);
|
||||
mptr++;
|
||||
state = CS_WAITDELIM;
|
||||
}
|
||||
else if (t_iseq(ptr, '\\'))
|
||||
state = CS_INESC;
|
||||
}
|
||||
else if (state == CS_IN2VALUE)
|
||||
{
|
||||
if (t_isspace(ptr) || t_iseq(ptr, ','))
|
||||
{
|
||||
mptr->value = nstrdup(begin, ptr - begin);
|
||||
mptr++;
|
||||
state = (t_iseq(ptr, ',')) ? CS_WAITKEY : CS_WAITDELIM;
|
||||
}
|
||||
else if (t_iseq(ptr, '\\'))
|
||||
state = CS_INESC;
|
||||
}
|
||||
else if (state == CS_WAITDELIM)
|
||||
{
|
||||
if (t_iseq(ptr, ','))
|
||||
state = CS_WAITKEY;
|
||||
else if (!t_isspace(ptr))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error"),
|
||||
errdetail("Syntax error in position %d.",
|
||||
(int) (ptr - VARDATA(in)))));
|
||||
}
|
||||
else if (state == CS_INESC)
|
||||
state = CS_INVALUE;
|
||||
else if (state == CS_IN2ESC)
|
||||
state = CS_IN2VALUE;
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("bad parser state"),
|
||||
errdetail("%d at position %d.",
|
||||
state, (int) (ptr - VARDATA(in)))));
|
||||
ptr += pg_mblen(ptr);
|
||||
}
|
||||
|
||||
if (state == CS_IN2VALUE)
|
||||
{
|
||||
mptr->value = nstrdup(begin, ptr - begin);
|
||||
mptr++;
|
||||
}
|
||||
else if (!(state == CS_WAITDELIM || state == CS_WAITKEY))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("unexpected end of line")));
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,54 +0,0 @@
|
|||
#ifndef __QUERY_H__
|
||||
#define __QUERY_H__
|
||||
/*
|
||||
#define BS_DEBUG
|
||||
*/
|
||||
|
||||
#include "ts_locale.h"
|
||||
/*
|
||||
* item in polish notation with back link
|
||||
* to left operand
|
||||
*/
|
||||
typedef struct ITEM
|
||||
{
|
||||
int8 type;
|
||||
int8 weight;
|
||||
int2 left;
|
||||
int4 val;
|
||||
/* user-friendly value, must correlate with WordEntry */
|
||||
uint32
|
||||
istrue:1, /* use for ranking in Cover */
|
||||
length:11,
|
||||
distance:20;
|
||||
} ITEM;
|
||||
|
||||
/*
|
||||
*Storage:
|
||||
* (len)(size)(array of ITEM)(array of operand in user-friendly form)
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
int32 vl_len_; /* varlena header (do not touch directly!) */
|
||||
int4 size;
|
||||
char data[1];
|
||||
} QUERYTYPE;
|
||||
|
||||
#define HDRSIZEQT ( VARHDRSZ + sizeof(int4) )
|
||||
#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + (size) * sizeof(ITEM) + (lenofoperand) )
|
||||
#define GETQUERY(x) (ITEM*)( (char*)(x)+HDRSIZEQT )
|
||||
#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((QUERYTYPE*)(x))->size * sizeof(ITEM) )
|
||||
|
||||
#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
|
||||
|
||||
#define END 0
|
||||
#define ERR 1
|
||||
#define VAL 2
|
||||
#define OPR 3
|
||||
#define OPEN 4
|
||||
#define CLOSE 5
|
||||
#define VALSTOP 6 /* for stop words */
|
||||
|
||||
bool TS_execute(ITEM * curitem, void *checkval,
|
||||
bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
|
||||
|
||||
#endif
|
|
@ -1,258 +0,0 @@
|
|||
/*
|
||||
* Rewrite routines of query tree
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include <float.h>
|
||||
|
||||
#include "utils/builtins.h"
|
||||
|
||||
#include "query.h"
|
||||
#include "query_cleanup.h"
|
||||
|
||||
typedef struct NODE
|
||||
{
|
||||
struct NODE *left;
|
||||
struct NODE *right;
|
||||
ITEM *valnode;
|
||||
} NODE;
|
||||
|
||||
/*
|
||||
* make query tree from plain view of query
|
||||
*/
|
||||
static NODE *
|
||||
maketree(ITEM * in)
|
||||
{
|
||||
NODE *node = (NODE *) palloc(sizeof(NODE));
|
||||
|
||||
node->valnode = in;
|
||||
node->right = node->left = NULL;
|
||||
if (in->type == OPR)
|
||||
{
|
||||
node->right = maketree(in + 1);
|
||||
if (in->val != (int4) '!')
|
||||
node->left = maketree(in + in->left);
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ITEM *ptr;
|
||||
int4 len;
|
||||
int4 cur;
|
||||
} PLAINTREE;
|
||||
|
||||
static void
|
||||
plainnode(PLAINTREE * state, NODE * node)
|
||||
{
|
||||
if (state->cur == state->len)
|
||||
{
|
||||
state->len *= 2;
|
||||
state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
|
||||
}
|
||||
memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
|
||||
if (node->valnode->type == VAL)
|
||||
state->cur++;
|
||||
else if (node->valnode->val == (int4) '!')
|
||||
{
|
||||
state->ptr[state->cur].left = 1;
|
||||
state->cur++;
|
||||
plainnode(state, node->right);
|
||||
}
|
||||
else
|
||||
{
|
||||
int4 cur = state->cur;
|
||||
|
||||
state->cur++;
|
||||
plainnode(state, node->right);
|
||||
state->ptr[cur].left = state->cur - cur;
|
||||
plainnode(state, node->left);
|
||||
}
|
||||
pfree(node);
|
||||
}
|
||||
|
||||
/*
|
||||
* make plain view of tree from 'normal' view of tree
|
||||
*/
|
||||
static ITEM *
|
||||
plaintree(NODE * root, int4 *len)
|
||||
{
|
||||
PLAINTREE pl;
|
||||
|
||||
pl.cur = 0;
|
||||
pl.len = 16;
|
||||
if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
|
||||
{
|
||||
pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
|
||||
plainnode(&pl, root);
|
||||
}
|
||||
else
|
||||
pl.ptr = NULL;
|
||||
*len = pl.cur;
|
||||
return pl.ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
freetree(NODE * node)
|
||||
{
|
||||
if (!node)
|
||||
return;
|
||||
if (node->left)
|
||||
freetree(node->left);
|
||||
if (node->right)
|
||||
freetree(node->right);
|
||||
pfree(node);
|
||||
}
|
||||
|
||||
/*
|
||||
* clean tree for ! operator.
|
||||
* It's usefull for debug, but in
|
||||
* other case, such view is used with search in index.
|
||||
* Operator ! always return TRUE
|
||||
*/
|
||||
static NODE *
|
||||
clean_NOT_intree(NODE * node)
|
||||
{
|
||||
if (node->valnode->type == VAL)
|
||||
return node;
|
||||
|
||||
if (node->valnode->val == (int4) '!')
|
||||
{
|
||||
freetree(node);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* operator & or | */
|
||||
if (node->valnode->val == (int4) '|')
|
||||
{
|
||||
if ((node->left = clean_NOT_intree(node->left)) == NULL ||
|
||||
(node->right = clean_NOT_intree(node->right)) == NULL)
|
||||
{
|
||||
freetree(node);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
NODE *res = node;
|
||||
|
||||
node->left = clean_NOT_intree(node->left);
|
||||
node->right = clean_NOT_intree(node->right);
|
||||
if (node->left == NULL && node->right == NULL)
|
||||
{
|
||||
pfree(node);
|
||||
res = NULL;
|
||||
}
|
||||
else if (node->left == NULL)
|
||||
{
|
||||
res = node->right;
|
||||
pfree(node);
|
||||
}
|
||||
else if (node->right == NULL)
|
||||
{
|
||||
res = node->left;
|
||||
pfree(node);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
ITEM *
|
||||
clean_NOT_v2(ITEM * ptr, int4 *len)
|
||||
{
|
||||
NODE *root = maketree(ptr);
|
||||
|
||||
return plaintree(clean_NOT_intree(root), len);
|
||||
}
|
||||
|
||||
|
||||
#ifdef V_UNKNOWN /* exists in Windows headers */
|
||||
#undef V_UNKNOWN
|
||||
#endif
|
||||
#ifdef V_FALSE /* exists in Solaris headers */
|
||||
#undef V_FALSE
|
||||
#endif
|
||||
|
||||
#define V_UNKNOWN 0
|
||||
#define V_TRUE 1
|
||||
#define V_FALSE 2
|
||||
#define V_STOP 3
|
||||
|
||||
/*
|
||||
* Clean query tree from values which is always in
|
||||
* text (stopword)
|
||||
*/
|
||||
static NODE *
|
||||
clean_fakeval_intree(NODE * node, char *result)
|
||||
{
|
||||
char lresult = V_UNKNOWN,
|
||||
rresult = V_UNKNOWN;
|
||||
|
||||
if (node->valnode->type == VAL)
|
||||
return node;
|
||||
else if (node->valnode->type == VALSTOP)
|
||||
{
|
||||
pfree(node);
|
||||
*result = V_STOP;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
if (node->valnode->val == (int4) '!')
|
||||
{
|
||||
node->right = clean_fakeval_intree(node->right, &rresult);
|
||||
if (!node->right)
|
||||
{
|
||||
*result = V_STOP;
|
||||
freetree(node);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
NODE *res = node;
|
||||
|
||||
node->left = clean_fakeval_intree(node->left, &lresult);
|
||||
node->right = clean_fakeval_intree(node->right, &rresult);
|
||||
if (lresult == V_STOP && rresult == V_STOP)
|
||||
{
|
||||
freetree(node);
|
||||
*result = V_STOP;
|
||||
return NULL;
|
||||
}
|
||||
else if (lresult == V_STOP)
|
||||
{
|
||||
res = node->right;
|
||||
pfree(node);
|
||||
}
|
||||
else if (rresult == V_STOP)
|
||||
{
|
||||
res = node->left;
|
||||
pfree(node);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
ITEM *
|
||||
clean_fakeval_v2(ITEM * ptr, int4 *len)
|
||||
{
|
||||
NODE *root = maketree(ptr);
|
||||
char result = V_UNKNOWN;
|
||||
NODE *resroot;
|
||||
|
||||
resroot = clean_fakeval_intree(root, &result);
|
||||
if (result != V_UNKNOWN)
|
||||
{
|
||||
elog(NOTICE, "query contains only stopword(s) or doesn't contain lexeme(s), ignored");
|
||||
*len = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return plaintree(resroot, len);
|
||||
}
|
|
@ -1,9 +0,0 @@
|
|||
#ifndef __REWRITE_H__
|
||||
#define __REWRITE_H__
|
||||
|
||||
#include "query.h"
|
||||
|
||||
ITEM *clean_NOT_v2(ITEM * ptr, int4 *len);
|
||||
ITEM *clean_fakeval_v2(ITEM * ptr, int4 *len);
|
||||
|
||||
#endif
|
|
@ -1,389 +0,0 @@
|
|||
#include "postgres.h"
|
||||
|
||||
#include "access/skey.h"
|
||||
#include "storage/bufpage.h"
|
||||
#include "access/gist.h"
|
||||
|
||||
#include "query.h"
|
||||
|
||||
typedef uint64 TPQTGist;
|
||||
|
||||
#define SIGLEN (sizeof(TPQTGist)*BITS_PER_BYTE)
|
||||
|
||||
|
||||
#define GETENTRY(vec,pos) ((TPQTGist *) DatumGetPointer((vec)->vector[(pos)].key))
|
||||
|
||||
PG_FUNCTION_INFO_V1(tsq_mcontains);
|
||||
Datum tsq_mcontains(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(tsq_mcontained);
|
||||
Datum tsq_mcontained(PG_FUNCTION_ARGS);
|
||||
|
||||
static TPQTGist
|
||||
makesign(QUERYTYPE * a)
|
||||
{
|
||||
int i;
|
||||
ITEM *ptr = GETQUERY(a);
|
||||
TPQTGist sign = 0;
|
||||
|
||||
for (i = 0; i < a->size; i++)
|
||||
{
|
||||
if (ptr->type == VAL)
|
||||
sign |= ((TPQTGist) 1) << (ptr->val % SIGLEN);
|
||||
ptr++;
|
||||
}
|
||||
|
||||
return sign;
|
||||
}
|
||||
|
||||
Datum
|
||||
tsq_mcontains(PG_FUNCTION_ARGS)
|
||||
{
|
||||
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
|
||||
QUERYTYPE *ex = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
|
||||
TPQTGist sq,
|
||||
se;
|
||||
int i,
|
||||
j;
|
||||
ITEM *iq,
|
||||
*ie;
|
||||
|
||||
if (query->size < ex->size)
|
||||
{
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
PG_FREE_IF_COPY(ex, 1);
|
||||
|
||||
PG_RETURN_BOOL(false);
|
||||
}
|
||||
|
||||
sq = makesign(query);
|
||||
se = makesign(ex);
|
||||
|
||||
if ((sq & se) != se)
|
||||
{
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
PG_FREE_IF_COPY(ex, 1);
|
||||
|
||||
PG_RETURN_BOOL(false);
|
||||
}
|
||||
|
||||
ie = GETQUERY(ex);
|
||||
|
||||
for (i = 0; i < ex->size; i++)
|
||||
{
|
||||
iq = GETQUERY(query);
|
||||
if (ie[i].type != VAL)
|
||||
continue;
|
||||
for (j = 0; j < query->size; j++)
|
||||
if (iq[j].type == VAL && ie[i].val == iq[j].val)
|
||||
{
|
||||
j = query->size + 1;
|
||||
break;
|
||||
}
|
||||
if (j == query->size)
|
||||
{
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
PG_FREE_IF_COPY(ex, 1);
|
||||
|
||||
PG_RETURN_BOOL(false);
|
||||
}
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
PG_FREE_IF_COPY(ex, 1);
|
||||
|
||||
PG_RETURN_BOOL(true);
|
||||
}
|
||||
|
||||
Datum
|
||||
tsq_mcontained(PG_FUNCTION_ARGS)
|
||||
{
|
||||
PG_RETURN_DATUM(
|
||||
DirectFunctionCall2(
|
||||
tsq_mcontains,
|
||||
PG_GETARG_DATUM(1),
|
||||
PG_GETARG_DATUM(0)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(gtsq_in);
|
||||
Datum gtsq_in(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(gtsq_out);
|
||||
Datum gtsq_out(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(gtsq_compress);
|
||||
Datum gtsq_compress(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(gtsq_decompress);
|
||||
Datum gtsq_decompress(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(gtsq_consistent);
|
||||
Datum gtsq_consistent(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(gtsq_union);
|
||||
Datum gtsq_union(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(gtsq_same);
|
||||
Datum gtsq_same(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(gtsq_penalty);
|
||||
Datum gtsq_penalty(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(gtsq_picksplit);
|
||||
Datum gtsq_picksplit(PG_FUNCTION_ARGS);
|
||||
|
||||
|
||||
Datum
|
||||
gtsq_in(PG_FUNCTION_ARGS)
|
||||
{
|
||||
elog(ERROR, "not implemented");
|
||||
PG_RETURN_DATUM(0);
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsq_out(PG_FUNCTION_ARGS)
|
||||
{
|
||||
elog(ERROR, "not implemented");
|
||||
PG_RETURN_DATUM(0);
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsq_compress(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
|
||||
GISTENTRY *retval = entry;
|
||||
|
||||
if (entry->leafkey)
|
||||
{
|
||||
TPQTGist *sign = (TPQTGist *) palloc(sizeof(TPQTGist));
|
||||
|
||||
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
|
||||
*sign = makesign((QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key)));
|
||||
|
||||
gistentryinit(*retval, PointerGetDatum(sign),
|
||||
entry->rel, entry->page,
|
||||
entry->offset, FALSE);
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(retval);
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsq_decompress(PG_FUNCTION_ARGS)
|
||||
{
|
||||
PG_RETURN_DATUM(PG_GETARG_DATUM(0));
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsq_consistent(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
|
||||
TPQTGist *key = (TPQTGist *) DatumGetPointer(entry->key);
|
||||
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
|
||||
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
|
||||
TPQTGist sq = makesign(query);
|
||||
bool retval;
|
||||
|
||||
switch (strategy)
|
||||
{
|
||||
case RTContainsStrategyNumber:
|
||||
case RTOldContainsStrategyNumber:
|
||||
if (GIST_LEAF(entry))
|
||||
retval = (*key & sq) == sq;
|
||||
else
|
||||
retval = (*key & sq) != 0;
|
||||
break;
|
||||
case RTContainedByStrategyNumber:
|
||||
case RTOldContainedByStrategyNumber:
|
||||
if (GIST_LEAF(entry))
|
||||
retval = (*key & sq) == *key;
|
||||
else
|
||||
retval = (*key & sq) != 0;
|
||||
break;
|
||||
default:
|
||||
retval = FALSE;
|
||||
}
|
||||
PG_RETURN_BOOL(retval);
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsq_union(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
|
||||
TPQTGist *sign = (TPQTGist *) palloc(sizeof(TPQTGist));
|
||||
int i;
|
||||
int *size = (int *) PG_GETARG_POINTER(1);
|
||||
|
||||
memset(sign, 0, sizeof(TPQTGist));
|
||||
|
||||
for (i = 0; i < entryvec->n; i++)
|
||||
*sign |= *GETENTRY(entryvec, i);
|
||||
|
||||
*size = sizeof(TPQTGist);
|
||||
|
||||
PG_RETURN_POINTER(sign);
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsq_same(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TPQTGist *a = (TPQTGist *) PG_GETARG_POINTER(0);
|
||||
TPQTGist *b = (TPQTGist *) PG_GETARG_POINTER(1);
|
||||
|
||||
PG_RETURN_POINTER(*a == *b);
|
||||
}
|
||||
|
||||
static int
|
||||
sizebitvec(TPQTGist sign)
|
||||
{
|
||||
int size = 0,
|
||||
i;
|
||||
|
||||
for (i = 0; i < SIGLEN; i++)
|
||||
size += 0x01 & (sign >> i);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static int
|
||||
hemdist(TPQTGist a, TPQTGist b)
|
||||
{
|
||||
TPQTGist res = a ^ b;
|
||||
|
||||
return sizebitvec(res);
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsq_penalty(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TPQTGist *origval = (TPQTGist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
|
||||
TPQTGist *newval = (TPQTGist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
|
||||
float *penalty = (float *) PG_GETARG_POINTER(2);
|
||||
|
||||
*penalty = hemdist(*origval, *newval);
|
||||
|
||||
PG_RETURN_POINTER(penalty);
|
||||
}
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
OffsetNumber pos;
|
||||
int4 cost;
|
||||
} SPLITCOST;
|
||||
|
||||
static int
|
||||
comparecost(const void *a, const void *b)
|
||||
{
|
||||
if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
|
||||
return 0;
|
||||
else
|
||||
return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
|
||||
}
|
||||
|
||||
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
|
||||
|
||||
Datum
|
||||
gtsq_picksplit(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
|
||||
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
|
||||
OffsetNumber maxoff = entryvec->n - 2;
|
||||
OffsetNumber k,
|
||||
j;
|
||||
|
||||
TPQTGist *datum_l,
|
||||
*datum_r;
|
||||
int4 size_alpha,
|
||||
size_beta;
|
||||
int4 size_waste,
|
||||
waste = -1;
|
||||
int4 nbytes;
|
||||
OffsetNumber seed_1 = 0,
|
||||
seed_2 = 0;
|
||||
OffsetNumber *left,
|
||||
*right;
|
||||
|
||||
SPLITCOST *costvector;
|
||||
|
||||
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
|
||||
left = v->spl_left = (OffsetNumber *) palloc(nbytes);
|
||||
right = v->spl_right = (OffsetNumber *) palloc(nbytes);
|
||||
v->spl_nleft = v->spl_nright = 0;
|
||||
|
||||
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
|
||||
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
|
||||
{
|
||||
size_waste = hemdist(*GETENTRY(entryvec, j), *GETENTRY(entryvec, k));
|
||||
if (size_waste > waste)
|
||||
{
|
||||
waste = size_waste;
|
||||
seed_1 = k;
|
||||
seed_2 = j;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (seed_1 == 0 || seed_2 == 0)
|
||||
{
|
||||
seed_1 = 1;
|
||||
seed_2 = 2;
|
||||
}
|
||||
|
||||
datum_l = (TPQTGist *) palloc(sizeof(TPQTGist));
|
||||
*datum_l = *GETENTRY(entryvec, seed_1);
|
||||
datum_r = (TPQTGist *) palloc(sizeof(TPQTGist));
|
||||
*datum_r = *GETENTRY(entryvec, seed_2);
|
||||
|
||||
|
||||
maxoff = OffsetNumberNext(maxoff);
|
||||
costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
|
||||
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
|
||||
{
|
||||
costvector[j - 1].pos = j;
|
||||
size_alpha = hemdist(*GETENTRY(entryvec, seed_1), *GETENTRY(entryvec, j));
|
||||
size_beta = hemdist(*GETENTRY(entryvec, seed_2), *GETENTRY(entryvec, j));
|
||||
costvector[j - 1].cost = abs(size_alpha - size_beta);
|
||||
}
|
||||
qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
|
||||
|
||||
for (k = 0; k < maxoff; k++)
|
||||
{
|
||||
j = costvector[k].pos;
|
||||
if (j == seed_1)
|
||||
{
|
||||
*left++ = j;
|
||||
v->spl_nleft++;
|
||||
continue;
|
||||
}
|
||||
else if (j == seed_2)
|
||||
{
|
||||
*right++ = j;
|
||||
v->spl_nright++;
|
||||
continue;
|
||||
}
|
||||
size_alpha = hemdist(*datum_l, *GETENTRY(entryvec, j));
|
||||
size_beta = hemdist(*datum_r, *GETENTRY(entryvec, j));
|
||||
|
||||
if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05))
|
||||
{
|
||||
*datum_l |= *GETENTRY(entryvec, j);
|
||||
*left++ = j;
|
||||
v->spl_nleft++;
|
||||
}
|
||||
else
|
||||
{
|
||||
*datum_r |= *GETENTRY(entryvec, j);
|
||||
*right++ = j;
|
||||
v->spl_nright++;
|
||||
}
|
||||
}
|
||||
|
||||
*right = *left = FirstOffsetNumber;
|
||||
v->spl_ldatum = PointerGetDatum(datum_l);
|
||||
v->spl_rdatum = PointerGetDatum(datum_r);
|
||||
|
||||
PG_RETURN_POINTER(v);
|
||||
}
|
|
@ -1,549 +0,0 @@
|
|||
#include "postgres.h"
|
||||
#include "executor/spi.h"
|
||||
|
||||
#include "query_util.h"
|
||||
|
||||
MemoryContext AggregateContext = NULL;
|
||||
|
||||
static int
|
||||
addone(int *counters, int last, int total)
|
||||
{
|
||||
counters[last]++;
|
||||
if (counters[last] >= total)
|
||||
{
|
||||
if (last == 0)
|
||||
return 0;
|
||||
if (addone(counters, last - 1, total - 1) == 0)
|
||||
return 0;
|
||||
counters[last] = counters[last - 1] + 1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static QTNode *
|
||||
findeq(QTNode * node, QTNode * ex, MemoryType memtype, QTNode * subs, bool *isfind)
|
||||
{
|
||||
|
||||
if ((node->sign & ex->sign) != ex->sign || node->valnode->type != ex->valnode->type || node->valnode->val != ex->valnode->val)
|
||||
return node;
|
||||
|
||||
if (node->flags & QTN_NOCHANGE)
|
||||
return node;
|
||||
|
||||
if (node->valnode->type == OPR)
|
||||
{
|
||||
if (node->nchild == ex->nchild)
|
||||
{
|
||||
if (QTNEq(node, ex))
|
||||
{
|
||||
QTNFree(node);
|
||||
if (subs)
|
||||
{
|
||||
node = QTNCopy(subs, memtype);
|
||||
node->flags |= QTN_NOCHANGE;
|
||||
}
|
||||
else
|
||||
node = NULL;
|
||||
*isfind = true;
|
||||
}
|
||||
}
|
||||
else if (node->nchild > ex->nchild)
|
||||
{
|
||||
int *counters = (int *) palloc(sizeof(int) * node->nchild);
|
||||
int i;
|
||||
QTNode *tnode = (QTNode *) MEMALLOC(memtype, sizeof(QTNode));
|
||||
|
||||
memset(tnode, 0, sizeof(QTNode));
|
||||
tnode->child = (QTNode **) MEMALLOC(memtype, sizeof(QTNode *) * ex->nchild);
|
||||
tnode->nchild = ex->nchild;
|
||||
tnode->valnode = (ITEM *) MEMALLOC(memtype, sizeof(ITEM));
|
||||
*(tnode->valnode) = *(ex->valnode);
|
||||
|
||||
for (i = 0; i < ex->nchild; i++)
|
||||
counters[i] = i;
|
||||
|
||||
do
|
||||
{
|
||||
tnode->sign = 0;
|
||||
for (i = 0; i < ex->nchild; i++)
|
||||
{
|
||||
tnode->child[i] = node->child[counters[i]];
|
||||
tnode->sign |= tnode->child[i]->sign;
|
||||
}
|
||||
|
||||
if (QTNEq(tnode, ex))
|
||||
{
|
||||
int j = 0;
|
||||
|
||||
MEMFREE(memtype, tnode->valnode);
|
||||
MEMFREE(memtype, tnode->child);
|
||||
MEMFREE(memtype, tnode);
|
||||
if (subs)
|
||||
{
|
||||
tnode = QTNCopy(subs, memtype);
|
||||
tnode->flags = QTN_NOCHANGE | QTN_NEEDFREE;
|
||||
}
|
||||
else
|
||||
tnode = NULL;
|
||||
|
||||
node->child[counters[0]] = tnode;
|
||||
|
||||
for (i = 1; i < ex->nchild; i++)
|
||||
node->child[counters[i]] = NULL;
|
||||
for (i = 0; i < node->nchild; i++)
|
||||
{
|
||||
if (node->child[i])
|
||||
{
|
||||
node->child[j] = node->child[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
node->nchild = j;
|
||||
|
||||
*isfind = true;
|
||||
|
||||
break;
|
||||
}
|
||||
} while (addone(counters, ex->nchild - 1, node->nchild));
|
||||
if (tnode && (tnode->flags & QTN_NOCHANGE) == 0)
|
||||
{
|
||||
MEMFREE(memtype, tnode->valnode);
|
||||
MEMFREE(memtype, tnode->child);
|
||||
MEMFREE(memtype, tnode);
|
||||
}
|
||||
else
|
||||
QTNSort(node);
|
||||
pfree(counters);
|
||||
}
|
||||
}
|
||||
else if (QTNEq(node, ex))
|
||||
{
|
||||
QTNFree(node);
|
||||
if (subs)
|
||||
{
|
||||
node = QTNCopy(subs, memtype);
|
||||
node->flags |= QTN_NOCHANGE;
|
||||
}
|
||||
else
|
||||
{
|
||||
node = NULL;
|
||||
}
|
||||
*isfind = true;
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
static QTNode *
|
||||
dofindsubquery(QTNode * root, QTNode * ex, MemoryType memtype, QTNode * subs, bool *isfind)
|
||||
{
|
||||
root = findeq(root, ex, memtype, subs, isfind);
|
||||
|
||||
if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == OPR)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < root->nchild; i++)
|
||||
root->child[i] = dofindsubquery(root->child[i], ex, memtype, subs, isfind);
|
||||
}
|
||||
|
||||
return root;
|
||||
}
|
||||
|
||||
static QTNode *
|
||||
dropvoidsubtree(QTNode * root)
|
||||
{
|
||||
|
||||
if (!root)
|
||||
return NULL;
|
||||
|
||||
if (root->valnode->type == OPR)
|
||||
{
|
||||
int i,
|
||||
j = 0;
|
||||
|
||||
for (i = 0; i < root->nchild; i++)
|
||||
{
|
||||
if (root->child[i])
|
||||
{
|
||||
root->child[j] = root->child[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
root->nchild = j;
|
||||
|
||||
if (root->valnode->val == (int4) '!' && root->nchild == 0)
|
||||
{
|
||||
QTNFree(root);
|
||||
root = NULL;
|
||||
}
|
||||
else if (root->nchild == 1)
|
||||
{
|
||||
QTNode *nroot = root->child[0];
|
||||
|
||||
pfree(root);
|
||||
root = nroot;
|
||||
}
|
||||
}
|
||||
|
||||
return root;
|
||||
}
|
||||
|
||||
static QTNode *
|
||||
findsubquery(QTNode * root, QTNode * ex, MemoryType memtype, QTNode * subs, bool *isfind)
|
||||
{
|
||||
bool DidFind = false;
|
||||
|
||||
root = dofindsubquery(root, ex, memtype, subs, &DidFind);
|
||||
|
||||
if (!subs && DidFind)
|
||||
root = dropvoidsubtree(root);
|
||||
|
||||
if (isfind)
|
||||
*isfind = DidFind;
|
||||
|
||||
return root;
|
||||
}
|
||||
|
||||
static Oid tsqOid = InvalidOid;
|
||||
static void
|
||||
get_tsq_Oid(void)
|
||||
{
|
||||
int ret;
|
||||
bool isnull;
|
||||
|
||||
if ((ret = SPI_exec("select oid from pg_type where typname='tsquery'", 1)) < 0)
|
||||
/* internal error */
|
||||
elog(ERROR, "SPI_exec to get tsquery oid returns %d", ret);
|
||||
|
||||
if (SPI_processed < 1)
|
||||
/* internal error */
|
||||
elog(ERROR, "there is no tsvector type");
|
||||
tsqOid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
|
||||
if (tsqOid == InvalidOid)
|
||||
/* internal error */
|
||||
elog(ERROR, "tsquery type has InvalidOid");
|
||||
}
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(tsquery_rewrite);
|
||||
PG_FUNCTION_INFO_V1(rewrite_accum);
|
||||
Datum rewrite_accum(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
rewrite_accum(PG_FUNCTION_ARGS)
|
||||
{
|
||||
QUERYTYPE *acc = (QUERYTYPE *) PG_GETARG_POINTER(0);
|
||||
ArrayType *qa = (ArrayType *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1)));
|
||||
QUERYTYPE *q;
|
||||
QTNode *qex,
|
||||
*subs = NULL,
|
||||
*acctree;
|
||||
bool isfind = false;
|
||||
Datum *elemsp;
|
||||
int nelemsp;
|
||||
|
||||
AggregateContext = ((AggState *) fcinfo->context)->aggcontext;
|
||||
|
||||
if (acc == NULL || PG_ARGISNULL(0))
|
||||
{
|
||||
acc = (QUERYTYPE *) MEMALLOC(AggMemory, sizeof(QUERYTYPE));
|
||||
SET_VARSIZE(acc, HDRSIZEQT);
|
||||
acc->size = 0;
|
||||
}
|
||||
|
||||
if (qa == NULL || PG_ARGISNULL(1))
|
||||
{
|
||||
PG_FREE_IF_COPY(qa, 1);
|
||||
PG_RETURN_POINTER(acc);
|
||||
}
|
||||
|
||||
if (ARR_NDIM(qa) != 1)
|
||||
elog(ERROR, "array must be one-dimensional, not %d dimension", ARR_NDIM(qa));
|
||||
|
||||
if (ArrayGetNItems(ARR_NDIM(qa), ARR_DIMS(qa)) != 3)
|
||||
elog(ERROR, "array should have only three elements");
|
||||
|
||||
if (tsqOid == InvalidOid)
|
||||
{
|
||||
SPI_connect();
|
||||
get_tsq_Oid();
|
||||
SPI_finish();
|
||||
}
|
||||
|
||||
if (ARR_ELEMTYPE(qa) != tsqOid)
|
||||
elog(ERROR, "array should contain tsquery type");
|
||||
|
||||
deconstruct_array(qa, tsqOid, -1, false, 'i', &elemsp, NULL, &nelemsp);
|
||||
|
||||
q = (QUERYTYPE *) DatumGetPointer(elemsp[0]);
|
||||
if (q->size == 0)
|
||||
{
|
||||
pfree(elemsp);
|
||||
PG_RETURN_POINTER(acc);
|
||||
}
|
||||
|
||||
if (!acc->size)
|
||||
{
|
||||
if (VARSIZE(acc) > HDRSIZEQT)
|
||||
{
|
||||
pfree(elemsp);
|
||||
PG_RETURN_POINTER(acc);
|
||||
}
|
||||
else
|
||||
acctree = QT2QTN(GETQUERY(q), GETOPERAND(q));
|
||||
}
|
||||
else
|
||||
acctree = QT2QTN(GETQUERY(acc), GETOPERAND(acc));
|
||||
|
||||
QTNTernary(acctree);
|
||||
QTNSort(acctree);
|
||||
|
||||
q = (QUERYTYPE *) DatumGetPointer(elemsp[1]);
|
||||
if (q->size == 0)
|
||||
{
|
||||
pfree(elemsp);
|
||||
PG_RETURN_POINTER(acc);
|
||||
}
|
||||
qex = QT2QTN(GETQUERY(q), GETOPERAND(q));
|
||||
QTNTernary(qex);
|
||||
QTNSort(qex);
|
||||
|
||||
q = (QUERYTYPE *) DatumGetPointer(elemsp[2]);
|
||||
if (q->size)
|
||||
subs = QT2QTN(GETQUERY(q), GETOPERAND(q));
|
||||
|
||||
acctree = findsubquery(acctree, qex, PlainMemory, subs, &isfind);
|
||||
|
||||
if (isfind || !acc->size)
|
||||
{
|
||||
/* pfree( acc ); do not pfree(p), because nodeAgg.c will */
|
||||
if (acctree)
|
||||
{
|
||||
QTNBinary(acctree);
|
||||
acc = QTN2QT(acctree, AggMemory);
|
||||
}
|
||||
else
|
||||
{
|
||||
acc = (QUERYTYPE *) MEMALLOC(AggMemory, HDRSIZEQT * 2);
|
||||
SET_VARSIZE(acc, HDRSIZEQT * 2);
|
||||
acc->size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
pfree(elemsp);
|
||||
QTNFree(qex);
|
||||
QTNFree(subs);
|
||||
QTNFree(acctree);
|
||||
|
||||
PG_RETURN_POINTER(acc);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(rewrite_finish);
|
||||
Datum rewrite_finish(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
rewrite_finish(PG_FUNCTION_ARGS)
|
||||
{
|
||||
QUERYTYPE *acc = (QUERYTYPE *) PG_GETARG_POINTER(0);
|
||||
QUERYTYPE *rewrited;
|
||||
|
||||
if (acc == NULL || PG_ARGISNULL(0) || acc->size == 0)
|
||||
{
|
||||
acc = (QUERYTYPE *) palloc(sizeof(QUERYTYPE));
|
||||
SET_VARSIZE(acc, HDRSIZEQT);
|
||||
acc->size = 0;
|
||||
}
|
||||
|
||||
rewrited = (QUERYTYPE *) palloc(VARSIZE(acc));
|
||||
memcpy(rewrited, acc, VARSIZE(acc));
|
||||
pfree(acc);
|
||||
|
||||
PG_RETURN_POINTER(rewrited);
|
||||
}
|
||||
|
||||
Datum tsquery_rewrite(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
tsquery_rewrite(PG_FUNCTION_ARGS)
|
||||
{
|
||||
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0)));
|
||||
text *in = PG_GETARG_TEXT_P(1);
|
||||
QUERYTYPE *rewrited = query;
|
||||
QTNode *tree;
|
||||
char *buf;
|
||||
void *plan;
|
||||
Portal portal;
|
||||
bool isnull;
|
||||
int i;
|
||||
|
||||
if (query->size == 0)
|
||||
{
|
||||
PG_FREE_IF_COPY(in, 1);
|
||||
PG_RETURN_POINTER(rewrited);
|
||||
}
|
||||
|
||||
tree = QT2QTN(GETQUERY(query), GETOPERAND(query));
|
||||
QTNTernary(tree);
|
||||
QTNSort(tree);
|
||||
|
||||
buf = (char *) palloc(VARSIZE(in));
|
||||
memcpy(buf, VARDATA(in), VARSIZE(in) - VARHDRSZ);
|
||||
buf[VARSIZE(in) - VARHDRSZ] = '\0';
|
||||
|
||||
SPI_connect();
|
||||
|
||||
if (tsqOid == InvalidOid)
|
||||
get_tsq_Oid();
|
||||
|
||||
if ((plan = SPI_prepare(buf, 0, NULL)) == NULL)
|
||||
elog(ERROR, "SPI_prepare('%s') returns NULL", buf);
|
||||
|
||||
if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, false)) == NULL)
|
||||
elog(ERROR, "SPI_cursor_open('%s') returns NULL", buf);
|
||||
|
||||
SPI_cursor_fetch(portal, true, 100);
|
||||
|
||||
if (SPI_tuptable->tupdesc->natts != 2)
|
||||
elog(ERROR, "number of fields doesn't equal to 2");
|
||||
|
||||
if (SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tsqOid)
|
||||
elog(ERROR, "column #1 isn't of tsquery type");
|
||||
|
||||
if (SPI_gettypeid(SPI_tuptable->tupdesc, 2) != tsqOid)
|
||||
elog(ERROR, "column #2 isn't of tsquery type");
|
||||
|
||||
while (SPI_processed > 0 && tree)
|
||||
{
|
||||
for (i = 0; i < SPI_processed && tree; i++)
|
||||
{
|
||||
Datum qdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
|
||||
Datum sdata;
|
||||
|
||||
if (isnull)
|
||||
continue;
|
||||
|
||||
sdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull);
|
||||
|
||||
if (!isnull)
|
||||
{
|
||||
QUERYTYPE *qtex = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(qdata));
|
||||
QUERYTYPE *qtsubs = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(sdata));
|
||||
QTNode *qex,
|
||||
*qsubs = NULL;
|
||||
|
||||
if (qtex->size == 0)
|
||||
{
|
||||
if (qtex != (QUERYTYPE *) DatumGetPointer(qdata))
|
||||
pfree(qtex);
|
||||
if (qtsubs != (QUERYTYPE *) DatumGetPointer(sdata))
|
||||
pfree(qtsubs);
|
||||
continue;
|
||||
}
|
||||
|
||||
qex = QT2QTN(GETQUERY(qtex), GETOPERAND(qtex));
|
||||
|
||||
QTNTernary(qex);
|
||||
QTNSort(qex);
|
||||
|
||||
if (qtsubs->size)
|
||||
qsubs = QT2QTN(GETQUERY(qtsubs), GETOPERAND(qtsubs));
|
||||
|
||||
tree = findsubquery(tree, qex, SPIMemory, qsubs, NULL);
|
||||
|
||||
QTNFree(qex);
|
||||
if (qtex != (QUERYTYPE *) DatumGetPointer(qdata))
|
||||
pfree(qtex);
|
||||
QTNFree(qsubs);
|
||||
if (qtsubs != (QUERYTYPE *) DatumGetPointer(sdata))
|
||||
pfree(qtsubs);
|
||||
}
|
||||
}
|
||||
|
||||
SPI_freetuptable(SPI_tuptable);
|
||||
SPI_cursor_fetch(portal, true, 100);
|
||||
}
|
||||
|
||||
SPI_freetuptable(SPI_tuptable);
|
||||
SPI_cursor_close(portal);
|
||||
SPI_freeplan(plan);
|
||||
SPI_finish();
|
||||
|
||||
|
||||
if (tree)
|
||||
{
|
||||
QTNBinary(tree);
|
||||
rewrited = QTN2QT(tree, PlainMemory);
|
||||
QTNFree(tree);
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
SET_VARSIZE(rewrited, HDRSIZEQT);
|
||||
rewrited->size = 0;
|
||||
}
|
||||
|
||||
pfree(buf);
|
||||
PG_FREE_IF_COPY(in, 1);
|
||||
PG_RETURN_POINTER(rewrited);
|
||||
}
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(tsquery_rewrite_query);
|
||||
Datum tsquery_rewrite_query(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
tsquery_rewrite_query(PG_FUNCTION_ARGS)
|
||||
{
|
||||
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0)));
|
||||
QUERYTYPE *ex = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
|
||||
QUERYTYPE *subst = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
|
||||
QUERYTYPE *rewrited = query;
|
||||
QTNode *tree,
|
||||
*qex,
|
||||
*subs = NULL;
|
||||
|
||||
if (query->size == 0 || ex->size == 0)
|
||||
{
|
||||
PG_FREE_IF_COPY(ex, 1);
|
||||
PG_FREE_IF_COPY(subst, 2);
|
||||
PG_RETURN_POINTER(rewrited);
|
||||
}
|
||||
|
||||
tree = QT2QTN(GETQUERY(query), GETOPERAND(query));
|
||||
QTNTernary(tree);
|
||||
QTNSort(tree);
|
||||
|
||||
qex = QT2QTN(GETQUERY(ex), GETOPERAND(ex));
|
||||
QTNTernary(qex);
|
||||
QTNSort(qex);
|
||||
|
||||
if (subst->size)
|
||||
subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst));
|
||||
|
||||
tree = findsubquery(tree, qex, PlainMemory, subs, NULL);
|
||||
QTNFree(qex);
|
||||
QTNFree(subs);
|
||||
|
||||
if (!tree)
|
||||
{
|
||||
SET_VARSIZE(rewrited, HDRSIZEQT);
|
||||
rewrited->size = 0;
|
||||
PG_FREE_IF_COPY(ex, 1);
|
||||
PG_FREE_IF_COPY(subst, 2);
|
||||
PG_RETURN_POINTER(rewrited);
|
||||
}
|
||||
else
|
||||
{
|
||||
QTNBinary(tree);
|
||||
rewrited = QTN2QT(tree, PlainMemory);
|
||||
QTNFree(tree);
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
PG_FREE_IF_COPY(ex, 1);
|
||||
PG_FREE_IF_COPY(subst, 2);
|
||||
PG_RETURN_POINTER(rewrited);
|
||||
}
|
|
@ -1,205 +0,0 @@
|
|||
#include "postgres.h"
|
||||
#include "fmgr.h"
|
||||
|
||||
#include "query_util.h"
|
||||
|
||||
PG_FUNCTION_INFO_V1(tsquery_numnode);
|
||||
Datum tsquery_numnode(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
tsquery_numnode(PG_FUNCTION_ARGS)
|
||||
{
|
||||
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0)));
|
||||
int nnode = query->size;
|
||||
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
PG_RETURN_INT32(nnode);
|
||||
}
|
||||
|
||||
static QTNode *
|
||||
join_tsqueries(QUERYTYPE * a, QUERYTYPE * b)
|
||||
{
|
||||
QTNode *res = (QTNode *) palloc0(sizeof(QTNode));
|
||||
|
||||
res->flags |= QTN_NEEDFREE;
|
||||
|
||||
res->valnode = (ITEM *) palloc0(sizeof(ITEM));
|
||||
res->valnode->type = OPR;
|
||||
|
||||
res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
|
||||
res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b));
|
||||
res->child[1] = QT2QTN(GETQUERY(a), GETOPERAND(a));
|
||||
res->nchild = 2;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(tsquery_and);
|
||||
Datum tsquery_and(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
tsquery_and(PG_FUNCTION_ARGS)
|
||||
{
|
||||
QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0)));
|
||||
QUERYTYPE *b = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1)));
|
||||
QTNode *res;
|
||||
QUERYTYPE *query;
|
||||
|
||||
if (a->size == 0)
|
||||
{
|
||||
PG_FREE_IF_COPY(a, 1);
|
||||
PG_RETURN_POINTER(b);
|
||||
}
|
||||
else if (b->size == 0)
|
||||
{
|
||||
PG_FREE_IF_COPY(b, 1);
|
||||
PG_RETURN_POINTER(a);
|
||||
}
|
||||
|
||||
res = join_tsqueries(a, b);
|
||||
|
||||
res->valnode->val = '&';
|
||||
|
||||
query = QTN2QT(res, PlainMemory);
|
||||
|
||||
QTNFree(res);
|
||||
PG_FREE_IF_COPY(a, 0);
|
||||
PG_FREE_IF_COPY(b, 1);
|
||||
|
||||
PG_RETURN_POINTER(query);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(tsquery_or);
|
||||
Datum tsquery_or(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
tsquery_or(PG_FUNCTION_ARGS)
|
||||
{
|
||||
QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0)));
|
||||
QUERYTYPE *b = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1)));
|
||||
QTNode *res;
|
||||
QUERYTYPE *query;
|
||||
|
||||
if (a->size == 0)
|
||||
{
|
||||
PG_FREE_IF_COPY(a, 1);
|
||||
PG_RETURN_POINTER(b);
|
||||
}
|
||||
else if (b->size == 0)
|
||||
{
|
||||
PG_FREE_IF_COPY(b, 1);
|
||||
PG_RETURN_POINTER(a);
|
||||
}
|
||||
|
||||
res = join_tsqueries(a, b);
|
||||
|
||||
res->valnode->val = '|';
|
||||
|
||||
query = QTN2QT(res, PlainMemory);
|
||||
|
||||
QTNFree(res);
|
||||
PG_FREE_IF_COPY(a, 0);
|
||||
PG_FREE_IF_COPY(b, 1);
|
||||
|
||||
PG_RETURN_POINTER(query);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(tsquery_not);
|
||||
Datum tsquery_not(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
tsquery_not(PG_FUNCTION_ARGS)
|
||||
{
|
||||
QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0)));
|
||||
QTNode *res;
|
||||
QUERYTYPE *query;
|
||||
|
||||
if (a->size == 0)
|
||||
PG_RETURN_POINTER(a);
|
||||
|
||||
res = (QTNode *) palloc0(sizeof(QTNode));
|
||||
|
||||
res->flags |= QTN_NEEDFREE;
|
||||
|
||||
res->valnode = (ITEM *) palloc0(sizeof(ITEM));
|
||||
res->valnode->type = OPR;
|
||||
res->valnode->val = '!';
|
||||
|
||||
res->child = (QTNode **) palloc0(sizeof(QTNode *));
|
||||
res->child[0] = QT2QTN(GETQUERY(a), GETOPERAND(a));
|
||||
res->nchild = 1;
|
||||
|
||||
query = QTN2QT(res, PlainMemory);
|
||||
|
||||
QTNFree(res);
|
||||
PG_FREE_IF_COPY(a, 0);
|
||||
|
||||
PG_RETURN_POINTER(query);
|
||||
}
|
||||
|
||||
static int
|
||||
CompareTSQ(QUERYTYPE * a, QUERYTYPE * b)
|
||||
{
|
||||
if (a->size != b->size)
|
||||
{
|
||||
return (a->size < b->size) ? -1 : 1;
|
||||
}
|
||||
else if (VARSIZE(a) != VARSIZE(b))
|
||||
{
|
||||
return (VARSIZE(a) < VARSIZE(b)) ? -1 : 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
QTNode *an = QT2QTN(GETQUERY(a), GETOPERAND(a));
|
||||
QTNode *bn = QT2QTN(GETQUERY(b), GETOPERAND(b));
|
||||
int res = QTNodeCompare(an, bn);
|
||||
|
||||
QTNFree(an);
|
||||
QTNFree(bn);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(tsquery_cmp);
|
||||
\
|
||||
Datum tsquery_cmp(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
tsquery_cmp(PG_FUNCTION_ARGS)
|
||||
{
|
||||
QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0)));
|
||||
QUERYTYPE *b = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1)));
|
||||
int res = CompareTSQ(a, b);
|
||||
|
||||
PG_FREE_IF_COPY(a, 0);
|
||||
PG_FREE_IF_COPY(b, 1);
|
||||
|
||||
PG_RETURN_INT32(res);
|
||||
}
|
||||
|
||||
#define CMPFUNC( NAME, ACTION ) \
|
||||
Datum NAME(PG_FUNCTION_ARGS); \
|
||||
\
|
||||
Datum \
|
||||
NAME(PG_FUNCTION_ARGS) { \
|
||||
QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); \
|
||||
QUERYTYPE *b = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1))); \
|
||||
int res = CompareTSQ(a,b); \
|
||||
\
|
||||
PG_FREE_IF_COPY(a,0); \
|
||||
PG_FREE_IF_COPY(b,1); \
|
||||
\
|
||||
PG_RETURN_BOOL( ACTION ); \
|
||||
} \
|
||||
\
|
||||
PG_FUNCTION_INFO_V1(NAME)
|
||||
|
||||
CMPFUNC(tsquery_lt, res < 0);
|
||||
CMPFUNC(tsquery_le, res <= 0);
|
||||
CMPFUNC(tsquery_eq, res == 0);
|
||||
CMPFUNC(tsquery_ge, res >= 0);
|
||||
CMPFUNC(tsquery_gt, res > 0);
|
||||
CMPFUNC(tsquery_ne, res != 0);
|
|
@ -1,301 +0,0 @@
|
|||
#include "postgres.h"
|
||||
#include "executor/spi.h"
|
||||
#include "query_util.h"
|
||||
|
||||
QTNode *
|
||||
QT2QTN(ITEM * in, char *operand)
|
||||
{
|
||||
QTNode *node = (QTNode *) palloc0(sizeof(QTNode));
|
||||
|
||||
node->valnode = in;
|
||||
|
||||
if (in->type == OPR)
|
||||
{
|
||||
node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
|
||||
node->child[0] = QT2QTN(in + 1, operand);
|
||||
node->sign = node->child[0]->sign;
|
||||
if (in->val == (int4) '!')
|
||||
node->nchild = 1;
|
||||
else
|
||||
{
|
||||
node->nchild = 2;
|
||||
node->child[1] = QT2QTN(in + in->left, operand);
|
||||
node->sign |= node->child[1]->sign;
|
||||
}
|
||||
}
|
||||
else if (operand)
|
||||
{
|
||||
node->word = operand + in->distance;
|
||||
node->sign = 1 << (in->val % 32);
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
void
|
||||
QTNFree(QTNode * in)
|
||||
{
|
||||
if (!in)
|
||||
return;
|
||||
|
||||
if (in->valnode->type == VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
|
||||
pfree(in->word);
|
||||
|
||||
if (in->child)
|
||||
{
|
||||
if (in->valnode)
|
||||
{
|
||||
if (in->valnode->type == OPR && in->nchild > 0)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < in->nchild; i++)
|
||||
QTNFree(in->child[i]);
|
||||
}
|
||||
if (in->flags & QTN_NEEDFREE)
|
||||
pfree(in->valnode);
|
||||
}
|
||||
pfree(in->child);
|
||||
}
|
||||
|
||||
pfree(in);
|
||||
}
|
||||
|
||||
int
|
||||
QTNodeCompare(QTNode * an, QTNode * bn)
|
||||
{
|
||||
if (an->valnode->type != bn->valnode->type)
|
||||
return (an->valnode->type > bn->valnode->type) ? -1 : 1;
|
||||
else if (an->valnode->val != bn->valnode->val)
|
||||
return (an->valnode->val > bn->valnode->val) ? -1 : 1;
|
||||
else if (an->valnode->type == VAL)
|
||||
{
|
||||
if (an->valnode->length == bn->valnode->length)
|
||||
return strncmp(an->word, bn->word, an->valnode->length);
|
||||
else
|
||||
return (an->valnode->length > bn->valnode->length) ? -1 : 1;
|
||||
}
|
||||
else if (an->nchild != bn->nchild)
|
||||
{
|
||||
return (an->nchild > bn->nchild) ? -1 : 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i,
|
||||
res;
|
||||
|
||||
for (i = 0; i < an->nchild; i++)
|
||||
if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
|
||||
return res;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
cmpQTN(const void *a, const void *b)
|
||||
{
|
||||
return QTNodeCompare(*(QTNode **) a, *(QTNode **) b);
|
||||
}
|
||||
|
||||
void
|
||||
QTNSort(QTNode * in)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (in->valnode->type != OPR)
|
||||
return;
|
||||
|
||||
for (i = 0; i < in->nchild; i++)
|
||||
QTNSort(in->child[i]);
|
||||
if (in->nchild > 1)
|
||||
qsort((void *) in->child, in->nchild, sizeof(QTNode *), cmpQTN);
|
||||
}
|
||||
|
||||
bool
|
||||
QTNEq(QTNode * a, QTNode * b)
|
||||
{
|
||||
uint32 sign = a->sign & b->sign;
|
||||
|
||||
if (!(sign == a->sign && sign == b->sign))
|
||||
return 0;
|
||||
|
||||
return (QTNodeCompare(a, b) == 0) ? true : false;
|
||||
}
|
||||
|
||||
void
|
||||
QTNTernary(QTNode * in)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (in->valnode->type != OPR)
|
||||
return;
|
||||
|
||||
for (i = 0; i < in->nchild; i++)
|
||||
QTNTernary(in->child[i]);
|
||||
|
||||
for (i = 0; i < in->nchild; i++)
|
||||
{
|
||||
if (in->valnode->type == in->child[i]->valnode->type && in->valnode->val == in->child[i]->valnode->val)
|
||||
{
|
||||
QTNode *cc = in->child[i];
|
||||
int oldnchild = in->nchild;
|
||||
|
||||
in->nchild += cc->nchild - 1;
|
||||
in->child = (QTNode **) repalloc(in->child, in->nchild * sizeof(QTNode *));
|
||||
|
||||
if (i + 1 != oldnchild)
|
||||
memmove(in->child + i + cc->nchild, in->child + i + 1,
|
||||
(oldnchild - i - 1) * sizeof(QTNode *));
|
||||
|
||||
memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *));
|
||||
i += cc->nchild - 1;
|
||||
|
||||
pfree(cc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
QTNBinary(QTNode * in)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (in->valnode->type != OPR)
|
||||
return;
|
||||
|
||||
for (i = 0; i < in->nchild; i++)
|
||||
QTNBinary(in->child[i]);
|
||||
|
||||
if (in->nchild <= 2)
|
||||
return;
|
||||
|
||||
while (in->nchild > 2)
|
||||
{
|
||||
QTNode *nn = (QTNode *) palloc0(sizeof(QTNode));
|
||||
|
||||
nn->valnode = (ITEM *) palloc0(sizeof(ITEM));
|
||||
nn->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
|
||||
|
||||
nn->nchild = 2;
|
||||
nn->flags = QTN_NEEDFREE;
|
||||
|
||||
nn->child[0] = in->child[0];
|
||||
nn->child[1] = in->child[1];
|
||||
nn->sign = nn->child[0]->sign | nn->child[1]->sign;
|
||||
|
||||
nn->valnode->type = in->valnode->type;
|
||||
nn->valnode->val = in->valnode->val;
|
||||
|
||||
in->child[0] = nn;
|
||||
in->child[1] = in->child[in->nchild - 1];
|
||||
in->nchild--;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
cntsize(QTNode * in, int4 *sumlen, int4 *nnode)
|
||||
{
|
||||
*nnode += 1;
|
||||
if (in->valnode->type == OPR)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < in->nchild; i++)
|
||||
cntsize(in->child[i], sumlen, nnode);
|
||||
}
|
||||
else
|
||||
{
|
||||
*sumlen += in->valnode->length + 1;
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ITEM *curitem;
|
||||
char *operand;
|
||||
char *curoperand;
|
||||
} QTN2QTState;
|
||||
|
||||
static void
|
||||
fillQT(QTN2QTState * state, QTNode * in)
|
||||
{
|
||||
*(state->curitem) = *(in->valnode);
|
||||
|
||||
if (in->valnode->type == VAL)
|
||||
{
|
||||
memcpy(state->curoperand, in->word, in->valnode->length);
|
||||
state->curitem->distance = state->curoperand - state->operand;
|
||||
state->curoperand[in->valnode->length] = '\0';
|
||||
state->curoperand += in->valnode->length + 1;
|
||||
state->curitem++;
|
||||
}
|
||||
else
|
||||
{
|
||||
ITEM *curitem = state->curitem;
|
||||
|
||||
Assert(in->nchild <= 2);
|
||||
state->curitem++;
|
||||
|
||||
fillQT(state, in->child[0]);
|
||||
|
||||
if (in->nchild == 2)
|
||||
{
|
||||
curitem->left = state->curitem - curitem;
|
||||
fillQT(state, in->child[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
QUERYTYPE *
|
||||
QTN2QT(QTNode * in, MemoryType memtype)
|
||||
{
|
||||
QUERYTYPE *out;
|
||||
int len;
|
||||
int sumlen = 0,
|
||||
nnode = 0;
|
||||
QTN2QTState state;
|
||||
|
||||
cntsize(in, &sumlen, &nnode);
|
||||
len = COMPUTESIZE(nnode, sumlen);
|
||||
|
||||
out = (QUERYTYPE *) MEMALLOC(memtype, len);
|
||||
SET_VARSIZE(out, len);
|
||||
out->size = nnode;
|
||||
|
||||
state.curitem = GETQUERY(out);
|
||||
state.operand = state.curoperand = GETOPERAND(out);
|
||||
|
||||
fillQT(&state, in);
|
||||
return out;
|
||||
}
|
||||
|
||||
QTNode *
|
||||
QTNCopy(QTNode * in, MemoryType memtype)
|
||||
{
|
||||
QTNode *out = (QTNode *) MEMALLOC(memtype, sizeof(QTNode));
|
||||
|
||||
*out = *in;
|
||||
out->valnode = (ITEM *) MEMALLOC(memtype, sizeof(ITEM));
|
||||
*(out->valnode) = *(in->valnode);
|
||||
out->flags |= QTN_NEEDFREE;
|
||||
|
||||
if (in->valnode->type == VAL)
|
||||
{
|
||||
out->word = MEMALLOC(memtype, in->valnode->length + 1);
|
||||
memcpy(out->word, in->word, in->valnode->length);
|
||||
out->word[in->valnode->length] = '\0';
|
||||
out->flags |= QTN_WORDFREE;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
|
||||
out->child = (QTNode **) MEMALLOC(memtype, sizeof(QTNode *) * in->nchild);
|
||||
|
||||
for (i = 0; i < in->nchild; i++)
|
||||
out->child[i] = QTNCopy(in->child[i], memtype);
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
|
@ -1,47 +0,0 @@
|
|||
#ifndef __QUERY_UTIL_H__
|
||||
#define __QUERY_UTIL_H__
|
||||
|
||||
#include "postgres.h"
|
||||
#include "utils/memutils.h"
|
||||
|
||||
#include "query.h"
|
||||
#include "executor/spi.h"
|
||||
|
||||
typedef struct QTNode
|
||||
{
|
||||
ITEM *valnode;
|
||||
uint32 flags;
|
||||
int4 nchild;
|
||||
char *word;
|
||||
uint32 sign;
|
||||
struct QTNode **child;
|
||||
} QTNode;
|
||||
|
||||
#define QTN_NEEDFREE 0x01
|
||||
#define QTN_NOCHANGE 0x02
|
||||
#define QTN_WORDFREE 0x04
|
||||
|
||||
typedef enum
|
||||
{
|
||||
PlainMemory,
|
||||
SPIMemory,
|
||||
AggMemory
|
||||
} MemoryType;
|
||||
|
||||
QTNode *QT2QTN(ITEM * in, char *operand);
|
||||
QUERYTYPE *QTN2QT(QTNode * in, MemoryType memtype);
|
||||
void QTNFree(QTNode * in);
|
||||
void QTNSort(QTNode * in);
|
||||
void QTNTernary(QTNode * in);
|
||||
void QTNBinary(QTNode * in);
|
||||
int QTNodeCompare(QTNode * an, QTNode * bn);
|
||||
QTNode *QTNCopy(QTNode * in, MemoryType memtype);
|
||||
bool QTNEq(QTNode * a, QTNode * b);
|
||||
|
||||
|
||||
extern MemoryContext AggregateContext;
|
||||
|
||||
#define MEMALLOC(us, s) ( ((us)==SPIMemory) ? SPI_palloc(s) : ( ( (us)==PlainMemory ) ? palloc(s) : MemoryContextAlloc(AggregateContext, (s)) ) )
|
||||
#define MEMFREE(us, p) ( ((us)==SPIMemory) ? SPI_pfree(p) : pfree(p) )
|
||||
|
||||
#endif
|
|
@ -1,924 +0,0 @@
|
|||
/*
|
||||
* Relevation
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "access/gist.h"
|
||||
#include "access/itup.h"
|
||||
#include "catalog/namespace.h"
|
||||
#include "commands/trigger.h"
|
||||
#include "executor/spi.h"
|
||||
#include "fmgr.h"
|
||||
#include "funcapi.h"
|
||||
#include "nodes/pg_list.h"
|
||||
#include "storage/bufpage.h"
|
||||
#include "utils/array.h"
|
||||
#include "utils/builtins.h"
|
||||
|
||||
#include "tsvector.h"
|
||||
#include "query.h"
|
||||
#include "common.h"
|
||||
|
||||
PG_FUNCTION_INFO_V1(rank);
|
||||
Datum rank(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(rank_def);
|
||||
Datum rank_def(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(rank_cd);
|
||||
Datum rank_cd(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(rank_cd_def);
|
||||
Datum rank_cd_def(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(get_covers);
|
||||
Datum get_covers(PG_FUNCTION_ARGS);
|
||||
|
||||
static float weights[] = {0.1f, 0.2f, 0.4f, 1.0f};
|
||||
|
||||
#define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] )
|
||||
|
||||
#define RANK_NO_NORM 0x00
|
||||
#define RANK_NORM_LOGLENGTH 0x01
|
||||
#define RANK_NORM_LENGTH 0x02
|
||||
#define RANK_NORM_EXTDIST 0x04
|
||||
#define RANK_NORM_UNIQ 0x08
|
||||
#define RANK_NORM_LOGUNIQ 0x10
|
||||
#define DEF_NORM_METHOD RANK_NO_NORM
|
||||
|
||||
static float calc_rank_or(float *w, tsvector * t, QUERYTYPE * q);
|
||||
static float calc_rank_and(float *w, tsvector * t, QUERYTYPE * q);
|
||||
|
||||
/*
|
||||
* Returns a weight of a word collocation
|
||||
*/
|
||||
static float4
|
||||
word_distance(int4 w)
|
||||
{
|
||||
if (w > 100)
|
||||
return (float4)1e-30;
|
||||
|
||||
return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
|
||||
}
|
||||
|
||||
static int
|
||||
cnt_length(tsvector * t)
|
||||
{
|
||||
WordEntry *ptr = ARRPTR(t),
|
||||
*end = (WordEntry *) STRPTR(t);
|
||||
int len = 0,
|
||||
clen;
|
||||
|
||||
while (ptr < end)
|
||||
{
|
||||
if ((clen = POSDATALEN(t, ptr)) == 0)
|
||||
len += 1;
|
||||
else
|
||||
len += clen;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static int4
|
||||
WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item)
|
||||
{
|
||||
if (ptr->len == item->length)
|
||||
return strncmp(
|
||||
eval + ptr->pos,
|
||||
qval + item->distance,
|
||||
item->length);
|
||||
|
||||
return (ptr->len > item->length) ? 1 : -1;
|
||||
}
|
||||
|
||||
static WordEntry *
|
||||
find_wordentry(tsvector * t, QUERYTYPE * q, ITEM * item)
|
||||
{
|
||||
WordEntry *StopLow = ARRPTR(t);
|
||||
WordEntry *StopHigh = (WordEntry *) STRPTR(t);
|
||||
WordEntry *StopMiddle;
|
||||
int difference;
|
||||
|
||||
/* Loop invariant: StopLow <= item < StopHigh */
|
||||
|
||||
while (StopLow < StopHigh)
|
||||
{
|
||||
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
||||
difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
|
||||
if (difference == 0)
|
||||
return StopMiddle;
|
||||
else if (difference < 0)
|
||||
StopLow = StopMiddle + 1;
|
||||
else
|
||||
StopHigh = StopMiddle;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
compareITEM(const void *a, const void *b, void *arg)
|
||||
{
|
||||
char *operand = (char *) arg;
|
||||
|
||||
if ((*(ITEM **) a)->length == (*(ITEM **) b)->length)
|
||||
return strncmp(operand + (*(ITEM **) a)->distance,
|
||||
operand + (*(ITEM **) b)->distance,
|
||||
(*(ITEM **) b)->length);
|
||||
|
||||
return ((*(ITEM **) a)->length > (*(ITEM **) b)->length) ? 1 : -1;
|
||||
}
|
||||
|
||||
static ITEM **
|
||||
SortAndUniqItems(char *operand, ITEM * item, int *size)
|
||||
{
|
||||
ITEM **res,
|
||||
**ptr,
|
||||
**prevptr;
|
||||
|
||||
ptr = res = (ITEM **) palloc(sizeof(ITEM *) * *size);
|
||||
|
||||
while ((*size)--)
|
||||
{
|
||||
if (item->type == VAL)
|
||||
{
|
||||
*ptr = item;
|
||||
ptr++;
|
||||
}
|
||||
item++;
|
||||
}
|
||||
|
||||
*size = ptr - res;
|
||||
if (*size < 2)
|
||||
return res;
|
||||
|
||||
qsort_arg(res, *size, sizeof(ITEM **), compareITEM, (void *) operand);
|
||||
|
||||
ptr = res + 1;
|
||||
prevptr = res;
|
||||
|
||||
while (ptr - res < *size)
|
||||
{
|
||||
if (compareITEM((void *) ptr, (void *) prevptr, (void *) operand) != 0)
|
||||
{
|
||||
prevptr++;
|
||||
*prevptr = *ptr;
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
|
||||
*size = prevptr + 1 - res;
|
||||
return res;
|
||||
}
|
||||
|
||||
static WordEntryPos POSNULL[] = {
|
||||
0,
|
||||
0
|
||||
};
|
||||
|
||||
static float
|
||||
calc_rank_and(float *w, tsvector * t, QUERYTYPE * q)
|
||||
{
|
||||
uint16 **pos;
|
||||
int i,
|
||||
k,
|
||||
l,
|
||||
p;
|
||||
WordEntry *entry;
|
||||
WordEntryPos *post,
|
||||
*ct;
|
||||
int4 dimt,
|
||||
lenct,
|
||||
dist;
|
||||
float res = -1.0;
|
||||
ITEM **item;
|
||||
int size = q->size;
|
||||
|
||||
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
|
||||
if (size < 2)
|
||||
{
|
||||
pfree(item);
|
||||
return calc_rank_or(w, t, q);
|
||||
}
|
||||
pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
|
||||
memset(pos, 0, sizeof(uint16 *) * q->size);
|
||||
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
||||
WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1);
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
entry = find_wordentry(t, q, item[i]);
|
||||
if (!entry)
|
||||
continue;
|
||||
|
||||
if (entry->haspos)
|
||||
pos[i] = (uint16 *) _POSDATAPTR(t, entry);
|
||||
else
|
||||
pos[i] = (uint16 *) POSNULL;
|
||||
|
||||
|
||||
dimt = *(uint16 *) (pos[i]);
|
||||
post = (WordEntryPos *) (pos[i] + 1);
|
||||
for (k = 0; k < i; k++)
|
||||
{
|
||||
if (!pos[k])
|
||||
continue;
|
||||
lenct = *(uint16 *) (pos[k]);
|
||||
ct = (WordEntryPos *) (pos[k] + 1);
|
||||
for (l = 0; l < dimt; l++)
|
||||
{
|
||||
for (p = 0; p < lenct; p++)
|
||||
{
|
||||
dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
|
||||
if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL)))
|
||||
{
|
||||
float curw;
|
||||
|
||||
if (!dist)
|
||||
dist = MAXENTRYPOS;
|
||||
curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
|
||||
res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pfree(pos);
|
||||
pfree(item);
|
||||
return res;
|
||||
}
|
||||
|
||||
static float
|
||||
calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
|
||||
{
|
||||
WordEntry *entry;
|
||||
WordEntryPos *post;
|
||||
int4 dimt,
|
||||
j,
|
||||
i;
|
||||
float res = 0.0;
|
||||
ITEM **item;
|
||||
int size = q->size;
|
||||
|
||||
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
||||
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
float resj,
|
||||
wjm;
|
||||
int4 jm;
|
||||
|
||||
entry = find_wordentry(t, q, item[i]);
|
||||
if (!entry)
|
||||
continue;
|
||||
|
||||
if (entry->haspos)
|
||||
{
|
||||
dimt = POSDATALEN(t, entry);
|
||||
post = POSDATAPTR(t, entry);
|
||||
}
|
||||
else
|
||||
{
|
||||
dimt = *(uint16 *) POSNULL;
|
||||
post = POSNULL + 1;
|
||||
}
|
||||
|
||||
resj = 0.0;
|
||||
wjm = -1.0;
|
||||
jm = 0;
|
||||
for (j = 0; j < dimt; j++)
|
||||
{
|
||||
resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
|
||||
if (wpos(post[j]) > wjm)
|
||||
{
|
||||
wjm = wpos(post[j]);
|
||||
jm = j;
|
||||
}
|
||||
}
|
||||
/*
|
||||
limit (sum(i/i^2),i->inf) = pi^2/6
|
||||
resj = sum(wi/i^2),i=1,noccurence,
|
||||
wi - should be sorted desc,
|
||||
don't sort for now, just choose maximum weight. This should be corrected
|
||||
Oleg Bartunov
|
||||
*/
|
||||
res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
|
||||
}
|
||||
if (size > 0)
|
||||
res = res / size;
|
||||
pfree(item);
|
||||
return res;
|
||||
}
|
||||
|
||||
static float
|
||||
calc_rank(float *w, tsvector * t, QUERYTYPE * q, int4 method)
|
||||
{
|
||||
ITEM *item = GETQUERY(q);
|
||||
float res = 0.0;
|
||||
int len;
|
||||
|
||||
if (!t->size || !q->size)
|
||||
return 0.0;
|
||||
|
||||
res = (item->type != VAL && item->val == (int4) '&') ?
|
||||
calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
|
||||
|
||||
if (res < 0)
|
||||
res = (float)1e-20;
|
||||
|
||||
if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
|
||||
res /= log((double) (cnt_length(t) + 1)) / log(2.0);
|
||||
|
||||
if (method & RANK_NORM_LENGTH)
|
||||
{
|
||||
len = cnt_length(t);
|
||||
if (len > 0)
|
||||
res /= (float) len;
|
||||
}
|
||||
|
||||
if ((method & RANK_NORM_UNIQ) && t->size > 0)
|
||||
res /= (float) (t->size);
|
||||
|
||||
if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
|
||||
res /= log((double) (t->size + 1)) / log(2.0);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
Datum
|
||||
rank(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
|
||||
int method = DEF_NORM_METHOD;
|
||||
float res = 0.0;
|
||||
float ws[lengthof(weights)];
|
||||
float4 *arrdata;
|
||||
int i;
|
||||
|
||||
if (ARR_NDIM(win) != 1)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
||||
errmsg("array of weight must be one-dimensional")));
|
||||
|
||||
if (ARRNELEMS(win) < lengthof(weights))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
||||
errmsg("array of weight is too short")));
|
||||
|
||||
if (ARR_HASNULL(win))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
||||
errmsg("array of weight must not contain nulls")));
|
||||
|
||||
arrdata = (float4 *) ARR_DATA_PTR(win);
|
||||
for (i = 0; i < lengthof(weights); i++)
|
||||
{
|
||||
ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
|
||||
if (ws[i] > 1.0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("weight out of range")));
|
||||
}
|
||||
|
||||
if (PG_NARGS() == 4)
|
||||
method = PG_GETARG_INT32(3);
|
||||
|
||||
res = calc_rank(ws, txt, query, method);
|
||||
|
||||
PG_FREE_IF_COPY(win, 0);
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
PG_FREE_IF_COPY(query, 2);
|
||||
PG_RETURN_FLOAT4(res);
|
||||
}
|
||||
|
||||
Datum
|
||||
rank_def(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
float res = 0.0;
|
||||
int method = DEF_NORM_METHOD;
|
||||
|
||||
if (PG_NARGS() == 3)
|
||||
method = PG_GETARG_INT32(2);
|
||||
|
||||
res = calc_rank(weights, txt, query, method);
|
||||
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
PG_FREE_IF_COPY(query, 1);
|
||||
PG_RETURN_FLOAT4(res);
|
||||
}
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ITEM **item;
|
||||
int16 nitem;
|
||||
bool needfree;
|
||||
uint8 wclass;
|
||||
int32 pos;
|
||||
} DocRepresentation;
|
||||
|
||||
static int
|
||||
compareDocR(const void *a, const void *b)
|
||||
{
|
||||
if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos)
|
||||
return 0;
|
||||
return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
|
||||
}
|
||||
|
||||
static bool
|
||||
checkcondition_ITEM(void *checkval, ITEM * val)
|
||||
{
|
||||
return (bool) (val->istrue);
|
||||
}
|
||||
|
||||
static void
|
||||
reset_istrue_flag(QUERYTYPE * query)
|
||||
{
|
||||
ITEM *item = GETQUERY(query);
|
||||
int i;
|
||||
|
||||
/* reset istrue flag */
|
||||
for (i = 0; i < query->size; i++)
|
||||
{
|
||||
if (item->type == VAL)
|
||||
item->istrue = 0;
|
||||
item++;
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int pos;
|
||||
int p;
|
||||
int q;
|
||||
DocRepresentation *begin;
|
||||
DocRepresentation *end;
|
||||
} Extention;
|
||||
|
||||
|
||||
static bool
|
||||
Cover(DocRepresentation * doc, int len, QUERYTYPE * query, Extention * ext)
|
||||
{
|
||||
DocRepresentation *ptr;
|
||||
int lastpos = ext->pos;
|
||||
int i;
|
||||
bool found = false;
|
||||
|
||||
reset_istrue_flag(query);
|
||||
|
||||
ext->p = 0x7fffffff;
|
||||
ext->q = 0;
|
||||
ptr = doc + ext->pos;
|
||||
|
||||
/* find upper bound of cover from current position, move up */
|
||||
while (ptr - doc < len)
|
||||
{
|
||||
for (i = 0; i < ptr->nitem; i++)
|
||||
ptr->item[i]->istrue = 1;
|
||||
if (TS_execute(GETQUERY(query), NULL, false, checkcondition_ITEM))
|
||||
{
|
||||
if (ptr->pos > ext->q)
|
||||
{
|
||||
ext->q = ptr->pos;
|
||||
ext->end = ptr;
|
||||
lastpos = ptr - doc;
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
|
||||
if (!found)
|
||||
return false;
|
||||
|
||||
reset_istrue_flag(query);
|
||||
|
||||
ptr = doc + lastpos;
|
||||
|
||||
/* find lower bound of cover from founded upper bound, move down */
|
||||
while (ptr >= doc)
|
||||
{
|
||||
for (i = 0; i < ptr->nitem; i++)
|
||||
ptr->item[i]->istrue = 1;
|
||||
if (TS_execute(GETQUERY(query), NULL, true, checkcondition_ITEM))
|
||||
{
|
||||
if (ptr->pos < ext->p)
|
||||
{
|
||||
ext->begin = ptr;
|
||||
ext->p = ptr->pos;
|
||||
}
|
||||
break;
|
||||
}
|
||||
ptr--;
|
||||
}
|
||||
|
||||
if (ext->p <= ext->q)
|
||||
{
|
||||
/*
|
||||
* set position for next try to next lexeme after begining of founded
|
||||
* cover
|
||||
*/
|
||||
ext->pos = (ptr - doc) + 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
ext->pos++;
|
||||
return Cover(doc, len, query, ext);
|
||||
}
|
||||
|
||||
static DocRepresentation *
|
||||
get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen)
|
||||
{
|
||||
ITEM *item = GETQUERY(query);
|
||||
WordEntry *entry;
|
||||
WordEntryPos *post;
|
||||
int4 dimt,
|
||||
j,
|
||||
i;
|
||||
int len = query->size * 4,
|
||||
cur = 0;
|
||||
DocRepresentation *doc;
|
||||
char *operand;
|
||||
|
||||
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
||||
doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
|
||||
operand = GETOPERAND(query);
|
||||
reset_istrue_flag(query);
|
||||
|
||||
for (i = 0; i < query->size; i++)
|
||||
{
|
||||
if (item[i].type != VAL || item[i].istrue)
|
||||
continue;
|
||||
|
||||
entry = find_wordentry(txt, query, &(item[i]));
|
||||
if (!entry)
|
||||
continue;
|
||||
|
||||
if (entry->haspos)
|
||||
{
|
||||
dimt = POSDATALEN(txt, entry);
|
||||
post = POSDATAPTR(txt, entry);
|
||||
}
|
||||
else
|
||||
{
|
||||
dimt = *(uint16 *) POSNULL;
|
||||
post = POSNULL + 1;
|
||||
}
|
||||
|
||||
while (cur + dimt >= len)
|
||||
{
|
||||
len *= 2;
|
||||
doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
|
||||
}
|
||||
|
||||
for (j = 0; j < dimt; j++)
|
||||
{
|
||||
if (j == 0)
|
||||
{
|
||||
ITEM *kptr,
|
||||
*iptr = item + i;
|
||||
int k;
|
||||
|
||||
doc[cur].needfree = false;
|
||||
doc[cur].nitem = 0;
|
||||
doc[cur].item = (ITEM **) palloc(sizeof(ITEM *) * query->size);
|
||||
|
||||
for (k = 0; k < query->size; k++)
|
||||
{
|
||||
kptr = item + k;
|
||||
if (k == i ||
|
||||
(item[k].type == VAL &&
|
||||
compareITEM(&kptr, &iptr, operand) == 0))
|
||||
{
|
||||
doc[cur].item[doc[cur].nitem] = item + k;
|
||||
doc[cur].nitem++;
|
||||
kptr->istrue = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
doc[cur].needfree = false;
|
||||
doc[cur].nitem = doc[cur - 1].nitem;
|
||||
doc[cur].item = doc[cur - 1].item;
|
||||
}
|
||||
doc[cur].pos = WEP_GETPOS(post[j]);
|
||||
doc[cur].wclass = WEP_GETWEIGHT(post[j]);
|
||||
cur++;
|
||||
}
|
||||
}
|
||||
|
||||
*doclen = cur;
|
||||
|
||||
if (cur > 0)
|
||||
{
|
||||
if (cur > 1)
|
||||
qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
|
||||
return doc;
|
||||
}
|
||||
|
||||
pfree(doc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static float4
|
||||
calc_rank_cd(float4 *arrdata, tsvector * txt, QUERYTYPE * query, int method)
|
||||
{
|
||||
DocRepresentation *doc;
|
||||
int len,
|
||||
i,
|
||||
doclen = 0;
|
||||
Extention ext;
|
||||
double Wdoc = 0.0;
|
||||
double invws[lengthof(weights)];
|
||||
double SumDist = 0.0,
|
||||
PrevExtPos = 0.0,
|
||||
CurExtPos = 0.0;
|
||||
int NExtent = 0;
|
||||
|
||||
for (i = 0; i < lengthof(weights); i++)
|
||||
{
|
||||
invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
|
||||
if (invws[i] > 1.0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("weight out of range")));
|
||||
invws[i] = 1.0 / invws[i];
|
||||
}
|
||||
|
||||
doc = get_docrep(txt, query, &doclen);
|
||||
if (!doc)
|
||||
return 0.0;
|
||||
|
||||
MemSet(&ext, 0, sizeof(Extention));
|
||||
while (Cover(doc, doclen, query, &ext))
|
||||
{
|
||||
double Cpos = 0.0;
|
||||
double InvSum = 0.0;
|
||||
int nNoise;
|
||||
DocRepresentation *ptr = ext.begin;
|
||||
|
||||
while (ptr <= ext.end)
|
||||
{
|
||||
InvSum += invws[ptr->wclass];
|
||||
ptr++;
|
||||
}
|
||||
|
||||
Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
|
||||
/*
|
||||
* if doc are big enough then ext.q may be equal to ext.p
|
||||
* due to limit of posional information. In this case we
|
||||
* approximate number of noise word as half cover's
|
||||
* length
|
||||
*/
|
||||
nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
|
||||
if ( nNoise < 0 )
|
||||
nNoise = (ext.end - ext.begin) / 2;
|
||||
Wdoc += Cpos / ((double) (1 + nNoise));
|
||||
|
||||
CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
|
||||
if (NExtent > 0 && CurExtPos > PrevExtPos /* prevent devision by
|
||||
* zero in a case of
|
||||
multiple lexize */ )
|
||||
SumDist += 1.0 / (CurExtPos - PrevExtPos);
|
||||
|
||||
PrevExtPos = CurExtPos;
|
||||
NExtent++;
|
||||
}
|
||||
|
||||
if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
|
||||
Wdoc /= log((double) (cnt_length(txt) + 1));
|
||||
|
||||
if (method & RANK_NORM_LENGTH)
|
||||
{
|
||||
len = cnt_length(txt);
|
||||
if (len > 0)
|
||||
Wdoc /= (double) len;
|
||||
}
|
||||
|
||||
if ((method & RANK_NORM_EXTDIST) && SumDist > 0)
|
||||
Wdoc /= ((double) NExtent) / SumDist;
|
||||
|
||||
if ((method & RANK_NORM_UNIQ) && txt->size > 0)
|
||||
Wdoc /= (double) (txt->size);
|
||||
|
||||
if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
|
||||
Wdoc /= log((double) (txt->size + 1)) / log(2.0);
|
||||
|
||||
for (i = 0; i < doclen; i++)
|
||||
if (doc[i].needfree)
|
||||
pfree(doc[i].item);
|
||||
pfree(doc);
|
||||
|
||||
return (float4) Wdoc;
|
||||
}
|
||||
|
||||
Datum
|
||||
rank_cd(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ArrayType *win;
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(2));
|
||||
int method = DEF_NORM_METHOD;
|
||||
float4 res;
|
||||
|
||||
/*
|
||||
* Pre-8.2, rank_cd took just a plain int as its first argument.
|
||||
* It was a mistake to keep the same C function name while changing the
|
||||
* signature, but it's too late to fix that. Instead, do a runtime test
|
||||
* to make sure the expected datatype has been passed. This is needed
|
||||
* to prevent core dumps if tsearch2 function definitions from an old
|
||||
* database are loaded into an 8.2 server.
|
||||
*/
|
||||
if (get_fn_expr_argtype(fcinfo->flinfo, 0) != FLOAT4ARRAYOID)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
|
||||
errmsg("rank_cd() now takes real[] as its first argument, not integer")));
|
||||
|
||||
/* now safe to dereference the first arg */
|
||||
win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
|
||||
if (ARR_NDIM(win) != 1)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
||||
errmsg("array of weight must be one-dimensional")));
|
||||
|
||||
if (ARRNELEMS(win) < lengthof(weights))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
||||
errmsg("array of weight is too short")));
|
||||
|
||||
if (ARR_HASNULL(win))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
||||
errmsg("array of weight must not contain nulls")));
|
||||
|
||||
if (PG_NARGS() == 4)
|
||||
method = PG_GETARG_INT32(3);
|
||||
|
||||
res = calc_rank_cd((float4 *) ARR_DATA_PTR(win), txt, query, method);
|
||||
|
||||
PG_FREE_IF_COPY(win, 0);
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
PG_FREE_IF_COPY(query, 2);
|
||||
|
||||
PG_RETURN_FLOAT4(res);
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
rank_cd_def(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1));
|
||||
float4 res;
|
||||
|
||||
res = calc_rank_cd(weights, txt, query, (PG_NARGS() == 3) ? PG_GETARG_DATUM(2) : DEF_NORM_METHOD);
|
||||
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
PG_FREE_IF_COPY(query, 1);
|
||||
|
||||
PG_RETURN_FLOAT4(res);
|
||||
}
|
||||
|
||||
/**************debug*************/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char *w;
|
||||
int2 len;
|
||||
int2 pos;
|
||||
int2 start;
|
||||
int2 finish;
|
||||
} DocWord;
|
||||
|
||||
static int
|
||||
compareDocWord(const void *a, const void *b)
|
||||
{
|
||||
if (((DocWord *) a)->pos == ((DocWord *) b)->pos)
|
||||
return 0;
|
||||
return (((DocWord *) a)->pos > ((DocWord *) b)->pos) ? 1 : -1;
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
get_covers(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1));
|
||||
WordEntry *pptr = ARRPTR(txt);
|
||||
int i,
|
||||
dlen = 0,
|
||||
j,
|
||||
cur = 0,
|
||||
len = 0,
|
||||
rlen;
|
||||
DocWord *dw,
|
||||
*dwptr;
|
||||
text *out;
|
||||
char *cptr;
|
||||
DocRepresentation *doc;
|
||||
int olddwpos = 0;
|
||||
int ncover = 1;
|
||||
Extention ext;
|
||||
|
||||
doc = get_docrep(txt, query, &rlen);
|
||||
|
||||
if (!doc)
|
||||
{
|
||||
out = palloc(VARHDRSZ);
|
||||
SET_VARSIZE(out, VARHDRSZ);
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
PG_FREE_IF_COPY(query, 1);
|
||||
PG_RETURN_POINTER(out);
|
||||
}
|
||||
|
||||
for (i = 0; i < txt->size; i++)
|
||||
{
|
||||
if (!pptr[i].haspos)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("no pos info")));
|
||||
dlen += POSDATALEN(txt, &(pptr[i]));
|
||||
}
|
||||
|
||||
dwptr = dw = palloc(sizeof(DocWord) * dlen);
|
||||
memset(dw, 0, sizeof(DocWord) * dlen);
|
||||
|
||||
for (i = 0; i < txt->size; i++)
|
||||
{
|
||||
WordEntryPos *posdata = POSDATAPTR(txt, &(pptr[i]));
|
||||
|
||||
for (j = 0; j < POSDATALEN(txt, &(pptr[i])); j++)
|
||||
{
|
||||
dw[cur].w = STRPTR(txt) + pptr[i].pos;
|
||||
dw[cur].len = pptr[i].len;
|
||||
dw[cur].pos = WEP_GETPOS(posdata[j]);
|
||||
cur++;
|
||||
}
|
||||
len += (pptr[i].len + 1) * (int) POSDATALEN(txt, &(pptr[i]));
|
||||
}
|
||||
qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
|
||||
|
||||
MemSet(&ext, 0, sizeof(Extention));
|
||||
while (Cover(doc, rlen, query, &ext))
|
||||
{
|
||||
dwptr = dw + olddwpos;
|
||||
while (dwptr->pos < ext.p && dwptr - dw < dlen)
|
||||
dwptr++;
|
||||
olddwpos = dwptr - dw;
|
||||
dwptr->start = ncover;
|
||||
while (dwptr->pos < ext.q + 1 && dwptr - dw < dlen)
|
||||
dwptr++;
|
||||
(dwptr - 1)->finish = ncover;
|
||||
len += 4 /* {}+two spaces */ + 2 * 16 /* numbers */ ;
|
||||
ncover++;
|
||||
}
|
||||
|
||||
out = palloc(VARHDRSZ + len);
|
||||
cptr = ((char *) out) + VARHDRSZ;
|
||||
dwptr = dw;
|
||||
|
||||
while (dwptr - dw < dlen)
|
||||
{
|
||||
if (dwptr->start)
|
||||
{
|
||||
sprintf(cptr, "{%d ", dwptr->start);
|
||||
cptr = strchr(cptr, '\0');
|
||||
}
|
||||
memcpy(cptr, dwptr->w, dwptr->len);
|
||||
cptr += dwptr->len;
|
||||
*cptr = ' ';
|
||||
cptr++;
|
||||
if (dwptr->finish)
|
||||
{
|
||||
sprintf(cptr, "}%d ", dwptr->finish);
|
||||
cptr = strchr(cptr, '\0');
|
||||
}
|
||||
dwptr++;
|
||||
}
|
||||
|
||||
SET_VARSIZE(out, cptr - ((char *) out));
|
||||
|
||||
pfree(dw);
|
||||
for (i = 0; i < rlen; i++)
|
||||
if (doc[i].needfree)
|
||||
pfree(doc[i].item);
|
||||
pfree(doc);
|
||||
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
PG_FREE_IF_COPY(query, 1);
|
||||
PG_RETURN_POINTER(out);
|
||||
}
|
|
@ -1,102 +0,0 @@
|
|||
/*
|
||||
* simple but fast map from str to Oid
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "snmap.h"
|
||||
#include "common.h"
|
||||
|
||||
static int
|
||||
compareSNMapEntry(const void *a, const void *b)
|
||||
{
|
||||
if (((SNMapEntry *) a)->nsp < ((SNMapEntry *) b)->nsp)
|
||||
return -1;
|
||||
else if (((SNMapEntry *) a)->nsp > ((SNMapEntry *) b)->nsp)
|
||||
return 1;
|
||||
else
|
||||
return strcmp(((SNMapEntry *) a)->key, ((SNMapEntry *) b)->key);
|
||||
}
|
||||
|
||||
void
|
||||
addSNMap(SNMap * map, char *key, Oid value)
|
||||
{
|
||||
if (map->len >= map->reallen)
|
||||
{
|
||||
SNMapEntry *tmp;
|
||||
int len = (map->reallen) ? 2 * map->reallen : 16;
|
||||
|
||||
tmp = (SNMapEntry *) realloc(map->list, sizeof(SNMapEntry) * len);
|
||||
if (!tmp)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
map->reallen = len;
|
||||
map->list = tmp;
|
||||
}
|
||||
map->list[map->len].key = strdup(key);
|
||||
if (!map->list[map->len].key)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
map->list[map->len].nsp = get_oidnamespace(TSNSP_FunctionOid);
|
||||
map->list[map->len].value = value;
|
||||
map->len++;
|
||||
if (map->len > 1)
|
||||
qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
|
||||
}
|
||||
|
||||
void
|
||||
addSNMap_t(SNMap * map, text *key, Oid value)
|
||||
{
|
||||
char *k = text2char(key);
|
||||
|
||||
addSNMap(map, k, value);
|
||||
pfree(k);
|
||||
}
|
||||
|
||||
Oid
|
||||
findSNMap(SNMap * map, char *key)
|
||||
{
|
||||
SNMapEntry *ptr;
|
||||
SNMapEntry ks;
|
||||
|
||||
ks.key = key;
|
||||
ks.nsp = get_oidnamespace(TSNSP_FunctionOid);
|
||||
ks.value = 0;
|
||||
|
||||
if (map->len == 0 || !map->list)
|
||||
return 0;
|
||||
ptr = (SNMapEntry *) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
|
||||
return (ptr) ? ptr->value : 0;
|
||||
}
|
||||
|
||||
Oid
|
||||
findSNMap_t(SNMap * map, text *key)
|
||||
{
|
||||
char *k = text2char(key);
|
||||
int res;
|
||||
|
||||
res = findSNMap(map, k);
|
||||
pfree(k);
|
||||
return res;
|
||||
}
|
||||
|
||||
void
|
||||
freeSNMap(SNMap * map)
|
||||
{
|
||||
SNMapEntry *entry = map->list;
|
||||
|
||||
if (map->list)
|
||||
{
|
||||
while (map->len)
|
||||
{
|
||||
if (entry->key)
|
||||
free(entry->key);
|
||||
entry++;
|
||||
map->len--;
|
||||
}
|
||||
free(map->list);
|
||||
}
|
||||
memset(map, 0, sizeof(SNMap));
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
#ifndef __SNMAP_H__
|
||||
#define __SNMAP_H__
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char *key;
|
||||
Oid value;
|
||||
Oid nsp;
|
||||
} SNMapEntry;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int len;
|
||||
int reallen;
|
||||
SNMapEntry *list;
|
||||
} SNMap;
|
||||
|
||||
void addSNMap(SNMap * map, char *key, Oid value);
|
||||
void addSNMap_t(SNMap * map, text *key, Oid value);
|
||||
Oid findSNMap(SNMap * map, char *key);
|
||||
Oid findSNMap_t(SNMap * map, text *key);
|
||||
void freeSNMap(SNMap * map);
|
||||
|
||||
#endif
|
|
@ -1,27 +0,0 @@
|
|||
# $PostgreSQL: pgsql/contrib/tsearch2/snowball/Makefile,v 1.10 2007/06/26 22:05:03 tgl Exp $
|
||||
|
||||
SUBOBJS = english_stem.o api.o russian_stem.o russian_stem_UTF8.o utilities.o
|
||||
|
||||
EXTRA_CLEAN = SUBSYS.o $(SUBOBJS)
|
||||
|
||||
PG_CPPFLAGS = -I$(srcdir)/..
|
||||
|
||||
ifdef USE_PGXS
|
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS)
|
||||
else
|
||||
subdir = contrib/tsearch2/snowball
|
||||
top_builddir = ../../..
|
||||
include $(top_builddir)/src/Makefile.global
|
||||
include $(top_srcdir)/contrib/contrib-global.mk
|
||||
endif
|
||||
|
||||
override CFLAGS += $(CFLAGS_SL)
|
||||
|
||||
all: SUBSYS.o
|
||||
|
||||
SUBSYS.o: $(SUBOBJS)
|
||||
$(LD) $(LDREL) $(LDOUT) $@ $^
|
||||
|
||||
|
|
@ -1,85 +0,0 @@
|
|||
|
||||
#include <stdlib.h> /* for calloc, free */
|
||||
#include "header.h"
|
||||
|
||||
extern struct SN_env *
|
||||
SN_create_env(int S_size, int I_size, int B_size)
|
||||
{
|
||||
struct SN_env *z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
|
||||
|
||||
if (z == NULL)
|
||||
return NULL;
|
||||
z->p = create_s();
|
||||
if (z->p == NULL)
|
||||
goto error;
|
||||
if (S_size)
|
||||
{
|
||||
int i;
|
||||
|
||||
z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
|
||||
if (z->S == NULL)
|
||||
goto error;
|
||||
|
||||
for (i = 0; i < S_size; i++)
|
||||
{
|
||||
z->S[i] = create_s();
|
||||
if (z->S[i] == NULL)
|
||||
goto error;
|
||||
}
|
||||
z->S_size = S_size;
|
||||
}
|
||||
|
||||
if (I_size)
|
||||
{
|
||||
z->I = (int *) calloc(I_size, sizeof(int));
|
||||
if (z->I == NULL)
|
||||
goto error;
|
||||
z->I_size = I_size;
|
||||
}
|
||||
|
||||
if (B_size)
|
||||
{
|
||||
z->B = (symbol *) calloc(B_size, sizeof(symbol));
|
||||
if (z->B == NULL)
|
||||
goto error;
|
||||
z->B_size = B_size;
|
||||
}
|
||||
|
||||
return z;
|
||||
error:
|
||||
SN_close_env(z);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
extern void
|
||||
SN_close_env(struct SN_env * z)
|
||||
{
|
||||
if (z == NULL)
|
||||
return;
|
||||
if (z->S_size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < z->S_size; i++)
|
||||
{
|
||||
lose_s(z->S[i]);
|
||||
}
|
||||
free(z->S);
|
||||
}
|
||||
if (z->I_size)
|
||||
free(z->I);
|
||||
if (z->B_size)
|
||||
free(z->B);
|
||||
if (z->p)
|
||||
lose_s(z->p);
|
||||
free(z);
|
||||
}
|
||||
|
||||
extern int
|
||||
SN_set_current(struct SN_env * z, int size, const symbol * s)
|
||||
{
|
||||
int err = replace_s(z, 0, z->l, size, s, NULL);
|
||||
|
||||
z->c = 0;
|
||||
return err;
|
||||
}
|
|
@ -1,34 +0,0 @@
|
|||
|
||||
typedef unsigned char symbol;
|
||||
|
||||
/* Or replace 'char' above with 'short' for 16 bit characters.
|
||||
|
||||
More precisely, replace 'char' with whatever type guarantees the
|
||||
character width you need. Note however that sizeof(symbol) should divide
|
||||
HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
|
||||
there is an alignment problem. In the unlikely event of a problem here,
|
||||
consult Martin Porter.
|
||||
|
||||
*/
|
||||
|
||||
struct SN_env
|
||||
{
|
||||
symbol *p;
|
||||
int c;
|
||||
int a;
|
||||
int l;
|
||||
int lb;
|
||||
int bra;
|
||||
int ket;
|
||||
int S_size;
|
||||
int I_size;
|
||||
int B_size;
|
||||
symbol **S;
|
||||
int *I;
|
||||
symbol *B;
|
||||
};
|
||||
|
||||
extern struct SN_env *SN_create_env(int S_size, int I_size, int B_size);
|
||||
extern void SN_close_env(struct SN_env * z);
|
||||
|
||||
extern int SN_set_current(struct SN_env * z, int size, const symbol * s);
|
File diff suppressed because it is too large
Load Diff
|
@ -1,18 +0,0 @@
|
|||
/* $PostgreSQL: pgsql/contrib/tsearch2/snowball/english_stem.h,v 1.6 2006/03/11 04:38:30 momjian Exp $ */
|
||||
|
||||
/* This file was generated automatically by the Snowball to ANSI C compiler */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
extern struct SN_env *english_ISO_8859_1_create_env(void);
|
||||
extern void english_ISO_8859_1_close_env(struct SN_env * z);
|
||||
|
||||
extern int english_ISO_8859_1_stem(struct SN_env * z);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,56 +0,0 @@
|
|||
/* $PostgreSQL: pgsql/contrib/tsearch2/snowball/header.h,v 1.8 2006/07/10 22:06:11 momjian Exp $ */
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#include "api.h"
|
||||
|
||||
#define HEAD (2 * sizeof(int))
|
||||
|
||||
#define SIZE(p) ((int *)(p))[-1]
|
||||
#define SET_SIZE(p, n) ((int *)(p))[-1] = n
|
||||
#define CAPACITY(p) ((int *)(p))[-2]
|
||||
|
||||
struct among
|
||||
{
|
||||
int s_size; /* number of chars in string */
|
||||
symbol *s; /* search string */
|
||||
int substring_i; /* index to longest matching substring */
|
||||
int result; /* result of the lookup */
|
||||
int (*function) (struct SN_env *);
|
||||
};
|
||||
|
||||
extern symbol *create_s(void);
|
||||
extern void lose_s(symbol * p);
|
||||
|
||||
extern int skip_utf8(const symbol * p, int c, int lb, int l, int n);
|
||||
|
||||
extern int in_grouping_U(struct SN_env * z, unsigned char *s, int min, int max);
|
||||
extern int in_grouping_b_U(struct SN_env * z, unsigned char *s, int min, int max);
|
||||
extern int out_grouping_U(struct SN_env * z, unsigned char *s, int min, int max);
|
||||
extern int out_grouping_b_U(struct SN_env * z, unsigned char *s, int min, int max);
|
||||
|
||||
extern int in_grouping(struct SN_env * z, unsigned char *s, int min, int max);
|
||||
extern int in_grouping_b(struct SN_env * z, unsigned char *s, int min, int max);
|
||||
extern int out_grouping(struct SN_env * z, unsigned char *s, int min, int max);
|
||||
extern int out_grouping_b(struct SN_env * z, unsigned char *s, int min, int max);
|
||||
|
||||
extern int eq_s(struct SN_env * z, int s_size, symbol * s);
|
||||
extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
|
||||
extern int eq_v(struct SN_env * z, symbol * p);
|
||||
extern int eq_v_b(struct SN_env * z, symbol * p);
|
||||
|
||||
extern int find_among(struct SN_env * z, struct among * v, int v_size);
|
||||
extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
|
||||
|
||||
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int *adjustment);
|
||||
extern int slice_from_s(struct SN_env * z, int s_size, symbol * s);
|
||||
extern int slice_from_v(struct SN_env * z, symbol * p);
|
||||
extern int slice_del(struct SN_env * z);
|
||||
|
||||
extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
|
||||
extern int insert_v(struct SN_env * z, int bra, int ket, symbol * p);
|
||||
|
||||
extern symbol *slice_to(struct SN_env * z, symbol * p);
|
||||
extern symbol *assign_to(struct SN_env * z, symbol * p);
|
||||
|
||||
extern void debug(struct SN_env * z, int number, int line_count);
|
|
@ -1,928 +0,0 @@
|
|||
|
||||
/* This file was generated automatically by the Snowball to ANSI C compiler */
|
||||
|
||||
#include "header.h"
|
||||
|
||||
extern int russian_KOI8_R_stem(struct SN_env * z);
|
||||
static int r_tidy_up(struct SN_env * z);
|
||||
static int r_derivational(struct SN_env * z);
|
||||
static int r_noun(struct SN_env * z);
|
||||
static int r_verb(struct SN_env * z);
|
||||
static int r_reflexive(struct SN_env * z);
|
||||
static int r_adjectival(struct SN_env * z);
|
||||
static int r_adjective(struct SN_env * z);
|
||||
static int r_perfective_gerund(struct SN_env * z);
|
||||
static int r_R2(struct SN_env * z);
|
||||
static int r_mark_regions(struct SN_env * z);
|
||||
|
||||
extern struct SN_env *russian_KOI8_R_create_env(void);
|
||||
extern void russian_KOI8_R_close_env(struct SN_env * z);
|
||||
|
||||
static symbol s_0_0[3] = {0xD7, 0xDB, 0xC9};
|
||||
static symbol s_0_1[4] = {0xC9, 0xD7, 0xDB, 0xC9};
|
||||
static symbol s_0_2[4] = {0xD9, 0xD7, 0xDB, 0xC9};
|
||||
static symbol s_0_3[1] = {0xD7};
|
||||
static symbol s_0_4[2] = {0xC9, 0xD7};
|
||||
static symbol s_0_5[2] = {0xD9, 0xD7};
|
||||
static symbol s_0_6[5] = {0xD7, 0xDB, 0xC9, 0xD3, 0xD8};
|
||||
static symbol s_0_7[6] = {0xC9, 0xD7, 0xDB, 0xC9, 0xD3, 0xD8};
|
||||
static symbol s_0_8[6] = {0xD9, 0xD7, 0xDB, 0xC9, 0xD3, 0xD8};
|
||||
|
||||
static struct among a_0[9] =
|
||||
{
|
||||
/* 0 */ {3, s_0_0, -1, 1, 0},
|
||||
/* 1 */ {4, s_0_1, 0, 2, 0},
|
||||
/* 2 */ {4, s_0_2, 0, 2, 0},
|
||||
/* 3 */ {1, s_0_3, -1, 1, 0},
|
||||
/* 4 */ {2, s_0_4, 3, 2, 0},
|
||||
/* 5 */ {2, s_0_5, 3, 2, 0},
|
||||
/* 6 */ {5, s_0_6, -1, 1, 0},
|
||||
/* 7 */ {6, s_0_7, 6, 2, 0},
|
||||
/* 8 */ {6, s_0_8, 6, 2, 0}
|
||||
};
|
||||
|
||||
static symbol s_1_0[2] = {0xC0, 0xC0};
|
||||
static symbol s_1_1[2] = {0xC5, 0xC0};
|
||||
static symbol s_1_2[2] = {0xCF, 0xC0};
|
||||
static symbol s_1_3[2] = {0xD5, 0xC0};
|
||||
static symbol s_1_4[2] = {0xC5, 0xC5};
|
||||
static symbol s_1_5[2] = {0xC9, 0xC5};
|
||||
static symbol s_1_6[2] = {0xCF, 0xC5};
|
||||
static symbol s_1_7[2] = {0xD9, 0xC5};
|
||||
static symbol s_1_8[2] = {0xC9, 0xC8};
|
||||
static symbol s_1_9[2] = {0xD9, 0xC8};
|
||||
static symbol s_1_10[3] = {0xC9, 0xCD, 0xC9};
|
||||
static symbol s_1_11[3] = {0xD9, 0xCD, 0xC9};
|
||||
static symbol s_1_12[2] = {0xC5, 0xCA};
|
||||
static symbol s_1_13[2] = {0xC9, 0xCA};
|
||||
static symbol s_1_14[2] = {0xCF, 0xCA};
|
||||
static symbol s_1_15[2] = {0xD9, 0xCA};
|
||||
static symbol s_1_16[2] = {0xC5, 0xCD};
|
||||
static symbol s_1_17[2] = {0xC9, 0xCD};
|
||||
static symbol s_1_18[2] = {0xCF, 0xCD};
|
||||
static symbol s_1_19[2] = {0xD9, 0xCD};
|
||||
static symbol s_1_20[3] = {0xC5, 0xC7, 0xCF};
|
||||
static symbol s_1_21[3] = {0xCF, 0xC7, 0xCF};
|
||||
static symbol s_1_22[2] = {0xC1, 0xD1};
|
||||
static symbol s_1_23[2] = {0xD1, 0xD1};
|
||||
static symbol s_1_24[3] = {0xC5, 0xCD, 0xD5};
|
||||
static symbol s_1_25[3] = {0xCF, 0xCD, 0xD5};
|
||||
|
||||
static struct among a_1[26] =
|
||||
{
|
||||
/* 0 */ {2, s_1_0, -1, 1, 0},
|
||||
/* 1 */ {2, s_1_1, -1, 1, 0},
|
||||
/* 2 */ {2, s_1_2, -1, 1, 0},
|
||||
/* 3 */ {2, s_1_3, -1, 1, 0},
|
||||
/* 4 */ {2, s_1_4, -1, 1, 0},
|
||||
/* 5 */ {2, s_1_5, -1, 1, 0},
|
||||
/* 6 */ {2, s_1_6, -1, 1, 0},
|
||||
/* 7 */ {2, s_1_7, -1, 1, 0},
|
||||
/* 8 */ {2, s_1_8, -1, 1, 0},
|
||||
/* 9 */ {2, s_1_9, -1, 1, 0},
|
||||
/* 10 */ {3, s_1_10, -1, 1, 0},
|
||||
/* 11 */ {3, s_1_11, -1, 1, 0},
|
||||
/* 12 */ {2, s_1_12, -1, 1, 0},
|
||||
/* 13 */ {2, s_1_13, -1, 1, 0},
|
||||
/* 14 */ {2, s_1_14, -1, 1, 0},
|
||||
/* 15 */ {2, s_1_15, -1, 1, 0},
|
||||
/* 16 */ {2, s_1_16, -1, 1, 0},
|
||||
/* 17 */ {2, s_1_17, -1, 1, 0},
|
||||
/* 18 */ {2, s_1_18, -1, 1, 0},
|
||||
/* 19 */ {2, s_1_19, -1, 1, 0},
|
||||
/* 20 */ {3, s_1_20, -1, 1, 0},
|
||||
/* 21 */ {3, s_1_21, -1, 1, 0},
|
||||
/* 22 */ {2, s_1_22, -1, 1, 0},
|
||||
/* 23 */ {2, s_1_23, -1, 1, 0},
|
||||
/* 24 */ {3, s_1_24, -1, 1, 0},
|
||||
/* 25 */ {3, s_1_25, -1, 1, 0}
|
||||
};
|
||||
|
||||
static symbol s_2_0[2] = {0xC5, 0xCD};
|
||||
static symbol s_2_1[2] = {0xCE, 0xCE};
|
||||
static symbol s_2_2[2] = {0xD7, 0xDB};
|
||||
static symbol s_2_3[3] = {0xC9, 0xD7, 0xDB};
|
||||
static symbol s_2_4[3] = {0xD9, 0xD7, 0xDB};
|
||||
static symbol s_2_5[1] = {0xDD};
|
||||
static symbol s_2_6[2] = {0xC0, 0xDD};
|
||||
static symbol s_2_7[3] = {0xD5, 0xC0, 0xDD};
|
||||
|
||||
static struct among a_2[8] =
|
||||
{
|
||||
/* 0 */ {2, s_2_0, -1, 1, 0},
|
||||
/* 1 */ {2, s_2_1, -1, 1, 0},
|
||||
/* 2 */ {2, s_2_2, -1, 1, 0},
|
||||
/* 3 */ {3, s_2_3, 2, 2, 0},
|
||||
/* 4 */ {3, s_2_4, 2, 2, 0},
|
||||
/* 5 */ {1, s_2_5, -1, 1, 0},
|
||||
/* 6 */ {2, s_2_6, 5, 1, 0},
|
||||
/* 7 */ {3, s_2_7, 6, 2, 0}
|
||||
};
|
||||
|
||||
static symbol s_3_0[2] = {0xD3, 0xD1};
|
||||
static symbol s_3_1[2] = {0xD3, 0xD8};
|
||||
|
||||
static struct among a_3[2] =
|
||||
{
|
||||
/* 0 */ {2, s_3_0, -1, 1, 0},
|
||||
/* 1 */ {2, s_3_1, -1, 1, 0}
|
||||
};
|
||||
|
||||
static symbol s_4_0[1] = {0xC0};
|
||||
static symbol s_4_1[2] = {0xD5, 0xC0};
|
||||
static symbol s_4_2[2] = {0xCC, 0xC1};
|
||||
static symbol s_4_3[3] = {0xC9, 0xCC, 0xC1};
|
||||
static symbol s_4_4[3] = {0xD9, 0xCC, 0xC1};
|
||||
static symbol s_4_5[2] = {0xCE, 0xC1};
|
||||
static symbol s_4_6[3] = {0xC5, 0xCE, 0xC1};
|
||||
static symbol s_4_7[3] = {0xC5, 0xD4, 0xC5};
|
||||
static symbol s_4_8[3] = {0xC9, 0xD4, 0xC5};
|
||||
static symbol s_4_9[3] = {0xCA, 0xD4, 0xC5};
|
||||
static symbol s_4_10[4] = {0xC5, 0xCA, 0xD4, 0xC5};
|
||||
static symbol s_4_11[4] = {0xD5, 0xCA, 0xD4, 0xC5};
|
||||
static symbol s_4_12[2] = {0xCC, 0xC9};
|
||||
static symbol s_4_13[3] = {0xC9, 0xCC, 0xC9};
|
||||
static symbol s_4_14[3] = {0xD9, 0xCC, 0xC9};
|
||||
static symbol s_4_15[1] = {0xCA};
|
||||
static symbol s_4_16[2] = {0xC5, 0xCA};
|
||||
static symbol s_4_17[2] = {0xD5, 0xCA};
|
||||
static symbol s_4_18[1] = {0xCC};
|
||||
static symbol s_4_19[2] = {0xC9, 0xCC};
|
||||
static symbol s_4_20[2] = {0xD9, 0xCC};
|
||||
static symbol s_4_21[2] = {0xC5, 0xCD};
|
||||
static symbol s_4_22[2] = {0xC9, 0xCD};
|
||||
static symbol s_4_23[2] = {0xD9, 0xCD};
|
||||
static symbol s_4_24[1] = {0xCE};
|
||||
static symbol s_4_25[2] = {0xC5, 0xCE};
|
||||
static symbol s_4_26[2] = {0xCC, 0xCF};
|
||||
static symbol s_4_27[3] = {0xC9, 0xCC, 0xCF};
|
||||
static symbol s_4_28[3] = {0xD9, 0xCC, 0xCF};
|
||||
static symbol s_4_29[2] = {0xCE, 0xCF};
|
||||
static symbol s_4_30[3] = {0xC5, 0xCE, 0xCF};
|
||||
static symbol s_4_31[3] = {0xCE, 0xCE, 0xCF};
|
||||
static symbol s_4_32[2] = {0xC0, 0xD4};
|
||||
static symbol s_4_33[3] = {0xD5, 0xC0, 0xD4};
|
||||
static symbol s_4_34[2] = {0xC5, 0xD4};
|
||||
static symbol s_4_35[3] = {0xD5, 0xC5, 0xD4};
|
||||
static symbol s_4_36[2] = {0xC9, 0xD4};
|
||||
static symbol s_4_37[2] = {0xD1, 0xD4};
|
||||
static symbol s_4_38[2] = {0xD9, 0xD4};
|
||||
static symbol s_4_39[2] = {0xD4, 0xD8};
|
||||
static symbol s_4_40[3] = {0xC9, 0xD4, 0xD8};
|
||||
static symbol s_4_41[3] = {0xD9, 0xD4, 0xD8};
|
||||
static symbol s_4_42[3] = {0xC5, 0xDB, 0xD8};
|
||||
static symbol s_4_43[3] = {0xC9, 0xDB, 0xD8};
|
||||
static symbol s_4_44[2] = {0xCE, 0xD9};
|
||||
static symbol s_4_45[3] = {0xC5, 0xCE, 0xD9};
|
||||
|
||||
static struct among a_4[46] =
|
||||
{
|
||||
/* 0 */ {1, s_4_0, -1, 2, 0},
|
||||
/* 1 */ {2, s_4_1, 0, 2, 0},
|
||||
/* 2 */ {2, s_4_2, -1, 1, 0},
|
||||
/* 3 */ {3, s_4_3, 2, 2, 0},
|
||||
/* 4 */ {3, s_4_4, 2, 2, 0},
|
||||
/* 5 */ {2, s_4_5, -1, 1, 0},
|
||||
/* 6 */ {3, s_4_6, 5, 2, 0},
|
||||
/* 7 */ {3, s_4_7, -1, 1, 0},
|
||||
/* 8 */ {3, s_4_8, -1, 2, 0},
|
||||
/* 9 */ {3, s_4_9, -1, 1, 0},
|
||||
/* 10 */ {4, s_4_10, 9, 2, 0},
|
||||
/* 11 */ {4, s_4_11, 9, 2, 0},
|
||||
/* 12 */ {2, s_4_12, -1, 1, 0},
|
||||
/* 13 */ {3, s_4_13, 12, 2, 0},
|
||||
/* 14 */ {3, s_4_14, 12, 2, 0},
|
||||
/* 15 */ {1, s_4_15, -1, 1, 0},
|
||||
/* 16 */ {2, s_4_16, 15, 2, 0},
|
||||
/* 17 */ {2, s_4_17, 15, 2, 0},
|
||||
/* 18 */ {1, s_4_18, -1, 1, 0},
|
||||
/* 19 */ {2, s_4_19, 18, 2, 0},
|
||||
/* 20 */ {2, s_4_20, 18, 2, 0},
|
||||
/* 21 */ {2, s_4_21, -1, 1, 0},
|
||||
/* 22 */ {2, s_4_22, -1, 2, 0},
|
||||
/* 23 */ {2, s_4_23, -1, 2, 0},
|
||||
/* 24 */ {1, s_4_24, -1, 1, 0},
|
||||
/* 25 */ {2, s_4_25, 24, 2, 0},
|
||||
/* 26 */ {2, s_4_26, -1, 1, 0},
|
||||
/* 27 */ {3, s_4_27, 26, 2, 0},
|
||||
/* 28 */ {3, s_4_28, 26, 2, 0},
|
||||
/* 29 */ {2, s_4_29, -1, 1, 0},
|
||||
/* 30 */ {3, s_4_30, 29, 2, 0},
|
||||
/* 31 */ {3, s_4_31, 29, 1, 0},
|
||||
/* 32 */ {2, s_4_32, -1, 1, 0},
|
||||
/* 33 */ {3, s_4_33, 32, 2, 0},
|
||||
/* 34 */ {2, s_4_34, -1, 1, 0},
|
||||
/* 35 */ {3, s_4_35, 34, 2, 0},
|
||||
/* 36 */ {2, s_4_36, -1, 2, 0},
|
||||
/* 37 */ {2, s_4_37, -1, 2, 0},
|
||||
/* 38 */ {2, s_4_38, -1, 2, 0},
|
||||
/* 39 */ {2, s_4_39, -1, 1, 0},
|
||||
/* 40 */ {3, s_4_40, 39, 2, 0},
|
||||
/* 41 */ {3, s_4_41, 39, 2, 0},
|
||||
/* 42 */ {3, s_4_42, -1, 1, 0},
|
||||
/* 43 */ {3, s_4_43, -1, 2, 0},
|
||||
/* 44 */ {2, s_4_44, -1, 1, 0},
|
||||
/* 45 */ {3, s_4_45, 44, 2, 0}
|
||||
};
|
||||
|
||||
static symbol s_5_0[1] = {0xC0};
|
||||
static symbol s_5_1[2] = {0xC9, 0xC0};
|
||||
static symbol s_5_2[2] = {0xD8, 0xC0};
|
||||
static symbol s_5_3[1] = {0xC1};
|
||||
static symbol s_5_4[1] = {0xC5};
|
||||
static symbol s_5_5[2] = {0xC9, 0xC5};
|
||||
static symbol s_5_6[2] = {0xD8, 0xC5};
|
||||
static symbol s_5_7[2] = {0xC1, 0xC8};
|
||||
static symbol s_5_8[2] = {0xD1, 0xC8};
|
||||
static symbol s_5_9[3] = {0xC9, 0xD1, 0xC8};
|
||||
static symbol s_5_10[1] = {0xC9};
|
||||
static symbol s_5_11[2] = {0xC5, 0xC9};
|
||||
static symbol s_5_12[2] = {0xC9, 0xC9};
|
||||
static symbol s_5_13[3] = {0xC1, 0xCD, 0xC9};
|
||||
static symbol s_5_14[3] = {0xD1, 0xCD, 0xC9};
|
||||
static symbol s_5_15[4] = {0xC9, 0xD1, 0xCD, 0xC9};
|
||||
static symbol s_5_16[1] = {0xCA};
|
||||
static symbol s_5_17[2] = {0xC5, 0xCA};
|
||||
static symbol s_5_18[3] = {0xC9, 0xC5, 0xCA};
|
||||
static symbol s_5_19[2] = {0xC9, 0xCA};
|
||||
static symbol s_5_20[2] = {0xCF, 0xCA};
|
||||
static symbol s_5_21[2] = {0xC1, 0xCD};
|
||||
static symbol s_5_22[2] = {0xC5, 0xCD};
|
||||
static symbol s_5_23[3] = {0xC9, 0xC5, 0xCD};
|
||||
static symbol s_5_24[2] = {0xCF, 0xCD};
|
||||
static symbol s_5_25[2] = {0xD1, 0xCD};
|
||||
static symbol s_5_26[3] = {0xC9, 0xD1, 0xCD};
|
||||
static symbol s_5_27[1] = {0xCF};
|
||||
static symbol s_5_28[1] = {0xD1};
|
||||
static symbol s_5_29[2] = {0xC9, 0xD1};
|
||||
static symbol s_5_30[2] = {0xD8, 0xD1};
|
||||
static symbol s_5_31[1] = {0xD5};
|
||||
static symbol s_5_32[2] = {0xC5, 0xD7};
|
||||
static symbol s_5_33[2] = {0xCF, 0xD7};
|
||||
static symbol s_5_34[1] = {0xD8};
|
||||
static symbol s_5_35[1] = {0xD9};
|
||||
|
||||
static struct among a_5[36] =
|
||||
{
|
||||
/* 0 */ {1, s_5_0, -1, 1, 0},
|
||||
/* 1 */ {2, s_5_1, 0, 1, 0},
|
||||
/* 2 */ {2, s_5_2, 0, 1, 0},
|
||||
/* 3 */ {1, s_5_3, -1, 1, 0},
|
||||
/* 4 */ {1, s_5_4, -1, 1, 0},
|
||||
/* 5 */ {2, s_5_5, 4, 1, 0},
|
||||
/* 6 */ {2, s_5_6, 4, 1, 0},
|
||||
/* 7 */ {2, s_5_7, -1, 1, 0},
|
||||
/* 8 */ {2, s_5_8, -1, 1, 0},
|
||||
/* 9 */ {3, s_5_9, 8, 1, 0},
|
||||
/* 10 */ {1, s_5_10, -1, 1, 0},
|
||||
/* 11 */ {2, s_5_11, 10, 1, 0},
|
||||
/* 12 */ {2, s_5_12, 10, 1, 0},
|
||||
/* 13 */ {3, s_5_13, 10, 1, 0},
|
||||
/* 14 */ {3, s_5_14, 10, 1, 0},
|
||||
/* 15 */ {4, s_5_15, 14, 1, 0},
|
||||
/* 16 */ {1, s_5_16, -1, 1, 0},
|
||||
/* 17 */ {2, s_5_17, 16, 1, 0},
|
||||
/* 18 */ {3, s_5_18, 17, 1, 0},
|
||||
/* 19 */ {2, s_5_19, 16, 1, 0},
|
||||
/* 20 */ {2, s_5_20, 16, 1, 0},
|
||||
/* 21 */ {2, s_5_21, -1, 1, 0},
|
||||
/* 22 */ {2, s_5_22, -1, 1, 0},
|
||||
/* 23 */ {3, s_5_23, 22, 1, 0},
|
||||
/* 24 */ {2, s_5_24, -1, 1, 0},
|
||||
/* 25 */ {2, s_5_25, -1, 1, 0},
|
||||
/* 26 */ {3, s_5_26, 25, 1, 0},
|
||||
/* 27 */ {1, s_5_27, -1, 1, 0},
|
||||
/* 28 */ {1, s_5_28, -1, 1, 0},
|
||||
/* 29 */ {2, s_5_29, 28, 1, 0},
|
||||
/* 30 */ {2, s_5_30, 28, 1, 0},
|
||||
/* 31 */ {1, s_5_31, -1, 1, 0},
|
||||
/* 32 */ {2, s_5_32, -1, 1, 0},
|
||||
/* 33 */ {2, s_5_33, -1, 1, 0},
|
||||
/* 34 */ {1, s_5_34, -1, 1, 0},
|
||||
/* 35 */ {1, s_5_35, -1, 1, 0}
|
||||
};
|
||||
|
||||
static symbol s_6_0[3] = {0xCF, 0xD3, 0xD4};
|
||||
static symbol s_6_1[4] = {0xCF, 0xD3, 0xD4, 0xD8};
|
||||
|
||||
static struct among a_6[2] =
|
||||
{
|
||||
/* 0 */ {3, s_6_0, -1, 1, 0},
|
||||
/* 1 */ {4, s_6_1, -1, 1, 0}
|
||||
};
|
||||
|
||||
static symbol s_7_0[4] = {0xC5, 0xCA, 0xDB, 0xC5};
|
||||
static symbol s_7_1[1] = {0xCE};
|
||||
static symbol s_7_2[1] = {0xD8};
|
||||
static symbol s_7_3[3] = {0xC5, 0xCA, 0xDB};
|
||||
|
||||
static struct among a_7[4] =
|
||||
{
|
||||
/* 0 */ {4, s_7_0, -1, 1, 0},
|
||||
/* 1 */ {1, s_7_1, -1, 2, 0},
|
||||
/* 2 */ {1, s_7_2, -1, 3, 0},
|
||||
/* 3 */ {3, s_7_3, -1, 1, 0}
|
||||
};
|
||||
|
||||
static unsigned char g_v[] = {35, 130, 34, 18};
|
||||
|
||||
static symbol s_0[] = {0xC1};
|
||||
static symbol s_1[] = {0xD1};
|
||||
static symbol s_2[] = {0xC1};
|
||||
static symbol s_3[] = {0xD1};
|
||||
static symbol s_4[] = {0xC1};
|
||||
static symbol s_5[] = {0xD1};
|
||||
static symbol s_6[] = {0xCE};
|
||||
static symbol s_7[] = {0xCE};
|
||||
static symbol s_8[] = {0xCE};
|
||||
static symbol s_9[] = {0xC9};
|
||||
|
||||
static int
|
||||
r_mark_regions(struct SN_env * z)
|
||||
{
|
||||
z->I[0] = z->l;
|
||||
z->I[1] = z->l;
|
||||
{
|
||||
int c = z->c; /* do, line 63 */
|
||||
|
||||
while (1)
|
||||
{ /* gopast, line 64 */
|
||||
if (!(in_grouping(z, g_v, 192, 220)))
|
||||
goto lab1;
|
||||
break;
|
||||
lab1:
|
||||
if (z->c >= z->l)
|
||||
goto lab0;
|
||||
z->c++; /* gopast, line 64 */
|
||||
}
|
||||
z->I[0] = z->c; /* setmark pV, line 64 */
|
||||
while (1)
|
||||
{ /* gopast, line 64 */
|
||||
if (!(out_grouping(z, g_v, 192, 220)))
|
||||
goto lab2;
|
||||
break;
|
||||
lab2:
|
||||
if (z->c >= z->l)
|
||||
goto lab0;
|
||||
z->c++; /* gopast, line 64 */
|
||||
}
|
||||
while (1)
|
||||
{ /* gopast, line 65 */
|
||||
if (!(in_grouping(z, g_v, 192, 220)))
|
||||
goto lab3;
|
||||
break;
|
||||
lab3:
|
||||
if (z->c >= z->l)
|
||||
goto lab0;
|
||||
z->c++; /* gopast, line 65 */
|
||||
}
|
||||
while (1)
|
||||
{ /* gopast, line 65 */
|
||||
if (!(out_grouping(z, g_v, 192, 220)))
|
||||
goto lab4;
|
||||
break;
|
||||
lab4:
|
||||
if (z->c >= z->l)
|
||||
goto lab0;
|
||||
z->c++; /* gopast, line 65 */
|
||||
}
|
||||
z->I[1] = z->c; /* setmark p2, line 65 */
|
||||
lab0:
|
||||
z->c = c;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
r_R2(struct SN_env * z)
|
||||
{
|
||||
if (!(z->I[1] <= z->c))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
r_perfective_gerund(struct SN_env * z)
|
||||
{
|
||||
int among_var;
|
||||
|
||||
z->ket = z->c; /* [, line 74 */
|
||||
among_var = find_among_b(z, a_0, 9); /* substring, line 74 */
|
||||
if (!(among_var))
|
||||
return 0;
|
||||
z->bra = z->c; /* ], line 74 */
|
||||
switch (among_var)
|
||||
{
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* or, line 78 */
|
||||
if (!(eq_s_b(z, 1, s_0)))
|
||||
goto lab1;
|
||||
goto lab0;
|
||||
lab1:
|
||||
z->c = z->l - m;
|
||||
if (!(eq_s_b(z, 1, s_1)))
|
||||
return 0;
|
||||
}
|
||||
lab0:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 78 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 85 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
r_adjective(struct SN_env * z)
|
||||
{
|
||||
int among_var;
|
||||
|
||||
z->ket = z->c; /* [, line 90 */
|
||||
among_var = find_among_b(z, a_1, 26); /* substring, line 90 */
|
||||
if (!(among_var))
|
||||
return 0;
|
||||
z->bra = z->c; /* ], line 90 */
|
||||
switch (among_var)
|
||||
{
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 99 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
r_adjectival(struct SN_env * z)
|
||||
{
|
||||
int among_var;
|
||||
|
||||
{
|
||||
int ret = r_adjective(z);
|
||||
|
||||
if (ret == 0)
|
||||
return 0; /* call adjective, line 104 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* try, line 111 */
|
||||
z->ket = z->c; /* [, line 112 */
|
||||
among_var = find_among_b(z, a_2, 8); /* substring, line 112 */
|
||||
if (!(among_var))
|
||||
{
|
||||
z->c = z->l - m;
|
||||
goto lab0;
|
||||
}
|
||||
z->bra = z->c; /* ], line 112 */
|
||||
switch (among_var)
|
||||
{
|
||||
case 0:
|
||||
{
|
||||
z->c = z->l - m;
|
||||
goto lab0;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* or, line 117 */
|
||||
if (!(eq_s_b(z, 1, s_2)))
|
||||
goto lab2;
|
||||
goto lab1;
|
||||
lab2:
|
||||
z->c = z->l - m;
|
||||
if (!(eq_s_b(z, 1, s_3)))
|
||||
{
|
||||
z->c = z->l - m;
|
||||
goto lab0;
|
||||
}
|
||||
}
|
||||
lab1:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 117 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 124 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
lab0:
|
||||
;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
r_reflexive(struct SN_env * z)
|
||||
{
|
||||
int among_var;
|
||||
|
||||
z->ket = z->c; /* [, line 131 */
|
||||
among_var = find_among_b(z, a_3, 2); /* substring, line 131 */
|
||||
if (!(among_var))
|
||||
return 0;
|
||||
z->bra = z->c; /* ], line 131 */
|
||||
switch (among_var)
|
||||
{
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 134 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
r_verb(struct SN_env * z)
|
||||
{
|
||||
int among_var;
|
||||
|
||||
z->ket = z->c; /* [, line 139 */
|
||||
among_var = find_among_b(z, a_4, 46); /* substring, line 139 */
|
||||
if (!(among_var))
|
||||
return 0;
|
||||
z->bra = z->c; /* ], line 139 */
|
||||
switch (among_var)
|
||||
{
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* or, line 145 */
|
||||
if (!(eq_s_b(z, 1, s_4)))
|
||||
goto lab1;
|
||||
goto lab0;
|
||||
lab1:
|
||||
z->c = z->l - m;
|
||||
if (!(eq_s_b(z, 1, s_5)))
|
||||
return 0;
|
||||
}
|
||||
lab0:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 145 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 153 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
r_noun(struct SN_env * z)
|
||||
{
|
||||
int among_var;
|
||||
|
||||
z->ket = z->c; /* [, line 162 */
|
||||
among_var = find_among_b(z, a_5, 36); /* substring, line 162 */
|
||||
if (!(among_var))
|
||||
return 0;
|
||||
z->bra = z->c; /* ], line 162 */
|
||||
switch (among_var)
|
||||
{
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 169 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
r_derivational(struct SN_env * z)
|
||||
{
|
||||
int among_var;
|
||||
|
||||
z->ket = z->c; /* [, line 178 */
|
||||
among_var = find_among_b(z, a_6, 2); /* substring, line 178 */
|
||||
if (!(among_var))
|
||||
return 0;
|
||||
z->bra = z->c; /* ], line 178 */
|
||||
{
|
||||
int ret = r_R2(z);
|
||||
|
||||
if (ret == 0)
|
||||
return 0; /* call R2, line 178 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
switch (among_var)
|
||||
{
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 181 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
r_tidy_up(struct SN_env * z)
|
||||
{
|
||||
int among_var;
|
||||
|
||||
z->ket = z->c; /* [, line 186 */
|
||||
among_var = find_among_b(z, a_7, 4); /* substring, line 186 */
|
||||
if (!(among_var))
|
||||
return 0;
|
||||
z->bra = z->c; /* ], line 186 */
|
||||
switch (among_var)
|
||||
{
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 190 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
z->ket = z->c; /* [, line 191 */
|
||||
if (!(eq_s_b(z, 1, s_6)))
|
||||
return 0;
|
||||
z->bra = z->c; /* ], line 191 */
|
||||
if (!(eq_s_b(z, 1, s_7)))
|
||||
return 0;
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 191 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (!(eq_s_b(z, 1, s_8)))
|
||||
return 0;
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 194 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 196 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
russian_KOI8_R_stem(struct SN_env * z)
|
||||
{
|
||||
{
|
||||
int c = z->c; /* do, line 203 */
|
||||
|
||||
{
|
||||
int ret = r_mark_regions(z);
|
||||
|
||||
if (ret == 0)
|
||||
goto lab0; /* call mark_regions, line 203 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
lab0:
|
||||
z->c = c;
|
||||
}
|
||||
z->lb = z->c;
|
||||
z->c = z->l; /* backwards, line 204 */
|
||||
|
||||
{
|
||||
int m3; /* setlimit, line 204 */
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m;
|
||||
if (z->c < z->I[0])
|
||||
return 0;
|
||||
z->c = z->I[0]; /* tomark, line 204 */
|
||||
m3 = z->lb;
|
||||
z->lb = z->c;
|
||||
z->c = z->l - m;
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* do, line 205 */
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* or, line 206 */
|
||||
{
|
||||
int ret = r_perfective_gerund(z);
|
||||
|
||||
if (ret == 0)
|
||||
goto lab3; /* call perfective_gerund, line 206 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
goto lab2;
|
||||
lab3:
|
||||
z->c = z->l - m;
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* try, line 207 */
|
||||
{
|
||||
int ret = r_reflexive(z);
|
||||
|
||||
if (ret == 0)
|
||||
{
|
||||
z->c = z->l - m;
|
||||
goto lab4;
|
||||
} /* call reflexive, line 207 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
lab4:
|
||||
;
|
||||
}
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* or, line 208 */
|
||||
{
|
||||
int ret = r_adjectival(z);
|
||||
|
||||
if (ret == 0)
|
||||
goto lab6; /* call adjectival, line 208 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
goto lab5;
|
||||
lab6:
|
||||
z->c = z->l - m;
|
||||
{
|
||||
int ret = r_verb(z);
|
||||
|
||||
if (ret == 0)
|
||||
goto lab7; /* call verb, line 208 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
goto lab5;
|
||||
lab7:
|
||||
z->c = z->l - m;
|
||||
{
|
||||
int ret = r_noun(z);
|
||||
|
||||
if (ret == 0)
|
||||
goto lab1; /* call noun, line 208 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
lab5:
|
||||
;
|
||||
}
|
||||
lab2:
|
||||
lab1:
|
||||
z->c = z->l - m;
|
||||
}
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* try, line 211 */
|
||||
z->ket = z->c; /* [, line 211 */
|
||||
if (!(eq_s_b(z, 1, s_9)))
|
||||
{
|
||||
z->c = z->l - m;
|
||||
goto lab8;
|
||||
}
|
||||
z->bra = z->c; /* ], line 211 */
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 211 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
lab8:
|
||||
;
|
||||
}
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* do, line 214 */
|
||||
{
|
||||
int ret = r_derivational(z);
|
||||
|
||||
if (ret == 0)
|
||||
goto lab9; /* call derivational, line 214 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
lab9:
|
||||
z->c = z->l - m;
|
||||
}
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* do, line 215 */
|
||||
{
|
||||
int ret = r_tidy_up(z);
|
||||
|
||||
if (ret == 0)
|
||||
goto lab10; /* call tidy_up, line 215 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
lab10:
|
||||
z->c = z->l - m;
|
||||
}
|
||||
z->lb = m3;
|
||||
}
|
||||
z->c = z->lb;
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern struct SN_env *
|
||||
russian_KOI8_R_create_env(void)
|
||||
{
|
||||
return SN_create_env(0, 2, 0);
|
||||
}
|
||||
|
||||
extern void
|
||||
russian_KOI8_R_close_env(struct SN_env * z)
|
||||
{
|
||||
SN_close_env(z);
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
|
||||
/* This file was generated automatically by the Snowball to ANSI C compiler */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
extern struct SN_env *russian_KOI8_R_create_env(void);
|
||||
extern void russian_KOI8_R_close_env(struct SN_env * z);
|
||||
|
||||
extern int russian_KOI8_R_stem(struct SN_env * z);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,942 +0,0 @@
|
|||
|
||||
/* This file was generated automatically by the Snowball to ANSI C compiler */
|
||||
|
||||
#include "header.h"
|
||||
|
||||
extern int russian_UTF_8_stem(struct SN_env * z);
|
||||
static int r_tidy_up(struct SN_env * z);
|
||||
static int r_derivational(struct SN_env * z);
|
||||
static int r_noun(struct SN_env * z);
|
||||
static int r_verb(struct SN_env * z);
|
||||
static int r_reflexive(struct SN_env * z);
|
||||
static int r_adjectival(struct SN_env * z);
|
||||
static int r_adjective(struct SN_env * z);
|
||||
static int r_perfective_gerund(struct SN_env * z);
|
||||
static int r_R2(struct SN_env * z);
|
||||
static int r_mark_regions(struct SN_env * z);
|
||||
|
||||
extern struct SN_env *russian_UTF_8_create_env(void);
|
||||
extern void russian_UTF_8_close_env(struct SN_env * z);
|
||||
|
||||
static symbol s_0_0[10] = {0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C};
|
||||
static symbol s_0_1[12] = {0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C};
|
||||
static symbol s_0_2[12] = {0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C};
|
||||
static symbol s_0_3[2] = {0xD0, 0xB2};
|
||||
static symbol s_0_4[4] = {0xD1, 0x8B, 0xD0, 0xB2};
|
||||
static symbol s_0_5[4] = {0xD0, 0xB8, 0xD0, 0xB2};
|
||||
static symbol s_0_6[6] = {0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8};
|
||||
static symbol s_0_7[8] = {0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8};
|
||||
static symbol s_0_8[8] = {0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8};
|
||||
|
||||
static struct among a_0[9] =
|
||||
{
|
||||
/* 0 */ {10, s_0_0, -1, 1, 0},
|
||||
/* 1 */ {12, s_0_1, 0, 2, 0},
|
||||
/* 2 */ {12, s_0_2, 0, 2, 0},
|
||||
/* 3 */ {2, s_0_3, -1, 1, 0},
|
||||
/* 4 */ {4, s_0_4, 3, 2, 0},
|
||||
/* 5 */ {4, s_0_5, 3, 2, 0},
|
||||
/* 6 */ {6, s_0_6, -1, 1, 0},
|
||||
/* 7 */ {8, s_0_7, 6, 2, 0},
|
||||
/* 8 */ {8, s_0_8, 6, 2, 0}
|
||||
};
|
||||
|
||||
static symbol s_1_0[6] = {0xD0, 0xB5, 0xD0, 0xBC, 0xD1, 0x83};
|
||||
static symbol s_1_1[6] = {0xD0, 0xBE, 0xD0, 0xBC, 0xD1, 0x83};
|
||||
static symbol s_1_2[4] = {0xD1, 0x8B, 0xD1, 0x85};
|
||||
static symbol s_1_3[4] = {0xD0, 0xB8, 0xD1, 0x85};
|
||||
static symbol s_1_4[4] = {0xD1, 0x83, 0xD1, 0x8E};
|
||||
static symbol s_1_5[4] = {0xD1, 0x8E, 0xD1, 0x8E};
|
||||
static symbol s_1_6[4] = {0xD0, 0xB5, 0xD1, 0x8E};
|
||||
static symbol s_1_7[4] = {0xD0, 0xBE, 0xD1, 0x8E};
|
||||
static symbol s_1_8[4] = {0xD1, 0x8F, 0xD1, 0x8F};
|
||||
static symbol s_1_9[4] = {0xD0, 0xB0, 0xD1, 0x8F};
|
||||
static symbol s_1_10[4] = {0xD1, 0x8B, 0xD0, 0xB5};
|
||||
static symbol s_1_11[4] = {0xD0, 0xB5, 0xD0, 0xB5};
|
||||
static symbol s_1_12[4] = {0xD0, 0xB8, 0xD0, 0xB5};
|
||||
static symbol s_1_13[4] = {0xD0, 0xBE, 0xD0, 0xB5};
|
||||
static symbol s_1_14[6] = {0xD1, 0x8B, 0xD0, 0xBC, 0xD0, 0xB8};
|
||||
static symbol s_1_15[6] = {0xD0, 0xB8, 0xD0, 0xBC, 0xD0, 0xB8};
|
||||
static symbol s_1_16[4] = {0xD1, 0x8B, 0xD0, 0xB9};
|
||||
static symbol s_1_17[4] = {0xD0, 0xB5, 0xD0, 0xB9};
|
||||
static symbol s_1_18[4] = {0xD0, 0xB8, 0xD0, 0xB9};
|
||||
static symbol s_1_19[4] = {0xD0, 0xBE, 0xD0, 0xB9};
|
||||
static symbol s_1_20[4] = {0xD1, 0x8B, 0xD0, 0xBC};
|
||||
static symbol s_1_21[4] = {0xD0, 0xB5, 0xD0, 0xBC};
|
||||
static symbol s_1_22[4] = {0xD0, 0xB8, 0xD0, 0xBC};
|
||||
static symbol s_1_23[4] = {0xD0, 0xBE, 0xD0, 0xBC};
|
||||
static symbol s_1_24[6] = {0xD0, 0xB5, 0xD0, 0xB3, 0xD0, 0xBE};
|
||||
static symbol s_1_25[6] = {0xD0, 0xBE, 0xD0, 0xB3, 0xD0, 0xBE};
|
||||
|
||||
static struct among a_1[26] =
|
||||
{
|
||||
/* 0 */ {6, s_1_0, -1, 1, 0},
|
||||
/* 1 */ {6, s_1_1, -1, 1, 0},
|
||||
/* 2 */ {4, s_1_2, -1, 1, 0},
|
||||
/* 3 */ {4, s_1_3, -1, 1, 0},
|
||||
/* 4 */ {4, s_1_4, -1, 1, 0},
|
||||
/* 5 */ {4, s_1_5, -1, 1, 0},
|
||||
/* 6 */ {4, s_1_6, -1, 1, 0},
|
||||
/* 7 */ {4, s_1_7, -1, 1, 0},
|
||||
/* 8 */ {4, s_1_8, -1, 1, 0},
|
||||
/* 9 */ {4, s_1_9, -1, 1, 0},
|
||||
/* 10 */ {4, s_1_10, -1, 1, 0},
|
||||
/* 11 */ {4, s_1_11, -1, 1, 0},
|
||||
/* 12 */ {4, s_1_12, -1, 1, 0},
|
||||
/* 13 */ {4, s_1_13, -1, 1, 0},
|
||||
/* 14 */ {6, s_1_14, -1, 1, 0},
|
||||
/* 15 */ {6, s_1_15, -1, 1, 0},
|
||||
/* 16 */ {4, s_1_16, -1, 1, 0},
|
||||
/* 17 */ {4, s_1_17, -1, 1, 0},
|
||||
/* 18 */ {4, s_1_18, -1, 1, 0},
|
||||
/* 19 */ {4, s_1_19, -1, 1, 0},
|
||||
/* 20 */ {4, s_1_20, -1, 1, 0},
|
||||
/* 21 */ {4, s_1_21, -1, 1, 0},
|
||||
/* 22 */ {4, s_1_22, -1, 1, 0},
|
||||
/* 23 */ {4, s_1_23, -1, 1, 0},
|
||||
/* 24 */ {6, s_1_24, -1, 1, 0},
|
||||
/* 25 */ {6, s_1_25, -1, 1, 0}
|
||||
};
|
||||
|
||||
static symbol s_2_0[4] = {0xD0, 0xB2, 0xD1, 0x88};
|
||||
static symbol s_2_1[6] = {0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88};
|
||||
static symbol s_2_2[6] = {0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88};
|
||||
static symbol s_2_3[2] = {0xD1, 0x89};
|
||||
static symbol s_2_4[4] = {0xD1, 0x8E, 0xD1, 0x89};
|
||||
static symbol s_2_5[6] = {0xD1, 0x83, 0xD1, 0x8E, 0xD1, 0x89};
|
||||
static symbol s_2_6[4] = {0xD0, 0xB5, 0xD0, 0xBC};
|
||||
static symbol s_2_7[4] = {0xD0, 0xBD, 0xD0, 0xBD};
|
||||
|
||||
static struct among a_2[8] =
|
||||
{
|
||||
/* 0 */ {4, s_2_0, -1, 1, 0},
|
||||
/* 1 */ {6, s_2_1, 0, 2, 0},
|
||||
/* 2 */ {6, s_2_2, 0, 2, 0},
|
||||
/* 3 */ {2, s_2_3, -1, 1, 0},
|
||||
/* 4 */ {4, s_2_4, 3, 1, 0},
|
||||
/* 5 */ {6, s_2_5, 4, 2, 0},
|
||||
/* 6 */ {4, s_2_6, -1, 1, 0},
|
||||
/* 7 */ {4, s_2_7, -1, 1, 0}
|
||||
};
|
||||
|
||||
static symbol s_3_0[4] = {0xD1, 0x81, 0xD1, 0x8C};
|
||||
static symbol s_3_1[4] = {0xD1, 0x81, 0xD1, 0x8F};
|
||||
|
||||
static struct among a_3[2] =
|
||||
{
|
||||
/* 0 */ {4, s_3_0, -1, 1, 0},
|
||||
/* 1 */ {4, s_3_1, -1, 1, 0}
|
||||
};
|
||||
|
||||
static symbol s_4_0[4] = {0xD1, 0x8B, 0xD1, 0x82};
|
||||
static symbol s_4_1[4] = {0xD1, 0x8E, 0xD1, 0x82};
|
||||
static symbol s_4_2[6] = {0xD1, 0x83, 0xD1, 0x8E, 0xD1, 0x82};
|
||||
static symbol s_4_3[4] = {0xD1, 0x8F, 0xD1, 0x82};
|
||||
static symbol s_4_4[4] = {0xD0, 0xB5, 0xD1, 0x82};
|
||||
static symbol s_4_5[6] = {0xD1, 0x83, 0xD0, 0xB5, 0xD1, 0x82};
|
||||
static symbol s_4_6[4] = {0xD0, 0xB8, 0xD1, 0x82};
|
||||
static symbol s_4_7[4] = {0xD0, 0xBD, 0xD1, 0x8B};
|
||||
static symbol s_4_8[6] = {0xD0, 0xB5, 0xD0, 0xBD, 0xD1, 0x8B};
|
||||
static symbol s_4_9[4] = {0xD1, 0x82, 0xD1, 0x8C};
|
||||
static symbol s_4_10[6] = {0xD1, 0x8B, 0xD1, 0x82, 0xD1, 0x8C};
|
||||
static symbol s_4_11[6] = {0xD0, 0xB8, 0xD1, 0x82, 0xD1, 0x8C};
|
||||
static symbol s_4_12[6] = {0xD0, 0xB5, 0xD1, 0x88, 0xD1, 0x8C};
|
||||
static symbol s_4_13[6] = {0xD0, 0xB8, 0xD1, 0x88, 0xD1, 0x8C};
|
||||
static symbol s_4_14[2] = {0xD1, 0x8E};
|
||||
static symbol s_4_15[4] = {0xD1, 0x83, 0xD1, 0x8E};
|
||||
static symbol s_4_16[4] = {0xD0, 0xBB, 0xD0, 0xB0};
|
||||
static symbol s_4_17[6] = {0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xB0};
|
||||
static symbol s_4_18[6] = {0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xB0};
|
||||
static symbol s_4_19[4] = {0xD0, 0xBD, 0xD0, 0xB0};
|
||||
static symbol s_4_20[6] = {0xD0, 0xB5, 0xD0, 0xBD, 0xD0, 0xB0};
|
||||
static symbol s_4_21[6] = {0xD0, 0xB5, 0xD1, 0x82, 0xD0, 0xB5};
|
||||
static symbol s_4_22[6] = {0xD0, 0xB8, 0xD1, 0x82, 0xD0, 0xB5};
|
||||
static symbol s_4_23[6] = {0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5};
|
||||
static symbol s_4_24[8] = {0xD1, 0x83, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5};
|
||||
static symbol s_4_25[8] = {0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5};
|
||||
static symbol s_4_26[4] = {0xD0, 0xBB, 0xD0, 0xB8};
|
||||
static symbol s_4_27[6] = {0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xB8};
|
||||
static symbol s_4_28[6] = {0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xB8};
|
||||
static symbol s_4_29[2] = {0xD0, 0xB9};
|
||||
static symbol s_4_30[4] = {0xD1, 0x83, 0xD0, 0xB9};
|
||||
static symbol s_4_31[4] = {0xD0, 0xB5, 0xD0, 0xB9};
|
||||
static symbol s_4_32[2] = {0xD0, 0xBB};
|
||||
static symbol s_4_33[4] = {0xD1, 0x8B, 0xD0, 0xBB};
|
||||
static symbol s_4_34[4] = {0xD0, 0xB8, 0xD0, 0xBB};
|
||||
static symbol s_4_35[4] = {0xD1, 0x8B, 0xD0, 0xBC};
|
||||
static symbol s_4_36[4] = {0xD0, 0xB5, 0xD0, 0xBC};
|
||||
static symbol s_4_37[4] = {0xD0, 0xB8, 0xD0, 0xBC};
|
||||
static symbol s_4_38[2] = {0xD0, 0xBD};
|
||||
static symbol s_4_39[4] = {0xD0, 0xB5, 0xD0, 0xBD};
|
||||
static symbol s_4_40[4] = {0xD0, 0xBB, 0xD0, 0xBE};
|
||||
static symbol s_4_41[6] = {0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xBE};
|
||||
static symbol s_4_42[6] = {0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xBE};
|
||||
static symbol s_4_43[4] = {0xD0, 0xBD, 0xD0, 0xBE};
|
||||
static symbol s_4_44[6] = {0xD0, 0xB5, 0xD0, 0xBD, 0xD0, 0xBE};
|
||||
static symbol s_4_45[6] = {0xD0, 0xBD, 0xD0, 0xBD, 0xD0, 0xBE};
|
||||
|
||||
static struct among a_4[46] =
|
||||
{
|
||||
/* 0 */ {4, s_4_0, -1, 2, 0},
|
||||
/* 1 */ {4, s_4_1, -1, 1, 0},
|
||||
/* 2 */ {6, s_4_2, 1, 2, 0},
|
||||
/* 3 */ {4, s_4_3, -1, 2, 0},
|
||||
/* 4 */ {4, s_4_4, -1, 1, 0},
|
||||
/* 5 */ {6, s_4_5, 4, 2, 0},
|
||||
/* 6 */ {4, s_4_6, -1, 2, 0},
|
||||
/* 7 */ {4, s_4_7, -1, 1, 0},
|
||||
/* 8 */ {6, s_4_8, 7, 2, 0},
|
||||
/* 9 */ {4, s_4_9, -1, 1, 0},
|
||||
/* 10 */ {6, s_4_10, 9, 2, 0},
|
||||
/* 11 */ {6, s_4_11, 9, 2, 0},
|
||||
/* 12 */ {6, s_4_12, -1, 1, 0},
|
||||
/* 13 */ {6, s_4_13, -1, 2, 0},
|
||||
/* 14 */ {2, s_4_14, -1, 2, 0},
|
||||
/* 15 */ {4, s_4_15, 14, 2, 0},
|
||||
/* 16 */ {4, s_4_16, -1, 1, 0},
|
||||
/* 17 */ {6, s_4_17, 16, 2, 0},
|
||||
/* 18 */ {6, s_4_18, 16, 2, 0},
|
||||
/* 19 */ {4, s_4_19, -1, 1, 0},
|
||||
/* 20 */ {6, s_4_20, 19, 2, 0},
|
||||
/* 21 */ {6, s_4_21, -1, 1, 0},
|
||||
/* 22 */ {6, s_4_22, -1, 2, 0},
|
||||
/* 23 */ {6, s_4_23, -1, 1, 0},
|
||||
/* 24 */ {8, s_4_24, 23, 2, 0},
|
||||
/* 25 */ {8, s_4_25, 23, 2, 0},
|
||||
/* 26 */ {4, s_4_26, -1, 1, 0},
|
||||
/* 27 */ {6, s_4_27, 26, 2, 0},
|
||||
/* 28 */ {6, s_4_28, 26, 2, 0},
|
||||
/* 29 */ {2, s_4_29, -1, 1, 0},
|
||||
/* 30 */ {4, s_4_30, 29, 2, 0},
|
||||
/* 31 */ {4, s_4_31, 29, 2, 0},
|
||||
/* 32 */ {2, s_4_32, -1, 1, 0},
|
||||
/* 33 */ {4, s_4_33, 32, 2, 0},
|
||||
/* 34 */ {4, s_4_34, 32, 2, 0},
|
||||
/* 35 */ {4, s_4_35, -1, 2, 0},
|
||||
/* 36 */ {4, s_4_36, -1, 1, 0},
|
||||
/* 37 */ {4, s_4_37, -1, 2, 0},
|
||||
/* 38 */ {2, s_4_38, -1, 1, 0},
|
||||
/* 39 */ {4, s_4_39, 38, 2, 0},
|
||||
/* 40 */ {4, s_4_40, -1, 1, 0},
|
||||
/* 41 */ {6, s_4_41, 40, 2, 0},
|
||||
/* 42 */ {6, s_4_42, 40, 2, 0},
|
||||
/* 43 */ {4, s_4_43, -1, 1, 0},
|
||||
/* 44 */ {6, s_4_44, 43, 2, 0},
|
||||
/* 45 */ {6, s_4_45, 43, 1, 0}
|
||||
};
|
||||
|
||||
static symbol s_5_0[2] = {0xD1, 0x83};
|
||||
static symbol s_5_1[4] = {0xD1, 0x8F, 0xD1, 0x85};
|
||||
static symbol s_5_2[6] = {0xD0, 0xB8, 0xD1, 0x8F, 0xD1, 0x85};
|
||||
static symbol s_5_3[4] = {0xD0, 0xB0, 0xD1, 0x85};
|
||||
static symbol s_5_4[2] = {0xD1, 0x8B};
|
||||
static symbol s_5_5[2] = {0xD1, 0x8C};
|
||||
static symbol s_5_6[2] = {0xD1, 0x8E};
|
||||
static symbol s_5_7[4] = {0xD1, 0x8C, 0xD1, 0x8E};
|
||||
static symbol s_5_8[4] = {0xD0, 0xB8, 0xD1, 0x8E};
|
||||
static symbol s_5_9[2] = {0xD1, 0x8F};
|
||||
static symbol s_5_10[4] = {0xD1, 0x8C, 0xD1, 0x8F};
|
||||
static symbol s_5_11[4] = {0xD0, 0xB8, 0xD1, 0x8F};
|
||||
static symbol s_5_12[2] = {0xD0, 0xB0};
|
||||
static symbol s_5_13[4] = {0xD0, 0xB5, 0xD0, 0xB2};
|
||||
static symbol s_5_14[4] = {0xD0, 0xBE, 0xD0, 0xB2};
|
||||
static symbol s_5_15[2] = {0xD0, 0xB5};
|
||||
static symbol s_5_16[4] = {0xD1, 0x8C, 0xD0, 0xB5};
|
||||
static symbol s_5_17[4] = {0xD0, 0xB8, 0xD0, 0xB5};
|
||||
static symbol s_5_18[2] = {0xD0, 0xB8};
|
||||
static symbol s_5_19[4] = {0xD0, 0xB5, 0xD0, 0xB8};
|
||||
static symbol s_5_20[4] = {0xD0, 0xB8, 0xD0, 0xB8};
|
||||
static symbol s_5_21[6] = {0xD1, 0x8F, 0xD0, 0xBC, 0xD0, 0xB8};
|
||||
static symbol s_5_22[8] = {0xD0, 0xB8, 0xD1, 0x8F, 0xD0, 0xBC, 0xD0, 0xB8};
|
||||
static symbol s_5_23[6] = {0xD0, 0xB0, 0xD0, 0xBC, 0xD0, 0xB8};
|
||||
static symbol s_5_24[2] = {0xD0, 0xB9};
|
||||
static symbol s_5_25[4] = {0xD0, 0xB5, 0xD0, 0xB9};
|
||||
static symbol s_5_26[6] = {0xD0, 0xB8, 0xD0, 0xB5, 0xD0, 0xB9};
|
||||
static symbol s_5_27[4] = {0xD0, 0xB8, 0xD0, 0xB9};
|
||||
static symbol s_5_28[4] = {0xD0, 0xBE, 0xD0, 0xB9};
|
||||
static symbol s_5_29[4] = {0xD1, 0x8F, 0xD0, 0xBC};
|
||||
static symbol s_5_30[6] = {0xD0, 0xB8, 0xD1, 0x8F, 0xD0, 0xBC};
|
||||
static symbol s_5_31[4] = {0xD0, 0xB0, 0xD0, 0xBC};
|
||||
static symbol s_5_32[4] = {0xD0, 0xB5, 0xD0, 0xBC};
|
||||
static symbol s_5_33[6] = {0xD0, 0xB8, 0xD0, 0xB5, 0xD0, 0xBC};
|
||||
static symbol s_5_34[4] = {0xD0, 0xBE, 0xD0, 0xBC};
|
||||
static symbol s_5_35[2] = {0xD0, 0xBE};
|
||||
|
||||
static struct among a_5[36] =
|
||||
{
|
||||
/* 0 */ {2, s_5_0, -1, 1, 0},
|
||||
/* 1 */ {4, s_5_1, -1, 1, 0},
|
||||
/* 2 */ {6, s_5_2, 1, 1, 0},
|
||||
/* 3 */ {4, s_5_3, -1, 1, 0},
|
||||
/* 4 */ {2, s_5_4, -1, 1, 0},
|
||||
/* 5 */ {2, s_5_5, -1, 1, 0},
|
||||
/* 6 */ {2, s_5_6, -1, 1, 0},
|
||||
/* 7 */ {4, s_5_7, 6, 1, 0},
|
||||
/* 8 */ {4, s_5_8, 6, 1, 0},
|
||||
/* 9 */ {2, s_5_9, -1, 1, 0},
|
||||
/* 10 */ {4, s_5_10, 9, 1, 0},
|
||||
/* 11 */ {4, s_5_11, 9, 1, 0},
|
||||
/* 12 */ {2, s_5_12, -1, 1, 0},
|
||||
/* 13 */ {4, s_5_13, -1, 1, 0},
|
||||
/* 14 */ {4, s_5_14, -1, 1, 0},
|
||||
/* 15 */ {2, s_5_15, -1, 1, 0},
|
||||
/* 16 */ {4, s_5_16, 15, 1, 0},
|
||||
/* 17 */ {4, s_5_17, 15, 1, 0},
|
||||
/* 18 */ {2, s_5_18, -1, 1, 0},
|
||||
/* 19 */ {4, s_5_19, 18, 1, 0},
|
||||
/* 20 */ {4, s_5_20, 18, 1, 0},
|
||||
/* 21 */ {6, s_5_21, 18, 1, 0},
|
||||
/* 22 */ {8, s_5_22, 21, 1, 0},
|
||||
/* 23 */ {6, s_5_23, 18, 1, 0},
|
||||
/* 24 */ {2, s_5_24, -1, 1, 0},
|
||||
/* 25 */ {4, s_5_25, 24, 1, 0},
|
||||
/* 26 */ {6, s_5_26, 25, 1, 0},
|
||||
/* 27 */ {4, s_5_27, 24, 1, 0},
|
||||
/* 28 */ {4, s_5_28, 24, 1, 0},
|
||||
/* 29 */ {4, s_5_29, -1, 1, 0},
|
||||
/* 30 */ {6, s_5_30, 29, 1, 0},
|
||||
/* 31 */ {4, s_5_31, -1, 1, 0},
|
||||
/* 32 */ {4, s_5_32, -1, 1, 0},
|
||||
/* 33 */ {6, s_5_33, 32, 1, 0},
|
||||
/* 34 */ {4, s_5_34, -1, 1, 0},
|
||||
/* 35 */ {2, s_5_35, -1, 1, 0}
|
||||
};
|
||||
|
||||
static symbol s_6_0[6] = {0xD0, 0xBE, 0xD1, 0x81, 0xD1, 0x82};
|
||||
static symbol s_6_1[8] = {0xD0, 0xBE, 0xD1, 0x81, 0xD1, 0x82, 0xD1, 0x8C};
|
||||
|
||||
static struct among a_6[2] =
|
||||
{
|
||||
/* 0 */ {6, s_6_0, -1, 1, 0},
|
||||
/* 1 */ {8, s_6_1, -1, 1, 0}
|
||||
};
|
||||
|
||||
static symbol s_7_0[6] = {0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x88};
|
||||
static symbol s_7_1[2] = {0xD1, 0x8C};
|
||||
static symbol s_7_2[8] = {0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x88, 0xD0, 0xB5};
|
||||
static symbol s_7_3[2] = {0xD0, 0xBD};
|
||||
|
||||
static struct among a_7[4] =
|
||||
{
|
||||
/* 0 */ {6, s_7_0, -1, 1, 0},
|
||||
/* 1 */ {2, s_7_1, -1, 3, 0},
|
||||
/* 2 */ {8, s_7_2, -1, 1, 0},
|
||||
/* 3 */ {2, s_7_3, -1, 2, 0}
|
||||
};
|
||||
|
||||
static unsigned char g_v[] = {33, 65, 8, 232};
|
||||
|
||||
static symbol s_0[] = {0xD0, 0xB0};
|
||||
static symbol s_1[] = {0xD1, 0x8F};
|
||||
static symbol s_2[] = {0xD0, 0xB0};
|
||||
static symbol s_3[] = {0xD1, 0x8F};
|
||||
static symbol s_4[] = {0xD0, 0xB0};
|
||||
static symbol s_5[] = {0xD1, 0x8F};
|
||||
static symbol s_6[] = {0xD0, 0xBD};
|
||||
static symbol s_7[] = {0xD0, 0xBD};
|
||||
static symbol s_8[] = {0xD0, 0xBD};
|
||||
static symbol s_9[] = {0xD0, 0xB8};
|
||||
|
||||
static int
|
||||
r_mark_regions(struct SN_env * z)
|
||||
{
|
||||
z->I[0] = z->l;
|
||||
z->I[1] = z->l;
|
||||
{
|
||||
int c = z->c; /* do, line 61 */
|
||||
|
||||
while (1)
|
||||
{ /* gopast, line 62 */
|
||||
if (!(in_grouping_U(z, g_v, 1072, 1103)))
|
||||
goto lab1;
|
||||
break;
|
||||
lab1:
|
||||
{
|
||||
int c = skip_utf8(z->p, z->c, 0, z->l, 1);
|
||||
|
||||
if (c < 0)
|
||||
goto lab0;
|
||||
z->c = c; /* gopast, line 62 */
|
||||
}
|
||||
}
|
||||
z->I[0] = z->c; /* setmark pV, line 62 */
|
||||
while (1)
|
||||
{ /* gopast, line 62 */
|
||||
if (!(out_grouping_U(z, g_v, 1072, 1103)))
|
||||
goto lab2;
|
||||
break;
|
||||
lab2:
|
||||
{
|
||||
int c = skip_utf8(z->p, z->c, 0, z->l, 1);
|
||||
|
||||
if (c < 0)
|
||||
goto lab0;
|
||||
z->c = c; /* gopast, line 62 */
|
||||
}
|
||||
}
|
||||
while (1)
|
||||
{ /* gopast, line 63 */
|
||||
if (!(in_grouping_U(z, g_v, 1072, 1103)))
|
||||
goto lab3;
|
||||
break;
|
||||
lab3:
|
||||
{
|
||||
int c = skip_utf8(z->p, z->c, 0, z->l, 1);
|
||||
|
||||
if (c < 0)
|
||||
goto lab0;
|
||||
z->c = c; /* gopast, line 63 */
|
||||
}
|
||||
}
|
||||
while (1)
|
||||
{ /* gopast, line 63 */
|
||||
if (!(out_grouping_U(z, g_v, 1072, 1103)))
|
||||
goto lab4;
|
||||
break;
|
||||
lab4:
|
||||
{
|
||||
int c = skip_utf8(z->p, z->c, 0, z->l, 1);
|
||||
|
||||
if (c < 0)
|
||||
goto lab0;
|
||||
z->c = c; /* gopast, line 63 */
|
||||
}
|
||||
}
|
||||
z->I[1] = z->c; /* setmark p2, line 63 */
|
||||
lab0:
|
||||
z->c = c;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
r_R2(struct SN_env * z)
|
||||
{
|
||||
if (!(z->I[1] <= z->c))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
r_perfective_gerund(struct SN_env * z)
|
||||
{
|
||||
int among_var;
|
||||
|
||||
z->ket = z->c; /* [, line 72 */
|
||||
among_var = find_among_b(z, a_0, 9); /* substring, line 72 */
|
||||
if (!(among_var))
|
||||
return 0;
|
||||
z->bra = z->c; /* ], line 72 */
|
||||
switch (among_var)
|
||||
{
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* or, line 76 */
|
||||
if (!(eq_s_b(z, 2, s_0)))
|
||||
goto lab1;
|
||||
goto lab0;
|
||||
lab1:
|
||||
z->c = z->l - m;
|
||||
if (!(eq_s_b(z, 2, s_1)))
|
||||
return 0;
|
||||
}
|
||||
lab0:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 76 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 83 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
r_adjective(struct SN_env * z)
|
||||
{
|
||||
int among_var;
|
||||
|
||||
z->ket = z->c; /* [, line 88 */
|
||||
among_var = find_among_b(z, a_1, 26); /* substring, line 88 */
|
||||
if (!(among_var))
|
||||
return 0;
|
||||
z->bra = z->c; /* ], line 88 */
|
||||
switch (among_var)
|
||||
{
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 97 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
r_adjectival(struct SN_env * z)
|
||||
{
|
||||
int among_var;
|
||||
|
||||
{
|
||||
int ret = r_adjective(z);
|
||||
|
||||
if (ret == 0)
|
||||
return 0; /* call adjective, line 102 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* try, line 109 */
|
||||
z->ket = z->c; /* [, line 110 */
|
||||
among_var = find_among_b(z, a_2, 8); /* substring, line 110 */
|
||||
if (!(among_var))
|
||||
{
|
||||
z->c = z->l - m;
|
||||
goto lab0;
|
||||
}
|
||||
z->bra = z->c; /* ], line 110 */
|
||||
switch (among_var)
|
||||
{
|
||||
case 0:
|
||||
{
|
||||
z->c = z->l - m;
|
||||
goto lab0;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* or, line 115 */
|
||||
if (!(eq_s_b(z, 2, s_2)))
|
||||
goto lab2;
|
||||
goto lab1;
|
||||
lab2:
|
||||
z->c = z->l - m;
|
||||
if (!(eq_s_b(z, 2, s_3)))
|
||||
{
|
||||
z->c = z->l - m;
|
||||
goto lab0;
|
||||
}
|
||||
}
|
||||
lab1:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 115 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 122 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
lab0:
|
||||
;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
r_reflexive(struct SN_env * z)
|
||||
{
|
||||
int among_var;
|
||||
|
||||
z->ket = z->c; /* [, line 129 */
|
||||
among_var = find_among_b(z, a_3, 2); /* substring, line 129 */
|
||||
if (!(among_var))
|
||||
return 0;
|
||||
z->bra = z->c; /* ], line 129 */
|
||||
switch (among_var)
|
||||
{
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 132 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
r_verb(struct SN_env * z)
|
||||
{
|
||||
int among_var;
|
||||
|
||||
z->ket = z->c; /* [, line 137 */
|
||||
among_var = find_among_b(z, a_4, 46); /* substring, line 137 */
|
||||
if (!(among_var))
|
||||
return 0;
|
||||
z->bra = z->c; /* ], line 137 */
|
||||
switch (among_var)
|
||||
{
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* or, line 143 */
|
||||
if (!(eq_s_b(z, 2, s_4)))
|
||||
goto lab1;
|
||||
goto lab0;
|
||||
lab1:
|
||||
z->c = z->l - m;
|
||||
if (!(eq_s_b(z, 2, s_5)))
|
||||
return 0;
|
||||
}
|
||||
lab0:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 143 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 151 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
r_noun(struct SN_env * z)
|
||||
{
|
||||
int among_var;
|
||||
|
||||
z->ket = z->c; /* [, line 160 */
|
||||
among_var = find_among_b(z, a_5, 36); /* substring, line 160 */
|
||||
if (!(among_var))
|
||||
return 0;
|
||||
z->bra = z->c; /* ], line 160 */
|
||||
switch (among_var)
|
||||
{
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 167 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
r_derivational(struct SN_env * z)
|
||||
{
|
||||
int among_var;
|
||||
|
||||
z->ket = z->c; /* [, line 176 */
|
||||
among_var = find_among_b(z, a_6, 2); /* substring, line 176 */
|
||||
if (!(among_var))
|
||||
return 0;
|
||||
z->bra = z->c; /* ], line 176 */
|
||||
{
|
||||
int ret = r_R2(z);
|
||||
|
||||
if (ret == 0)
|
||||
return 0; /* call R2, line 176 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
switch (among_var)
|
||||
{
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 179 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
r_tidy_up(struct SN_env * z)
|
||||
{
|
||||
int among_var;
|
||||
|
||||
z->ket = z->c; /* [, line 184 */
|
||||
among_var = find_among_b(z, a_7, 4); /* substring, line 184 */
|
||||
if (!(among_var))
|
||||
return 0;
|
||||
z->bra = z->c; /* ], line 184 */
|
||||
switch (among_var)
|
||||
{
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 188 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
z->ket = z->c; /* [, line 189 */
|
||||
if (!(eq_s_b(z, 2, s_6)))
|
||||
return 0;
|
||||
z->bra = z->c; /* ], line 189 */
|
||||
if (!(eq_s_b(z, 2, s_7)))
|
||||
return 0;
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 189 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (!(eq_s_b(z, 2, s_8)))
|
||||
return 0;
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 192 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 194 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
russian_UTF_8_stem(struct SN_env * z)
|
||||
{
|
||||
{
|
||||
int c = z->c; /* do, line 201 */
|
||||
|
||||
{
|
||||
int ret = r_mark_regions(z);
|
||||
|
||||
if (ret == 0)
|
||||
goto lab0; /* call mark_regions, line 201 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
lab0:
|
||||
z->c = c;
|
||||
}
|
||||
z->lb = z->c;
|
||||
z->c = z->l; /* backwards, line 202 */
|
||||
|
||||
{
|
||||
int m3; /* setlimit, line 202 */
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m;
|
||||
if (z->c < z->I[0])
|
||||
return 0;
|
||||
z->c = z->I[0]; /* tomark, line 202 */
|
||||
m3 = z->lb;
|
||||
z->lb = z->c;
|
||||
z->c = z->l - m;
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* do, line 203 */
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* or, line 204 */
|
||||
{
|
||||
int ret = r_perfective_gerund(z);
|
||||
|
||||
if (ret == 0)
|
||||
goto lab3; /* call perfective_gerund, line 204 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
goto lab2;
|
||||
lab3:
|
||||
z->c = z->l - m;
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* try, line 205 */
|
||||
{
|
||||
int ret = r_reflexive(z);
|
||||
|
||||
if (ret == 0)
|
||||
{
|
||||
z->c = z->l - m;
|
||||
goto lab4;
|
||||
} /* call reflexive, line 205 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
lab4:
|
||||
;
|
||||
}
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* or, line 206 */
|
||||
{
|
||||
int ret = r_adjectival(z);
|
||||
|
||||
if (ret == 0)
|
||||
goto lab6; /* call adjectival, line 206 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
goto lab5;
|
||||
lab6:
|
||||
z->c = z->l - m;
|
||||
{
|
||||
int ret = r_verb(z);
|
||||
|
||||
if (ret == 0)
|
||||
goto lab7; /* call verb, line 206 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
goto lab5;
|
||||
lab7:
|
||||
z->c = z->l - m;
|
||||
{
|
||||
int ret = r_noun(z);
|
||||
|
||||
if (ret == 0)
|
||||
goto lab1; /* call noun, line 206 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
lab5:
|
||||
;
|
||||
}
|
||||
lab2:
|
||||
lab1:
|
||||
z->c = z->l - m;
|
||||
}
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* try, line 209 */
|
||||
z->ket = z->c; /* [, line 209 */
|
||||
if (!(eq_s_b(z, 2, s_9)))
|
||||
{
|
||||
z->c = z->l - m;
|
||||
goto lab8;
|
||||
}
|
||||
z->bra = z->c; /* ], line 209 */
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = slice_del(z); /* delete, line 209 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
lab8:
|
||||
;
|
||||
}
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* do, line 212 */
|
||||
{
|
||||
int ret = r_derivational(z);
|
||||
|
||||
if (ret == 0)
|
||||
goto lab9; /* call derivational, line 212 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
lab9:
|
||||
z->c = z->l - m;
|
||||
}
|
||||
{
|
||||
int m = z->l - z->c;
|
||||
|
||||
(void) m; /* do, line 213 */
|
||||
{
|
||||
int ret = r_tidy_up(z);
|
||||
|
||||
if (ret == 0)
|
||||
goto lab10; /* call tidy_up, line 213 */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
lab10:
|
||||
z->c = z->l - m;
|
||||
}
|
||||
z->lb = m3;
|
||||
}
|
||||
z->c = z->lb;
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern struct SN_env *russian_UTF_8_create_env(void)
|
||||
{
|
||||
return SN_create_env(0, 2, 0);
|
||||
}
|
||||
|
||||
extern void russian_UTF_8_close_env(struct SN_env * z)
|
||||
{
|
||||
SN_close_env(z);
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
|
||||
/* This file was generated automatically by the Snowball to ANSI C compiler */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
extern struct SN_env *russian_UTF_8_create_env(void);
|
||||
extern void russian_UTF_8_close_env(struct SN_env * z);
|
||||
|
||||
extern int russian_UTF_8_stem(struct SN_env * z);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,656 +0,0 @@
|
|||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "header.h"
|
||||
|
||||
#define unless(C) if(!(C))
|
||||
|
||||
#define CREATE_SIZE 1
|
||||
|
||||
extern symbol *
|
||||
create_s(void)
|
||||
{
|
||||
symbol *p;
|
||||
void *mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol));
|
||||
|
||||
if (mem == NULL)
|
||||
return NULL;
|
||||
p = (symbol *) (HEAD + (char *) mem);
|
||||
CAPACITY(p) = CREATE_SIZE;
|
||||
SET_SIZE(p, CREATE_SIZE);
|
||||
return p;
|
||||
}
|
||||
|
||||
extern void
|
||||
lose_s(symbol * p)
|
||||
{
|
||||
if (p == NULL)
|
||||
return;
|
||||
free((char *) p - HEAD);
|
||||
}
|
||||
|
||||
/*
|
||||
new_p = X_skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c
|
||||
if n +ve, or n characters backwards from p +c - 1 if n -ve. new_p is the new
|
||||
position, or 0 on failure.
|
||||
|
||||
-- used to implement hop and next in the utf8 case.
|
||||
*/
|
||||
|
||||
extern int
|
||||
skip_utf8(const symbol * p, int c, int lb, int l, int n)
|
||||
{
|
||||
int b;
|
||||
|
||||
if (n >= 0)
|
||||
{
|
||||
for (; n > 0; n--)
|
||||
{
|
||||
if (c >= l)
|
||||
return -1;
|
||||
b = p[c++];
|
||||
if (b >= 0xC0)
|
||||
{ /* 1100 0000 */
|
||||
while (c < l)
|
||||
{
|
||||
b = p[c];
|
||||
if (b >= 0xC0 || b < 0x80)
|
||||
break;
|
||||
/* break unless b is 10------ */
|
||||
c++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (; n < 0; n++)
|
||||
{
|
||||
if (c <= lb)
|
||||
return -1;
|
||||
b = p[--c];
|
||||
if (b >= 0x80)
|
||||
{ /* 1000 0000 */
|
||||
while (c > lb)
|
||||
{
|
||||
b = p[c];
|
||||
if (b >= 0xC0)
|
||||
break; /* 1100 0000 */
|
||||
c--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
/* Code for character groupings: utf8 cases */
|
||||
|
||||
static int
|
||||
get_utf8(const symbol * p, int c, int l, int *slot)
|
||||
{
|
||||
int b0,
|
||||
b1;
|
||||
|
||||
if (c >= l)
|
||||
return 0;
|
||||
b0 = p[c++];
|
||||
if (b0 < 0xC0 || c == l)
|
||||
{ /* 1100 0000 */
|
||||
*slot = b0;
|
||||
return 1;
|
||||
}
|
||||
b1 = p[c++];
|
||||
if (b0 < 0xE0 || c == l)
|
||||
{ /* 1110 0000 */
|
||||
*slot = (b0 & 0x1F) << 6 | (b1 & 0x3F);
|
||||
return 2;
|
||||
}
|
||||
*slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (*p & 0x3F);
|
||||
return 3;
|
||||
}
|
||||
|
||||
static int
|
||||
get_b_utf8(const symbol * p, int c, int lb, int *slot)
|
||||
{
|
||||
int b0,
|
||||
b1;
|
||||
|
||||
if (c <= lb)
|
||||
return 0;
|
||||
b0 = p[--c];
|
||||
if (b0 < 0x80 || c == lb)
|
||||
{ /* 1000 0000 */
|
||||
*slot = b0;
|
||||
return 1;
|
||||
}
|
||||
b1 = p[--c];
|
||||
if (b1 >= 0xC0 || c == lb)
|
||||
{ /* 1100 0000 */
|
||||
*slot = (b1 & 0x1F) << 6 | (b0 & 0x3F);
|
||||
return 2;
|
||||
}
|
||||
*slot = (*p & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F);
|
||||
return 3;
|
||||
}
|
||||
|
||||
extern int
|
||||
in_grouping_U(struct SN_env * z, unsigned char *s, int min, int max)
|
||||
{
|
||||
int ch;
|
||||
int w = get_utf8(z->p, z->c, z->l, &ch);
|
||||
|
||||
unless(w) return 0;
|
||||
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
|
||||
return 0;
|
||||
z->c += w;
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
in_grouping_b_U(struct SN_env * z, unsigned char *s, int min, int max)
|
||||
{
|
||||
int ch;
|
||||
int w = get_b_utf8(z->p, z->c, z->lb, &ch);
|
||||
|
||||
unless(w) return 0;
|
||||
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
|
||||
return 0;
|
||||
z->c -= w;
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
out_grouping_U(struct SN_env * z, unsigned char *s, int min, int max)
|
||||
{
|
||||
int ch;
|
||||
int w = get_utf8(z->p, z->c, z->l, &ch);
|
||||
|
||||
unless(w) return 0;
|
||||
unless(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
|
||||
z->c += w;
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
out_grouping_b_U(struct SN_env * z, unsigned char *s, int min, int max)
|
||||
{
|
||||
int ch;
|
||||
int w = get_b_utf8(z->p, z->c, z->lb, &ch);
|
||||
|
||||
unless(w) return 0;
|
||||
unless(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
|
||||
z->c -= w;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Code for character groupings: non-utf8 cases */
|
||||
|
||||
extern int
|
||||
in_grouping(struct SN_env * z, unsigned char *s, int min, int max)
|
||||
{
|
||||
int ch;
|
||||
|
||||
if (z->c >= z->l)
|
||||
return 0;
|
||||
ch = z->p[z->c];
|
||||
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
|
||||
return 0;
|
||||
z->c++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
in_grouping_b(struct SN_env * z, unsigned char *s, int min, int max)
|
||||
{
|
||||
int ch;
|
||||
|
||||
if (z->c <= z->lb)
|
||||
return 0;
|
||||
ch = z->p[z->c - 1];
|
||||
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
|
||||
return 0;
|
||||
z->c--;
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
out_grouping(struct SN_env * z, unsigned char *s, int min, int max)
|
||||
{
|
||||
int ch;
|
||||
|
||||
if (z->c >= z->l)
|
||||
return 0;
|
||||
ch = z->p[z->c];
|
||||
unless(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
|
||||
z->c++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
out_grouping_b(struct SN_env * z, unsigned char *s, int min, int max)
|
||||
{
|
||||
int ch;
|
||||
|
||||
if (z->c <= z->lb)
|
||||
return 0;
|
||||
ch = z->p[z->c - 1];
|
||||
unless(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
|
||||
z->c--;
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
eq_s(struct SN_env * z, int s_size, symbol * s)
|
||||
{
|
||||
if (z->l - z->c < s_size || memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0)
|
||||
return 0;
|
||||
z->c += s_size;
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
eq_s_b(struct SN_env * z, int s_size, symbol * s)
|
||||
{
|
||||
if (z->c - z->lb < s_size || memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0)
|
||||
return 0;
|
||||
z->c -= s_size;
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
eq_v(struct SN_env * z, symbol * p)
|
||||
{
|
||||
return eq_s(z, SIZE(p), p);
|
||||
}
|
||||
|
||||
extern int
|
||||
eq_v_b(struct SN_env * z, symbol * p)
|
||||
{
|
||||
return eq_s_b(z, SIZE(p), p);
|
||||
}
|
||||
|
||||
extern int
|
||||
find_among(struct SN_env * z, struct among * v, int v_size)
|
||||
{
|
||||
|
||||
int i = 0;
|
||||
int j = v_size;
|
||||
|
||||
int c = z->c;
|
||||
int l = z->l;
|
||||
symbol *q = z->p + c;
|
||||
|
||||
struct among *w;
|
||||
|
||||
int common_i = 0;
|
||||
int common_j = 0;
|
||||
|
||||
int first_key_inspected = 0;
|
||||
|
||||
while (1)
|
||||
{
|
||||
int k = i + ((j - i) >> 1);
|
||||
int diff = 0;
|
||||
int common = common_i < common_j ? common_i : common_j; /* smaller */
|
||||
|
||||
w = v + k;
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = common; i < w->s_size; i++)
|
||||
{
|
||||
if (c + common == l)
|
||||
{
|
||||
diff = -1;
|
||||
break;
|
||||
}
|
||||
diff = q[common] - w->s[i];
|
||||
if (diff != 0)
|
||||
break;
|
||||
common++;
|
||||
}
|
||||
}
|
||||
if (diff < 0)
|
||||
{
|
||||
j = k;
|
||||
common_j = common;
|
||||
}
|
||||
else
|
||||
{
|
||||
i = k;
|
||||
common_i = common;
|
||||
}
|
||||
if (j - i <= 1)
|
||||
{
|
||||
if (i > 0)
|
||||
break; /* v->s has been inspected */
|
||||
if (j == i)
|
||||
break; /* only one item in v */
|
||||
|
||||
/*
|
||||
* - but now we need to go round once more to get v->s inspected.
|
||||
* This looks messy, but is actually the optimal approach.
|
||||
*/
|
||||
|
||||
if (first_key_inspected)
|
||||
break;
|
||||
first_key_inspected = 1;
|
||||
}
|
||||
}
|
||||
while (1)
|
||||
{
|
||||
w = v + i;
|
||||
if (common_i >= w->s_size)
|
||||
{
|
||||
z->c = c + w->s_size;
|
||||
if (w->function == 0)
|
||||
return w->result;
|
||||
{
|
||||
int res = w->function(z);
|
||||
|
||||
z->c = c + w->s_size;
|
||||
if (res)
|
||||
return w->result;
|
||||
}
|
||||
}
|
||||
i = w->substring_i;
|
||||
if (i < 0)
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* find_among_b is for backwards processing. Same comments apply */
|
||||
|
||||
extern int
|
||||
find_among_b(struct SN_env * z, struct among * v, int v_size)
|
||||
{
|
||||
|
||||
int i = 0;
|
||||
int j = v_size;
|
||||
|
||||
int c = z->c;
|
||||
int lb = z->lb;
|
||||
symbol *q = z->p + c - 1;
|
||||
|
||||
struct among *w;
|
||||
|
||||
int common_i = 0;
|
||||
int common_j = 0;
|
||||
|
||||
int first_key_inspected = 0;
|
||||
|
||||
while (1)
|
||||
{
|
||||
int k = i + ((j - i) >> 1);
|
||||
int diff = 0;
|
||||
int common = common_i < common_j ? common_i : common_j;
|
||||
|
||||
w = v + k;
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = w->s_size - 1 - common; i >= 0; i--)
|
||||
{
|
||||
if (c - common == lb)
|
||||
{
|
||||
diff = -1;
|
||||
break;
|
||||
}
|
||||
diff = q[-common] - w->s[i];
|
||||
if (diff != 0)
|
||||
break;
|
||||
common++;
|
||||
}
|
||||
}
|
||||
if (diff < 0)
|
||||
{
|
||||
j = k;
|
||||
common_j = common;
|
||||
}
|
||||
else
|
||||
{
|
||||
i = k;
|
||||
common_i = common;
|
||||
}
|
||||
if (j - i <= 1)
|
||||
{
|
||||
if (i > 0)
|
||||
break;
|
||||
if (j == i)
|
||||
break;
|
||||
if (first_key_inspected)
|
||||
break;
|
||||
first_key_inspected = 1;
|
||||
}
|
||||
}
|
||||
while (1)
|
||||
{
|
||||
w = v + i;
|
||||
if (common_i >= w->s_size)
|
||||
{
|
||||
z->c = c - w->s_size;
|
||||
if (w->function == 0)
|
||||
return w->result;
|
||||
{
|
||||
int res = w->function(z);
|
||||
|
||||
z->c = c - w->s_size;
|
||||
if (res)
|
||||
return w->result;
|
||||
}
|
||||
}
|
||||
i = w->substring_i;
|
||||
if (i < 0)
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Increase the size of the buffer pointed to by p to at least n symbols.
|
||||
* If insufficient memory, returns NULL and frees the old buffer.
|
||||
*/
|
||||
static symbol *
|
||||
increase_size(symbol * p, int n)
|
||||
{
|
||||
symbol *q;
|
||||
int new_size = n + 20;
|
||||
void *mem = realloc((char *) p - HEAD,
|
||||
HEAD + (new_size + 1) * sizeof(symbol));
|
||||
|
||||
if (mem == NULL)
|
||||
{
|
||||
lose_s(p);
|
||||
return NULL;
|
||||
}
|
||||
q = (symbol *) (HEAD + (char *) mem);
|
||||
CAPACITY(q) = new_size;
|
||||
return q;
|
||||
}
|
||||
|
||||
/* to replace symbols between c_bra and c_ket in z->p by the
|
||||
s_size symbols at s.
|
||||
Returns 0 on success, -1 on error.
|
||||
Also, frees z->p (and sets it to NULL) on error.
|
||||
*/
|
||||
extern int
|
||||
replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int *adjptr)
|
||||
{
|
||||
int adjustment;
|
||||
int len;
|
||||
|
||||
if (z->p == NULL)
|
||||
{
|
||||
z->p = create_s();
|
||||
if (z->p == NULL)
|
||||
return -1;
|
||||
}
|
||||
adjustment = s_size - (c_ket - c_bra);
|
||||
len = SIZE(z->p);
|
||||
if (adjustment != 0)
|
||||
{
|
||||
if (adjustment + len > CAPACITY(z->p))
|
||||
{
|
||||
z->p = increase_size(z->p, adjustment + len);
|
||||
if (z->p == NULL)
|
||||
return -1;
|
||||
}
|
||||
memmove(z->p + c_ket + adjustment,
|
||||
z->p + c_ket,
|
||||
(len - c_ket) * sizeof(symbol));
|
||||
SET_SIZE(z->p, adjustment + len);
|
||||
z->l += adjustment;
|
||||
if (z->c >= c_ket)
|
||||
z->c += adjustment;
|
||||
else if (z->c > c_bra)
|
||||
z->c = c_bra;
|
||||
}
|
||||
unless(s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
|
||||
if (adjptr != NULL)
|
||||
*adjptr = adjustment;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
slice_check(struct SN_env * z)
|
||||
{
|
||||
|
||||
if (z->bra < 0 ||
|
||||
z->bra > z->ket ||
|
||||
z->ket > z->l ||
|
||||
z->p == NULL ||
|
||||
z->l > SIZE(z->p)) /* this line could be removed */
|
||||
{
|
||||
#if 0
|
||||
fprintf(stderr, "faulty slice operation:\n");
|
||||
debug(z, -1, 0);
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
slice_from_s(struct SN_env * z, int s_size, symbol * s)
|
||||
{
|
||||
if (slice_check(z))
|
||||
return -1;
|
||||
return replace_s(z, z->bra, z->ket, s_size, s, NULL);
|
||||
}
|
||||
|
||||
extern int
|
||||
slice_from_v(struct SN_env * z, symbol * p)
|
||||
{
|
||||
return slice_from_s(z, SIZE(p), p);
|
||||
}
|
||||
|
||||
extern int
|
||||
slice_del(struct SN_env * z)
|
||||
{
|
||||
return slice_from_s(z, 0, 0);
|
||||
}
|
||||
|
||||
extern int
|
||||
insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
|
||||
{
|
||||
int adjustment;
|
||||
|
||||
if (replace_s(z, bra, ket, s_size, s, &adjustment))
|
||||
return -1;
|
||||
if (bra <= z->bra)
|
||||
z->bra += adjustment;
|
||||
if (bra <= z->ket)
|
||||
z->ket += adjustment;
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
insert_v(struct SN_env * z, int bra, int ket, symbol * p)
|
||||
{
|
||||
int adjustment;
|
||||
|
||||
if (replace_s(z, bra, ket, SIZE(p), p, &adjustment))
|
||||
return -1;
|
||||
if (bra <= z->bra)
|
||||
z->bra += adjustment;
|
||||
if (bra <= z->ket)
|
||||
z->ket += adjustment;
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern symbol *
|
||||
slice_to(struct SN_env * z, symbol * p)
|
||||
{
|
||||
if (slice_check(z))
|
||||
{
|
||||
lose_s(p);
|
||||
return NULL;
|
||||
}
|
||||
{
|
||||
int len = z->ket - z->bra;
|
||||
|
||||
if (CAPACITY(p) < len)
|
||||
{
|
||||
p = increase_size(p, len);
|
||||
if (p == NULL)
|
||||
return NULL;
|
||||
}
|
||||
memmove(p, z->p + z->bra, len * sizeof(symbol));
|
||||
SET_SIZE(p, len);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
extern symbol *
|
||||
assign_to(struct SN_env * z, symbol * p)
|
||||
{
|
||||
int len = z->l;
|
||||
|
||||
if (CAPACITY(p) < len)
|
||||
{
|
||||
p = increase_size(p, len);
|
||||
if (p == NULL)
|
||||
return NULL;
|
||||
}
|
||||
memmove(p, z->p, len * sizeof(symbol));
|
||||
SET_SIZE(p, len);
|
||||
return p;
|
||||
}
|
||||
|
||||
#if 0
|
||||
extern void
|
||||
debug(struct SN_env * z, int number, int line_count)
|
||||
{
|
||||
int i;
|
||||
int limit = SIZE(z->p);
|
||||
|
||||
/* if (number >= 0) printf("%3d (line %4d): '", number, line_count); */
|
||||
if (number >= 0)
|
||||
printf("%3d (line %4d): [%d]'", number, line_count, limit);
|
||||
for (i = 0; i <= limit; i++)
|
||||
{
|
||||
if (z->lb == i)
|
||||
printf("{");
|
||||
if (z->bra == i)
|
||||
printf("[");
|
||||
if (z->c == i)
|
||||
printf("|");
|
||||
if (z->ket == i)
|
||||
printf("]");
|
||||
if (z->l == i)
|
||||
printf("}");
|
||||
if (i < limit)
|
||||
{
|
||||
int ch = z->p[i];
|
||||
|
||||
if (ch == 0)
|
||||
ch = '#';
|
||||
printf("%c", ch);
|
||||
}
|
||||
}
|
||||
printf("'\n");
|
||||
}
|
||||
|
||||
#endif
|
|
@ -88,9 +88,9 @@ Moscow moskva | moscow
|
|||
\set ECHO all
|
||||
|
||||
alter table test_tsquery add column keyword tsquery;
|
||||
update test_tsquery set keyword = to_tsquery('default', txtkeyword);
|
||||
update test_tsquery set keyword = to_tsquery('english', txtkeyword);
|
||||
alter table test_tsquery add column sample tsquery;
|
||||
update test_tsquery set sample = to_tsquery('default', txtsample::text);
|
||||
update test_tsquery set sample = to_tsquery('english', txtsample::text);
|
||||
|
||||
create unique index bt_tsq on test_tsquery (keyword);
|
||||
|
||||
|
@ -116,21 +116,10 @@ select rewrite('moscow', 'select keyword, sample from test_tsquery'::text );
|
|||
select rewrite('moscow & hotel', 'select keyword, sample from test_tsquery'::text );
|
||||
select rewrite('bar & new & qq & foo & york', 'select keyword, sample from test_tsquery'::text );
|
||||
|
||||
select rewrite( ARRAY['moscow', keyword, sample] ) from test_tsquery;
|
||||
select rewrite( ARRAY['moscow & hotel', keyword, sample] ) from test_tsquery;
|
||||
select rewrite( ARRAY['bar & new & qq & foo & york', keyword, sample] ) from test_tsquery;
|
||||
|
||||
|
||||
select keyword from test_tsquery where keyword @> 'new';
|
||||
select keyword from test_tsquery where keyword @> 'moscow';
|
||||
select keyword from test_tsquery where keyword <@ 'new';
|
||||
select keyword from test_tsquery where keyword <@ 'moscow';
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where keyword <@ query;
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where keyword <@ query;
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where keyword <@ query;
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where query @> keyword;
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where query @> keyword;
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where query @> keyword;
|
||||
|
||||
create index qq on test_tsquery using gist (keyword gist_tp_tsquery_ops);
|
||||
set enable_seqscan='off';
|
||||
|
@ -139,48 +128,43 @@ select keyword from test_tsquery where keyword @> 'new';
|
|||
select keyword from test_tsquery where keyword @> 'moscow';
|
||||
select keyword from test_tsquery where keyword <@ 'new';
|
||||
select keyword from test_tsquery where keyword <@ 'moscow';
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where keyword <@ query;
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where keyword <@ query;
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where keyword <@ query;
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where query @> keyword;
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where query @> keyword;
|
||||
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where query @> keyword;
|
||||
|
||||
set enable_seqscan='on';
|
||||
|
||||
|
||||
|
||||
select lexize('simple', 'ASD56 hsdkf');
|
||||
select lexize('en_stem', 'SKIES Problems identity');
|
||||
select lexize('english_stem', 'SKIES Problems identity');
|
||||
|
||||
select * from token_type('default');
|
||||
select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
||||
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
|
||||
<i <b> wow < jqw <> qwerty');
|
||||
|
||||
SELECT to_tsvector('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
||||
SELECT to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
||||
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
|
||||
<i <b> wow < jqw <> qwerty');
|
||||
|
||||
SELECT length(to_tsvector('default', '345 qw'));
|
||||
SELECT length(to_tsvector('english', '345 qw'));
|
||||
|
||||
SELECT length(to_tsvector('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
||||
SELECT length(to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
||||
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
|
||||
<i <b> wow < jqw <> qwerty'));
|
||||
|
||||
|
||||
select to_tsquery('default', 'qwe & sKies ');
|
||||
select to_tsquery('english', 'qwe & sKies ');
|
||||
select to_tsquery('simple', 'qwe & sKies ');
|
||||
select to_tsquery('default', '''the wether'':dc & '' sKies '':BC ');
|
||||
select to_tsquery('default', 'asd&(and|fghj)');
|
||||
select to_tsquery('default', '(asd&and)|fghj');
|
||||
select to_tsquery('default', '(asd&!and)|fghj');
|
||||
select to_tsquery('default', '(the|and&(i&1))&fghj');
|
||||
select to_tsquery('english', '''the wether'':dc & '' sKies '':BC ');
|
||||
select to_tsquery('english', 'asd&(and|fghj)');
|
||||
select to_tsquery('english', '(asd&and)|fghj');
|
||||
select to_tsquery('english', '(asd&!and)|fghj');
|
||||
select to_tsquery('english', '(the|and&(i&1))&fghj');
|
||||
|
||||
select plainto_tsquery('default', 'the and z 1))& fghj');
|
||||
select plainto_tsquery('default', 'foo bar') && plainto_tsquery('default', 'asd');
|
||||
select plainto_tsquery('default', 'foo bar') || plainto_tsquery('default', 'asd fg');
|
||||
select plainto_tsquery('default', 'foo bar') || !!plainto_tsquery('default', 'asd fg');
|
||||
select plainto_tsquery('default', 'foo bar') && 'asd | fg';
|
||||
select plainto_tsquery('english', 'the and z 1))& fghj');
|
||||
select plainto_tsquery('english', 'foo bar') && plainto_tsquery('english', 'asd');
|
||||
select plainto_tsquery('english', 'foo bar') || plainto_tsquery('english', 'asd fg');
|
||||
select plainto_tsquery('english', 'foo bar') || !!plainto_tsquery('english', 'asd fg');
|
||||
select plainto_tsquery('english', 'foo bar') && 'asd | fg';
|
||||
|
||||
select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
|
||||
select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
|
||||
|
@ -209,7 +193,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
|||
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
||||
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
||||
|
||||
select set_curcfg('default');
|
||||
select set_curcfg('english');
|
||||
|
||||
CREATE TRIGGER tsvectorupdate
|
||||
BEFORE UPDATE OR INSERT ON test_tsvector
|
||||
|
@ -225,11 +209,7 @@ UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
|
|||
|
||||
SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
|
||||
|
||||
drop trigger tsvectorupdate on test_tsvector;
|
||||
create function wow(text) returns text as 'select $1 || '' copyright''; ' language sql;
|
||||
create trigger tsvectorupdate before update or insert on test_tsvector
|
||||
for each row execute procedure tsearch2(a, wow, t);
|
||||
insert into test_tsvector (t) values ('345 qwerty');
|
||||
insert into test_tsvector (t) values ('345 qwerty copyright');
|
||||
select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
|
||||
select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
|
||||
|
||||
|
@ -251,8 +231,7 @@ select * from stat('select a from test_tsvector','c') order by ndoc desc, nentry
|
|||
select * from stat('select a from test_tsvector','d') order by ndoc desc, nentry desc, word;
|
||||
select * from stat('select a from test_tsvector','ad') order by ndoc desc, nentry desc, word;
|
||||
|
||||
select reset_tsearch();
|
||||
select to_tsquery('default', 'skies & books');
|
||||
select to_tsquery('english', 'skies & books');
|
||||
|
||||
select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
|
||||
A thousand years to trace
|
||||
|
@ -284,36 +263,6 @@ The sculpture of these granite seams,
|
|||
Upon a woman s face. E. J. Pratt (1882 1964)
|
||||
'), to_tsquery('sea'));
|
||||
|
||||
select get_covers(to_tsvector('Erosion It took the sea a thousand years,
|
||||
A thousand years to trace
|
||||
The granite features of this cliff
|
||||
In crag and scarp and base.
|
||||
It took the sea an hour one night
|
||||
An hour of storm to place
|
||||
The sculpture of these granite seams,
|
||||
Upon a woman s face. E. J. Pratt (1882 1964)
|
||||
'), to_tsquery('sea&thousand&years'));
|
||||
|
||||
select get_covers(to_tsvector('Erosion It took the sea a thousand years,
|
||||
A thousand years to trace
|
||||
The granite features of this cliff
|
||||
In crag and scarp and base.
|
||||
It took the sea an hour one night
|
||||
An hour of storm to place
|
||||
The sculpture of these granite seams,
|
||||
Upon a woman s face. E. J. Pratt (1882 1964)
|
||||
'), to_tsquery('granite&sea'));
|
||||
|
||||
select get_covers(to_tsvector('Erosion It took the sea a thousand years,
|
||||
A thousand years to trace
|
||||
The granite features of this cliff
|
||||
In crag and scarp and base.
|
||||
It took the sea an hour one night
|
||||
An hour of storm to place
|
||||
The sculpture of these granite seams,
|
||||
Upon a woman s face. E. J. Pratt (1882 1964)
|
||||
'), to_tsquery('sea'));
|
||||
|
||||
select headline('Erosion It took the sea a thousand years,
|
||||
A thousand years to trace
|
||||
The granite features of this cliff
|
||||
|
@ -359,7 +308,7 @@ ff-bg
|
|||
</html>',
|
||||
to_tsquery('sea&foo'), 'HighlightAll=true');
|
||||
--check debug
|
||||
select * from ts_debug('Tsearch module for PostgreSQL 7.3.3');
|
||||
select * from public.ts_debug('Tsearch module for PostgreSQL 7.3.3');
|
||||
|
||||
--check ordering
|
||||
insert into test_tsvector values (null, null);
|
||||
|
|
|
@ -1,118 +0,0 @@
|
|||
/*
|
||||
* stopword library
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "common.h"
|
||||
#include "dict.h"
|
||||
#include "ts_locale.h"
|
||||
|
||||
#define STOPBUFLEN 4096
|
||||
|
||||
void
|
||||
freestoplist(StopList * s)
|
||||
{
|
||||
char **ptr = s->stop;
|
||||
|
||||
if (ptr)
|
||||
while (*ptr && s->len > 0)
|
||||
{
|
||||
free(*ptr);
|
||||
ptr++;
|
||||
s->len--;
|
||||
free(s->stop);
|
||||
}
|
||||
memset(s, 0, sizeof(StopList));
|
||||
}
|
||||
|
||||
void
|
||||
readstoplist(text *in, StopList * s)
|
||||
{
|
||||
char **stop = NULL;
|
||||
|
||||
s->len = 0;
|
||||
if (in && VARSIZE(in) - VARHDRSZ > 0)
|
||||
{
|
||||
char *filename = to_absfilename(text2char(in));
|
||||
FILE *hin;
|
||||
char buf[STOPBUFLEN], *pbuf;
|
||||
int reallen = 0;
|
||||
|
||||
if ((hin = fopen(filename, "r")) == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("could not open file \"%s\": %m",
|
||||
filename)));
|
||||
|
||||
while (fgets(buf, sizeof(buf), hin))
|
||||
{
|
||||
pbuf = buf;
|
||||
while( *pbuf && !isspace((unsigned char) *pbuf ) )
|
||||
pbuf++;
|
||||
*pbuf = '\0';
|
||||
|
||||
pg_verifymbstr(buf, strlen(buf), false);
|
||||
if (*buf == '\0' || *buf=='\n' || *buf=='\r')
|
||||
continue;
|
||||
|
||||
if (s->len >= reallen)
|
||||
{
|
||||
char **tmp;
|
||||
|
||||
reallen = (reallen) ? reallen * 2 : 16;
|
||||
tmp = (char **) realloc((void *) stop, sizeof(char *) * reallen);
|
||||
if (!tmp)
|
||||
{
|
||||
freestoplist(s);
|
||||
fclose(hin);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
}
|
||||
stop = tmp;
|
||||
}
|
||||
|
||||
if (s->wordop)
|
||||
{
|
||||
pbuf = s->wordop(buf);
|
||||
stop[s->len] = strdup(pbuf);
|
||||
pfree(pbuf);
|
||||
} else
|
||||
stop[s->len] = strdup(buf);
|
||||
|
||||
if (!stop[s->len])
|
||||
{
|
||||
freestoplist(s);
|
||||
fclose(hin);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
}
|
||||
|
||||
(s->len)++;
|
||||
}
|
||||
fclose(hin);
|
||||
pfree(filename);
|
||||
}
|
||||
s->stop = stop;
|
||||
}
|
||||
|
||||
static int
|
||||
comparestr(const void *a, const void *b)
|
||||
{
|
||||
return strcmp(*(char **) a, *(char **) b);
|
||||
}
|
||||
|
||||
void
|
||||
sortstoplist(StopList * s)
|
||||
{
|
||||
if (s->stop && s->len > 0)
|
||||
qsort(s->stop, s->len, sizeof(char *), comparestr);
|
||||
}
|
||||
|
||||
bool
|
||||
searchstoplist(StopList * s, char *key)
|
||||
{
|
||||
return (s->stop && s->len > 0 && bsearch(&key, s->stop, s->len, sizeof(char *), comparestr)) ? true : false;
|
||||
}
|
|
@ -1,128 +0,0 @@
|
|||
i
|
||||
me
|
||||
my
|
||||
myself
|
||||
we
|
||||
our
|
||||
ours
|
||||
ourselves
|
||||
you
|
||||
your
|
||||
yours
|
||||
yourself
|
||||
yourselves
|
||||
he
|
||||
him
|
||||
his
|
||||
himself
|
||||
she
|
||||
her
|
||||
hers
|
||||
herself
|
||||
it
|
||||
its
|
||||
itself
|
||||
they
|
||||
them
|
||||
their
|
||||
theirs
|
||||
themselves
|
||||
what
|
||||
which
|
||||
who
|
||||
whom
|
||||
this
|
||||
that
|
||||
these
|
||||
those
|
||||
am
|
||||
is
|
||||
are
|
||||
was
|
||||
were
|
||||
be
|
||||
been
|
||||
being
|
||||
have
|
||||
has
|
||||
had
|
||||
having
|
||||
do
|
||||
does
|
||||
did
|
||||
doing
|
||||
a
|
||||
an
|
||||
the
|
||||
and
|
||||
but
|
||||
if
|
||||
or
|
||||
because
|
||||
as
|
||||
until
|
||||
while
|
||||
of
|
||||
at
|
||||
by
|
||||
for
|
||||
with
|
||||
about
|
||||
against
|
||||
between
|
||||
into
|
||||
through
|
||||
during
|
||||
before
|
||||
after
|
||||
above
|
||||
below
|
||||
to
|
||||
from
|
||||
up
|
||||
down
|
||||
in
|
||||
out
|
||||
on
|
||||
off
|
||||
over
|
||||
under
|
||||
again
|
||||
further
|
||||
then
|
||||
once
|
||||
here
|
||||
there
|
||||
when
|
||||
where
|
||||
why
|
||||
how
|
||||
all
|
||||
any
|
||||
both
|
||||
each
|
||||
few
|
||||
more
|
||||
most
|
||||
other
|
||||
some
|
||||
such
|
||||
no
|
||||
nor
|
||||
not
|
||||
only
|
||||
own
|
||||
same
|
||||
so
|
||||
than
|
||||
too
|
||||
very
|
||||
s
|
||||
t
|
||||
can
|
||||
will
|
||||
just
|
||||
don
|
||||
should
|
||||
now
|
||||
|
|
@ -1,151 +0,0 @@
|
|||
и
|
||||
в
|
||||
во
|
||||
не
|
||||
что
|
||||
он
|
||||
на
|
||||
я
|
||||
с
|
||||
со
|
||||
как
|
||||
а
|
||||
то
|
||||
все
|
||||
она
|
||||
так
|
||||
его
|
||||
но
|
||||
да
|
||||
ты
|
||||
к
|
||||
у
|
||||
же
|
||||
вы
|
||||
за
|
||||
бы
|
||||
по
|
||||
только
|
||||
ее
|
||||
мне
|
||||
было
|
||||
вот
|
||||
от
|
||||
меня
|
||||
еще
|
||||
нет
|
||||
о
|
||||
из
|
||||
ему
|
||||
теперь
|
||||
когда
|
||||
даже
|
||||
ну
|
||||
вдруг
|
||||
ли
|
||||
если
|
||||
уже
|
||||
или
|
||||
ни
|
||||
быть
|
||||
был
|
||||
него
|
||||
до
|
||||
вас
|
||||
нибудь
|
||||
опять
|
||||
уж
|
||||
вам
|
||||
ведь
|
||||
там
|
||||
потом
|
||||
себя
|
||||
ничего
|
||||
ей
|
||||
может
|
||||
они
|
||||
тут
|
||||
где
|
||||
есть
|
||||
надо
|
||||
ней
|
||||
для
|
||||
мы
|
||||
тебя
|
||||
их
|
||||
чем
|
||||
была
|
||||
сам
|
||||
чтоб
|
||||
без
|
||||
будто
|
||||
чего
|
||||
раз
|
||||
тоже
|
||||
себе
|
||||
под
|
||||
будет
|
||||
ж
|
||||
тогда
|
||||
кто
|
||||
этот
|
||||
того
|
||||
потому
|
||||
этого
|
||||
какой
|
||||
совсем
|
||||
ним
|
||||
здесь
|
||||
этом
|
||||
один
|
||||
почти
|
||||
мой
|
||||
тем
|
||||
чтобы
|
||||
нее
|
||||
сейчас
|
||||
были
|
||||
куда
|
||||
зачем
|
||||
всех
|
||||
никогда
|
||||
можно
|
||||
при
|
||||
наконец
|
||||
два
|
||||
об
|
||||
другой
|
||||
хоть
|
||||
после
|
||||
над
|
||||
больше
|
||||
тот
|
||||
через
|
||||
эти
|
||||
нас
|
||||
про
|
||||
всего
|
||||
них
|
||||
какая
|
||||
много
|
||||
разве
|
||||
три
|
||||
эту
|
||||
моя
|
||||
впрочем
|
||||
хорошо
|
||||
свою
|
||||
этой
|
||||
перед
|
||||
иногда
|
||||
лучше
|
||||
чуть
|
||||
том
|
||||
нельзя
|
||||
такой
|
||||
им
|
||||
более
|
||||
всегда
|
||||
конечно
|
||||
всю
|
||||
между
|
|
@ -1,151 +0,0 @@
|
|||
и
|
||||
в
|
||||
во
|
||||
не
|
||||
что
|
||||
он
|
||||
на
|
||||
я
|
||||
с
|
||||
со
|
||||
как
|
||||
а
|
||||
то
|
||||
все
|
||||
она
|
||||
так
|
||||
его
|
||||
но
|
||||
да
|
||||
ты
|
||||
к
|
||||
у
|
||||
же
|
||||
вы
|
||||
за
|
||||
бы
|
||||
по
|
||||
только
|
||||
ее
|
||||
мне
|
||||
было
|
||||
вот
|
||||
от
|
||||
меня
|
||||
еще
|
||||
нет
|
||||
о
|
||||
из
|
||||
ему
|
||||
теперь
|
||||
когда
|
||||
даже
|
||||
ну
|
||||
вдруг
|
||||
ли
|
||||
если
|
||||
уже
|
||||
или
|
||||
ни
|
||||
быть
|
||||
был
|
||||
него
|
||||
до
|
||||
вас
|
||||
нибудь
|
||||
опять
|
||||
уж
|
||||
вам
|
||||
ведь
|
||||
там
|
||||
потом
|
||||
себя
|
||||
ничего
|
||||
ей
|
||||
может
|
||||
они
|
||||
тут
|
||||
где
|
||||
есть
|
||||
надо
|
||||
ней
|
||||
для
|
||||
мы
|
||||
тебя
|
||||
их
|
||||
чем
|
||||
была
|
||||
сам
|
||||
чтоб
|
||||
без
|
||||
будто
|
||||
чего
|
||||
раз
|
||||
тоже
|
||||
себе
|
||||
под
|
||||
будет
|
||||
ж
|
||||
тогда
|
||||
кто
|
||||
этот
|
||||
того
|
||||
потому
|
||||
этого
|
||||
какой
|
||||
совсем
|
||||
ним
|
||||
здесь
|
||||
этом
|
||||
один
|
||||
почти
|
||||
мой
|
||||
тем
|
||||
чтобы
|
||||
нее
|
||||
сейчас
|
||||
были
|
||||
куда
|
||||
зачем
|
||||
всех
|
||||
никогда
|
||||
можно
|
||||
при
|
||||
наконец
|
||||
два
|
||||
об
|
||||
другой
|
||||
хоть
|
||||
после
|
||||
над
|
||||
больше
|
||||
тот
|
||||
через
|
||||
эти
|
||||
нас
|
||||
про
|
||||
всего
|
||||
них
|
||||
какая
|
||||
много
|
||||
разве
|
||||
три
|
||||
эту
|
||||
моя
|
||||
впрочем
|
||||
хорошо
|
||||
свою
|
||||
этой
|
||||
перед
|
||||
иногда
|
||||
лучше
|
||||
чуть
|
||||
том
|
||||
нельзя
|
||||
такой
|
||||
им
|
||||
более
|
||||
всегда
|
||||
конечно
|
||||
всю
|
||||
между
|
|
@ -1,21 +0,0 @@
|
|||
#
|
||||
# Theasurus config file. Character ':' splits
|
||||
# string to part, example:
|
||||
# sample-words : substitute-words
|
||||
#
|
||||
# Any substitute-word can be marked by preceding '*' character,
|
||||
# which means do not lexize this word
|
||||
# Docs: http://www.sai.msu.su/~megera/oddmuse/index.cgi/Thesaurus_dictionary
|
||||
|
||||
#one two three : *123
|
||||
#one two : *12
|
||||
#one : *1
|
||||
#two : *2
|
||||
|
||||
#foo bar : blah blah
|
||||
#f bar : fbar
|
||||
#e bar : ebar
|
||||
#g bar bar : gbarbar
|
||||
#asd:sdffff
|
||||
#qwerty:qwer wert erty
|
||||
|
|
@ -1,648 +0,0 @@
|
|||
/*
|
||||
* interface functions to tscfg
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <locale.h>
|
||||
|
||||
#include "catalog/pg_type.h"
|
||||
#include "executor/spi.h"
|
||||
#include "fmgr.h"
|
||||
#include "utils/array.h"
|
||||
#include "utils/memutils.h"
|
||||
|
||||
#include "ts_cfg.h"
|
||||
#include "dict.h"
|
||||
#include "wparser.h"
|
||||
#include "snmap.h"
|
||||
#include "common.h"
|
||||
#include "tsvector.h"
|
||||
|
||||
PG_MODULE_MAGIC;
|
||||
|
||||
#define IGNORE_LONGLEXEME 1
|
||||
|
||||
/*********top interface**********/
|
||||
|
||||
static Oid current_cfg_id = 0;
|
||||
|
||||
void
|
||||
init_cfg(Oid id, TSCfgInfo * cfg)
|
||||
{
|
||||
Oid arg[2];
|
||||
bool isnull;
|
||||
Datum pars[2];
|
||||
int stat,
|
||||
i,
|
||||
j;
|
||||
text *ptr;
|
||||
text *prsname = NULL;
|
||||
char *nsp = get_namespace(TSNSP_FunctionOid);
|
||||
char buf[1024];
|
||||
MemoryContext oldcontext;
|
||||
void *plan;
|
||||
|
||||
arg[0] = OIDOID;
|
||||
arg[1] = OIDOID;
|
||||
pars[0] = ObjectIdGetDatum(id);
|
||||
pars[1] = ObjectIdGetDatum(id);
|
||||
|
||||
memset(cfg, 0, sizeof(TSCfgInfo));
|
||||
SPI_connect();
|
||||
|
||||
sprintf(buf, "select prs_name from %s.pg_ts_cfg where oid = $1", nsp);
|
||||
plan = SPI_prepare(buf, 1, arg);
|
||||
if (!plan)
|
||||
ts_error(ERROR, "SPI_prepare() failed");
|
||||
|
||||
stat = SPI_execp(plan, pars, " ", 1);
|
||||
if (stat < 0)
|
||||
ts_error(ERROR, "SPI_execp return %d", stat);
|
||||
if (SPI_processed > 0)
|
||||
{
|
||||
prsname = DatumGetTextP(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
|
||||
oldcontext = MemoryContextSwitchTo(TopMemoryContext);
|
||||
prsname = ptextdup(prsname);
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
|
||||
cfg->id = id;
|
||||
}
|
||||
else
|
||||
ts_error(ERROR, "No tsearch cfg with id %d", id);
|
||||
|
||||
SPI_freeplan(plan);
|
||||
|
||||
arg[0] = TEXTOID;
|
||||
sprintf(buf, "select lt.tokid, map.dict_name from %s.pg_ts_cfgmap as map, %s.pg_ts_cfg as cfg, %s.token_type( $1 ) as lt where lt.alias = map.tok_alias and map.ts_name = cfg.ts_name and cfg.oid= $2 order by lt.tokid desc;", nsp, nsp, nsp);
|
||||
plan = SPI_prepare(buf, 2, arg);
|
||||
if (!plan)
|
||||
ts_error(ERROR, "SPI_prepare() failed");
|
||||
|
||||
pars[0] = PointerGetDatum(prsname);
|
||||
stat = SPI_execp(plan, pars, " ", 0);
|
||||
if (stat < 0)
|
||||
ts_error(ERROR, "SPI_execp return %d", stat);
|
||||
if (SPI_processed <= 0)
|
||||
ts_error(ERROR, "No parser with id %d", id);
|
||||
|
||||
for (i = 0; i < SPI_processed; i++)
|
||||
{
|
||||
int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
|
||||
ArrayType *toasted_a = (ArrayType *) PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
|
||||
ArrayType *a;
|
||||
|
||||
if (!cfg->map)
|
||||
{
|
||||
cfg->len = lexid + 1;
|
||||
cfg->map = (ListDictionary *) malloc(sizeof(ListDictionary) * cfg->len);
|
||||
if (!cfg->map)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
memset(cfg->map, 0, sizeof(ListDictionary) * cfg->len);
|
||||
}
|
||||
|
||||
if (isnull)
|
||||
continue;
|
||||
|
||||
a = (ArrayType *) PointerGetDatum(PG_DETOAST_DATUM(DatumGetPointer(toasted_a)));
|
||||
|
||||
if (ARR_NDIM(a) != 1)
|
||||
ts_error(ERROR, "Wrong dimension");
|
||||
if (ARRNELEMS(a) < 1)
|
||||
continue;
|
||||
if (ARR_HASNULL(a))
|
||||
ts_error(ERROR, "Array must not contain nulls");
|
||||
|
||||
cfg->map[lexid].len = ARRNELEMS(a);
|
||||
cfg->map[lexid].dict_id = (Datum *) malloc(sizeof(Datum) * cfg->map[lexid].len);
|
||||
if (!cfg->map[lexid].dict_id)
|
||||
ts_error(ERROR, "No memory");
|
||||
|
||||
memset(cfg->map[lexid].dict_id, 0, sizeof(Datum) * cfg->map[lexid].len);
|
||||
ptr = (text *) ARR_DATA_PTR(a);
|
||||
oldcontext = MemoryContextSwitchTo(TopMemoryContext);
|
||||
for (j = 0; j < cfg->map[lexid].len; j++)
|
||||
{
|
||||
cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
|
||||
ptr = NEXTVAL(ptr);
|
||||
}
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
|
||||
if (a != toasted_a)
|
||||
pfree(a);
|
||||
}
|
||||
|
||||
SPI_freeplan(plan);
|
||||
SPI_finish();
|
||||
cfg->prs_id = name2id_prs(prsname);
|
||||
pfree(prsname);
|
||||
pfree(nsp);
|
||||
for (i = 0; i < cfg->len; i++)
|
||||
{
|
||||
for (j = 0; j < cfg->map[i].len; j++)
|
||||
{
|
||||
ptr = (text *) DatumGetPointer(cfg->map[i].dict_id[j]);
|
||||
cfg->map[i].dict_id[j] = ObjectIdGetDatum(name2id_dict(ptr));
|
||||
pfree(ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
TSCfgInfo *last_cfg;
|
||||
int len;
|
||||
int reallen;
|
||||
TSCfgInfo *list;
|
||||
SNMap name2id_map;
|
||||
} CFGList;
|
||||
|
||||
static CFGList CList = {NULL, 0, 0, NULL, {0, 0, NULL}};
|
||||
|
||||
void
|
||||
reset_cfg(void)
|
||||
{
|
||||
freeSNMap(&(CList.name2id_map));
|
||||
if (CList.list)
|
||||
{
|
||||
int i,
|
||||
j;
|
||||
|
||||
for (i = 0; i < CList.len; i++)
|
||||
if (CList.list[i].map)
|
||||
{
|
||||
for (j = 0; j < CList.list[i].len; j++)
|
||||
if (CList.list[i].map[j].dict_id)
|
||||
free(CList.list[i].map[j].dict_id);
|
||||
free(CList.list[i].map);
|
||||
}
|
||||
free(CList.list);
|
||||
}
|
||||
memset(&CList, 0, sizeof(CFGList));
|
||||
}
|
||||
|
||||
static int
|
||||
comparecfg(const void *a, const void *b)
|
||||
{
|
||||
if (((TSCfgInfo *) a)->id == ((TSCfgInfo *) b)->id)
|
||||
return 0;
|
||||
return (((TSCfgInfo *) a)->id < ((TSCfgInfo *) b)->id) ? -1 : 1;
|
||||
}
|
||||
|
||||
TSCfgInfo *
|
||||
findcfg(Oid id)
|
||||
{
|
||||
/* last used cfg */
|
||||
if (CList.last_cfg && CList.last_cfg->id == id)
|
||||
return CList.last_cfg;
|
||||
|
||||
/* already used cfg */
|
||||
if (CList.len != 0)
|
||||
{
|
||||
TSCfgInfo key;
|
||||
|
||||
key.id = id;
|
||||
CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
|
||||
if (CList.last_cfg != NULL)
|
||||
return CList.last_cfg;
|
||||
}
|
||||
|
||||
/* last chance */
|
||||
if (CList.len == CList.reallen)
|
||||
{
|
||||
TSCfgInfo *tmp;
|
||||
int reallen = (CList.reallen) ? 2 * CList.reallen : 16;
|
||||
|
||||
tmp = (TSCfgInfo *) realloc(CList.list, sizeof(TSCfgInfo) * reallen);
|
||||
if (!tmp)
|
||||
ts_error(ERROR, "No memory");
|
||||
CList.reallen = reallen;
|
||||
CList.list = tmp;
|
||||
}
|
||||
init_cfg(id, &(CList.list[CList.len]) );
|
||||
CList.last_cfg = &(CList.list[CList.len]);
|
||||
CList.len++;
|
||||
qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
|
||||
return findcfg(id); /* qsort changed order!! */ ;
|
||||
}
|
||||
|
||||
|
||||
Oid
|
||||
name2id_cfg(text *name)
|
||||
{
|
||||
Oid arg[1];
|
||||
bool isnull;
|
||||
Datum pars[1];
|
||||
int stat;
|
||||
Oid id = findSNMap_t(&(CList.name2id_map), name);
|
||||
void *plan;
|
||||
char *nsp;
|
||||
char buf[1024];
|
||||
|
||||
arg[0] = TEXTOID;
|
||||
pars[0] = PointerGetDatum(name);
|
||||
|
||||
if (id)
|
||||
return id;
|
||||
|
||||
nsp = get_namespace(TSNSP_FunctionOid);
|
||||
SPI_connect();
|
||||
sprintf(buf, "select oid from %s.pg_ts_cfg where ts_name = $1", nsp);
|
||||
plan = SPI_prepare(buf, 1, arg);
|
||||
if (!plan)
|
||||
/* internal error */
|
||||
elog(ERROR, "SPI_prepare() failed");
|
||||
|
||||
stat = SPI_execp(plan, pars, " ", 1);
|
||||
if (stat < 0)
|
||||
/* internal error */
|
||||
elog(ERROR, "SPI_execp return %d", stat);
|
||||
if (SPI_processed > 0)
|
||||
{
|
||||
id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
|
||||
if (isnull)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("null id for tsearch config")));
|
||||
}
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("no tsearch config")));
|
||||
|
||||
SPI_freeplan(plan);
|
||||
SPI_finish();
|
||||
addSNMap_t(&(CList.name2id_map), name, id);
|
||||
return id;
|
||||
}
|
||||
|
||||
void
|
||||
parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
|
||||
{
|
||||
int type,
|
||||
lenlemm;
|
||||
char *lemm = NULL;
|
||||
WParserInfo *prsobj = findprs(cfg->prs_id);
|
||||
LexizeData ldata;
|
||||
TSLexeme *norms;
|
||||
|
||||
prsobj->prs = (void *) DatumGetPointer(
|
||||
FunctionCall2(
|
||||
&(prsobj->start_info),
|
||||
PointerGetDatum(buf),
|
||||
Int32GetDatum(buflen)
|
||||
)
|
||||
);
|
||||
|
||||
LexizeInit(&ldata, cfg);
|
||||
|
||||
do
|
||||
{
|
||||
type = DatumGetInt32(FunctionCall3(
|
||||
&(prsobj->getlexeme_info),
|
||||
PointerGetDatum(prsobj->prs),
|
||||
PointerGetDatum(&lemm),
|
||||
PointerGetDatum(&lenlemm)));
|
||||
|
||||
if (type > 0 && lenlemm >= MAXSTRLEN)
|
||||
{
|
||||
#ifdef IGNORE_LONGLEXEME
|
||||
ereport(NOTICE,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("A word you are indexing is too long. It will be ignored.")));
|
||||
continue;
|
||||
#else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("A word you are indexing is too long")));
|
||||
#endif
|
||||
}
|
||||
|
||||
LexizeAddLemm(&ldata, type, lemm, lenlemm);
|
||||
|
||||
while ((norms = LexizeExec(&ldata, NULL)) != NULL)
|
||||
{
|
||||
TSLexeme *ptr = norms;
|
||||
|
||||
prs->pos++; /* set pos */
|
||||
|
||||
while (ptr->lexeme)
|
||||
{
|
||||
if (prs->curwords == prs->lenwords)
|
||||
{
|
||||
prs->lenwords *= 2;
|
||||
prs->words = (TSWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(TSWORD));
|
||||
}
|
||||
|
||||
if (ptr->flags & TSL_ADDPOS)
|
||||
prs->pos++;
|
||||
prs->words[prs->curwords].len = strlen(ptr->lexeme);
|
||||
prs->words[prs->curwords].word = ptr->lexeme;
|
||||
prs->words[prs->curwords].nvariant = ptr->nvariant;
|
||||
prs->words[prs->curwords].alen = 0;
|
||||
prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
|
||||
ptr++;
|
||||
prs->curwords++;
|
||||
}
|
||||
pfree(norms);
|
||||
}
|
||||
} while (type > 0);
|
||||
|
||||
FunctionCall1(
|
||||
&(prsobj->end_info),
|
||||
PointerGetDatum(prsobj->prs)
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type)
|
||||
{
|
||||
while (prs->curwords >= prs->lenwords)
|
||||
{
|
||||
prs->lenwords *= 2;
|
||||
prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
|
||||
}
|
||||
memset(&(prs->words[prs->curwords]), 0, sizeof(HLWORD));
|
||||
prs->words[prs->curwords].type = (uint8) type;
|
||||
prs->words[prs->curwords].len = buflen;
|
||||
prs->words[prs->curwords].word = palloc(buflen);
|
||||
memcpy(prs->words[prs->curwords].word, buf, buflen);
|
||||
prs->curwords++;
|
||||
}
|
||||
|
||||
static void
|
||||
hlfinditem(HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int buflen)
|
||||
{
|
||||
int i;
|
||||
ITEM *item = GETQUERY(query);
|
||||
HLWORD *word;
|
||||
|
||||
while (prs->curwords + query->size >= prs->lenwords)
|
||||
{
|
||||
prs->lenwords *= 2;
|
||||
prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
|
||||
}
|
||||
|
||||
word = &(prs->words[prs->curwords - 1]);
|
||||
for (i = 0; i < query->size; i++)
|
||||
{
|
||||
if (item->type == VAL && item->length == buflen && strncmp(GETOPERAND(query) + item->distance, buf, buflen) == 0)
|
||||
{
|
||||
if (word->item)
|
||||
{
|
||||
memcpy(&(prs->words[prs->curwords]), word, sizeof(HLWORD));
|
||||
prs->words[prs->curwords].item = item;
|
||||
prs->words[prs->curwords].repeated = 1;
|
||||
prs->curwords++;
|
||||
}
|
||||
else
|
||||
word->item = item;
|
||||
}
|
||||
item++;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
addHLParsedLex(HLPRSTEXT * prs, QUERYTYPE * query, ParsedLex * lexs, TSLexeme * norms)
|
||||
{
|
||||
ParsedLex *tmplexs;
|
||||
TSLexeme *ptr;
|
||||
|
||||
while (lexs)
|
||||
{
|
||||
|
||||
if (lexs->type > 0)
|
||||
hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);
|
||||
|
||||
ptr = norms;
|
||||
while (ptr && ptr->lexeme)
|
||||
{
|
||||
hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme));
|
||||
ptr++;
|
||||
}
|
||||
|
||||
tmplexs = lexs->next;
|
||||
pfree(lexs);
|
||||
lexs = tmplexs;
|
||||
}
|
||||
|
||||
if (norms)
|
||||
{
|
||||
ptr = norms;
|
||||
while (ptr->lexeme)
|
||||
{
|
||||
pfree(ptr->lexeme);
|
||||
ptr++;
|
||||
}
|
||||
pfree(norms);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4 buflen)
|
||||
{
|
||||
int type,
|
||||
lenlemm;
|
||||
char *lemm = NULL;
|
||||
WParserInfo *prsobj = findprs(cfg->prs_id);
|
||||
LexizeData ldata;
|
||||
TSLexeme *norms;
|
||||
ParsedLex *lexs;
|
||||
|
||||
prsobj->prs = (void *) DatumGetPointer(
|
||||
FunctionCall2(
|
||||
&(prsobj->start_info),
|
||||
PointerGetDatum(buf),
|
||||
Int32GetDatum(buflen)
|
||||
)
|
||||
);
|
||||
|
||||
LexizeInit(&ldata, cfg);
|
||||
|
||||
do
|
||||
{
|
||||
type = DatumGetInt32(FunctionCall3(
|
||||
&(prsobj->getlexeme_info),
|
||||
PointerGetDatum(prsobj->prs),
|
||||
PointerGetDatum(&lemm),
|
||||
PointerGetDatum(&lenlemm)));
|
||||
|
||||
if (type > 0 && lenlemm >= MAXSTRLEN)
|
||||
{
|
||||
#ifdef IGNORE_LONGLEXEME
|
||||
ereport(NOTICE,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("A word you are indexing is too long. It will be ignored.")));
|
||||
continue;
|
||||
#else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("A word you are indexing is too long")));
|
||||
#endif
|
||||
}
|
||||
|
||||
LexizeAddLemm(&ldata, type, lemm, lenlemm);
|
||||
|
||||
do
|
||||
{
|
||||
if ((norms = LexizeExec(&ldata, &lexs)) != NULL)
|
||||
addHLParsedLex(prs, query, lexs, norms);
|
||||
else
|
||||
addHLParsedLex(prs, query, lexs, NULL);
|
||||
} while (norms);
|
||||
|
||||
} while (type > 0);
|
||||
|
||||
FunctionCall1(
|
||||
&(prsobj->end_info),
|
||||
PointerGetDatum(prsobj->prs)
|
||||
);
|
||||
}
|
||||
|
||||
text *
|
||||
genhl(HLPRSTEXT * prs)
|
||||
{
|
||||
text *out;
|
||||
int len = 128;
|
||||
char *ptr;
|
||||
HLWORD *wrd = prs->words;
|
||||
|
||||
out = (text *) palloc(len);
|
||||
ptr = ((char *) out) + VARHDRSZ;
|
||||
|
||||
while (wrd - prs->words < prs->curwords)
|
||||
{
|
||||
while (wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char *) out)) >= len)
|
||||
{
|
||||
int dist = ptr - ((char *) out);
|
||||
|
||||
len *= 2;
|
||||
out = (text *) repalloc(out, len);
|
||||
ptr = ((char *) out) + dist;
|
||||
}
|
||||
|
||||
if (wrd->in && !wrd->repeated)
|
||||
{
|
||||
if (wrd->replace)
|
||||
{
|
||||
*ptr = ' ';
|
||||
ptr++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (wrd->selected)
|
||||
{
|
||||
memcpy(ptr, prs->startsel, prs->startsellen);
|
||||
ptr += prs->startsellen;
|
||||
}
|
||||
memcpy(ptr, wrd->word, wrd->len);
|
||||
ptr += wrd->len;
|
||||
if (wrd->selected)
|
||||
{
|
||||
memcpy(ptr, prs->stopsel, prs->stopsellen);
|
||||
ptr += prs->stopsellen;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!wrd->repeated)
|
||||
pfree(wrd->word);
|
||||
|
||||
wrd++;
|
||||
}
|
||||
|
||||
SET_VARSIZE(out, ptr - ((char *) out));
|
||||
return out;
|
||||
}
|
||||
|
||||
int
|
||||
get_currcfg(void)
|
||||
{
|
||||
Oid arg[1] = {TEXTOID};
|
||||
const char *curlocale;
|
||||
Datum pars[1];
|
||||
bool isnull;
|
||||
int stat;
|
||||
char buf[1024];
|
||||
char *nsp;
|
||||
void *plan;
|
||||
|
||||
if (current_cfg_id > 0)
|
||||
return current_cfg_id;
|
||||
|
||||
nsp = get_namespace(TSNSP_FunctionOid);
|
||||
SPI_connect();
|
||||
sprintf(buf, "select oid from %s.pg_ts_cfg where locale = $1 ", nsp);
|
||||
pfree(nsp);
|
||||
plan = SPI_prepare(buf, 1, arg);
|
||||
if (!plan)
|
||||
/* internal error */
|
||||
elog(ERROR, "SPI_prepare() failed");
|
||||
|
||||
curlocale = setlocale(LC_CTYPE, NULL);
|
||||
pars[0] = PointerGetDatum(char2text((char *) curlocale));
|
||||
stat = SPI_execp(plan, pars, " ", 1);
|
||||
|
||||
if (stat < 0)
|
||||
/* internal error */
|
||||
elog(ERROR, "SPI_execp return %d", stat);
|
||||
if (SPI_processed > 0)
|
||||
current_cfg_id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("could not find tsearch config by locale")));
|
||||
|
||||
pfree(DatumGetPointer(pars[0]));
|
||||
SPI_freeplan(plan);
|
||||
SPI_finish();
|
||||
return current_cfg_id;
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(set_curcfg);
|
||||
Datum set_curcfg(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
set_curcfg(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SET_FUNCOID();
|
||||
findcfg(PG_GETARG_OID(0));
|
||||
current_cfg_id = PG_GETARG_OID(0);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(set_curcfg_byname);
|
||||
Datum set_curcfg_byname(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
set_curcfg_byname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *name = PG_GETARG_TEXT_P(0);
|
||||
|
||||
SET_FUNCOID();
|
||||
DirectFunctionCall1(
|
||||
set_curcfg,
|
||||
ObjectIdGetDatum(name2id_cfg(name))
|
||||
);
|
||||
PG_FREE_IF_COPY(name, 0);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(show_curcfg);
|
||||
Datum show_curcfg(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
show_curcfg(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SET_FUNCOID();
|
||||
PG_RETURN_OID(get_currcfg());
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(reset_tsearch);
|
||||
Datum reset_tsearch(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
reset_tsearch(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SET_FUNCOID();
|
||||
ts_error(NOTICE, "TSearch cache cleaned");
|
||||
PG_RETURN_VOID();
|
||||
}
|
|
@ -1,78 +0,0 @@
|
|||
#ifndef __TS_CFG_H__
|
||||
#define __TS_CFG_H__
|
||||
|
||||
#include "postgres.h"
|
||||
#include "query.h"
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int len;
|
||||
Datum *dict_id;
|
||||
} ListDictionary;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
Oid id;
|
||||
Oid prs_id;
|
||||
int len;
|
||||
ListDictionary *map;
|
||||
} TSCfgInfo;
|
||||
|
||||
Oid name2id_cfg(text *name);
|
||||
TSCfgInfo *findcfg(Oid id);
|
||||
void init_cfg(Oid id, TSCfgInfo * cfg);
|
||||
void reset_cfg(void);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint16 len;
|
||||
uint16 nvariant;
|
||||
union
|
||||
{
|
||||
uint16 pos;
|
||||
uint16 *apos;
|
||||
} pos;
|
||||
char *word;
|
||||
uint32 alen;
|
||||
} TSWORD;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
TSWORD *words;
|
||||
int4 lenwords;
|
||||
int4 curwords;
|
||||
int4 pos;
|
||||
} PRSTEXT;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32 selected:1,
|
||||
in:1,
|
||||
replace:1,
|
||||
repeated:1,
|
||||
unused:4,
|
||||
type:8,
|
||||
len:16;
|
||||
char *word;
|
||||
ITEM *item;
|
||||
} HLWORD;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
HLWORD *words;
|
||||
int4 lenwords;
|
||||
int4 curwords;
|
||||
char *startsel;
|
||||
char *stopsel;
|
||||
int2 startsellen;
|
||||
int2 stopsellen;
|
||||
} HLPRSTEXT;
|
||||
|
||||
void hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4 buflen);
|
||||
text *genhl(HLPRSTEXT * prs);
|
||||
|
||||
void parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen);
|
||||
int get_currcfg(void);
|
||||
|
||||
#endif
|
|
@ -1,297 +0,0 @@
|
|||
/*
|
||||
* lexize stream of lexemes
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <locale.h>
|
||||
|
||||
#include "ts_cfg.h"
|
||||
#include "dict.h"
|
||||
|
||||
void
|
||||
LexizeInit(LexizeData * ld, TSCfgInfo * cfg)
|
||||
{
|
||||
ld->cfg = cfg;
|
||||
ld->curDictId = InvalidOid;
|
||||
ld->posDict = 0;
|
||||
ld->towork.head = ld->towork.tail = ld->curSub = NULL;
|
||||
ld->waste.head = ld->waste.tail = NULL;
|
||||
ld->lastRes = NULL;
|
||||
ld->tmpRes = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
LPLAddTail(ListParsedLex * list, ParsedLex * newpl)
|
||||
{
|
||||
if (list->tail)
|
||||
{
|
||||
list->tail->next = newpl;
|
||||
list->tail = newpl;
|
||||
}
|
||||
else
|
||||
list->head = list->tail = newpl;
|
||||
newpl->next = NULL;
|
||||
}
|
||||
|
||||
static ParsedLex *
|
||||
LPLRemoveHead(ListParsedLex * list)
|
||||
{
|
||||
ParsedLex *res = list->head;
|
||||
|
||||
if (list->head)
|
||||
list->head = list->head->next;
|
||||
|
||||
if (list->head == NULL)
|
||||
list->tail = NULL;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
LexizeAddLemm(LexizeData * ld, int type, char *lemm, int lenlemm)
|
||||
{
|
||||
ParsedLex *newpl = (ParsedLex *) palloc(sizeof(ParsedLex));
|
||||
|
||||
newpl = (ParsedLex *) palloc(sizeof(ParsedLex));
|
||||
newpl->type = type;
|
||||
newpl->lemm = lemm;
|
||||
newpl->lenlemm = lenlemm;
|
||||
LPLAddTail(&ld->towork, newpl);
|
||||
ld->curSub = ld->towork.tail;
|
||||
}
|
||||
|
||||
static void
|
||||
RemoveHead(LexizeData * ld)
|
||||
{
|
||||
LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork));
|
||||
|
||||
ld->posDict = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
setCorrLex(LexizeData * ld, ParsedLex ** correspondLexem)
|
||||
{
|
||||
if (correspondLexem)
|
||||
{
|
||||
*correspondLexem = ld->waste.head;
|
||||
}
|
||||
else
|
||||
{
|
||||
ParsedLex *tmp,
|
||||
*ptr = ld->waste.head;
|
||||
|
||||
while (ptr)
|
||||
{
|
||||
tmp = ptr->next;
|
||||
pfree(ptr);
|
||||
ptr = tmp;
|
||||
}
|
||||
}
|
||||
ld->waste.head = ld->waste.tail = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
moveToWaste(LexizeData * ld, ParsedLex * stop)
|
||||
{
|
||||
bool go = true;
|
||||
|
||||
while (ld->towork.head && go)
|
||||
{
|
||||
if (ld->towork.head == stop)
|
||||
{
|
||||
ld->curSub = stop->next;
|
||||
go = false;
|
||||
}
|
||||
RemoveHead(ld);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
setNewTmpRes(LexizeData * ld, ParsedLex * lex, TSLexeme * res)
|
||||
{
|
||||
if (ld->tmpRes)
|
||||
{
|
||||
TSLexeme *ptr;
|
||||
|
||||
for (ptr = ld->tmpRes; ptr->lexeme; ptr++)
|
||||
pfree(ptr->lexeme);
|
||||
pfree(ld->tmpRes);
|
||||
}
|
||||
ld->tmpRes = res;
|
||||
ld->lastRes = lex;
|
||||
}
|
||||
|
||||
TSLexeme *
|
||||
LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem)
|
||||
{
|
||||
int i;
|
||||
ListDictionary *map;
|
||||
DictInfo *dict;
|
||||
TSLexeme *res;
|
||||
|
||||
if (ld->curDictId == InvalidOid)
|
||||
{
|
||||
/*
|
||||
* usial mode: dictionary wants only one word, but we should keep in
|
||||
* mind that we should go through all stack
|
||||
*/
|
||||
|
||||
while (ld->towork.head)
|
||||
{
|
||||
ParsedLex *curVal = ld->towork.head;
|
||||
|
||||
map = ld->cfg->map + curVal->type;
|
||||
|
||||
if (curVal->type == 0 || curVal->type >= ld->cfg->len || map->len == 0)
|
||||
{
|
||||
/* skip this type of lexeme */
|
||||
RemoveHead(ld);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (i = ld->posDict; i < map->len; i++)
|
||||
{
|
||||
dict = finddict(DatumGetObjectId(map->dict_id[i]));
|
||||
|
||||
ld->dictState.isend = ld->dictState.getnext = false;
|
||||
ld->dictState.private = NULL;
|
||||
res = (TSLexeme *) DatumGetPointer(FunctionCall4(
|
||||
&(dict->lexize_info),
|
||||
PointerGetDatum(dict->dictionary),
|
||||
PointerGetDatum(curVal->lemm),
|
||||
Int32GetDatum(curVal->lenlemm),
|
||||
PointerGetDatum(&ld->dictState)
|
||||
));
|
||||
|
||||
if (ld->dictState.getnext)
|
||||
{
|
||||
/*
|
||||
* dictinary wants next word, so setup and store current
|
||||
* position and go to multiword mode
|
||||
*/
|
||||
|
||||
ld->curDictId = DatumGetObjectId(map->dict_id[i]);
|
||||
ld->posDict = i + 1;
|
||||
ld->curSub = curVal->next;
|
||||
if (res)
|
||||
setNewTmpRes(ld, curVal, res);
|
||||
return LexizeExec(ld, correspondLexem);
|
||||
}
|
||||
|
||||
if (!res) /* dictionary doesn't know this lexeme */
|
||||
continue;
|
||||
|
||||
RemoveHead(ld);
|
||||
setCorrLex(ld, correspondLexem);
|
||||
return res;
|
||||
}
|
||||
|
||||
RemoveHead(ld);
|
||||
}
|
||||
}
|
||||
else
|
||||
{ /* curDictId is valid */
|
||||
dict = finddict(ld->curDictId);
|
||||
|
||||
/*
|
||||
* Dictionary ld->curDictId asks us about following words
|
||||
*/
|
||||
|
||||
while (ld->curSub)
|
||||
{
|
||||
ParsedLex *curVal = ld->curSub;
|
||||
|
||||
map = ld->cfg->map + curVal->type;
|
||||
|
||||
if (curVal->type != 0)
|
||||
{
|
||||
bool dictExists = false;
|
||||
|
||||
if (curVal->type >= ld->cfg->len || map->len == 0)
|
||||
{
|
||||
/* skip this type of lexeme */
|
||||
ld->curSub = curVal->next;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* We should be sure that current type of lexeme is recognized
|
||||
* by our dictinonary: we just check is it exist in list of
|
||||
* dictionaries ?
|
||||
*/
|
||||
for (i = 0; i < map->len && !dictExists; i++)
|
||||
if (ld->curDictId == DatumGetObjectId(map->dict_id[i]))
|
||||
dictExists = true;
|
||||
|
||||
if (!dictExists)
|
||||
{
|
||||
/*
|
||||
* Dictionary can't work with current tpe of lexeme,
|
||||
* return to basic mode and redo all stored lexemes
|
||||
*/
|
||||
ld->curDictId = InvalidOid;
|
||||
return LexizeExec(ld, correspondLexem);
|
||||
}
|
||||
}
|
||||
|
||||
ld->dictState.isend = (curVal->type == 0) ? true : false;
|
||||
ld->dictState.getnext = false;
|
||||
|
||||
res = (TSLexeme *) DatumGetPointer(FunctionCall4(
|
||||
&(dict->lexize_info),
|
||||
PointerGetDatum(dict->dictionary),
|
||||
PointerGetDatum(curVal->lemm),
|
||||
Int32GetDatum(curVal->lenlemm),
|
||||
PointerGetDatum(&ld->dictState)
|
||||
));
|
||||
|
||||
if (ld->dictState.getnext)
|
||||
{
|
||||
/* Dictionary wants one more */
|
||||
ld->curSub = curVal->next;
|
||||
if (res)
|
||||
setNewTmpRes(ld, curVal, res);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (res || ld->tmpRes)
|
||||
{
|
||||
/*
|
||||
* Dictionary normalizes lexemes, so we remove from stack all
|
||||
* used lexemes , return to basic mode and redo end of stack
|
||||
* (if it exists)
|
||||
*/
|
||||
if (res)
|
||||
{
|
||||
moveToWaste(ld, ld->curSub);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = ld->tmpRes;
|
||||
moveToWaste(ld, ld->lastRes);
|
||||
}
|
||||
|
||||
/* reset to initial state */
|
||||
ld->curDictId = InvalidOid;
|
||||
ld->posDict = 0;
|
||||
ld->lastRes = NULL;
|
||||
ld->tmpRes = NULL;
|
||||
setCorrLex(ld, correspondLexem);
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dict don't want next lexem and didn't recognize anything, redo
|
||||
* from ld->towork.head
|
||||
*/
|
||||
ld->curDictId = InvalidOid;
|
||||
return LexizeExec(ld, correspondLexem);
|
||||
}
|
||||
}
|
||||
|
||||
setCorrLex(ld, correspondLexem);
|
||||
return NULL;
|
||||
}
|
|
@ -1,191 +0,0 @@
|
|||
#include "ts_locale.h"
|
||||
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/pg_locale.h"
|
||||
#include "mb/pg_wchar.h"
|
||||
|
||||
|
||||
#ifdef TS_USE_WIDE
|
||||
|
||||
#ifdef WIN32
|
||||
|
||||
size_t
|
||||
wchar2char(char *to, const wchar_t *from, size_t len)
|
||||
{
|
||||
if (len == 0)
|
||||
return 0;
|
||||
|
||||
if (GetDatabaseEncoding() == PG_UTF8)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, len,
|
||||
NULL, NULL);
|
||||
|
||||
if (r == 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
||||
errmsg("UTF-16 to UTF-8 translation failed: %lu",
|
||||
GetLastError())));
|
||||
Assert(r <= len);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
return wcstombs(to, from, len);
|
||||
}
|
||||
#endif /* WIN32 */
|
||||
|
||||
size_t
|
||||
char2wchar(wchar_t *to, const char *from, size_t len)
|
||||
{
|
||||
if (len == 0)
|
||||
return 0;
|
||||
|
||||
#ifdef WIN32
|
||||
if (GetDatabaseEncoding() == PG_UTF8)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = MultiByteToWideChar(CP_UTF8, 0, from, len, to, len);
|
||||
|
||||
if (!r)
|
||||
{
|
||||
pg_verifymbstr(from, strlen(from), false);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
||||
errmsg("invalid multibyte character for locale"),
|
||||
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
|
||||
}
|
||||
|
||||
Assert(r <= len);
|
||||
|
||||
return r;
|
||||
}
|
||||
else
|
||||
#endif /* WIN32 */
|
||||
if ( lc_ctype_is_c() )
|
||||
{
|
||||
/*
|
||||
* pg_mb2wchar_with_len always adds trailing '\0', so
|
||||
* 'to' should be allocated with sufficient space
|
||||
*/
|
||||
return pg_mb2wchar_with_len(from, (pg_wchar *)to, len);
|
||||
}
|
||||
|
||||
return mbstowcs(to, from, len);
|
||||
}
|
||||
|
||||
int
|
||||
_t_isalpha(const char *ptr)
|
||||
{
|
||||
wchar_t character[2];
|
||||
|
||||
if (lc_ctype_is_c())
|
||||
return isalpha(TOUCHAR(ptr));
|
||||
|
||||
char2wchar(character, ptr, 1);
|
||||
|
||||
return iswalpha((wint_t) *character);
|
||||
}
|
||||
|
||||
int
|
||||
_t_isprint(const char *ptr)
|
||||
{
|
||||
wchar_t character[2];
|
||||
|
||||
if (lc_ctype_is_c())
|
||||
return isprint(TOUCHAR(ptr));
|
||||
|
||||
char2wchar(character, ptr, 1);
|
||||
|
||||
return iswprint((wint_t) *character);
|
||||
}
|
||||
#endif /* TS_USE_WIDE */
|
||||
|
||||
char *
|
||||
lowerstr(char *str)
|
||||
{
|
||||
char *ptr = str;
|
||||
char *out;
|
||||
int len = strlen(str);
|
||||
|
||||
if ( len == 0 )
|
||||
return pstrdup("");
|
||||
|
||||
#ifdef TS_USE_WIDE
|
||||
|
||||
/*
|
||||
* Use wide char code only when max encoding length > 1 and ctype != C.
|
||||
* Some operating systems fail with multi-byte encodings and a C locale.
|
||||
* Also, for a C locale there is no need to process as multibyte. From
|
||||
* backend/utils/adt/oracle_compat.c Teodor
|
||||
*/
|
||||
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
|
||||
{
|
||||
wchar_t *wstr,
|
||||
*wptr;
|
||||
int wlen;
|
||||
|
||||
/*
|
||||
*alloc number of wchar_t for worst case, len contains
|
||||
* number of bytes <= number of characters and
|
||||
* alloc 1 wchar_t for 0, because wchar2char(wcstombs in really)
|
||||
* wants zero-terminated string
|
||||
*/
|
||||
wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len+1));
|
||||
|
||||
/*
|
||||
* str SHOULD be cstring, so wlen contains number
|
||||
* of converted character
|
||||
*/
|
||||
wlen = char2wchar(wstr, str, len);
|
||||
if ( wlen < 0 )
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
||||
errmsg("translation failed from server encoding to wchar_t")));
|
||||
|
||||
Assert(wlen<=len);
|
||||
wstr[wlen] = 0;
|
||||
|
||||
while (*wptr)
|
||||
{
|
||||
*wptr = towlower((wint_t) *wptr);
|
||||
wptr++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Alloc result string for worst case + '\0'
|
||||
*/
|
||||
len = sizeof(char)*pg_database_encoding_max_length()*(wlen+1);
|
||||
out = (char*)palloc(len);
|
||||
|
||||
/*
|
||||
* wlen now is number of bytes which is always >= number of characters
|
||||
*/
|
||||
wlen = wchar2char(out, wstr, len);
|
||||
pfree(wstr);
|
||||
|
||||
if ( wlen < 0 )
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
||||
errmsg("translation failed from wchar_t to server encoding %d", errno)));
|
||||
Assert(wlen<=len);
|
||||
out[wlen]='\0';
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
char *outptr;
|
||||
|
||||
outptr = out = (char*)palloc( sizeof(char) * (len+1) );
|
||||
while (*ptr)
|
||||
{
|
||||
*outptr++ = tolower(*(unsigned char *) ptr);
|
||||
ptr++;
|
||||
}
|
||||
*outptr = '\0';
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
|
@ -1,79 +0,0 @@
|
|||
#ifndef __TSLOCALE_H__
|
||||
#define __TSLOCALE_H__
|
||||
|
||||
#include "postgres.h"
|
||||
#include "utils/pg_locale.h"
|
||||
#include "mb/pg_wchar.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
|
||||
/*
|
||||
* towlower() and friends should be in <wctype.h>, but some pre-C99 systems
|
||||
* declare them in <wchar.h>.
|
||||
*/
|
||||
#ifdef HAVE_WCHAR_H
|
||||
#include <wchar.h>
|
||||
#endif
|
||||
#ifdef HAVE_WCTYPE_H
|
||||
#include <wctype.h>
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER)
|
||||
#define TS_USE_WIDE
|
||||
#endif
|
||||
|
||||
#ifdef TS_USE_WIDE
|
||||
#endif /* TS_USE_WIDE */
|
||||
|
||||
|
||||
#define TOUCHAR(x) (*((unsigned char*)(x)))
|
||||
|
||||
#ifdef TS_USE_WIDE
|
||||
size_t char2wchar(wchar_t *to, const char *from, size_t len);
|
||||
|
||||
#ifdef WIN32
|
||||
|
||||
size_t wchar2char(char *to, const wchar_t *from, size_t len);
|
||||
|
||||
#else /* WIN32 */
|
||||
|
||||
/* correct wcstombs */
|
||||
#define wchar2char wcstombs
|
||||
|
||||
#endif /* WIN32 */
|
||||
|
||||
#define t_isdigit(x) ( pg_mblen(x)==1 && isdigit( TOUCHAR(x) ) )
|
||||
#define t_isspace(x) ( pg_mblen(x)==1 && isspace( TOUCHAR(x) ) )
|
||||
extern int _t_isalpha(const char *ptr);
|
||||
|
||||
#define t_isalpha(x) ( (pg_mblen(x)==1) ? isalpha( TOUCHAR(x) ) : _t_isalpha(x) )
|
||||
extern int _t_isprint(const char *ptr);
|
||||
|
||||
#define t_isprint(x) ( (pg_mblen(x)==1) ? isprint( TOUCHAR(x) ) : _t_isprint(x) )
|
||||
/*
|
||||
* t_iseq() should be called only for ASCII symbols
|
||||
*/
|
||||
#define t_iseq(x,c) ( (pg_mblen(x)==1) ? ( TOUCHAR(x) == ((unsigned char)(c)) ) : false )
|
||||
|
||||
#define COPYCHAR(d,s) do { \
|
||||
int lll = pg_mblen( s ); \
|
||||
\
|
||||
while( lll-- ) \
|
||||
TOUCHAR((d)+lll) = TOUCHAR((s)+lll); \
|
||||
} while(0)
|
||||
|
||||
#else /* not def TS_USE_WIDE */
|
||||
|
||||
#define t_isdigit(x) isdigit( TOUCHAR(x) )
|
||||
#define t_isspace(x) isspace( TOUCHAR(x) )
|
||||
#define t_isalpha(x) isalpha( TOUCHAR(x) )
|
||||
#define t_isprint(x) isprint( TOUCHAR(x) )
|
||||
#define t_iseq(x,c) ( TOUCHAR(x) == ((unsigned char)(c)) )
|
||||
|
||||
#define COPYCHAR(d,s) TOUCHAR(d) = TOUCHAR(s)
|
||||
#endif
|
||||
|
||||
char *lowerstr(char *str);
|
||||
|
||||
#endif /* __TSLOCALE_H__ */
|
|
@ -1,567 +0,0 @@
|
|||
/*
|
||||
* stat functions
|
||||
*/
|
||||
|
||||
#include "tsvector.h"
|
||||
#include "ts_stat.h"
|
||||
#include "funcapi.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "executor/spi.h"
|
||||
#include "common.h"
|
||||
#include "ts_locale.h"
|
||||
|
||||
PG_FUNCTION_INFO_V1(tsstat_in);
|
||||
Datum tsstat_in(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
tsstat_in(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsstat *stat = palloc(STATHDRSIZE);
|
||||
|
||||
SET_VARSIZE(stat, STATHDRSIZE);
|
||||
stat->size = 0;
|
||||
stat->weight = 0;
|
||||
PG_RETURN_POINTER(stat);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(tsstat_out);
|
||||
Datum tsstat_out(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
tsstat_out(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("tsstat_out not implemented")));
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
static int
|
||||
check_weight(tsvector * txt, WordEntry * wptr, int8 weight)
|
||||
{
|
||||
int len = POSDATALEN(txt, wptr);
|
||||
int num = 0;
|
||||
WordEntryPos *ptr = POSDATAPTR(txt, wptr);
|
||||
|
||||
while (len--)
|
||||
{
|
||||
if (weight & (1 << WEP_GETWEIGHT(*ptr)))
|
||||
num++;
|
||||
ptr++;
|
||||
}
|
||||
return num;
|
||||
}
|
||||
|
||||
static WordEntry **
|
||||
SEI_realloc(WordEntry ** in, uint32 *len)
|
||||
{
|
||||
if (*len == 0 || in == NULL)
|
||||
{
|
||||
*len = 8;
|
||||
in = palloc(sizeof(WordEntry *) * (*len));
|
||||
}
|
||||
else
|
||||
{
|
||||
*len *= 2;
|
||||
in = repalloc(in, sizeof(WordEntry *) * (*len));
|
||||
}
|
||||
return in;
|
||||
}
|
||||
|
||||
static int
|
||||
compareStatWord(StatEntry * a, WordEntry * b, tsstat * stat, tsvector * txt)
|
||||
{
|
||||
if (a->len == b->len)
|
||||
return strncmp(
|
||||
STATSTRPTR(stat) + a->pos,
|
||||
STRPTR(txt) + b->pos,
|
||||
a->len
|
||||
);
|
||||
return (a->len > b->len) ? 1 : -1;
|
||||
}
|
||||
|
||||
static tsstat *
|
||||
formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len)
|
||||
{
|
||||
tsstat *newstat;
|
||||
uint32 totallen,
|
||||
nentry;
|
||||
uint32 slen = 0;
|
||||
WordEntry **ptr = entry;
|
||||
char *curptr;
|
||||
StatEntry *sptr,
|
||||
*nptr;
|
||||
|
||||
while (ptr - entry < len)
|
||||
{
|
||||
slen += (*ptr)->len;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
nentry = stat->size + len;
|
||||
slen += STATSTRSIZE(stat);
|
||||
totallen = CALCSTATSIZE(nentry, slen);
|
||||
newstat = palloc(totallen);
|
||||
SET_VARSIZE(newstat, totallen);
|
||||
newstat->weight = stat->weight;
|
||||
newstat->size = nentry;
|
||||
|
||||
memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
|
||||
curptr = STATSTRPTR(newstat) + STATSTRSIZE(stat);
|
||||
|
||||
ptr = entry;
|
||||
sptr = STATPTR(stat);
|
||||
nptr = STATPTR(newstat);
|
||||
|
||||
if (len == 1)
|
||||
{
|
||||
StatEntry *StopLow = STATPTR(stat);
|
||||
StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat);
|
||||
|
||||
while (StopLow < StopHigh)
|
||||
{
|
||||
sptr = StopLow + (StopHigh - StopLow) / 2;
|
||||
if (compareStatWord(sptr, *ptr, stat, txt) < 0)
|
||||
StopLow = sptr + 1;
|
||||
else
|
||||
StopHigh = sptr;
|
||||
}
|
||||
nptr = STATPTR(newstat) + (StopLow - STATPTR(stat));
|
||||
memcpy(STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow - STATPTR(stat)));
|
||||
if ((*ptr)->haspos)
|
||||
nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
|
||||
else
|
||||
nptr->nentry = 1;
|
||||
nptr->ndoc = 1;
|
||||
nptr->len = (*ptr)->len;
|
||||
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
|
||||
nptr->pos = curptr - STATSTRPTR(newstat);
|
||||
memcpy(nptr + 1, StopLow, sizeof(StatEntry) * (((StatEntry *) STATSTRPTR(stat)) - StopLow));
|
||||
}
|
||||
else
|
||||
{
|
||||
while (sptr - STATPTR(stat) < stat->size && ptr - entry < len)
|
||||
{
|
||||
if (compareStatWord(sptr, *ptr, stat, txt) < 0)
|
||||
{
|
||||
memcpy(nptr, sptr, sizeof(StatEntry));
|
||||
sptr++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((*ptr)->haspos)
|
||||
nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
|
||||
else
|
||||
nptr->nentry = 1;
|
||||
nptr->ndoc = 1;
|
||||
nptr->len = (*ptr)->len;
|
||||
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
|
||||
nptr->pos = curptr - STATSTRPTR(newstat);
|
||||
curptr += nptr->len;
|
||||
ptr++;
|
||||
}
|
||||
nptr++;
|
||||
}
|
||||
|
||||
memcpy(nptr, sptr, sizeof(StatEntry) * (stat->size - (sptr - STATPTR(stat))));
|
||||
|
||||
while (ptr - entry < len)
|
||||
{
|
||||
if ((*ptr)->haspos)
|
||||
nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
|
||||
else
|
||||
nptr->nentry = 1;
|
||||
nptr->ndoc = 1;
|
||||
nptr->len = (*ptr)->len;
|
||||
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
|
||||
nptr->pos = curptr - STATSTRPTR(newstat);
|
||||
curptr += nptr->len;
|
||||
ptr++;
|
||||
nptr++;
|
||||
}
|
||||
}
|
||||
|
||||
return newstat;
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(ts_accum);
|
||||
Datum ts_accum(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
ts_accum(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsstat *newstat,
|
||||
*stat = (tsstat *) PG_GETARG_POINTER(0);
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
WordEntry **newentry = NULL;
|
||||
uint32 len = 0,
|
||||
cur = 0;
|
||||
StatEntry *sptr;
|
||||
WordEntry *wptr;
|
||||
int n = 0;
|
||||
|
||||
if (stat == NULL || PG_ARGISNULL(0))
|
||||
{ /* Init in first */
|
||||
stat = palloc(STATHDRSIZE);
|
||||
SET_VARSIZE(stat, STATHDRSIZE);
|
||||
stat->size = 0;
|
||||
stat->weight = 0;
|
||||
}
|
||||
|
||||
/* simple check of correctness */
|
||||
if (txt == NULL || PG_ARGISNULL(1) || txt->size == 0)
|
||||
{
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
PG_RETURN_POINTER(stat);
|
||||
}
|
||||
|
||||
sptr = STATPTR(stat);
|
||||
wptr = ARRPTR(txt);
|
||||
|
||||
if (stat->size < 100 * txt->size)
|
||||
{ /* merge */
|
||||
while (sptr - STATPTR(stat) < stat->size && wptr - ARRPTR(txt) < txt->size)
|
||||
{
|
||||
int cmp = compareStatWord(sptr, wptr, stat, txt);
|
||||
|
||||
if (cmp < 0)
|
||||
sptr++;
|
||||
else if (cmp == 0)
|
||||
{
|
||||
if (stat->weight == 0)
|
||||
{
|
||||
sptr->ndoc++;
|
||||
sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1;
|
||||
}
|
||||
else if (wptr->haspos && (n = check_weight(txt, wptr, stat->weight)) != 0)
|
||||
{
|
||||
sptr->ndoc++;
|
||||
sptr->nentry += n;
|
||||
}
|
||||
sptr++;
|
||||
wptr++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
|
||||
{
|
||||
if (cur == len)
|
||||
newentry = SEI_realloc(newentry, &len);
|
||||
newentry[cur] = wptr;
|
||||
cur++;
|
||||
}
|
||||
wptr++;
|
||||
}
|
||||
}
|
||||
|
||||
while (wptr - ARRPTR(txt) < txt->size)
|
||||
{
|
||||
if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
|
||||
{
|
||||
if (cur == len)
|
||||
newentry = SEI_realloc(newentry, &len);
|
||||
newentry[cur] = wptr;
|
||||
cur++;
|
||||
}
|
||||
wptr++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{ /* search */
|
||||
while (wptr - ARRPTR(txt) < txt->size)
|
||||
{
|
||||
StatEntry *StopLow = STATPTR(stat);
|
||||
StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat);
|
||||
int cmp;
|
||||
|
||||
while (StopLow < StopHigh)
|
||||
{
|
||||
sptr = StopLow + (StopHigh - StopLow) / 2;
|
||||
cmp = compareStatWord(sptr, wptr, stat, txt);
|
||||
if (cmp == 0)
|
||||
{
|
||||
if (stat->weight == 0)
|
||||
{
|
||||
sptr->ndoc++;
|
||||
sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1;
|
||||
}
|
||||
else if (wptr->haspos && (n = check_weight(txt, wptr, stat->weight)) != 0)
|
||||
{
|
||||
sptr->ndoc++;
|
||||
sptr->nentry += n;
|
||||
}
|
||||
break;
|
||||
}
|
||||
else if (cmp < 0)
|
||||
StopLow = sptr + 1;
|
||||
else
|
||||
StopHigh = sptr;
|
||||
}
|
||||
|
||||
if (StopLow >= StopHigh)
|
||||
{ /* not found */
|
||||
if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
|
||||
{
|
||||
if (cur == len)
|
||||
newentry = SEI_realloc(newentry, &len);
|
||||
newentry[cur] = wptr;
|
||||
cur++;
|
||||
}
|
||||
}
|
||||
wptr++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (cur == 0)
|
||||
{ /* no new words */
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
PG_RETURN_POINTER(stat);
|
||||
}
|
||||
|
||||
newstat = formstat(stat, txt, newentry, cur);
|
||||
pfree(newentry);
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
/* pfree(stat); */
|
||||
|
||||
PG_RETURN_POINTER(newstat);
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32 cur;
|
||||
tsvector *stat;
|
||||
} StatStorage;
|
||||
|
||||
static void
|
||||
ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
|
||||
tsstat * stat)
|
||||
{
|
||||
TupleDesc tupdesc;
|
||||
MemoryContext oldcontext;
|
||||
StatStorage *st;
|
||||
|
||||
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
|
||||
st = palloc(sizeof(StatStorage));
|
||||
st->cur = 0;
|
||||
st->stat = palloc(VARSIZE(stat));
|
||||
memcpy(st->stat, stat, VARSIZE(stat));
|
||||
funcctx->user_fctx = (void *) st;
|
||||
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
|
||||
elog(ERROR, "return type must be a row type");
|
||||
tupdesc = CreateTupleDescCopy(tupdesc);
|
||||
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
}
|
||||
|
||||
|
||||
static Datum
|
||||
ts_process_call(FuncCallContext *funcctx)
|
||||
{
|
||||
StatStorage *st;
|
||||
|
||||
st = (StatStorage *) funcctx->user_fctx;
|
||||
|
||||
if (st->cur < st->stat->size)
|
||||
{
|
||||
Datum result;
|
||||
char *values[3];
|
||||
char ndoc[16];
|
||||
char nentry[16];
|
||||
StatEntry *entry = STATPTR(st->stat) + st->cur;
|
||||
HeapTuple tuple;
|
||||
|
||||
values[1] = ndoc;
|
||||
sprintf(ndoc, "%d", entry->ndoc);
|
||||
values[2] = nentry;
|
||||
sprintf(nentry, "%d", entry->nentry);
|
||||
values[0] = palloc(entry->len + 1);
|
||||
memcpy(values[0], STATSTRPTR(st->stat) + entry->pos, entry->len);
|
||||
(values[0])[entry->len] = '\0';
|
||||
|
||||
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
|
||||
result = HeapTupleGetDatum(tuple);
|
||||
|
||||
pfree(values[0]);
|
||||
st->cur++;
|
||||
return result;
|
||||
}
|
||||
else
|
||||
{
|
||||
pfree(st->stat);
|
||||
pfree(st);
|
||||
}
|
||||
|
||||
return (Datum) 0;
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(ts_accum_finish);
|
||||
Datum ts_accum_finish(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
ts_accum_finish(PG_FUNCTION_ARGS)
|
||||
{
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
ts_setup_firstcall(fcinfo, funcctx, (tsstat *) PG_GETARG_POINTER(0));
|
||||
}
|
||||
|
||||
funcctx = SRF_PERCALL_SETUP();
|
||||
if ((result = ts_process_call(funcctx)) != (Datum) 0)
|
||||
SRF_RETURN_NEXT(funcctx, result);
|
||||
SRF_RETURN_DONE(funcctx);
|
||||
}
|
||||
|
||||
static Oid tiOid = InvalidOid;
|
||||
|
||||
static void
|
||||
get_ti_Oid(void)
|
||||
{
|
||||
int ret;
|
||||
bool isnull;
|
||||
|
||||
if ((ret = SPI_exec("select oid from pg_type where typname='tsvector'", 1)) < 0)
|
||||
/* internal error */
|
||||
elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
|
||||
|
||||
if (SPI_processed < 1)
|
||||
/* internal error */
|
||||
elog(ERROR, "there is no tsvector type");
|
||||
tiOid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
|
||||
if (tiOid == InvalidOid)
|
||||
/* internal error */
|
||||
elog(ERROR, "tsvector type has InvalidOid");
|
||||
}
|
||||
|
||||
static tsstat *
|
||||
ts_stat_sql(text *txt, text *ws)
|
||||
{
|
||||
char *query = text2char(txt);
|
||||
int i;
|
||||
tsstat *newstat,
|
||||
*stat;
|
||||
bool isnull;
|
||||
Portal portal;
|
||||
void *plan;
|
||||
|
||||
if (tiOid == InvalidOid)
|
||||
get_ti_Oid();
|
||||
|
||||
if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
|
||||
/* internal error */
|
||||
elog(ERROR, "SPI_prepare('%s') returns NULL", query);
|
||||
|
||||
if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, false)) == NULL)
|
||||
/* internal error */
|
||||
elog(ERROR, "SPI_cursor_open('%s') returns NULL", query);
|
||||
|
||||
SPI_cursor_fetch(portal, true, 100);
|
||||
|
||||
if (SPI_tuptable->tupdesc->natts != 1)
|
||||
/* internal error */
|
||||
elog(ERROR, "number of fields doesn't equal to 1");
|
||||
|
||||
if (SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid)
|
||||
/* internal error */
|
||||
elog(ERROR, "column isn't of tsvector type");
|
||||
|
||||
stat = palloc(STATHDRSIZE);
|
||||
SET_VARSIZE(stat, STATHDRSIZE);
|
||||
stat->size = 0;
|
||||
stat->weight = 0;
|
||||
|
||||
if (ws)
|
||||
{
|
||||
char *buf;
|
||||
|
||||
buf = VARDATA(ws);
|
||||
while (buf - VARDATA(ws) < VARSIZE(ws) - VARHDRSZ)
|
||||
{
|
||||
if (pg_mblen(buf) == 1)
|
||||
{
|
||||
switch (*buf)
|
||||
{
|
||||
case 'A':
|
||||
case 'a':
|
||||
stat->weight |= 1 << 3;
|
||||
break;
|
||||
case 'B':
|
||||
case 'b':
|
||||
stat->weight |= 1 << 2;
|
||||
break;
|
||||
case 'C':
|
||||
case 'c':
|
||||
stat->weight |= 1 << 1;
|
||||
break;
|
||||
case 'D':
|
||||
case 'd':
|
||||
stat->weight |= 1;
|
||||
break;
|
||||
default:
|
||||
stat->weight |= 0;
|
||||
}
|
||||
}
|
||||
buf += pg_mblen(buf);
|
||||
}
|
||||
}
|
||||
|
||||
while (SPI_processed > 0)
|
||||
{
|
||||
for (i = 0; i < SPI_processed; i++)
|
||||
{
|
||||
Datum data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
|
||||
|
||||
if (!isnull)
|
||||
{
|
||||
newstat = (tsstat *) DatumGetPointer(DirectFunctionCall2(
|
||||
ts_accum,
|
||||
PointerGetDatum(stat),
|
||||
data
|
||||
));
|
||||
if (stat != newstat && stat)
|
||||
pfree(stat);
|
||||
stat = newstat;
|
||||
}
|
||||
}
|
||||
|
||||
SPI_freetuptable(SPI_tuptable);
|
||||
SPI_cursor_fetch(portal, true, 100);
|
||||
}
|
||||
|
||||
SPI_freetuptable(SPI_tuptable);
|
||||
SPI_cursor_close(portal);
|
||||
SPI_freeplan(plan);
|
||||
pfree(query);
|
||||
|
||||
return stat;
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(ts_stat);
|
||||
Datum ts_stat(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
ts_stat(PG_FUNCTION_ARGS)
|
||||
{
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
tsstat *stat;
|
||||
text *txt = PG_GETARG_TEXT_P(0);
|
||||
text *ws = (PG_NARGS() > 1) ? PG_GETARG_TEXT_P(1) : NULL;
|
||||
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
SPI_connect();
|
||||
stat = ts_stat_sql(txt, ws);
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
if (PG_NARGS() > 1)
|
||||
PG_FREE_IF_COPY(ws, 1);
|
||||
ts_setup_firstcall(fcinfo, funcctx, stat);
|
||||
SPI_finish();
|
||||
}
|
||||
|
||||
funcctx = SRF_PERCALL_SETUP();
|
||||
if ((result = ts_process_call(funcctx)) != (Datum) 0)
|
||||
SRF_RETURN_NEXT(funcctx, result);
|
||||
SRF_RETURN_DONE(funcctx);
|
||||
}
|
|
@ -1,34 +0,0 @@
|
|||
#ifndef __TXTIDX_STAT_H__
|
||||
#define __TXTIDX_STAT_H__
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/gist.h"
|
||||
#include "access/itup.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "storage/bufpage.h"
|
||||
#include "tsvector.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32 len;
|
||||
uint32 pos;
|
||||
uint32 ndoc;
|
||||
uint32 nentry;
|
||||
} StatEntry;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int32 vl_len_; /* varlena header (do not touch directly!) */
|
||||
int4 size;
|
||||
int4 weight;
|
||||
char data[1];
|
||||
} tsstat;
|
||||
|
||||
#define STATHDRSIZE (sizeof(int4) * 4)
|
||||
#define CALCSTATSIZE(x, lenstr) ( (x) * sizeof(StatEntry) + STATHDRSIZE + (lenstr) )
|
||||
#define STATPTR(x) ( (StatEntry*) ( (char*)(x) + STATHDRSIZE ) )
|
||||
#define STATSTRPTR(x) ( (char*)(x) + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)(x))->size ) )
|
||||
#define STATSTRSIZE(x) ( VARSIZE((tsvector*)(x)) - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)(x))->size ) )
|
||||
|
||||
#endif
|
|
@ -0,0 +1,441 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* tsearch2.c
|
||||
* Backwards-compatibility package for old contrib/tsearch2 API
|
||||
*
|
||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/contrib/tsearch2/tsearch2.c,v 1.1 2007/11/13 21:02:29 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "catalog/namespace.h"
|
||||
#include "commands/trigger.h"
|
||||
#include "fmgr.h"
|
||||
#include "tsearch/ts_utils.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/guc.h"
|
||||
#include "utils/syscache.h"
|
||||
|
||||
PG_MODULE_MAGIC;
|
||||
|
||||
static Oid current_dictionary_oid = InvalidOid;
|
||||
static Oid current_parser_oid = InvalidOid;
|
||||
|
||||
/* insert given value at argument position 0 */
|
||||
#define INSERT_ARGUMENT0(argument, isnull) \
|
||||
do { \
|
||||
int i; \
|
||||
for (i = fcinfo->nargs; i > 0; i--) \
|
||||
{ \
|
||||
fcinfo->arg[i] = fcinfo->arg[i-1]; \
|
||||
fcinfo->argnull[i] = fcinfo->argnull[i-1]; \
|
||||
} \
|
||||
fcinfo->arg[0] = (argument); \
|
||||
fcinfo->argnull[0] = (isnull); \
|
||||
fcinfo->nargs++; \
|
||||
} while (0)
|
||||
|
||||
#define TextPGetCString(t) \
|
||||
DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(t)))
|
||||
#define CStringGetTextP(c) \
|
||||
DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(c)))
|
||||
|
||||
#define TextGetObjectId(infunction, text) \
|
||||
DatumGetObjectId(DirectFunctionCall1(infunction, \
|
||||
DirectFunctionCall1(textout, PointerGetDatum(text))))
|
||||
|
||||
#define UNSUPPORTED_FUNCTION(name) \
|
||||
Datum name(PG_FUNCTION_ARGS); \
|
||||
Datum \
|
||||
name(PG_FUNCTION_ARGS) \
|
||||
{ \
|
||||
ereport(ERROR, \
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),\
|
||||
errmsg("function %s is no longer supported", \
|
||||
format_procedure(fcinfo->flinfo->fn_oid)), \
|
||||
errhint("Switch to new tsearch functionality."))); \
|
||||
/* keep compiler quiet */ \
|
||||
PG_RETURN_NULL(); \
|
||||
} \
|
||||
PG_FUNCTION_INFO_V1(name)
|
||||
|
||||
static Oid GetCurrentDict(void);
|
||||
static Oid GetCurrentParser(void);
|
||||
|
||||
Datum tsa_lexize_byname(PG_FUNCTION_ARGS);
|
||||
Datum tsa_lexize_bycurrent(PG_FUNCTION_ARGS);
|
||||
Datum tsa_set_curdict(PG_FUNCTION_ARGS);
|
||||
Datum tsa_set_curdict_byname(PG_FUNCTION_ARGS);
|
||||
Datum tsa_token_type_current(PG_FUNCTION_ARGS);
|
||||
Datum tsa_set_curprs(PG_FUNCTION_ARGS);
|
||||
Datum tsa_set_curprs_byname(PG_FUNCTION_ARGS);
|
||||
Datum tsa_parse_current(PG_FUNCTION_ARGS);
|
||||
Datum tsa_set_curcfg(PG_FUNCTION_ARGS);
|
||||
Datum tsa_set_curcfg_byname(PG_FUNCTION_ARGS);
|
||||
Datum tsa_show_curcfg(PG_FUNCTION_ARGS);
|
||||
Datum tsa_to_tsvector_name(PG_FUNCTION_ARGS);
|
||||
Datum tsa_to_tsquery_name(PG_FUNCTION_ARGS);
|
||||
Datum tsa_plainto_tsquery_name(PG_FUNCTION_ARGS);
|
||||
Datum tsa_headline_byname(PG_FUNCTION_ARGS);
|
||||
Datum tsa_ts_stat(PG_FUNCTION_ARGS);
|
||||
Datum tsa_tsearch2(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(tsa_lexize_byname);
|
||||
PG_FUNCTION_INFO_V1(tsa_lexize_bycurrent);
|
||||
PG_FUNCTION_INFO_V1(tsa_set_curdict);
|
||||
PG_FUNCTION_INFO_V1(tsa_set_curdict_byname);
|
||||
PG_FUNCTION_INFO_V1(tsa_token_type_current);
|
||||
PG_FUNCTION_INFO_V1(tsa_set_curprs);
|
||||
PG_FUNCTION_INFO_V1(tsa_set_curprs_byname);
|
||||
PG_FUNCTION_INFO_V1(tsa_parse_current);
|
||||
PG_FUNCTION_INFO_V1(tsa_set_curcfg);
|
||||
PG_FUNCTION_INFO_V1(tsa_set_curcfg_byname);
|
||||
PG_FUNCTION_INFO_V1(tsa_show_curcfg);
|
||||
PG_FUNCTION_INFO_V1(tsa_to_tsvector_name);
|
||||
PG_FUNCTION_INFO_V1(tsa_to_tsquery_name);
|
||||
PG_FUNCTION_INFO_V1(tsa_plainto_tsquery_name);
|
||||
PG_FUNCTION_INFO_V1(tsa_headline_byname);
|
||||
PG_FUNCTION_INFO_V1(tsa_ts_stat);
|
||||
PG_FUNCTION_INFO_V1(tsa_tsearch2);
|
||||
|
||||
|
||||
/*
|
||||
* List of unsupported functions
|
||||
*
|
||||
* The parser and dictionary functions are defined only so that the former
|
||||
* contents of pg_ts_parser and pg_ts_dict can be loaded into the system,
|
||||
* for ease of reference while creating the new tsearch configuration.
|
||||
*/
|
||||
|
||||
UNSUPPORTED_FUNCTION(tsa_dex_init);
|
||||
UNSUPPORTED_FUNCTION(tsa_dex_lexize);
|
||||
|
||||
UNSUPPORTED_FUNCTION(tsa_snb_en_init);
|
||||
UNSUPPORTED_FUNCTION(tsa_snb_lexize);
|
||||
UNSUPPORTED_FUNCTION(tsa_snb_ru_init_koi8);
|
||||
UNSUPPORTED_FUNCTION(tsa_snb_ru_init_utf8);
|
||||
|
||||
UNSUPPORTED_FUNCTION(tsa_spell_init);
|
||||
UNSUPPORTED_FUNCTION(tsa_spell_lexize);
|
||||
|
||||
UNSUPPORTED_FUNCTION(tsa_syn_init);
|
||||
UNSUPPORTED_FUNCTION(tsa_syn_lexize);
|
||||
|
||||
UNSUPPORTED_FUNCTION(tsa_thesaurus_init);
|
||||
UNSUPPORTED_FUNCTION(tsa_thesaurus_lexize);
|
||||
|
||||
UNSUPPORTED_FUNCTION(tsa_prsd_start);
|
||||
UNSUPPORTED_FUNCTION(tsa_prsd_getlexeme);
|
||||
UNSUPPORTED_FUNCTION(tsa_prsd_end);
|
||||
UNSUPPORTED_FUNCTION(tsa_prsd_lextype);
|
||||
UNSUPPORTED_FUNCTION(tsa_prsd_headline);
|
||||
|
||||
UNSUPPORTED_FUNCTION(tsa_reset_tsearch);
|
||||
UNSUPPORTED_FUNCTION(tsa_get_covers);
|
||||
|
||||
UNSUPPORTED_FUNCTION(tsa_rewrite_accum);
|
||||
UNSUPPORTED_FUNCTION(tsa_rewrite_finish);
|
||||
|
||||
|
||||
/*
|
||||
* list of redefined functions
|
||||
*/
|
||||
|
||||
/* lexize(text, text) */
|
||||
Datum
|
||||
tsa_lexize_byname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *dictname = PG_GETARG_TEXT_P(0);
|
||||
Datum arg1 = PG_GETARG_DATUM(1);
|
||||
|
||||
return DirectFunctionCall2(ts_lexize,
|
||||
ObjectIdGetDatum(TextGetObjectId(regdictionaryin, dictname)),
|
||||
arg1);
|
||||
}
|
||||
|
||||
/* lexize(text) */
|
||||
Datum
|
||||
tsa_lexize_bycurrent(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Datum arg0 = PG_GETARG_DATUM(0);
|
||||
Oid id = GetCurrentDict();
|
||||
|
||||
return DirectFunctionCall2(ts_lexize,
|
||||
ObjectIdGetDatum(id),
|
||||
arg0);
|
||||
}
|
||||
|
||||
/* set_curdict(int) */
|
||||
Datum
|
||||
tsa_set_curdict(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid dict_oid = PG_GETARG_OID(0);
|
||||
|
||||
if (!SearchSysCacheExists(TSDICTOID,
|
||||
ObjectIdGetDatum(dict_oid),
|
||||
0, 0, 0))
|
||||
elog(ERROR, "cache lookup failed for text search dictionary %u",
|
||||
dict_oid);
|
||||
|
||||
current_dictionary_oid = dict_oid;
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
/* set_curdict(text) */
|
||||
Datum
|
||||
tsa_set_curdict_byname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *name = PG_GETARG_TEXT_P(0);
|
||||
Oid dict_oid;
|
||||
|
||||
dict_oid = TSDictionaryGetDictid(stringToQualifiedNameList(TextPGetCString(name)), false);
|
||||
|
||||
current_dictionary_oid = dict_oid;
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
/* token_type() */
|
||||
Datum
|
||||
tsa_token_type_current(PG_FUNCTION_ARGS)
|
||||
{
|
||||
INSERT_ARGUMENT0(ObjectIdGetDatum(GetCurrentParser()), false);
|
||||
return ts_token_type_byid(fcinfo);
|
||||
}
|
||||
|
||||
/* set_curprs(int) */
|
||||
Datum
|
||||
tsa_set_curprs(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid parser_oid = PG_GETARG_OID(0);
|
||||
|
||||
if (!SearchSysCacheExists(TSPARSEROID,
|
||||
ObjectIdGetDatum(parser_oid),
|
||||
0, 0, 0))
|
||||
elog(ERROR, "cache lookup failed for text search parser %u",
|
||||
parser_oid);
|
||||
|
||||
current_parser_oid = parser_oid;
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
/* set_curprs(text) */
|
||||
Datum
|
||||
tsa_set_curprs_byname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *name = PG_GETARG_TEXT_P(0);
|
||||
Oid parser_oid;
|
||||
|
||||
parser_oid = TSParserGetPrsid(stringToQualifiedNameList(TextPGetCString(name)), false);
|
||||
|
||||
current_parser_oid = parser_oid;
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
/* parse(text) */
|
||||
Datum
|
||||
tsa_parse_current(PG_FUNCTION_ARGS)
|
||||
{
|
||||
INSERT_ARGUMENT0(ObjectIdGetDatum(GetCurrentParser()), false);
|
||||
return ts_parse_byid(fcinfo);
|
||||
}
|
||||
|
||||
/* set_curcfg(int) */
|
||||
Datum
|
||||
tsa_set_curcfg(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid arg0 = PG_GETARG_OID(0);
|
||||
char *name;
|
||||
|
||||
name = DatumGetCString(DirectFunctionCall1(regconfigout,
|
||||
ObjectIdGetDatum(arg0)));
|
||||
|
||||
set_config_option("default_text_search_config", name,
|
||||
PGC_USERSET,
|
||||
PGC_S_SESSION,
|
||||
GUC_ACTION_SET,
|
||||
true);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
/* set_curcfg(text) */
|
||||
Datum
|
||||
tsa_set_curcfg_byname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *arg0 = PG_GETARG_TEXT_P(0);
|
||||
char *name;
|
||||
|
||||
name = TextPGetCString(arg0);
|
||||
set_config_option("default_text_search_config", name,
|
||||
PGC_USERSET,
|
||||
PGC_S_SESSION,
|
||||
GUC_ACTION_SET,
|
||||
true);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
/* show_curcfg() */
|
||||
Datum
|
||||
tsa_show_curcfg(PG_FUNCTION_ARGS)
|
||||
{
|
||||
char *cfgname;
|
||||
Oid config_oid;
|
||||
|
||||
cfgname = GetConfigOptionByName("default_text_search_config", NULL);
|
||||
config_oid = DatumGetObjectId(DirectFunctionCall1(regconfigin,
|
||||
CStringGetDatum(cfgname)));
|
||||
|
||||
PG_RETURN_OID(config_oid);
|
||||
}
|
||||
|
||||
/* to_tsvector(text, text) */
|
||||
Datum
|
||||
tsa_to_tsvector_name(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *cfgname = PG_GETARG_TEXT_P(0);
|
||||
Datum arg1 = PG_GETARG_DATUM(1);
|
||||
Oid config_oid;
|
||||
|
||||
config_oid = TextGetObjectId(regconfigin, cfgname);
|
||||
|
||||
return DirectFunctionCall2(to_tsvector_byid,
|
||||
ObjectIdGetDatum(config_oid), arg1);
|
||||
}
|
||||
|
||||
/* to_tsquery(text, text) */
|
||||
Datum
|
||||
tsa_to_tsquery_name(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *cfgname = PG_GETARG_TEXT_P(0);
|
||||
Datum arg1 = PG_GETARG_DATUM(1);
|
||||
Oid config_oid;
|
||||
|
||||
config_oid = TextGetObjectId(regconfigin, cfgname);
|
||||
|
||||
return DirectFunctionCall2(to_tsquery_byid,
|
||||
ObjectIdGetDatum(config_oid), arg1);
|
||||
}
|
||||
|
||||
|
||||
/* plainto_tsquery(text, text) */
|
||||
Datum
|
||||
tsa_plainto_tsquery_name(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *cfgname = PG_GETARG_TEXT_P(0);
|
||||
Datum arg1 = PG_GETARG_DATUM(1);
|
||||
Oid config_oid;
|
||||
|
||||
config_oid = TextGetObjectId(regconfigin, cfgname);
|
||||
|
||||
return DirectFunctionCall2(plainto_tsquery_byid,
|
||||
ObjectIdGetDatum(config_oid), arg1);
|
||||
}
|
||||
|
||||
/* headline(text, text, tsquery [,text]) */
|
||||
Datum
|
||||
tsa_headline_byname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Datum arg0 = PG_GETARG_DATUM(0);
|
||||
Datum arg1 = PG_GETARG_DATUM(1);
|
||||
Datum arg2 = PG_GETARG_DATUM(2);
|
||||
Datum result;
|
||||
Oid config_oid;
|
||||
|
||||
/* first parameter has to be converted to oid */
|
||||
config_oid = DatumGetObjectId(DirectFunctionCall1(regconfigin,
|
||||
DirectFunctionCall1(textout, arg0)));
|
||||
|
||||
if (PG_NARGS() == 3)
|
||||
result = DirectFunctionCall3(ts_headline_byid,
|
||||
ObjectIdGetDatum(config_oid), arg1, arg2);
|
||||
else
|
||||
{
|
||||
Datum arg3 = PG_GETARG_DATUM(3);
|
||||
|
||||
result = DirectFunctionCall4(ts_headline_byid_opt,
|
||||
ObjectIdGetDatum(config_oid),
|
||||
arg1, arg2, arg3);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* tsearch2 version of update trigger
|
||||
*
|
||||
* We pass this on to the core trigger after inserting the default text
|
||||
* search configuration name as the second argument. Note that this isn't
|
||||
* a complete implementation of the original functionality; tsearch2 allowed
|
||||
* transformation function names to be included in the list. However, that
|
||||
* is deliberately removed as being a security risk.
|
||||
*/
|
||||
Datum
|
||||
tsa_tsearch2(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TriggerData *trigdata;
|
||||
Trigger *trigger;
|
||||
char **tgargs;
|
||||
int i;
|
||||
|
||||
/* Check call context */
|
||||
if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
|
||||
elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
|
||||
|
||||
trigdata = (TriggerData *) fcinfo->context;
|
||||
trigger = trigdata->tg_trigger;
|
||||
|
||||
if (trigger->tgnargs < 2)
|
||||
elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
|
||||
|
||||
/* create space for configuration name */
|
||||
tgargs = (char **) palloc((trigger->tgnargs + 1) * sizeof(char *));
|
||||
tgargs[0] = trigger->tgargs[0];
|
||||
for (i = 1; i < trigger->tgnargs; i++)
|
||||
tgargs[i+1] = trigger->tgargs[i];
|
||||
|
||||
tgargs[1] = pstrdup(GetConfigOptionByName("default_text_search_config",
|
||||
NULL));
|
||||
trigger->tgargs = tgargs;
|
||||
trigger->tgnargs++;
|
||||
|
||||
return tsvector_update_trigger_byid(fcinfo);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get Oid of current dictionary
|
||||
*/
|
||||
static Oid
|
||||
GetCurrentDict(void)
|
||||
{
|
||||
if (current_dictionary_oid == InvalidOid)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("no current dictionary"),
|
||||
errhint("Execute SELECT set_curdict(...).")));
|
||||
|
||||
return current_dictionary_oid;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get Oid of current parser
|
||||
*
|
||||
* Here, it seems reasonable to select the "default" parser if none has been
|
||||
* set.
|
||||
*/
|
||||
static Oid
|
||||
GetCurrentParser(void)
|
||||
{
|
||||
if (current_parser_oid == InvalidOid)
|
||||
current_parser_oid = TSParserGetPrsid(stringToQualifiedNameList("pg_catalog.default"), false);
|
||||
return current_parser_oid;
|
||||
}
|
|
@ -0,0 +1,570 @@
|
|||
/* $PostgreSQL: pgsql/contrib/tsearch2/tsearch2.sql.in,v 1.1 2007/11/13 21:02:29 tgl Exp $ */
|
||||
|
||||
-- Adjust this setting to control where the objects get created.
|
||||
SET search_path = public;
|
||||
|
||||
-- These domains are just to catch schema-qualified references to the
|
||||
-- old data types.
|
||||
CREATE DOMAIN tsvector AS pg_catalog.tsvector;
|
||||
CREATE DOMAIN tsquery AS pg_catalog.tsquery;
|
||||
CREATE DOMAIN gtsvector AS pg_catalog.gtsvector;
|
||||
CREATE DOMAIN gtsq AS pg_catalog.text;
|
||||
|
||||
--dict interface
|
||||
CREATE FUNCTION lexize(oid, text)
|
||||
RETURNS _text
|
||||
as 'ts_lexize'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE FUNCTION lexize(text, text)
|
||||
RETURNS _text
|
||||
as 'MODULE_PATHNAME', 'tsa_lexize_byname'
|
||||
LANGUAGE C
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE FUNCTION lexize(text)
|
||||
RETURNS _text
|
||||
as 'MODULE_PATHNAME', 'tsa_lexize_bycurrent'
|
||||
LANGUAGE C
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE FUNCTION set_curdict(int)
|
||||
RETURNS void
|
||||
as 'MODULE_PATHNAME', 'tsa_set_curdict'
|
||||
LANGUAGE C
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE FUNCTION set_curdict(text)
|
||||
RETURNS void
|
||||
as 'MODULE_PATHNAME', 'tsa_set_curdict_byname'
|
||||
LANGUAGE C
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
--built-in dictionaries
|
||||
CREATE FUNCTION dex_init(internal)
|
||||
RETURNS internal
|
||||
as 'MODULE_PATHNAME', 'tsa_dex_init'
|
||||
LANGUAGE C;
|
||||
|
||||
CREATE FUNCTION dex_lexize(internal,internal,int4)
|
||||
RETURNS internal
|
||||
as 'MODULE_PATHNAME', 'tsa_dex_lexize'
|
||||
LANGUAGE C
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE FUNCTION snb_en_init(internal)
|
||||
RETURNS internal
|
||||
as 'MODULE_PATHNAME', 'tsa_snb_en_init'
|
||||
LANGUAGE C;
|
||||
|
||||
CREATE FUNCTION snb_lexize(internal,internal,int4)
|
||||
RETURNS internal
|
||||
as 'MODULE_PATHNAME', 'tsa_snb_lexize'
|
||||
LANGUAGE C
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE FUNCTION snb_ru_init_koi8(internal)
|
||||
RETURNS internal
|
||||
as 'MODULE_PATHNAME', 'tsa_snb_ru_init_koi8'
|
||||
LANGUAGE C;
|
||||
|
||||
CREATE FUNCTION snb_ru_init_utf8(internal)
|
||||
RETURNS internal
|
||||
as 'MODULE_PATHNAME', 'tsa_snb_ru_init_utf8'
|
||||
LANGUAGE C;
|
||||
|
||||
CREATE FUNCTION spell_init(internal)
|
||||
RETURNS internal
|
||||
as 'MODULE_PATHNAME', 'tsa_spell_init'
|
||||
LANGUAGE C;
|
||||
|
||||
CREATE FUNCTION spell_lexize(internal,internal,int4)
|
||||
RETURNS internal
|
||||
as 'MODULE_PATHNAME', 'tsa_spell_lexize'
|
||||
LANGUAGE C
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE FUNCTION syn_init(internal)
|
||||
RETURNS internal
|
||||
as 'MODULE_PATHNAME', 'tsa_syn_init'
|
||||
LANGUAGE C;
|
||||
|
||||
CREATE FUNCTION syn_lexize(internal,internal,int4)
|
||||
RETURNS internal
|
||||
as 'MODULE_PATHNAME', 'tsa_syn_lexize'
|
||||
LANGUAGE C
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE FUNCTION thesaurus_init(internal)
|
||||
RETURNS internal
|
||||
as 'MODULE_PATHNAME', 'tsa_thesaurus_init'
|
||||
LANGUAGE C;
|
||||
|
||||
CREATE FUNCTION thesaurus_lexize(internal,internal,int4,internal)
|
||||
RETURNS internal
|
||||
as 'MODULE_PATHNAME', 'tsa_thesaurus_lexize'
|
||||
LANGUAGE C
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
--sql-level interface
|
||||
CREATE TYPE tokentype
|
||||
as (tokid int4, alias text, descr text);
|
||||
|
||||
CREATE FUNCTION token_type(int4)
|
||||
RETURNS setof tokentype
|
||||
as 'ts_token_type_byid'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT
|
||||
ROWS 16;
|
||||
|
||||
CREATE FUNCTION token_type(text)
|
||||
RETURNS setof tokentype
|
||||
as 'ts_token_type_byname'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT
|
||||
ROWS 16;
|
||||
|
||||
CREATE FUNCTION token_type()
|
||||
RETURNS setof tokentype
|
||||
as 'MODULE_PATHNAME', 'tsa_token_type_current'
|
||||
LANGUAGE C
|
||||
RETURNS NULL ON NULL INPUT
|
||||
ROWS 16;
|
||||
|
||||
CREATE FUNCTION set_curprs(int)
|
||||
RETURNS void
|
||||
as 'MODULE_PATHNAME', 'tsa_set_curprs'
|
||||
LANGUAGE C
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE FUNCTION set_curprs(text)
|
||||
RETURNS void
|
||||
as 'MODULE_PATHNAME', 'tsa_set_curprs_byname'
|
||||
LANGUAGE C
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE TYPE tokenout
|
||||
as (tokid int4, token text);
|
||||
|
||||
CREATE FUNCTION parse(oid,text)
|
||||
RETURNS setof tokenout
|
||||
as 'ts_parse_byid'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE FUNCTION parse(text,text)
|
||||
RETURNS setof tokenout
|
||||
as 'ts_parse_byname'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE FUNCTION parse(text)
|
||||
RETURNS setof tokenout
|
||||
as 'MODULE_PATHNAME', 'tsa_parse_current'
|
||||
LANGUAGE C
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
--default parser
|
||||
CREATE FUNCTION prsd_start(internal,int4)
|
||||
RETURNS internal
|
||||
as 'MODULE_PATHNAME', 'tsa_prsd_start'
|
||||
LANGUAGE C;
|
||||
|
||||
CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
|
||||
RETURNS int4
|
||||
as 'MODULE_PATHNAME', 'tsa_prsd_getlexeme'
|
||||
LANGUAGE C;
|
||||
|
||||
CREATE FUNCTION prsd_end(internal)
|
||||
RETURNS void
|
||||
as 'MODULE_PATHNAME', 'tsa_prsd_end'
|
||||
LANGUAGE C;
|
||||
|
||||
CREATE FUNCTION prsd_lextype(internal)
|
||||
RETURNS internal
|
||||
as 'MODULE_PATHNAME', 'tsa_prsd_lextype'
|
||||
LANGUAGE C;
|
||||
|
||||
CREATE FUNCTION prsd_headline(internal,internal,internal)
|
||||
RETURNS internal
|
||||
as 'MODULE_PATHNAME', 'tsa_prsd_headline'
|
||||
LANGUAGE C;
|
||||
|
||||
--tsearch config
|
||||
CREATE FUNCTION set_curcfg(int)
|
||||
RETURNS void
|
||||
as 'MODULE_PATHNAME', 'tsa_set_curcfg'
|
||||
LANGUAGE C
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE FUNCTION set_curcfg(text)
|
||||
RETURNS void
|
||||
as 'MODULE_PATHNAME', 'tsa_set_curcfg_byname'
|
||||
LANGUAGE C
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE FUNCTION show_curcfg()
|
||||
RETURNS oid
|
||||
as 'MODULE_PATHNAME', 'tsa_show_curcfg'
|
||||
LANGUAGE C
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE FUNCTION length(tsvector)
|
||||
RETURNS int4
|
||||
AS 'tsvector_length'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION to_tsvector(oid, text)
|
||||
RETURNS tsvector
|
||||
AS 'to_tsvector_byid'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION to_tsvector(text, text)
|
||||
RETURNS tsvector
|
||||
AS 'MODULE_PATHNAME', 'tsa_to_tsvector_name'
|
||||
LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION to_tsvector(text)
|
||||
RETURNS tsvector
|
||||
AS 'to_tsvector'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION strip(tsvector)
|
||||
RETURNS tsvector
|
||||
AS 'tsvector_strip'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION setweight(tsvector,"char")
|
||||
RETURNS tsvector
|
||||
AS 'tsvector_setweight'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION concat(tsvector,tsvector)
|
||||
RETURNS tsvector
|
||||
AS 'tsvector_concat'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION querytree(tsquery)
|
||||
RETURNS text
|
||||
AS 'tsquerytree'
|
||||
LANGUAGE INTERNAL RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE FUNCTION to_tsquery(oid, text)
|
||||
RETURNS tsquery
|
||||
AS 'to_tsquery_byid'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION to_tsquery(text, text)
|
||||
RETURNS tsquery
|
||||
AS 'MODULE_PATHNAME','tsa_to_tsquery_name'
|
||||
LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION to_tsquery(text)
|
||||
RETURNS tsquery
|
||||
AS 'to_tsquery'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION plainto_tsquery(oid, text)
|
||||
RETURNS tsquery
|
||||
AS 'plainto_tsquery_byid'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION plainto_tsquery(text, text)
|
||||
RETURNS tsquery
|
||||
AS 'MODULE_PATHNAME','tsa_plainto_tsquery_name'
|
||||
LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION plainto_tsquery(text)
|
||||
RETURNS tsquery
|
||||
AS 'plainto_tsquery'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
--Trigger
|
||||
CREATE FUNCTION tsearch2()
|
||||
RETURNS trigger
|
||||
AS 'MODULE_PATHNAME', 'tsa_tsearch2'
|
||||
LANGUAGE C;
|
||||
|
||||
--Relevation
|
||||
CREATE FUNCTION rank(float4[], tsvector, tsquery)
|
||||
RETURNS float4
|
||||
AS 'ts_rank_wtt'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
|
||||
RETURNS float4
|
||||
AS 'ts_rank_wttf'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION rank(tsvector, tsquery)
|
||||
RETURNS float4
|
||||
AS 'ts_rank_tt'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION rank(tsvector, tsquery, int4)
|
||||
RETURNS float4
|
||||
AS 'ts_rank_ttf'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION rank_cd(float4[], tsvector, tsquery)
|
||||
RETURNS float4
|
||||
AS 'ts_rankcd_wtt'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION rank_cd(float4[], tsvector, tsquery, int4)
|
||||
RETURNS float4
|
||||
AS 'ts_rankcd_wttf'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION rank_cd(tsvector, tsquery)
|
||||
RETURNS float4
|
||||
AS 'ts_rankcd_tt'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
|
||||
RETURNS float4
|
||||
AS 'ts_rankcd_ttf'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION headline(oid, text, tsquery, text)
|
||||
RETURNS text
|
||||
AS 'ts_headline_byid_opt'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION headline(oid, text, tsquery)
|
||||
RETURNS text
|
||||
AS 'ts_headline_byid'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION headline(text, text, tsquery, text)
|
||||
RETURNS text
|
||||
AS 'MODULE_PATHNAME', 'tsa_headline_byname'
|
||||
LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION headline(text, text, tsquery)
|
||||
RETURNS text
|
||||
AS 'MODULE_PATHNAME', 'tsa_headline_byname'
|
||||
LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION headline(text, tsquery, text)
|
||||
RETURNS text
|
||||
AS 'ts_headline_opt'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION headline(text, tsquery)
|
||||
RETURNS text
|
||||
AS 'ts_headline'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
-- CREATE the OPERATOR class
|
||||
CREATE OPERATOR CLASS gist_tsvector_ops
|
||||
FOR TYPE tsvector USING gist
|
||||
AS
|
||||
OPERATOR 1 @@ (tsvector, tsquery) RECHECK ,
|
||||
FUNCTION 1 gtsvector_consistent (gtsvector, internal, int4),
|
||||
FUNCTION 2 gtsvector_union (internal, internal),
|
||||
FUNCTION 3 gtsvector_compress (internal),
|
||||
FUNCTION 4 gtsvector_decompress (internal),
|
||||
FUNCTION 5 gtsvector_penalty (internal, internal, internal),
|
||||
FUNCTION 6 gtsvector_picksplit (internal, internal),
|
||||
FUNCTION 7 gtsvector_same (gtsvector, gtsvector, internal),
|
||||
STORAGE gtsvector;
|
||||
|
||||
--stat info
|
||||
CREATE TYPE statinfo
|
||||
as (word text, ndoc int4, nentry int4);
|
||||
|
||||
CREATE FUNCTION stat(text)
|
||||
RETURNS setof statinfo
|
||||
as 'ts_stat1'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE FUNCTION stat(text,text)
|
||||
RETURNS setof statinfo
|
||||
as 'ts_stat2'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
--reset - just for debuging
|
||||
CREATE FUNCTION reset_tsearch()
|
||||
RETURNS void
|
||||
as 'MODULE_PATHNAME', 'tsa_reset_tsearch'
|
||||
LANGUAGE C
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
--get cover (debug for rank_cd)
|
||||
CREATE FUNCTION get_covers(tsvector,tsquery)
|
||||
RETURNS text
|
||||
as 'MODULE_PATHNAME', 'tsa_get_covers'
|
||||
LANGUAGE C
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
--debug function
|
||||
create type tsdebug as (
|
||||
ts_name text,
|
||||
tok_type text,
|
||||
description text,
|
||||
token text,
|
||||
dict_name text[],
|
||||
"tsvector" tsvector
|
||||
);
|
||||
|
||||
CREATE or replace FUNCTION _get_parser_from_curcfg()
|
||||
RETURNS text as
|
||||
$$select prsname::text from pg_catalog.pg_ts_parser p join pg_ts_config c on cfgparser = p.oid where c.oid = show_curcfg();$$
|
||||
LANGUAGE SQL RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE FUNCTION ts_debug(text)
|
||||
RETURNS setof tsdebug as $$
|
||||
select
|
||||
(select c.cfgname::text from pg_catalog.pg_ts_config as c
|
||||
where c.oid = show_curcfg()),
|
||||
t.alias as tok_type,
|
||||
t.descr as description,
|
||||
p.token,
|
||||
ARRAY ( SELECT m.mapdict::pg_catalog.regdictionary::pg_catalog.text
|
||||
FROM pg_catalog.pg_ts_config_map AS m
|
||||
WHERE m.mapcfg = show_curcfg() AND m.maptokentype = p.tokid
|
||||
ORDER BY m.mapseqno )
|
||||
AS dict_name,
|
||||
strip(to_tsvector(p.token)) as tsvector
|
||||
from
|
||||
parse( _get_parser_from_curcfg(), $1 ) as p,
|
||||
token_type() as t
|
||||
where
|
||||
t.tokid = p.tokid
|
||||
$$ LANGUAGE SQL RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE OR REPLACE FUNCTION numnode(tsquery)
|
||||
RETURNS int4
|
||||
as 'tsquery_numnode'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION tsquery_and(tsquery,tsquery)
|
||||
RETURNS tsquery
|
||||
as 'tsquery_and'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION tsquery_or(tsquery,tsquery)
|
||||
RETURNS tsquery
|
||||
as 'tsquery_or'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION tsquery_not(tsquery)
|
||||
RETURNS tsquery
|
||||
as 'tsquery_not'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
--------------rewrite subsystem
|
||||
|
||||
CREATE OR REPLACE FUNCTION rewrite(tsquery, text)
|
||||
RETURNS tsquery
|
||||
as 'tsquery_rewrite_query'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION rewrite(tsquery, tsquery, tsquery)
|
||||
RETURNS tsquery
|
||||
as 'tsquery_rewrite'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION rewrite_accum(tsquery,tsquery[])
|
||||
RETURNS tsquery
|
||||
AS 'MODULE_PATHNAME', 'tsa_rewrite_accum'
|
||||
LANGUAGE C;
|
||||
|
||||
CREATE OR REPLACE FUNCTION rewrite_finish(tsquery)
|
||||
RETURNS tsquery
|
||||
as 'MODULE_PATHNAME', 'tsa_rewrite_finish'
|
||||
LANGUAGE C;
|
||||
|
||||
CREATE AGGREGATE rewrite (
|
||||
BASETYPE = tsquery[],
|
||||
SFUNC = rewrite_accum,
|
||||
STYPE = tsquery,
|
||||
FINALFUNC = rewrite_finish
|
||||
);
|
||||
|
||||
CREATE OR REPLACE FUNCTION tsq_mcontains(tsquery, tsquery)
|
||||
RETURNS bool
|
||||
as 'tsq_mcontains'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION tsq_mcontained(tsquery, tsquery)
|
||||
RETURNS bool
|
||||
as 'tsq_mcontained'
|
||||
LANGUAGE INTERNAL
|
||||
RETURNS NULL ON NULL INPUT IMMUTABLE;
|
||||
|
||||
CREATE OPERATOR CLASS gist_tp_tsquery_ops
|
||||
FOR TYPE tsquery USING gist
|
||||
AS
|
||||
OPERATOR 7 @> (tsquery, tsquery) RECHECK,
|
||||
OPERATOR 8 <@ (tsquery, tsquery) RECHECK,
|
||||
FUNCTION 1 gtsquery_consistent (bigint, internal, int4),
|
||||
FUNCTION 2 gtsquery_union (internal, internal),
|
||||
FUNCTION 3 gtsquery_compress (internal),
|
||||
FUNCTION 4 gtsquery_decompress (internal),
|
||||
FUNCTION 5 gtsquery_penalty (internal, internal, internal),
|
||||
FUNCTION 6 gtsquery_picksplit (internal, internal),
|
||||
FUNCTION 7 gtsquery_same (bigint, bigint, internal),
|
||||
STORAGE bigint;
|
||||
|
||||
CREATE OPERATOR CLASS gin_tsvector_ops
|
||||
FOR TYPE tsvector USING gin
|
||||
AS
|
||||
OPERATOR 1 @@ (tsvector, tsquery),
|
||||
OPERATOR 2 @@@ (tsvector, tsquery) RECHECK,
|
||||
FUNCTION 1 bttextcmp(text, text),
|
||||
FUNCTION 2 gin_extract_tsvector(tsvector,internal),
|
||||
FUNCTION 3 gin_extract_query(internal,internal,smallint),
|
||||
FUNCTION 4 gin_ts_consistent(internal,smallint,internal),
|
||||
STORAGE text;
|
||||
|
||||
CREATE OPERATOR CLASS tsvector_ops
|
||||
FOR TYPE tsvector USING btree AS
|
||||
OPERATOR 1 < ,
|
||||
OPERATOR 2 <= ,
|
||||
OPERATOR 3 = ,
|
||||
OPERATOR 4 >= ,
|
||||
OPERATOR 5 > ,
|
||||
FUNCTION 1 tsvector_cmp(tsvector, tsvector);
|
||||
|
||||
CREATE OPERATOR CLASS tsquery_ops
|
||||
FOR TYPE tsquery USING btree AS
|
||||
OPERATOR 1 < ,
|
||||
OPERATOR 2 <= ,
|
||||
OPERATOR 3 = ,
|
||||
OPERATOR 4 >= ,
|
||||
OPERATOR 5 > ,
|
||||
FUNCTION 1 tsquery_cmp(tsquery, tsquery);
|
File diff suppressed because it is too large
Load Diff
|
@ -1,101 +0,0 @@
|
|||
#ifndef __TXTIDX_H__
|
||||
#define __TXTIDX_H__
|
||||
|
||||
/*
|
||||
#define TXTIDX_DEBUG
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/gist.h"
|
||||
#include "access/itup.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "storage/bufpage.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32
|
||||
haspos:1,
|
||||
len:11, /* MAX 2Kb */
|
||||
pos:20; /* MAX 1Mb */
|
||||
} WordEntry;
|
||||
|
||||
#define MAXSTRLEN ( 1<<11 )
|
||||
#define MAXSTRPOS ( 1<<20 )
|
||||
|
||||
/*
|
||||
Equivalent to
|
||||
typedef struct
|
||||
{
|
||||
uint16
|
||||
weight:2,
|
||||
pos:14;
|
||||
} WordEntryPos;
|
||||
|
||||
*/
|
||||
|
||||
typedef uint16 WordEntryPos;
|
||||
|
||||
#define WEP_GETWEIGHT(x) ( (x) >> 14 )
|
||||
#define WEP_GETPOS(x) ( (x) & 0x3fff )
|
||||
|
||||
#define WEP_SETWEIGHT(x,v) (x) = ( (v) << 14 ) | ( (x) & 0x3fff )
|
||||
#define WEP_SETPOS(x,v) (x) = ( (x) & 0xc000 ) | ( (v) & 0x3fff )
|
||||
|
||||
|
||||
#define MAXENTRYPOS (1<<14)
|
||||
#define MAXNUMPOS 256
|
||||
#define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
|
||||
|
||||
/*
|
||||
* Structure of tsvector datatype:
|
||||
* 1) standard varlena header
|
||||
* 2) int4 size - number of lexemes or WordEntry array, which is the same
|
||||
* 3) Array of WordEntry - sorted array, comparison based on word's length
|
||||
* and strncmp(). WordEntry->pos points number of
|
||||
* bytes from end of WordEntry array to start of
|
||||
* corresponding lexeme.
|
||||
* 4) Lexeme's storage:
|
||||
* SHORTALIGNED(lexeme) and position information if it exists
|
||||
* Position information: first int2 - is a number of positions and it
|
||||
* follows array of WordEntryPos
|
||||
*/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int32 vl_len_; /* varlena header (do not touch directly!) */
|
||||
int4 size;
|
||||
char data[1];
|
||||
} tsvector;
|
||||
|
||||
#define DATAHDRSIZE (VARHDRSZ + sizeof(int4))
|
||||
#define CALCDATASIZE(x, lenstr) ( (x) * sizeof(WordEntry) + DATAHDRSIZE + (lenstr) )
|
||||
#define ARRPTR(x) ( (WordEntry*) ( (char*)(x) + DATAHDRSIZE ) )
|
||||
#define STRPTR(x) ( (char*)(x) + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)(x))->size ) )
|
||||
#define STRSIZE(x) ( ((tsvector*)(x))->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)(x))->size ) )
|
||||
#define _POSDATAPTR(x,e) (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
|
||||
#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 )
|
||||
#define POSDATAPTR(x,e) ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
WordEntry entry;
|
||||
WordEntryPos *pos;
|
||||
} WordEntryIN;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char *prsbuf;
|
||||
char *word;
|
||||
char *curpos;
|
||||
int4 len;
|
||||
int4 state;
|
||||
int4 alen;
|
||||
WordEntryPos *pos;
|
||||
bool oprisdelim;
|
||||
} TI_IN_STATE;
|
||||
|
||||
int4 gettoken_tsvector(TI_IN_STATE * state);
|
||||
|
||||
#endif
|
|
@ -1,330 +0,0 @@
|
|||
/*
|
||||
* Operations for tsvector type
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
|
||||
#include "access/gist.h"
|
||||
#include "access/itup.h"
|
||||
#include "catalog/namespace.h"
|
||||
#include "commands/trigger.h"
|
||||
#include "executor/spi.h"
|
||||
#include "nodes/pg_list.h"
|
||||
#include "storage/bufpage.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/pg_locale.h"
|
||||
|
||||
#include "tsvector.h"
|
||||
#include "query.h"
|
||||
#include "ts_cfg.h"
|
||||
#include "common.h"
|
||||
|
||||
PG_FUNCTION_INFO_V1(strip);
|
||||
Datum strip(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(setweight);
|
||||
Datum setweight(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(concat);
|
||||
Datum concat(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
strip(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
tsvector *out;
|
||||
int i,
|
||||
len = 0;
|
||||
WordEntry *arrin = ARRPTR(in),
|
||||
*arrout;
|
||||
char *cur;
|
||||
|
||||
for (i = 0; i < in->size; i++)
|
||||
len += SHORTALIGN(arrin[i].len);
|
||||
|
||||
len = CALCDATASIZE(in->size, len);
|
||||
out = (tsvector *) palloc0(len);
|
||||
SET_VARSIZE(out, len);
|
||||
out->size = in->size;
|
||||
arrout = ARRPTR(out);
|
||||
cur = STRPTR(out);
|
||||
for (i = 0; i < in->size; i++)
|
||||
{
|
||||
memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
|
||||
arrout[i].haspos = 0;
|
||||
arrout[i].len = arrin[i].len;
|
||||
arrout[i].pos = cur - STRPTR(out);
|
||||
cur += SHORTALIGN(arrout[i].len);
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(in, 0);
|
||||
PG_RETURN_POINTER(out);
|
||||
}
|
||||
|
||||
Datum
|
||||
setweight(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
char cw = PG_GETARG_CHAR(1);
|
||||
tsvector *out;
|
||||
int i,
|
||||
j;
|
||||
WordEntry *entry;
|
||||
WordEntryPos *p;
|
||||
int w = 0;
|
||||
|
||||
switch (cw)
|
||||
{
|
||||
case 'A':
|
||||
case 'a':
|
||||
w = 3;
|
||||
break;
|
||||
case 'B':
|
||||
case 'b':
|
||||
w = 2;
|
||||
break;
|
||||
case 'C':
|
||||
case 'c':
|
||||
w = 1;
|
||||
break;
|
||||
case 'D':
|
||||
case 'd':
|
||||
w = 0;
|
||||
break;
|
||||
/* internal error */
|
||||
default:
|
||||
elog(ERROR, "unrecognized weight");
|
||||
}
|
||||
|
||||
out = (tsvector *) palloc(VARSIZE(in));
|
||||
memcpy(out, in, VARSIZE(in));
|
||||
entry = ARRPTR(out);
|
||||
i = out->size;
|
||||
while (i--)
|
||||
{
|
||||
if ((j = POSDATALEN(out, entry)) != 0)
|
||||
{
|
||||
p = POSDATAPTR(out, entry);
|
||||
while (j--)
|
||||
{
|
||||
WEP_SETWEIGHT(*p, w);
|
||||
p++;
|
||||
}
|
||||
}
|
||||
entry++;
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(in, 0);
|
||||
PG_RETURN_POINTER(out);
|
||||
}
|
||||
|
||||
static int
|
||||
compareEntry(char *ptra, WordEntry * a, char *ptrb, WordEntry * b)
|
||||
{
|
||||
if (a->len == b->len)
|
||||
{
|
||||
return strncmp(
|
||||
ptra + a->pos,
|
||||
ptrb + b->pos,
|
||||
a->len);
|
||||
}
|
||||
return (a->len > b->len) ? 1 : -1;
|
||||
}
|
||||
|
||||
static int4
|
||||
add_pos(tsvector * src, WordEntry * srcptr, tsvector * dest, WordEntry * destptr, int4 maxpos)
|
||||
{
|
||||
uint16 *clen = (uint16 *) _POSDATAPTR(dest, destptr);
|
||||
int i;
|
||||
uint16 slen = POSDATALEN(src, srcptr),
|
||||
startlen;
|
||||
WordEntryPos *spos = POSDATAPTR(src, srcptr),
|
||||
*dpos = POSDATAPTR(dest, destptr);
|
||||
|
||||
if (!destptr->haspos)
|
||||
*clen = 0;
|
||||
|
||||
startlen = *clen;
|
||||
for (i = 0; i < slen && *clen < MAXNUMPOS && (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1); i++)
|
||||
{
|
||||
WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
|
||||
WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
|
||||
(*clen)++;
|
||||
}
|
||||
|
||||
if (*clen != startlen)
|
||||
destptr->haspos = 1;
|
||||
return *clen - startlen;
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
concat(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsvector *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
tsvector *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
tsvector *out;
|
||||
WordEntry *ptr;
|
||||
WordEntry *ptr1,
|
||||
*ptr2;
|
||||
WordEntryPos *p;
|
||||
int maxpos = 0,
|
||||
i,
|
||||
j,
|
||||
i1,
|
||||
i2;
|
||||
char *cur;
|
||||
char *data,
|
||||
*data1,
|
||||
*data2;
|
||||
|
||||
ptr = ARRPTR(in1);
|
||||
i = in1->size;
|
||||
while (i--)
|
||||
{
|
||||
if ((j = POSDATALEN(in1, ptr)) != 0)
|
||||
{
|
||||
p = POSDATAPTR(in1, ptr);
|
||||
while (j--)
|
||||
{
|
||||
if (WEP_GETPOS(*p) > maxpos)
|
||||
maxpos = WEP_GETPOS(*p);
|
||||
p++;
|
||||
}
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
|
||||
ptr1 = ARRPTR(in1);
|
||||
ptr2 = ARRPTR(in2);
|
||||
data1 = STRPTR(in1);
|
||||
data2 = STRPTR(in2);
|
||||
i1 = in1->size;
|
||||
i2 = in2->size;
|
||||
out = (tsvector *) palloc0(VARSIZE(in1) + VARSIZE(in2));
|
||||
SET_VARSIZE(out, VARSIZE(in1) + VARSIZE(in2));
|
||||
out->size = in1->size + in2->size;
|
||||
data = cur = STRPTR(out);
|
||||
ptr = ARRPTR(out);
|
||||
while (i1 && i2)
|
||||
{
|
||||
int cmp = compareEntry(data1, ptr1, data2, ptr2);
|
||||
|
||||
if (cmp < 0)
|
||||
{ /* in1 first */
|
||||
ptr->haspos = ptr1->haspos;
|
||||
ptr->len = ptr1->len;
|
||||
memcpy(cur, data1 + ptr1->pos, ptr1->len);
|
||||
ptr->pos = cur - data;
|
||||
cur += SHORTALIGN(ptr1->len);
|
||||
if (ptr->haspos)
|
||||
{
|
||||
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
||||
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
|
||||
}
|
||||
ptr++;
|
||||
ptr1++;
|
||||
i1--;
|
||||
}
|
||||
else if (cmp > 0)
|
||||
{ /* in2 first */
|
||||
ptr->haspos = ptr2->haspos;
|
||||
ptr->len = ptr2->len;
|
||||
memcpy(cur, data2 + ptr2->pos, ptr2->len);
|
||||
ptr->pos = cur - data;
|
||||
cur += SHORTALIGN(ptr2->len);
|
||||
if (ptr->haspos)
|
||||
{
|
||||
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
|
||||
|
||||
if (addlen == 0)
|
||||
ptr->haspos = 0;
|
||||
else
|
||||
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
||||
}
|
||||
ptr++;
|
||||
ptr2++;
|
||||
i2--;
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr->haspos = ptr1->haspos | ptr2->haspos;
|
||||
ptr->len = ptr1->len;
|
||||
memcpy(cur, data1 + ptr1->pos, ptr1->len);
|
||||
ptr->pos = cur - data;
|
||||
cur += SHORTALIGN(ptr1->len);
|
||||
if (ptr->haspos)
|
||||
{
|
||||
if (ptr1->haspos)
|
||||
{
|
||||
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
||||
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
|
||||
if (ptr2->haspos)
|
||||
cur += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
|
||||
}
|
||||
else if (ptr2->haspos)
|
||||
{
|
||||
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
|
||||
|
||||
if (addlen == 0)
|
||||
ptr->haspos = 0;
|
||||
else
|
||||
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
||||
}
|
||||
}
|
||||
ptr++;
|
||||
ptr1++;
|
||||
ptr2++;
|
||||
i1--;
|
||||
i2--;
|
||||
}
|
||||
}
|
||||
|
||||
while (i1)
|
||||
{
|
||||
ptr->haspos = ptr1->haspos;
|
||||
ptr->len = ptr1->len;
|
||||
memcpy(cur, data1 + ptr1->pos, ptr1->len);
|
||||
ptr->pos = cur - data;
|
||||
cur += SHORTALIGN(ptr1->len);
|
||||
if (ptr->haspos)
|
||||
{
|
||||
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
||||
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
|
||||
}
|
||||
ptr++;
|
||||
ptr1++;
|
||||
i1--;
|
||||
}
|
||||
|
||||
while (i2)
|
||||
{
|
||||
ptr->haspos = ptr2->haspos;
|
||||
ptr->len = ptr2->len;
|
||||
memcpy(cur, data2 + ptr2->pos, ptr2->len);
|
||||
ptr->pos = cur - data;
|
||||
cur += SHORTALIGN(ptr2->len);
|
||||
if (ptr->haspos)
|
||||
{
|
||||
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
|
||||
|
||||
if (addlen == 0)
|
||||
ptr->haspos = 0;
|
||||
else
|
||||
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
||||
}
|
||||
ptr++;
|
||||
ptr2++;
|
||||
i2--;
|
||||
}
|
||||
|
||||
out->size = ptr - ARRPTR(out);
|
||||
SET_VARSIZE(out, CALCDATASIZE(out->size, cur - data));
|
||||
if (data != STRPTR(out))
|
||||
memmove(STRPTR(out), data, cur - data);
|
||||
|
||||
PG_FREE_IF_COPY(in1, 0);
|
||||
PG_FREE_IF_COPY(in2, 1);
|
||||
PG_RETURN_POINTER(out);
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
/* $PostgreSQL: pgsql/contrib/tsearch2/uninstall_tsearch2.sql,v 1.1 2007/11/13 21:02:29 tgl Exp $ */
|
||||
|
||||
-- Adjust this setting to control where the objects get dropped.
|
||||
SET search_path = public, pg_catalog;
|
||||
|
||||
DROP DOMAIN tsvector CASCADE;
|
||||
DROP DOMAIN tsquery CASCADE;
|
||||
DROP DOMAIN gtsvector CASCADE;
|
||||
DROP DOMAIN gtsq CASCADE;
|
||||
|
||||
DROP TYPE tokentype CASCADE;
|
||||
DROP TYPE tokenout CASCADE;
|
||||
DROP TYPE statinfo CASCADE;
|
||||
DROP TYPE tsdebug CASCADE;
|
||||
|
||||
DROP OPERATOR CLASS tsquery_ops USING btree CASCADE;
|
||||
|
||||
DROP OPERATOR CLASS tsvector_ops USING btree CASCADE;
|
||||
|
||||
DROP OPERATOR CLASS gin_tsvector_ops USING gin CASCADE;
|
||||
|
||||
DROP OPERATOR CLASS gist_tp_tsquery_ops USING gist CASCADE;
|
||||
|
||||
DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
|
||||
|
||||
DROP FUNCTION lexize(oid, text) ;
|
||||
DROP FUNCTION lexize(text, text);
|
||||
DROP FUNCTION lexize(text);
|
||||
DROP FUNCTION set_curdict(int);
|
||||
DROP FUNCTION set_curdict(text);
|
||||
DROP FUNCTION dex_init(internal);
|
||||
DROP FUNCTION dex_lexize(internal,internal,int4);
|
||||
DROP FUNCTION snb_en_init(internal);
|
||||
DROP FUNCTION snb_lexize(internal,internal,int4);
|
||||
DROP FUNCTION snb_ru_init_koi8(internal);
|
||||
DROP FUNCTION snb_ru_init_utf8(internal);
|
||||
DROP FUNCTION spell_init(internal);
|
||||
DROP FUNCTION spell_lexize(internal,internal,int4);
|
||||
DROP FUNCTION syn_init(internal);
|
||||
DROP FUNCTION syn_lexize(internal,internal,int4);
|
||||
DROP FUNCTION thesaurus_init(internal);
|
||||
DROP FUNCTION thesaurus_lexize(internal,internal,int4,internal);
|
||||
DROP FUNCTION set_curprs(int);
|
||||
DROP FUNCTION set_curprs(text);
|
||||
DROP FUNCTION prsd_start(internal,int4);
|
||||
DROP FUNCTION prsd_getlexeme(internal,internal,internal);
|
||||
DROP FUNCTION prsd_end(internal);
|
||||
DROP FUNCTION prsd_lextype(internal);
|
||||
DROP FUNCTION prsd_headline(internal,internal,internal);
|
||||
DROP FUNCTION set_curcfg(int);
|
||||
DROP FUNCTION set_curcfg(text);
|
||||
DROP FUNCTION show_curcfg();
|
||||
DROP FUNCTION length(tsvector);
|
||||
DROP FUNCTION to_tsvector(oid, text);
|
||||
DROP FUNCTION to_tsvector(text, text);
|
||||
DROP FUNCTION to_tsvector(text);
|
||||
DROP FUNCTION strip(tsvector);
|
||||
DROP FUNCTION setweight(tsvector,"char");
|
||||
DROP FUNCTION concat(tsvector,tsvector);
|
||||
DROP FUNCTION querytree(tsquery);
|
||||
DROP FUNCTION to_tsquery(oid, text);
|
||||
DROP FUNCTION to_tsquery(text, text);
|
||||
DROP FUNCTION to_tsquery(text);
|
||||
DROP FUNCTION plainto_tsquery(oid, text);
|
||||
DROP FUNCTION plainto_tsquery(text, text);
|
||||
DROP FUNCTION plainto_tsquery(text);
|
||||
DROP FUNCTION tsearch2() CASCADE;
|
||||
DROP FUNCTION rank(float4[], tsvector, tsquery);
|
||||
DROP FUNCTION rank(float4[], tsvector, tsquery, int4);
|
||||
DROP FUNCTION rank(tsvector, tsquery);
|
||||
DROP FUNCTION rank(tsvector, tsquery, int4);
|
||||
DROP FUNCTION rank_cd(float4[], tsvector, tsquery);
|
||||
DROP FUNCTION rank_cd(float4[], tsvector, tsquery, int4);
|
||||
DROP FUNCTION rank_cd(tsvector, tsquery);
|
||||
DROP FUNCTION rank_cd(tsvector, tsquery, int4);
|
||||
DROP FUNCTION headline(oid, text, tsquery, text);
|
||||
DROP FUNCTION headline(oid, text, tsquery);
|
||||
DROP FUNCTION headline(text, text, tsquery, text);
|
||||
DROP FUNCTION headline(text, text, tsquery);
|
||||
DROP FUNCTION headline(text, tsquery, text);
|
||||
DROP FUNCTION headline(text, tsquery);
|
||||
DROP FUNCTION get_covers(tsvector,tsquery);
|
||||
DROP FUNCTION _get_parser_from_curcfg();
|
||||
DROP FUNCTION numnode(tsquery);
|
||||
DROP FUNCTION tsquery_and(tsquery,tsquery);
|
||||
DROP FUNCTION tsquery_or(tsquery,tsquery);
|
||||
DROP FUNCTION tsquery_not(tsquery);
|
||||
DROP FUNCTION rewrite(tsquery, text);
|
||||
DROP FUNCTION rewrite(tsquery, tsquery, tsquery);
|
||||
DROP AGGREGATE rewrite (tsquery[]);
|
||||
DROP FUNCTION rewrite_accum(tsquery,tsquery[]);
|
||||
DROP FUNCTION rewrite_finish(tsquery);
|
||||
DROP FUNCTION tsq_mcontains(tsquery, tsquery);
|
||||
DROP FUNCTION tsq_mcontained(tsquery, tsquery);
|
||||
DROP FUNCTION reset_tsearch();
|
|
@ -1,71 +0,0 @@
|
|||
/* $PostgreSQL: pgsql/contrib/tsearch2/untsearch.sql.in,v 1.10 2007/11/13 04:24:29 momjian Exp $ */
|
||||
|
||||
-- Adjust this setting to control where the objects get dropped.
|
||||
SET search_path = public;
|
||||
|
||||
DROP OPERATOR CLASS gin_tsvector_ops USING gin CASCADE;
|
||||
|
||||
DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
|
||||
|
||||
|
||||
DROP OPERATOR || (tsvector, tsvector);
|
||||
DROP OPERATOR @@ (tsvector, tsquery);
|
||||
DROP OPERATOR @@ (tsquery, tsvector);
|
||||
|
||||
--DROP AGGREGATE stat(tsvector);
|
||||
|
||||
DROP TABLE pg_ts_dict;
|
||||
DROP TABLE pg_ts_parser;
|
||||
DROP TABLE pg_ts_cfg;
|
||||
DROP TABLE pg_ts_cfgmap;
|
||||
|
||||
DROP TYPE tokentype CASCADE;
|
||||
DROP TYPE tokenout CASCADE;
|
||||
DROP TYPE tsvector CASCADE;
|
||||
DROP TYPE tsquery CASCADE;
|
||||
DROP TYPE gtsvector CASCADE;
|
||||
--DROP TYPE tsstat CASCADE;
|
||||
DROP TYPE statinfo CASCADE;
|
||||
DROP TYPE tsdebug CASCADE;
|
||||
DROP TYPE gtsq CASCADE;
|
||||
|
||||
DROP FUNCTION lexize(oid, text) ;
|
||||
DROP FUNCTION lexize(text, text);
|
||||
DROP FUNCTION lexize(text);
|
||||
DROP FUNCTION set_curdict(int);
|
||||
DROP FUNCTION set_curdict(text);
|
||||
DROP FUNCTION dex_init(internal);
|
||||
DROP FUNCTION dex_lexize(internal,internal,int4);
|
||||
DROP FUNCTION snb_en_init(internal);
|
||||
DROP FUNCTION snb_lexize(internal,internal,int4);
|
||||
DROP FUNCTION snb_ru_init_koi8(internal);
|
||||
DROP FUNCTION snb_ru_init_utf8(internal);
|
||||
DROP FUNCTION spell_init(internal);
|
||||
DROP FUNCTION spell_lexize(internal,internal,int4);
|
||||
DROP FUNCTION thesaurus_init(internal);
|
||||
DROP FUNCTION thesaurus_lexize(internal,internal,int4,internal);
|
||||
DROP FUNCTION syn_init(internal);
|
||||
DROP FUNCTION syn_lexize(internal,internal,int4);
|
||||
DROP FUNCTION set_curprs(int);
|
||||
DROP FUNCTION set_curprs(text);
|
||||
DROP FUNCTION prsd_start(internal,int4);
|
||||
DROP FUNCTION prsd_getlexeme(internal,internal,internal);
|
||||
DROP FUNCTION prsd_end(internal);
|
||||
DROP FUNCTION prsd_lextype(internal);
|
||||
DROP FUNCTION prsd_headline(internal,internal,internal);
|
||||
DROP FUNCTION set_curcfg(int);
|
||||
DROP FUNCTION set_curcfg(text);
|
||||
DROP FUNCTION show_curcfg();
|
||||
DROP FUNCTION gtsvector_compress(internal);
|
||||
DROP FUNCTION gtsvector_decompress(internal);
|
||||
DROP FUNCTION gtsvector_penalty(internal,internal,internal);
|
||||
DROP FUNCTION gtsvector_picksplit(internal, internal);
|
||||
DROP FUNCTION gtsvector_union(internal, internal);
|
||||
DROP FUNCTION gtsq_compress(internal);
|
||||
DROP FUNCTION gtsq_decompress(internal);
|
||||
DROP FUNCTION gtsq_penalty(internal,internal,internal);
|
||||
DROP FUNCTION gtsq_picksplit(internal, internal);
|
||||
DROP FUNCTION gtsq_union(bytea, internal);
|
||||
DROP FUNCTION reset_tsearch();
|
||||
DROP FUNCTION tsearch2() CASCADE;
|
||||
DROP FUNCTION _get_parser_from_curcfg();
|
|
@ -1,27 +0,0 @@
|
|||
# $PostgreSQL: pgsql/contrib/tsearch2/wordparser/Makefile,v 1.10 2007/06/26 22:05:03 tgl Exp $
|
||||
|
||||
SUBOBJS = parser.o deflex.o
|
||||
|
||||
EXTRA_CLEAN = SUBSYS.o $(SUBOBJS)
|
||||
|
||||
PG_CPPFLAGS = -I$(srcdir)/..
|
||||
|
||||
ifdef USE_PGXS
|
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS)
|
||||
else
|
||||
subdir = contrib/tsearch2/wordparser
|
||||
top_builddir = ../../..
|
||||
include $(top_builddir)/src/Makefile.global
|
||||
include $(top_srcdir)/contrib/contrib-global.mk
|
||||
endif
|
||||
|
||||
override CFLAGS += $(CFLAGS_SL)
|
||||
|
||||
all: SUBSYS.o
|
||||
|
||||
SUBSYS.o: $(SUBOBJS)
|
||||
$(LD) $(LDREL) $(LDOUT) $@ $^
|
||||
|
||||
|
|
@ -1,57 +0,0 @@
|
|||
/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/deflex.c,v 1.4 2006/03/11 04:38:30 momjian Exp $ */
|
||||
|
||||
#include "deflex.h"
|
||||
|
||||
const char *lex_descr[] = {
|
||||
"",
|
||||
"Latin word",
|
||||
"Non-latin word",
|
||||
"Word",
|
||||
"Email",
|
||||
"URL",
|
||||
"Host",
|
||||
"Scientific notation",
|
||||
"VERSION",
|
||||
"Part of hyphenated word",
|
||||
"Non-latin part of hyphenated word",
|
||||
"Latin part of hyphenated word",
|
||||
"Space symbols",
|
||||
"HTML Tag",
|
||||
"Protocol head",
|
||||
"Hyphenated word",
|
||||
"Latin hyphenated word",
|
||||
"Non-latin hyphenated word",
|
||||
"URI",
|
||||
"File or path name",
|
||||
"Decimal notation",
|
||||
"Signed integer",
|
||||
"Unsigned integer",
|
||||
"HTML Entity"
|
||||
};
|
||||
|
||||
const char *tok_alias[] = {
|
||||
"",
|
||||
"lword",
|
||||
"nlword",
|
||||
"word",
|
||||
"email",
|
||||
"url",
|
||||
"host",
|
||||
"sfloat",
|
||||
"version",
|
||||
"part_hword",
|
||||
"nlpart_hword",
|
||||
"lpart_hword",
|
||||
"blank",
|
||||
"tag",
|
||||
"protocol",
|
||||
"hword",
|
||||
"lhword",
|
||||
"nlhword",
|
||||
"uri",
|
||||
"file",
|
||||
"float",
|
||||
"int",
|
||||
"uint",
|
||||
"entity"
|
||||
};
|
|
@ -1,36 +0,0 @@
|
|||
/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/deflex.h,v 1.3 2006/03/11 04:38:30 momjian Exp $ */
|
||||
|
||||
#ifndef __DEFLEX_H__
|
||||
#define __DEFLEX_H__
|
||||
|
||||
/* rememder !!!! */
|
||||
#define LASTNUM 23
|
||||
|
||||
#define LATWORD 1
|
||||
#define CYRWORD 2
|
||||
#define UWORD 3
|
||||
#define EMAIL 4
|
||||
#define FURL 5
|
||||
#define HOST 6
|
||||
#define SCIENTIFIC 7
|
||||
#define VERSIONNUMBER 8
|
||||
#define PARTHYPHENWORD 9
|
||||
#define CYRPARTHYPHENWORD 10
|
||||
#define LATPARTHYPHENWORD 11
|
||||
#define SPACE 12
|
||||
#define TAG 13
|
||||
#define PROTOCOL 14
|
||||
#define HYPHENWORD 15
|
||||
#define LATHYPHENWORD 16
|
||||
#define CYRHYPHENWORD 17
|
||||
#define URI 18
|
||||
#define FILEPATH 19
|
||||
#define DECIMAL 20
|
||||
#define SIGNEDINT 21
|
||||
#define UNSIGNEDINT 22
|
||||
#define HTMLENTITY 23
|
||||
|
||||
extern const char *lex_descr[];
|
||||
extern const char *tok_alias[];
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -1,167 +0,0 @@
|
|||
/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.h,v 1.11 2006/03/11 04:38:30 momjian Exp $ */
|
||||
|
||||
#ifndef __PARSER_H__
|
||||
#define __PARSER_H__
|
||||
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include "ts_locale.h"
|
||||
|
||||
typedef enum
|
||||
{
|
||||
TPS_Base = 0,
|
||||
TPS_InUWord,
|
||||
TPS_InLatWord,
|
||||
TPS_InCyrWord,
|
||||
TPS_InUnsignedInt,
|
||||
TPS_InSignedIntFirst,
|
||||
TPS_InSignedInt,
|
||||
TPS_InSpace,
|
||||
TPS_InUDecimalFirst,
|
||||
TPS_InUDecimal,
|
||||
TPS_InDecimalFirst,
|
||||
TPS_InDecimal,
|
||||
TPS_InVerVersion,
|
||||
TPS_InSVerVersion,
|
||||
TPS_InVersionFirst,
|
||||
TPS_InVersion,
|
||||
TPS_InMantissaFirst,
|
||||
TPS_InMantissaSign,
|
||||
TPS_InMantissa,
|
||||
TPS_InHTMLEntityFirst,
|
||||
TPS_InHTMLEntity,
|
||||
TPS_InHTMLEntityNumFirst,
|
||||
TPS_InHTMLEntityNum,
|
||||
TPS_InHTMLEntityEnd,
|
||||
TPS_InTagFirst,
|
||||
TPS_InXMLBegin,
|
||||
TPS_InTagCloseFirst,
|
||||
TPS_InTagName,
|
||||
TPS_InTagBeginEnd,
|
||||
TPS_InTag,
|
||||
TPS_InTagEscapeK,
|
||||
TPS_InTagEscapeKK,
|
||||
TPS_InTagBackSleshed,
|
||||
TPS_InTagEnd,
|
||||
TPS_InCommentFirst,
|
||||
TPS_InCommentLast,
|
||||
TPS_InComment,
|
||||
TPS_InCloseCommentFirst,
|
||||
TPS_InCloseCommentLast,
|
||||
TPS_InCommentEnd,
|
||||
TPS_InHostFirstDomain,
|
||||
TPS_InHostDomainSecond,
|
||||
TPS_InHostDomain,
|
||||
TPS_InPortFirst,
|
||||
TPS_InPort,
|
||||
TPS_InHostFirstAN,
|
||||
TPS_InHost,
|
||||
TPS_InEmail,
|
||||
TPS_InFileFirst,
|
||||
TPS_InFileTwiddle,
|
||||
TPS_InPathFirst,
|
||||
TPS_InPathFirstFirst,
|
||||
TPS_InPathSecond,
|
||||
TPS_InFile,
|
||||
TPS_InFileNext,
|
||||
TPS_InURIFirst,
|
||||
TPS_InURIStart,
|
||||
TPS_InURI,
|
||||
TPS_InFURL,
|
||||
TPS_InProtocolFirst,
|
||||
TPS_InProtocolSecond,
|
||||
TPS_InProtocolEnd,
|
||||
TPS_InHyphenLatWordFirst,
|
||||
TPS_InHyphenLatWord,
|
||||
TPS_InHyphenCyrWordFirst,
|
||||
TPS_InHyphenCyrWord,
|
||||
TPS_InHyphenUWordFirst,
|
||||
TPS_InHyphenUWord,
|
||||
TPS_InHyphenValueFirst,
|
||||
TPS_InHyphenValue,
|
||||
TPS_InHyphenValueExact,
|
||||
TPS_InParseHyphen,
|
||||
TPS_InParseHyphenHyphen,
|
||||
TPS_InHyphenCyrWordPart,
|
||||
TPS_InHyphenLatWordPart,
|
||||
TPS_InHyphenUWordPart,
|
||||
TPS_InHyphenUnsignedInt,
|
||||
TPS_InHDecimalPartFirst,
|
||||
TPS_InHDecimalPart,
|
||||
TPS_InHVersionPartFirst,
|
||||
TPS_InHVersionPart,
|
||||
TPS_Null /* last state (fake value) */
|
||||
} TParserState;
|
||||
|
||||
/* forward declaration */
|
||||
struct TParser;
|
||||
|
||||
|
||||
typedef int (*TParserCharTest) (struct TParser *); /* any p_is* functions
|
||||
* except p_iseq */
|
||||
typedef void (*TParserSpecial) (struct TParser *); /* special handler for
|
||||
* special cases... */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
TParserCharTest isclass;
|
||||
char c;
|
||||
uint16 flags;
|
||||
TParserState tostate;
|
||||
int type;
|
||||
TParserSpecial special;
|
||||
} TParserStateActionItem;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
TParserState state;
|
||||
TParserStateActionItem *action;
|
||||
} TParserStateAction;
|
||||
|
||||
typedef struct TParserPosition
|
||||
{
|
||||
int posbyte; /* position of parser in bytes */
|
||||
int poschar; /* osition of parser in characters */
|
||||
int charlen; /* length of current char */
|
||||
int lenbytelexeme;
|
||||
int lencharlexeme;
|
||||
TParserState state;
|
||||
struct TParserPosition *prev;
|
||||
int flags;
|
||||
TParserStateActionItem *pushedAtAction;
|
||||
} TParserPosition;
|
||||
|
||||
typedef struct TParser
|
||||
{
|
||||
/* string and position information */
|
||||
char *str; /* multibyte string */
|
||||
int lenstr; /* length of mbstring */
|
||||
#ifdef TS_USE_WIDE
|
||||
wchar_t *wstr; /* wide character string */
|
||||
int lenwstr; /* length of wsting */
|
||||
#endif
|
||||
|
||||
/* State of parse */
|
||||
int charmaxlen;
|
||||
bool usewide;
|
||||
TParserPosition *state;
|
||||
bool ignore;
|
||||
bool wanthost;
|
||||
|
||||
/* silly char */
|
||||
char c;
|
||||
|
||||
/* out */
|
||||
char *lexeme;
|
||||
int lenbytelexeme;
|
||||
int lencharlexeme;
|
||||
int type;
|
||||
|
||||
} TParser;
|
||||
|
||||
|
||||
TParser *TParserInit(char *, int);
|
||||
bool TParserGet(TParser *);
|
||||
void TParserClose(TParser *);
|
||||
|
||||
#endif
|
|
@ -1,611 +0,0 @@
|
|||
/*
|
||||
* interface functions to parser
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#include "catalog/pg_type.h"
|
||||
#include "executor/spi.h"
|
||||
#include "fmgr.h"
|
||||
#include "funcapi.h"
|
||||
#include "utils/array.h"
|
||||
#include "utils/memutils.h"
|
||||
|
||||
#include "wparser.h"
|
||||
#include "ts_cfg.h"
|
||||
#include "snmap.h"
|
||||
#include "common.h"
|
||||
|
||||
/*********top interface**********/
|
||||
|
||||
static Oid current_parser_id = InvalidOid;
|
||||
|
||||
void
|
||||
init_prs(Oid id, WParserInfo * prs)
|
||||
{
|
||||
Oid arg[1];
|
||||
bool isnull;
|
||||
Datum pars[1];
|
||||
int stat;
|
||||
void *plan;
|
||||
char buf[1024],
|
||||
*nsp;
|
||||
|
||||
arg[0] = OIDOID;
|
||||
pars[0] = ObjectIdGetDatum(id);
|
||||
|
||||
memset(prs, 0, sizeof(WParserInfo));
|
||||
SPI_connect();
|
||||
nsp = get_namespace(TSNSP_FunctionOid);
|
||||
sprintf(buf, "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from %s.pg_ts_parser where oid = $1", nsp);
|
||||
pfree(nsp);
|
||||
plan = SPI_prepare(buf, 1, arg);
|
||||
if (!plan)
|
||||
ts_error(ERROR, "SPI_prepare() failed");
|
||||
|
||||
stat = SPI_execp(plan, pars, " ", 1);
|
||||
if (stat < 0)
|
||||
ts_error(ERROR, "SPI_execp return %d", stat);
|
||||
if (SPI_processed > 0)
|
||||
{
|
||||
Oid oid = InvalidOid;
|
||||
|
||||
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
|
||||
fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
|
||||
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull));
|
||||
fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
|
||||
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull));
|
||||
fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
|
||||
prs->lextype = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull));
|
||||
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull));
|
||||
fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
|
||||
prs->prs_id = id;
|
||||
}
|
||||
else
|
||||
ts_error(ERROR, "No parser with id %d", id);
|
||||
SPI_freeplan(plan);
|
||||
SPI_finish();
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
WParserInfo *last_prs;
|
||||
int len;
|
||||
int reallen;
|
||||
WParserInfo *list;
|
||||
SNMap name2id_map;
|
||||
} PrsList;
|
||||
|
||||
static PrsList PList = {NULL, 0, 0, NULL, {0, 0, NULL}};
|
||||
|
||||
void
|
||||
reset_prs(void)
|
||||
{
|
||||
freeSNMap(&(PList.name2id_map));
|
||||
if (PList.list)
|
||||
free(PList.list);
|
||||
memset(&PList, 0, sizeof(PrsList));
|
||||
}
|
||||
|
||||
static int
|
||||
compareprs(const void *a, const void *b)
|
||||
{
|
||||
if (((WParserInfo *) a)->prs_id == ((WParserInfo *) b)->prs_id)
|
||||
return 0;
|
||||
return (((WParserInfo *) a)->prs_id < ((WParserInfo *) b)->prs_id) ? -1 : 1;
|
||||
}
|
||||
|
||||
WParserInfo *
|
||||
findprs(Oid id)
|
||||
{
|
||||
/* last used prs */
|
||||
if (PList.last_prs && PList.last_prs->prs_id == id)
|
||||
return PList.last_prs;
|
||||
|
||||
/* already used prs */
|
||||
if (PList.len != 0)
|
||||
{
|
||||
WParserInfo key;
|
||||
|
||||
key.prs_id = id;
|
||||
PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
|
||||
if (PList.last_prs != NULL)
|
||||
return PList.last_prs;
|
||||
}
|
||||
|
||||
/* last chance */
|
||||
if (PList.len == PList.reallen)
|
||||
{
|
||||
WParserInfo *tmp;
|
||||
int reallen = (PList.reallen) ? 2 * PList.reallen : 16;
|
||||
|
||||
tmp = (WParserInfo *) realloc(PList.list, sizeof(WParserInfo) * reallen);
|
||||
if (!tmp)
|
||||
ts_error(ERROR, "No memory");
|
||||
PList.reallen = reallen;
|
||||
PList.list = tmp;
|
||||
}
|
||||
init_prs(id, &(PList.list[PList.len]) );
|
||||
PList.last_prs = &(PList.list[PList.len]);
|
||||
PList.len++;
|
||||
qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
|
||||
return findprs(id); /* qsort changed order!! */ ;
|
||||
}
|
||||
|
||||
Oid
|
||||
name2id_prs(text *name)
|
||||
{
|
||||
Oid arg[1];
|
||||
bool isnull;
|
||||
Datum pars[1];
|
||||
int stat;
|
||||
Oid id = findSNMap_t(&(PList.name2id_map), name);
|
||||
char buf[1024],
|
||||
*nsp;
|
||||
void *plan;
|
||||
|
||||
arg[0] = TEXTOID;
|
||||
pars[0] = PointerGetDatum(name);
|
||||
|
||||
if (id)
|
||||
return id;
|
||||
|
||||
SPI_connect();
|
||||
nsp = get_namespace(TSNSP_FunctionOid);
|
||||
sprintf(buf, "select oid from %s.pg_ts_parser where prs_name = $1", nsp);
|
||||
pfree(nsp);
|
||||
plan = SPI_prepare(buf, 1, arg);
|
||||
if (!plan)
|
||||
ts_error(ERROR, "SPI_prepare() failed");
|
||||
|
||||
stat = SPI_execp(plan, pars, " ", 1);
|
||||
if (stat < 0)
|
||||
ts_error(ERROR, "SPI_execp return %d", stat);
|
||||
if (SPI_processed > 0)
|
||||
id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
|
||||
else
|
||||
ts_error(ERROR, "No parser '%s'", text2char(name));
|
||||
SPI_freeplan(plan);
|
||||
SPI_finish();
|
||||
addSNMap_t(&(PList.name2id_map), name, id);
|
||||
return id;
|
||||
}
|
||||
|
||||
|
||||
/******sql-level interface******/
|
||||
typedef struct
|
||||
{
|
||||
int cur;
|
||||
LexDescr *list;
|
||||
} TypeStorage;
|
||||
|
||||
static void
|
||||
setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx, Oid prsid)
|
||||
{
|
||||
TupleDesc tupdesc;
|
||||
MemoryContext oldcontext;
|
||||
TypeStorage *st;
|
||||
WParserInfo *prs = findprs(prsid);
|
||||
|
||||
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
|
||||
|
||||
st = (TypeStorage *) palloc(sizeof(TypeStorage));
|
||||
st->cur = 0;
|
||||
st->list = (LexDescr *) DatumGetPointer(
|
||||
OidFunctionCall1(prs->lextype, PointerGetDatum(prs->prs))
|
||||
);
|
||||
funcctx->user_fctx = (void *) st;
|
||||
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
|
||||
elog(ERROR, "return type must be a row type");
|
||||
tupdesc = CreateTupleDescCopy(tupdesc);
|
||||
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
}
|
||||
|
||||
static Datum
|
||||
process_call(FuncCallContext *funcctx)
|
||||
{
|
||||
TypeStorage *st;
|
||||
|
||||
st = (TypeStorage *) funcctx->user_fctx;
|
||||
if (st->list && st->list[st->cur].lexid)
|
||||
{
|
||||
Datum result;
|
||||
char *values[3];
|
||||
char txtid[16];
|
||||
HeapTuple tuple;
|
||||
|
||||
values[0] = txtid;
|
||||
sprintf(txtid, "%d", st->list[st->cur].lexid);
|
||||
values[1] = st->list[st->cur].alias;
|
||||
values[2] = st->list[st->cur].descr;
|
||||
|
||||
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
|
||||
result = HeapTupleGetDatum(tuple);
|
||||
|
||||
pfree(values[1]);
|
||||
pfree(values[2]);
|
||||
st->cur++;
|
||||
return result;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (st->list)
|
||||
pfree(st->list);
|
||||
pfree(st);
|
||||
}
|
||||
return (Datum) 0;
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(token_type);
|
||||
Datum token_type(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
token_type(PG_FUNCTION_ARGS)
|
||||
{
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
|
||||
SET_FUNCOID();
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
setup_firstcall(fcinfo, funcctx, PG_GETARG_OID(0));
|
||||
}
|
||||
|
||||
funcctx = SRF_PERCALL_SETUP();
|
||||
|
||||
if ((result = process_call(funcctx)) != (Datum) 0)
|
||||
SRF_RETURN_NEXT(funcctx, result);
|
||||
SRF_RETURN_DONE(funcctx);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(token_type_byname);
|
||||
Datum token_type_byname(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
token_type_byname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
|
||||
SET_FUNCOID();
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
text *name = PG_GETARG_TEXT_P(0);
|
||||
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
setup_firstcall(fcinfo, funcctx, name2id_prs(name));
|
||||
PG_FREE_IF_COPY(name, 0);
|
||||
}
|
||||
|
||||
funcctx = SRF_PERCALL_SETUP();
|
||||
|
||||
if ((result = process_call(funcctx)) != (Datum) 0)
|
||||
SRF_RETURN_NEXT(funcctx, result);
|
||||
SRF_RETURN_DONE(funcctx);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(token_type_current);
|
||||
Datum token_type_current(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
token_type_current(PG_FUNCTION_ARGS)
|
||||
{
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
|
||||
SET_FUNCOID();
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
if (current_parser_id == InvalidOid)
|
||||
current_parser_id = name2id_prs(char2text("default"));
|
||||
setup_firstcall(fcinfo, funcctx, current_parser_id);
|
||||
}
|
||||
|
||||
funcctx = SRF_PERCALL_SETUP();
|
||||
|
||||
if ((result = process_call(funcctx)) != (Datum) 0)
|
||||
SRF_RETURN_NEXT(funcctx, result);
|
||||
SRF_RETURN_DONE(funcctx);
|
||||
}
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(set_curprs);
|
||||
Datum set_curprs(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
set_curprs(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SET_FUNCOID();
|
||||
findprs(PG_GETARG_OID(0));
|
||||
current_parser_id = PG_GETARG_OID(0);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(set_curprs_byname);
|
||||
Datum set_curprs_byname(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
set_curprs_byname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *name = PG_GETARG_TEXT_P(0);
|
||||
|
||||
SET_FUNCOID();
|
||||
DirectFunctionCall1(
|
||||
set_curprs,
|
||||
ObjectIdGetDatum(name2id_prs(name))
|
||||
);
|
||||
PG_FREE_IF_COPY(name, 0);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int type;
|
||||
char *lexeme;
|
||||
} LexemeEntry;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int cur;
|
||||
int len;
|
||||
LexemeEntry *list;
|
||||
} PrsStorage;
|
||||
|
||||
|
||||
static void
|
||||
prs_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
|
||||
int prsid, text *txt)
|
||||
{
|
||||
TupleDesc tupdesc;
|
||||
MemoryContext oldcontext;
|
||||
PrsStorage *st;
|
||||
WParserInfo *prs = findprs(prsid);
|
||||
char *lex = NULL;
|
||||
int llen = 0,
|
||||
type = 0;
|
||||
|
||||
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
|
||||
|
||||
st = (PrsStorage *) palloc(sizeof(PrsStorage));
|
||||
st->cur = 0;
|
||||
st->len = 16;
|
||||
st->list = (LexemeEntry *) palloc(sizeof(LexemeEntry) * st->len);
|
||||
|
||||
prs->prs = (void *) DatumGetPointer(
|
||||
FunctionCall2(
|
||||
&(prs->start_info),
|
||||
PointerGetDatum(VARDATA(txt)),
|
||||
Int32GetDatum(VARSIZE(txt) - VARHDRSZ)
|
||||
)
|
||||
);
|
||||
|
||||
while ((type = DatumGetInt32(FunctionCall3(
|
||||
&(prs->getlexeme_info),
|
||||
PointerGetDatum(prs->prs),
|
||||
PointerGetDatum(&lex),
|
||||
PointerGetDatum(&llen)))) != 0)
|
||||
{
|
||||
|
||||
if (st->cur >= st->len)
|
||||
{
|
||||
st->len = 2 * st->len;
|
||||
st->list = (LexemeEntry *) repalloc(st->list, sizeof(LexemeEntry) * st->len);
|
||||
}
|
||||
st->list[st->cur].lexeme = palloc(llen + 1);
|
||||
memcpy(st->list[st->cur].lexeme, lex, llen);
|
||||
st->list[st->cur].lexeme[llen] = '\0';
|
||||
st->list[st->cur].type = type;
|
||||
st->cur++;
|
||||
}
|
||||
|
||||
FunctionCall1(
|
||||
&(prs->end_info),
|
||||
PointerGetDatum(prs->prs)
|
||||
);
|
||||
|
||||
st->len = st->cur;
|
||||
st->cur = 0;
|
||||
|
||||
funcctx->user_fctx = (void *) st;
|
||||
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
|
||||
elog(ERROR, "return type must be a row type");
|
||||
tupdesc = CreateTupleDescCopy(tupdesc);
|
||||
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
}
|
||||
|
||||
static Datum
|
||||
prs_process_call(FuncCallContext *funcctx)
|
||||
{
|
||||
PrsStorage *st;
|
||||
|
||||
st = (PrsStorage *) funcctx->user_fctx;
|
||||
if (st->cur < st->len)
|
||||
{
|
||||
Datum result;
|
||||
char *values[2];
|
||||
char tid[16];
|
||||
HeapTuple tuple;
|
||||
|
||||
values[0] = tid;
|
||||
sprintf(tid, "%d", st->list[st->cur].type);
|
||||
values[1] = st->list[st->cur].lexeme;
|
||||
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
|
||||
result = HeapTupleGetDatum(tuple);
|
||||
|
||||
pfree(values[1]);
|
||||
st->cur++;
|
||||
return result;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (st->list)
|
||||
pfree(st->list);
|
||||
pfree(st);
|
||||
}
|
||||
return (Datum) 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(parse);
|
||||
Datum parse(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
parse(PG_FUNCTION_ARGS)
|
||||
{
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
|
||||
SET_FUNCOID();
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
text *txt = PG_GETARG_TEXT_P(1);
|
||||
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
prs_setup_firstcall(fcinfo, funcctx, PG_GETARG_OID(0), txt);
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
}
|
||||
|
||||
funcctx = SRF_PERCALL_SETUP();
|
||||
|
||||
if ((result = prs_process_call(funcctx)) != (Datum) 0)
|
||||
SRF_RETURN_NEXT(funcctx, result);
|
||||
SRF_RETURN_DONE(funcctx);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(parse_byname);
|
||||
Datum parse_byname(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
parse_byname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
|
||||
SET_FUNCOID();
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
text *name = PG_GETARG_TEXT_P(0);
|
||||
text *txt = PG_GETARG_TEXT_P(1);
|
||||
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
prs_setup_firstcall(fcinfo, funcctx, name2id_prs(name), txt);
|
||||
PG_FREE_IF_COPY(name, 0);
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
}
|
||||
|
||||
funcctx = SRF_PERCALL_SETUP();
|
||||
|
||||
if ((result = prs_process_call(funcctx)) != (Datum) 0)
|
||||
SRF_RETURN_NEXT(funcctx, result);
|
||||
SRF_RETURN_DONE(funcctx);
|
||||
}
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(parse_current);
|
||||
Datum parse_current(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
parse_current(PG_FUNCTION_ARGS)
|
||||
{
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
|
||||
SET_FUNCOID();
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
text *txt = PG_GETARG_TEXT_P(0);
|
||||
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
if (current_parser_id == InvalidOid)
|
||||
current_parser_id = name2id_prs(char2text("default"));
|
||||
prs_setup_firstcall(fcinfo, funcctx, current_parser_id, txt);
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
}
|
||||
|
||||
funcctx = SRF_PERCALL_SETUP();
|
||||
|
||||
if ((result = prs_process_call(funcctx)) != (Datum) 0)
|
||||
SRF_RETURN_NEXT(funcctx, result);
|
||||
SRF_RETURN_DONE(funcctx);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(headline);
|
||||
Datum headline(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
headline(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *in = PG_GETARG_TEXT_P(1);
|
||||
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
|
||||
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
|
||||
HLPRSTEXT prs;
|
||||
text *out;
|
||||
TSCfgInfo *cfg;
|
||||
WParserInfo *prsobj;
|
||||
|
||||
SET_FUNCOID();
|
||||
cfg = findcfg(PG_GETARG_OID(0));
|
||||
prsobj = findprs(cfg->prs_id);
|
||||
|
||||
memset(&prs, 0, sizeof(HLPRSTEXT));
|
||||
prs.lenwords = 32;
|
||||
prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
|
||||
hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
|
||||
|
||||
|
||||
FunctionCall3(
|
||||
&(prsobj->headline_info),
|
||||
PointerGetDatum(&prs),
|
||||
PointerGetDatum(opt),
|
||||
PointerGetDatum(query)
|
||||
);
|
||||
|
||||
out = genhl(&prs);
|
||||
|
||||
PG_FREE_IF_COPY(in, 1);
|
||||
PG_FREE_IF_COPY(query, 2);
|
||||
if (opt)
|
||||
PG_FREE_IF_COPY(opt, 3);
|
||||
pfree(prs.words);
|
||||
pfree(prs.startsel);
|
||||
pfree(prs.stopsel);
|
||||
|
||||
PG_RETURN_POINTER(out);
|
||||
}
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(headline_byname);
|
||||
Datum headline_byname(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
headline_byname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *cfg = PG_GETARG_TEXT_P(0);
|
||||
|
||||
Datum out;
|
||||
|
||||
SET_FUNCOID();
|
||||
out = DirectFunctionCall4(
|
||||
headline,
|
||||
ObjectIdGetDatum(name2id_cfg(cfg)),
|
||||
PG_GETARG_DATUM(1),
|
||||
PG_GETARG_DATUM(2),
|
||||
(PG_NARGS() > 3) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
|
||||
);
|
||||
|
||||
PG_FREE_IF_COPY(cfg, 0);
|
||||
PG_RETURN_DATUM(out);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(headline_current);
|
||||
Datum headline_current(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
headline_current(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SET_FUNCOID();
|
||||
PG_RETURN_DATUM(DirectFunctionCall4(
|
||||
headline,
|
||||
ObjectIdGetDatum(get_currcfg()),
|
||||
PG_GETARG_DATUM(0),
|
||||
PG_GETARG_DATUM(1),
|
||||
(PG_NARGS() > 2) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
|
||||
));
|
||||
}
|
|
@ -1,30 +0,0 @@
|
|||
#ifndef __WPARSER_H__
|
||||
#define __WPARSER_H__
|
||||
#include "postgres.h"
|
||||
#include "fmgr.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
Oid prs_id;
|
||||
FmgrInfo start_info;
|
||||
FmgrInfo getlexeme_info;
|
||||
FmgrInfo end_info;
|
||||
FmgrInfo headline_info;
|
||||
Oid lextype;
|
||||
void *prs;
|
||||
} WParserInfo;
|
||||
|
||||
void init_prs(Oid id, WParserInfo * prs);
|
||||
WParserInfo *findprs(Oid id);
|
||||
Oid name2id_prs(text *name);
|
||||
void reset_prs(void);
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int lexid;
|
||||
char *alias;
|
||||
char *descr;
|
||||
} LexDescr;
|
||||
|
||||
#endif
|
|
@ -1,390 +0,0 @@
|
|||
/*
|
||||
* default word parser
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "utils/builtins.h"
|
||||
|
||||
#include "dict.h"
|
||||
#include "wparser.h"
|
||||
#include "common.h"
|
||||
#include "ts_cfg.h"
|
||||
#include "wordparser/parser.h"
|
||||
#include "wordparser/deflex.h"
|
||||
|
||||
PG_FUNCTION_INFO_V1(prsd_lextype);
|
||||
Datum prsd_lextype(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
prsd_lextype(PG_FUNCTION_ARGS)
|
||||
{
|
||||
LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (LASTNUM + 1));
|
||||
int i;
|
||||
|
||||
for (i = 1; i <= LASTNUM; i++)
|
||||
{
|
||||
descr[i - 1].lexid = i;
|
||||
descr[i - 1].alias = pstrdup(tok_alias[i]);
|
||||
descr[i - 1].descr = pstrdup(lex_descr[i]);
|
||||
}
|
||||
|
||||
descr[LASTNUM].lexid = 0;
|
||||
|
||||
PG_RETURN_POINTER(descr);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(prsd_start);
|
||||
Datum prsd_start(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
prsd_start(PG_FUNCTION_ARGS)
|
||||
{
|
||||
PG_RETURN_POINTER(TParserInit((char *) PG_GETARG_POINTER(0), PG_GETARG_INT32(1)));
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(prsd_getlexeme);
|
||||
Datum prsd_getlexeme(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
prsd_getlexeme(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TParser *p = (TParser *) PG_GETARG_POINTER(0);
|
||||
char **t = (char **) PG_GETARG_POINTER(1);
|
||||
int *tlen = (int *) PG_GETARG_POINTER(2);
|
||||
|
||||
if (!TParserGet(p))
|
||||
PG_RETURN_INT32(0);
|
||||
|
||||
*t = p->lexeme;
|
||||
*tlen = p->lenbytelexeme;
|
||||
|
||||
PG_RETURN_INT32(p->type);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(prsd_end);
|
||||
Datum prsd_end(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
prsd_end(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TParser *p = (TParser *) PG_GETARG_POINTER(0);
|
||||
|
||||
TParserClose(p);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
#define LEAVETOKEN(x) ( (x)==12 )
|
||||
#define COMPLEXTOKEN(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
|
||||
#define ENDPUNCTOKEN(x) ( (x)==12 )
|
||||
|
||||
|
||||
#define TS_IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
|
||||
#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
|
||||
#define HTMLHLIDIGNORE(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
|
||||
#define NONWORDTOKEN(x) ( (x)==12 || HLIDIGNORE(x) )
|
||||
#define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || TS_IDIGNORE(x) )
|
||||
|
||||
typedef struct
|
||||
{
|
||||
HLWORD *words;
|
||||
int len;
|
||||
} hlCheck;
|
||||
|
||||
static bool
|
||||
checkcondition_HL(void *checkval, ITEM * val)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ((hlCheck *) checkval)->len; i++)
|
||||
{
|
||||
if (((hlCheck *) checkval)->words[i].item == val)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
hlCover(HLPRSTEXT * prs, QUERYTYPE * query, int *p, int *q)
|
||||
{
|
||||
int i,
|
||||
j;
|
||||
ITEM *item = GETQUERY(query);
|
||||
int pos = *p;
|
||||
|
||||
*q = 0;
|
||||
*p = 0x7fffffff;
|
||||
|
||||
for (j = 0; j < query->size; j++)
|
||||
{
|
||||
if (item->type != VAL)
|
||||
{
|
||||
item++;
|
||||
continue;
|
||||
}
|
||||
for (i = pos; i < prs->curwords; i++)
|
||||
{
|
||||
if (prs->words[i].item == item)
|
||||
{
|
||||
if (i > *q)
|
||||
*q = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
item++;
|
||||
}
|
||||
|
||||
if (*q == 0)
|
||||
return false;
|
||||
|
||||
item = GETQUERY(query);
|
||||
for (j = 0; j < query->size; j++)
|
||||
{
|
||||
if (item->type != VAL)
|
||||
{
|
||||
item++;
|
||||
continue;
|
||||
}
|
||||
for (i = *q; i >= pos; i--)
|
||||
{
|
||||
if (prs->words[i].item == item)
|
||||
{
|
||||
if (i < *p)
|
||||
*p = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
item++;
|
||||
}
|
||||
|
||||
if (*p <= *q)
|
||||
{
|
||||
hlCheck ch;
|
||||
|
||||
ch.words = &(prs->words[*p]);
|
||||
ch.len = *q - *p + 1;
|
||||
if (TS_execute(GETQUERY(query), &ch, false, checkcondition_HL))
|
||||
return true;
|
||||
else
|
||||
{
|
||||
(*p)++;
|
||||
return hlCover(prs, query, p, q);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(prsd_headline);
|
||||
Datum prsd_headline(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
prsd_headline(PG_FUNCTION_ARGS)
|
||||
{
|
||||
HLPRSTEXT *prs = (HLPRSTEXT *) PG_GETARG_POINTER(0);
|
||||
text *opt = (text *) PG_GETARG_POINTER(1); /* can't be toasted */
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_GETARG_POINTER(2); /* can't be toasted */
|
||||
|
||||
/* from opt + start and and tag */
|
||||
int min_words = 15;
|
||||
int max_words = 35;
|
||||
int shortword = 3;
|
||||
|
||||
int p = 0,
|
||||
q = 0;
|
||||
int bestb = -1,
|
||||
beste = -1;
|
||||
int bestlen = -1;
|
||||
int pose = 0,
|
||||
posb,
|
||||
poslen,
|
||||
curlen;
|
||||
|
||||
int i;
|
||||
int highlight = 0;
|
||||
|
||||
/* config */
|
||||
prs->startsel = NULL;
|
||||
prs->stopsel = NULL;
|
||||
if (opt)
|
||||
{
|
||||
Map *map,
|
||||
*mptr;
|
||||
|
||||
parse_cfgdict(opt, &map);
|
||||
mptr = map;
|
||||
|
||||
while (mptr && mptr->key)
|
||||
{
|
||||
if (pg_strcasecmp(mptr->key, "MaxWords") == 0)
|
||||
max_words = pg_atoi(mptr->value, 4, 1);
|
||||
else if (pg_strcasecmp(mptr->key, "MinWords") == 0)
|
||||
min_words = pg_atoi(mptr->value, 4, 1);
|
||||
else if (pg_strcasecmp(mptr->key, "ShortWord") == 0)
|
||||
shortword = pg_atoi(mptr->value, 4, 1);
|
||||
else if (pg_strcasecmp(mptr->key, "StartSel") == 0)
|
||||
prs->startsel = pstrdup(mptr->value);
|
||||
else if (pg_strcasecmp(mptr->key, "StopSel") == 0)
|
||||
prs->stopsel = pstrdup(mptr->value);
|
||||
else if (pg_strcasecmp(mptr->key, "HighlightAll") == 0)
|
||||
highlight = (
|
||||
pg_strcasecmp(mptr->value, "1") == 0 ||
|
||||
pg_strcasecmp(mptr->value, "on") == 0 ||
|
||||
pg_strcasecmp(mptr->value, "true") == 0 ||
|
||||
pg_strcasecmp(mptr->value, "t") == 0 ||
|
||||
pg_strcasecmp(mptr->value, "y") == 0 ||
|
||||
pg_strcasecmp(mptr->value, "yes") == 0) ?
|
||||
1 : 0;
|
||||
|
||||
pfree(mptr->key);
|
||||
pfree(mptr->value);
|
||||
|
||||
mptr++;
|
||||
}
|
||||
pfree(map);
|
||||
|
||||
if (highlight == 0)
|
||||
{
|
||||
if (min_words >= max_words)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("MinWords should be less than MaxWords")));
|
||||
if (min_words <= 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("MinWords should be positive")));
|
||||
if (shortword < 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("ShortWord should be >= 0")));
|
||||
}
|
||||
}
|
||||
|
||||
if (highlight == 0)
|
||||
{
|
||||
while (hlCover(prs, query, &p, &q))
|
||||
{
|
||||
/* find cover len in words */
|
||||
curlen = 0;
|
||||
poslen = 0;
|
||||
for (i = p; i <= q && curlen < max_words; i++)
|
||||
{
|
||||
if (!NONWORDTOKEN(prs->words[i].type))
|
||||
curlen++;
|
||||
if (prs->words[i].item && !prs->words[i].repeated)
|
||||
poslen++;
|
||||
pose = i;
|
||||
}
|
||||
|
||||
if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword))
|
||||
{
|
||||
/* best already finded, so try one more cover */
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
|
||||
posb = p;
|
||||
if (curlen < max_words)
|
||||
{ /* find good end */
|
||||
for (i = i - 1; i < prs->curwords && curlen < max_words; i++)
|
||||
{
|
||||
if (i != q)
|
||||
{
|
||||
if (!NONWORDTOKEN(prs->words[i].type))
|
||||
curlen++;
|
||||
if (prs->words[i].item && !prs->words[i].repeated)
|
||||
poslen++;
|
||||
}
|
||||
pose = i;
|
||||
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
|
||||
continue;
|
||||
if (curlen >= min_words)
|
||||
break;
|
||||
}
|
||||
if (curlen < min_words && i >= prs->curwords)
|
||||
{ /* got end of text and our cover is shoter
|
||||
* than min_words */
|
||||
for (i = p; i >= 0; i--)
|
||||
{
|
||||
if (!NONWORDTOKEN(prs->words[i].type))
|
||||
curlen++;
|
||||
if (prs->words[i].item && !prs->words[i].repeated)
|
||||
poslen++;
|
||||
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
|
||||
continue;
|
||||
if (curlen >= min_words)
|
||||
break;
|
||||
}
|
||||
posb = (i >= 0) ? i : 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{ /* shorter cover :((( */
|
||||
for (; curlen > min_words; i--)
|
||||
{
|
||||
if (!NONWORDTOKEN(prs->words[i].type))
|
||||
curlen--;
|
||||
if (prs->words[i].item && !prs->words[i].repeated)
|
||||
poslen--;
|
||||
pose = i;
|
||||
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (bestlen < 0 || (poslen > bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) ||
|
||||
(bestlen >= 0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) &&
|
||||
(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword)))
|
||||
{
|
||||
bestb = posb;
|
||||
beste = pose;
|
||||
bestlen = poslen;
|
||||
}
|
||||
|
||||
p++;
|
||||
}
|
||||
|
||||
if (bestlen < 0)
|
||||
{
|
||||
curlen = 0;
|
||||
for (i = 0; i < prs->curwords && curlen < min_words; i++)
|
||||
{
|
||||
if (!NONWORDTOKEN(prs->words[i].type))
|
||||
curlen++;
|
||||
pose = i;
|
||||
}
|
||||
bestb = 0;
|
||||
beste = pose;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
bestb = 0;
|
||||
beste = prs->curwords - 1;
|
||||
}
|
||||
|
||||
for (i = bestb; i <= beste; i++)
|
||||
{
|
||||
if (prs->words[i].item)
|
||||
prs->words[i].selected = 1;
|
||||
if (highlight == 0)
|
||||
{
|
||||
if (HLIDIGNORE(prs->words[i].type))
|
||||
prs->words[i].replace = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (HTMLHLIDIGNORE(prs->words[i].type))
|
||||
prs->words[i].replace = 1;
|
||||
}
|
||||
|
||||
prs->words[i].in = (prs->words[i].repeated) ? 0 : 1;
|
||||
}
|
||||
|
||||
if (!prs->startsel)
|
||||
prs->startsel = pstrdup("<b>");
|
||||
if (!prs->stopsel)
|
||||
prs->stopsel = pstrdup("</b>");
|
||||
prs->startsellen = strlen(prs->startsel);
|
||||
prs->stopsellen = strlen(prs->stopsel);
|
||||
|
||||
PG_RETURN_POINTER(prs);
|
||||
}
|
Loading…
Reference in New Issue