Add GIN support for pg_trgm. From Guillaume Smet <guillaume.smet@gmail.com>
with minor editorization by me.
This commit is contained in:
parent
547e41cdf8
commit
15f91f2789
@ -1,7 +1,7 @@
|
|||||||
# $PostgreSQL: pgsql/contrib/pg_trgm/Makefile,v 1.6 2007/02/09 17:24:33 petere Exp $
|
# $PostgreSQL: pgsql/contrib/pg_trgm/Makefile,v 1.7 2007/03/14 14:15:40 teodor Exp $
|
||||||
|
|
||||||
MODULE_big = pg_trgm
|
MODULE_big = pg_trgm
|
||||||
OBJS = trgm_op.o trgm_gist.o
|
OBJS = trgm_op.o trgm_gist.o trgm_gin.o
|
||||||
|
|
||||||
DATA_built = pg_trgm.sql
|
DATA_built = pg_trgm.sql
|
||||||
DATA = uninstall_pg_trgm.sql
|
DATA = uninstall_pg_trgm.sql
|
||||||
|
@ -113,6 +113,8 @@ Tsearch2 Integration
|
|||||||
Next, create a trigram index on the word column:
|
Next, create a trigram index on the word column:
|
||||||
|
|
||||||
CREATE INDEX words_idx ON words USING gist(word gist_trgm_ops);
|
CREATE INDEX words_idx ON words USING gist(word gist_trgm_ops);
|
||||||
|
or
|
||||||
|
CREATE INDEX words_idx ON words USING gin(word gist_trgm_ops);
|
||||||
|
|
||||||
Now, a SELECT query similar to the example above can be used to
|
Now, a SELECT query similar to the example above can be used to
|
||||||
suggest spellings for misspelled words in user search terms. A
|
suggest spellings for misspelled words in user search terms. A
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -36,7 +36,7 @@ CREATE OPERATOR % (
|
|||||||
JOIN = contjoinsel
|
JOIN = contjoinsel
|
||||||
);
|
);
|
||||||
|
|
||||||
--gist key
|
-- gist key
|
||||||
CREATE FUNCTION gtrgm_in(cstring)
|
CREATE FUNCTION gtrgm_in(cstring)
|
||||||
RETURNS gtrgm
|
RETURNS gtrgm
|
||||||
AS 'MODULE_PATHNAME'
|
AS 'MODULE_PATHNAME'
|
||||||
@ -53,7 +53,7 @@ CREATE TYPE gtrgm (
|
|||||||
OUTPUT = gtrgm_out
|
OUTPUT = gtrgm_out
|
||||||
);
|
);
|
||||||
|
|
||||||
-- support functions
|
-- support functions for gist
|
||||||
CREATE FUNCTION gtrgm_consistent(gtrgm,internal,int4)
|
CREATE FUNCTION gtrgm_consistent(gtrgm,internal,int4)
|
||||||
RETURNS bool
|
RETURNS bool
|
||||||
AS 'MODULE_PATHNAME'
|
AS 'MODULE_PATHNAME'
|
||||||
@ -89,7 +89,7 @@ RETURNS internal
|
|||||||
AS 'MODULE_PATHNAME'
|
AS 'MODULE_PATHNAME'
|
||||||
LANGUAGE C;
|
LANGUAGE C;
|
||||||
|
|
||||||
-- create the operator class
|
-- create the operator class for gist
|
||||||
CREATE OPERATOR CLASS gist_trgm_ops
|
CREATE OPERATOR CLASS gist_trgm_ops
|
||||||
FOR TYPE text USING gist
|
FOR TYPE text USING gist
|
||||||
AS
|
AS
|
||||||
@ -103,5 +103,31 @@ AS
|
|||||||
FUNCTION 7 gtrgm_same (gtrgm, gtrgm, internal),
|
FUNCTION 7 gtrgm_same (gtrgm, gtrgm, internal),
|
||||||
STORAGE gtrgm;
|
STORAGE gtrgm;
|
||||||
|
|
||||||
|
-- support functions for gin
|
||||||
|
CREATE FUNCTION gin_extract_trgm(text, internal)
|
||||||
|
RETURNS internal
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C;
|
||||||
|
|
||||||
|
CREATE FUNCTION gin_extract_trgm(text, internal, internal)
|
||||||
|
RETURNS internal
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C;
|
||||||
|
|
||||||
|
CREATE FUNCTION gin_trgm_consistent(internal, internal, text)
|
||||||
|
RETURNS internal
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C;
|
||||||
|
|
||||||
|
-- create the operator class for gin
|
||||||
|
CREATE OPERATOR CLASS gin_trgm_ops
|
||||||
|
FOR TYPE text USING gin
|
||||||
|
AS
|
||||||
|
OPERATOR 1 % (text, text) RECHECK,
|
||||||
|
FUNCTION 1 btint4cmp (int4, int4),
|
||||||
|
FUNCTION 2 gin_extract_trgm (text, internal),
|
||||||
|
FUNCTION 3 gin_extract_trgm (text, internal, internal),
|
||||||
|
FUNCTION 4 gin_trgm_consistent (internal, internal, text),
|
||||||
|
STORAGE int4;
|
||||||
|
|
||||||
COMMIT;
|
COMMIT;
|
||||||
|
@ -28,3 +28,11 @@ select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu098
|
|||||||
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t;
|
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t;
|
||||||
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
|
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
|
||||||
|
|
||||||
|
drop index trgm_idx;
|
||||||
|
create index trgm_idx on test_trgm using gin (t gin_trgm_ops);
|
||||||
|
set enable_seqscan=off;
|
||||||
|
|
||||||
|
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
|
||||||
|
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t;
|
||||||
|
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
|
||||||
|
|
||||||
|
@ -28,6 +28,7 @@ typedef char trgm[3];
|
|||||||
*(((char*)(a))+2) = *(((char*)(b))+2); \
|
*(((char*)(a))+2) = *(((char*)(b))+2); \
|
||||||
} while(0);
|
} while(0);
|
||||||
|
|
||||||
|
#define TRGMINT(a) ( (*(((char*)(a))+2)<<16)+(*(((char*)(a))+1)<<8)+*(((char*)(a))+0) )
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
|
77
contrib/pg_trgm/trgm_gin.c
Normal file
77
contrib/pg_trgm/trgm_gin.c
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
#include "trgm.h"
|
||||||
|
|
||||||
|
#include "access/gin.h"
|
||||||
|
#include "access/itup.h"
|
||||||
|
#include "access/tuptoaster.h"
|
||||||
|
#include "storage/bufpage.h"
|
||||||
|
#include "utils/array.h"
|
||||||
|
#include "utils/builtins.h"
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(gin_extract_trgm);
|
||||||
|
Datum gin_extract_trgm(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(gin_trgm_consistent);
|
||||||
|
Datum gin_trgm_consistent(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
Datum
|
||||||
|
gin_extract_trgm(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
text *val = (text *) PG_GETARG_TEXT_P(0);
|
||||||
|
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
||||||
|
Datum *entries = NULL;
|
||||||
|
TRGM *trg;
|
||||||
|
int4 trglen;
|
||||||
|
|
||||||
|
*nentries = 0;
|
||||||
|
|
||||||
|
trg = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
|
||||||
|
trglen = ARRNELEM(trg);
|
||||||
|
|
||||||
|
if (trglen > 0)
|
||||||
|
{
|
||||||
|
trgm *ptr;
|
||||||
|
int4 i = 0,
|
||||||
|
item;
|
||||||
|
|
||||||
|
*nentries = (int32) trglen;
|
||||||
|
entries = (Datum *) palloc(sizeof(Datum) * trglen);
|
||||||
|
|
||||||
|
ptr = GETARR(trg);
|
||||||
|
while (ptr - GETARR(trg) < ARRNELEM(trg))
|
||||||
|
{
|
||||||
|
item = TRGMINT(ptr);
|
||||||
|
entries[i++] = Int32GetDatum(item);
|
||||||
|
|
||||||
|
ptr++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(entries);
|
||||||
|
}
|
||||||
|
|
||||||
|
Datum
|
||||||
|
gin_trgm_consistent(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
bool *check = (bool *) PG_GETARG_POINTER(0);
|
||||||
|
text *query = (text *) PG_GETARG_TEXT_P(2);
|
||||||
|
bool res = FALSE;
|
||||||
|
TRGM *trg;
|
||||||
|
int4 i,
|
||||||
|
trglen,
|
||||||
|
ntrue = 0;
|
||||||
|
|
||||||
|
trg = generate_trgm(VARDATA(query), VARSIZE(query) - VARHDRSZ);
|
||||||
|
trglen = ARRNELEM(trg);
|
||||||
|
|
||||||
|
for (i = 0; i < trglen; i++)
|
||||||
|
if (check[i])
|
||||||
|
ntrue ++;
|
||||||
|
|
||||||
|
#ifdef DIVUNION
|
||||||
|
res = (trglen == ntrue) ? true : ((((((float4) ntrue) / ((float4) (trglen - ntrue)))) >= trgm_limit) ? true : false);
|
||||||
|
#else
|
||||||
|
res = (trglen == 0) ? false : ((((((float4) ntrue) / ((float4) trglen))) >= trgm_limit) ? true : false);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
PG_RETURN_BOOL(res);
|
||||||
|
}
|
@ -20,6 +20,14 @@ DROP FUNCTION gtrgm_consistent(gtrgm,internal,int4);
|
|||||||
|
|
||||||
DROP TYPE gtrgm CASCADE;
|
DROP TYPE gtrgm CASCADE;
|
||||||
|
|
||||||
|
DROP OPERATOR CLASS gin_trgm_ops USING gin;
|
||||||
|
|
||||||
|
DROP FUNCTION gin_extract_trgm(text, internal);
|
||||||
|
|
||||||
|
DROP FUNCTION gin_extract_trgm(text, internal, internal);
|
||||||
|
|
||||||
|
DROP FUNCTION gin_trgm_consistent(internal, internal, text);
|
||||||
|
|
||||||
DROP OPERATOR % (text, text);
|
DROP OPERATOR % (text, text);
|
||||||
|
|
||||||
DROP FUNCTION similarity_op(text,text);
|
DROP FUNCTION similarity_op(text,text);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user