Add KNNGIST support to contrib/pg_trgm.
Teodor Sigaev, with some revision by Tom
This commit is contained in:
parent
b576757d7e
commit
b525bf771e
@ -1187,6 +1187,13 @@ select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu198
|
|||||||
qwertyu0988 | 0.333333
|
qwertyu0988 | 0.333333
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2;
|
||||||
|
?column? | t
|
||||||
|
----------+-------------
|
||||||
|
0.411765 | qwertyu0988
|
||||||
|
0.5 | qwertyu0987
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
create index trgm_idx on test_trgm using gist (t gist_trgm_ops);
|
create index trgm_idx on test_trgm using gist (t gist_trgm_ops);
|
||||||
set enable_seqscan=off;
|
set enable_seqscan=off;
|
||||||
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
|
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
|
||||||
@ -2315,6 +2322,22 @@ select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu198
|
|||||||
qwertyu0988 | 0.333333
|
qwertyu0988 | 0.333333
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
explain (costs off)
|
||||||
|
select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2;
|
||||||
|
QUERY PLAN
|
||||||
|
---------------------------------------------------
|
||||||
|
Limit
|
||||||
|
-> Index Scan using trgm_idx on test_trgm
|
||||||
|
Order By: (t <-> 'q0987wertyu0988'::text)
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2;
|
||||||
|
?column? | t
|
||||||
|
----------+-------------
|
||||||
|
0.411765 | qwertyu0988
|
||||||
|
0.5 | qwertyu0987
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
drop index trgm_idx;
|
drop index trgm_idx;
|
||||||
create index trgm_idx on test_trgm using gin (t gin_trgm_ops);
|
create index trgm_idx on test_trgm using gin (t gin_trgm_ops);
|
||||||
set enable_seqscan=off;
|
set enable_seqscan=off;
|
||||||
|
@ -26,7 +26,7 @@ LANGUAGE C STRICT IMMUTABLE;
|
|||||||
CREATE OR REPLACE FUNCTION similarity_op(text,text)
|
CREATE OR REPLACE FUNCTION similarity_op(text,text)
|
||||||
RETURNS bool
|
RETURNS bool
|
||||||
AS 'MODULE_PATHNAME'
|
AS 'MODULE_PATHNAME'
|
||||||
LANGUAGE C STRICT STABLE;
|
LANGUAGE C STRICT STABLE; -- stable because depends on trgm_limit
|
||||||
|
|
||||||
CREATE OPERATOR % (
|
CREATE OPERATOR % (
|
||||||
LEFTARG = text,
|
LEFTARG = text,
|
||||||
@ -37,6 +37,18 @@ CREATE OPERATOR % (
|
|||||||
JOIN = contjoinsel
|
JOIN = contjoinsel
|
||||||
);
|
);
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION similarity_dist(text,text)
|
||||||
|
RETURNS float4
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C STRICT IMMUTABLE;
|
||||||
|
|
||||||
|
CREATE OPERATOR <-> (
|
||||||
|
LEFTARG = text,
|
||||||
|
RIGHTARG = text,
|
||||||
|
PROCEDURE = similarity_dist,
|
||||||
|
COMMUTATOR = '<->'
|
||||||
|
);
|
||||||
|
|
||||||
-- gist key
|
-- gist key
|
||||||
CREATE OR REPLACE FUNCTION gtrgm_in(cstring)
|
CREATE OR REPLACE FUNCTION gtrgm_in(cstring)
|
||||||
RETURNS gtrgm
|
RETURNS gtrgm
|
||||||
@ -60,6 +72,11 @@ RETURNS bool
|
|||||||
AS 'MODULE_PATHNAME'
|
AS 'MODULE_PATHNAME'
|
||||||
LANGUAGE C IMMUTABLE STRICT;
|
LANGUAGE C IMMUTABLE STRICT;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION gtrgm_distance(internal,text,int,oid)
|
||||||
|
RETURNS float8
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C IMMUTABLE STRICT;
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION gtrgm_compress(internal)
|
CREATE OR REPLACE FUNCTION gtrgm_compress(internal)
|
||||||
RETURNS internal
|
RETURNS internal
|
||||||
AS 'MODULE_PATHNAME'
|
AS 'MODULE_PATHNAME'
|
||||||
@ -95,6 +112,7 @@ CREATE OPERATOR CLASS gist_trgm_ops
|
|||||||
FOR TYPE text USING gist
|
FOR TYPE text USING gist
|
||||||
AS
|
AS
|
||||||
OPERATOR 1 % (text, text),
|
OPERATOR 1 % (text, text),
|
||||||
|
OPERATOR 2 <-> (text, text) FOR ORDER BY pg_catalog.float_ops,
|
||||||
FUNCTION 1 gtrgm_consistent (internal, text, int, oid, internal),
|
FUNCTION 1 gtrgm_consistent (internal, text, int, oid, internal),
|
||||||
FUNCTION 2 gtrgm_union (bytea, internal),
|
FUNCTION 2 gtrgm_union (bytea, internal),
|
||||||
FUNCTION 3 gtrgm_compress (internal),
|
FUNCTION 3 gtrgm_compress (internal),
|
||||||
@ -102,6 +120,7 @@ AS
|
|||||||
FUNCTION 5 gtrgm_penalty (internal, internal, internal),
|
FUNCTION 5 gtrgm_penalty (internal, internal, internal),
|
||||||
FUNCTION 6 gtrgm_picksplit (internal, internal),
|
FUNCTION 6 gtrgm_picksplit (internal, internal),
|
||||||
FUNCTION 7 gtrgm_same (gtrgm, gtrgm, internal),
|
FUNCTION 7 gtrgm_same (gtrgm, gtrgm, internal),
|
||||||
|
FUNCTION 8 gtrgm_distance (internal, text, int, oid),
|
||||||
STORAGE gtrgm;
|
STORAGE gtrgm;
|
||||||
|
|
||||||
-- support functions for gin
|
-- support functions for gin
|
||||||
|
@ -26,6 +26,7 @@ CREATE TABLE test_trgm(t text);
|
|||||||
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
|
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
|
||||||
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t;
|
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t;
|
||||||
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
|
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
|
||||||
|
select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2;
|
||||||
|
|
||||||
create index trgm_idx on test_trgm using gist (t gist_trgm_ops);
|
create index trgm_idx on test_trgm using gist (t gist_trgm_ops);
|
||||||
set enable_seqscan=off;
|
set enable_seqscan=off;
|
||||||
@ -33,6 +34,9 @@ set enable_seqscan=off;
|
|||||||
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
|
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
|
||||||
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t;
|
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t;
|
||||||
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
|
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
|
||||||
|
explain (costs off)
|
||||||
|
select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2;
|
||||||
|
select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2;
|
||||||
|
|
||||||
drop index trgm_idx;
|
drop index trgm_idx;
|
||||||
create index trgm_idx on test_trgm using gin (t gin_trgm_ops);
|
create index trgm_idx on test_trgm using gin (t gin_trgm_ops);
|
||||||
|
@ -4,12 +4,10 @@
|
|||||||
#ifndef __TRGM_H__
|
#ifndef __TRGM_H__
|
||||||
#define __TRGM_H__
|
#define __TRGM_H__
|
||||||
|
|
||||||
#include "postgres.h"
|
|
||||||
|
|
||||||
#include "access/gist.h"
|
#include "access/gist.h"
|
||||||
#include "access/itup.h"
|
#include "access/itup.h"
|
||||||
#include "utils/builtins.h"
|
|
||||||
#include "storage/bufpage.h"
|
#include "storage/bufpage.h"
|
||||||
|
#include "utils/builtins.h"
|
||||||
|
|
||||||
/* options */
|
/* options */
|
||||||
#define LPADDING 2
|
#define LPADDING 2
|
||||||
@ -18,6 +16,10 @@
|
|||||||
#define IGNORECASE
|
#define IGNORECASE
|
||||||
#define DIVUNION
|
#define DIVUNION
|
||||||
|
|
||||||
|
/* operator strategy numbers */
|
||||||
|
#define SimilarityStrategyNumber 1
|
||||||
|
#define DistanceStrategyNumber 2
|
||||||
|
|
||||||
|
|
||||||
typedef char trgm[3];
|
typedef char trgm[3];
|
||||||
|
|
||||||
@ -89,4 +91,4 @@ extern float4 trgm_limit;
|
|||||||
TRGM *generate_trgm(char *str, int slen);
|
TRGM *generate_trgm(char *str, int slen);
|
||||||
float4 cnt_sml(TRGM *trg1, TRGM *trg2);
|
float4 cnt_sml(TRGM *trg1, TRGM *trg2);
|
||||||
|
|
||||||
#endif
|
#endif /* __TRGM_H__ */
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
/*
|
/*
|
||||||
* contrib/pg_trgm/trgm_gin.c
|
* contrib/pg_trgm/trgm_gin.c
|
||||||
*/
|
*/
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
#include "trgm.h"
|
#include "trgm.h"
|
||||||
|
|
||||||
#include "access/gin.h"
|
#include "access/gin.h"
|
||||||
@ -10,6 +12,7 @@
|
|||||||
#include "utils/array.h"
|
#include "utils/array.h"
|
||||||
#include "utils/builtins.h"
|
#include "utils/builtins.h"
|
||||||
|
|
||||||
|
|
||||||
PG_FUNCTION_INFO_V1(gin_extract_trgm);
|
PG_FUNCTION_INFO_V1(gin_extract_trgm);
|
||||||
Datum gin_extract_trgm(PG_FUNCTION_ARGS);
|
Datum gin_extract_trgm(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
@ -1,15 +1,19 @@
|
|||||||
/*
|
/*
|
||||||
* contrib/pg_trgm/trgm_gist.c
|
* contrib/pg_trgm/trgm_gist.c
|
||||||
*/
|
*/
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
#include "trgm.h"
|
#include "trgm.h"
|
||||||
|
|
||||||
#include "access/gist.h"
|
#include "access/gist.h"
|
||||||
#include "access/itup.h"
|
#include "access/itup.h"
|
||||||
|
#include "access/skey.h"
|
||||||
#include "access/tuptoaster.h"
|
#include "access/tuptoaster.h"
|
||||||
#include "storage/bufpage.h"
|
#include "storage/bufpage.h"
|
||||||
#include "utils/array.h"
|
#include "utils/array.h"
|
||||||
#include "utils/builtins.h"
|
#include "utils/builtins.h"
|
||||||
|
|
||||||
|
|
||||||
PG_FUNCTION_INFO_V1(gtrgm_in);
|
PG_FUNCTION_INFO_V1(gtrgm_in);
|
||||||
Datum gtrgm_in(PG_FUNCTION_ARGS);
|
Datum gtrgm_in(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
@ -25,6 +29,9 @@ Datum gtrgm_decompress(PG_FUNCTION_ARGS);
|
|||||||
PG_FUNCTION_INFO_V1(gtrgm_consistent);
|
PG_FUNCTION_INFO_V1(gtrgm_consistent);
|
||||||
Datum gtrgm_consistent(PG_FUNCTION_ARGS);
|
Datum gtrgm_consistent(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(gtrgm_distance);
|
||||||
|
Datum gtrgm_distance(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
PG_FUNCTION_INFO_V1(gtrgm_union);
|
PG_FUNCTION_INFO_V1(gtrgm_union);
|
||||||
Datum gtrgm_union(PG_FUNCTION_ARGS);
|
Datum gtrgm_union(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
@ -159,18 +166,35 @@ gtrgm_decompress(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int4
|
||||||
|
cnt_sml_sign_common(TRGM *qtrg, BITVECP sign)
|
||||||
|
{
|
||||||
|
int4 count = 0;
|
||||||
|
int4 k,
|
||||||
|
len = ARRNELEM(qtrg);
|
||||||
|
trgm *ptr = GETARR(qtrg);
|
||||||
|
int4 tmp = 0;
|
||||||
|
|
||||||
|
for (k = 0; k < len; k++)
|
||||||
|
{
|
||||||
|
CPTRGM(((char *) &tmp), ptr + k);
|
||||||
|
count += GETBIT(sign, HASHVAL(tmp));
|
||||||
|
}
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
Datum
|
Datum
|
||||||
gtrgm_consistent(PG_FUNCTION_ARGS)
|
gtrgm_consistent(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
|
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
|
||||||
text *query = PG_GETARG_TEXT_P(1);
|
text *query = PG_GETARG_TEXT_P(1);
|
||||||
|
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
|
||||||
/* StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); */
|
|
||||||
/* Oid subtype = PG_GETARG_OID(3); */
|
/* Oid subtype = PG_GETARG_OID(3); */
|
||||||
bool *recheck = (bool *) PG_GETARG_POINTER(4);
|
bool *recheck = (bool *) PG_GETARG_POINTER(4);
|
||||||
TRGM *key = (TRGM *) DatumGetPointer(entry->key);
|
TRGM *key = (TRGM *) DatumGetPointer(entry->key);
|
||||||
TRGM *qtrg;
|
TRGM *qtrg;
|
||||||
bool res = false;
|
bool res;
|
||||||
char *cache = (char *) fcinfo->flinfo->fn_extra;
|
char *cache = (char *) fcinfo->flinfo->fn_extra;
|
||||||
|
|
||||||
/* All cases served by this function are exact */
|
/* All cases served by this function are exact */
|
||||||
@ -193,6 +217,9 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
qtrg = (TRGM *) (cache + MAXALIGN(VARSIZE(query)));
|
qtrg = (TRGM *) (cache + MAXALIGN(VARSIZE(query)));
|
||||||
|
|
||||||
|
switch (strategy)
|
||||||
|
{
|
||||||
|
case SimilarityStrategyNumber:
|
||||||
if (GIST_LEAF(entry))
|
if (GIST_LEAF(entry))
|
||||||
{ /* all leafs contains orig trgm */
|
{ /* all leafs contains orig trgm */
|
||||||
float4 tmpsml = cnt_sml(key, qtrg);
|
float4 tmpsml = cnt_sml(key, qtrg);
|
||||||
@ -206,28 +233,81 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{ /* non-leaf contains signature */
|
{ /* non-leaf contains signature */
|
||||||
int4 count = 0;
|
int4 count = cnt_sml_sign_common(qtrg, GETSIGN(key));
|
||||||
int4 k,
|
int4 len = ARRNELEM(qtrg);
|
||||||
len = ARRNELEM(qtrg);
|
|
||||||
trgm *ptr = GETARR(qtrg);
|
|
||||||
BITVECP sign = GETSIGN(key);
|
|
||||||
int4 tmp = 0;
|
|
||||||
|
|
||||||
for (k = 0; k < len; k++)
|
if (len == 0)
|
||||||
{
|
res = false;
|
||||||
CPTRGM(((char *) &tmp), ptr + k);
|
else
|
||||||
count += GETBIT(sign, HASHVAL(tmp));
|
res = (((((float8) count) / ((float8) len))) >= trgm_limit) ? true : false;
|
||||||
}
|
}
|
||||||
#ifdef DIVUNION
|
break;
|
||||||
res = (len == count) ? true : ((((((float4) count) / ((float4) (len - count)))) >= trgm_limit) ? true : false);
|
default:
|
||||||
#else
|
elog(ERROR, "unrecognized strategy number: %d", strategy);
|
||||||
res = (len == 0) ? false : ((((((float4) count) / ((float4) len))) >= trgm_limit) ? true : false);
|
res = false; /* keep compiler quiet */
|
||||||
#endif
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
PG_RETURN_BOOL(res);
|
PG_RETURN_BOOL(res);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Datum
|
||||||
|
gtrgm_distance(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
|
||||||
|
text *query = PG_GETARG_TEXT_P(1);
|
||||||
|
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
|
||||||
|
/* Oid subtype = PG_GETARG_OID(3); */
|
||||||
|
TRGM *key = (TRGM *) DatumGetPointer(entry->key);
|
||||||
|
TRGM *qtrg;
|
||||||
|
float8 res;
|
||||||
|
char *cache = (char *) fcinfo->flinfo->fn_extra;
|
||||||
|
|
||||||
|
if (cache == NULL || VARSIZE(cache) != VARSIZE(query) || memcmp(cache, query, VARSIZE(query)) != 0)
|
||||||
|
{
|
||||||
|
qtrg = generate_trgm(VARDATA(query), VARSIZE(query) - VARHDRSZ);
|
||||||
|
|
||||||
|
if (cache)
|
||||||
|
pfree(cache);
|
||||||
|
|
||||||
|
fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
|
||||||
|
MAXALIGN(VARSIZE(query)) + VARSIZE(qtrg));
|
||||||
|
cache = (char *) fcinfo->flinfo->fn_extra;
|
||||||
|
|
||||||
|
memcpy(cache, query, VARSIZE(query));
|
||||||
|
memcpy(cache + MAXALIGN(VARSIZE(query)), qtrg, VARSIZE(qtrg));
|
||||||
|
}
|
||||||
|
|
||||||
|
qtrg = (TRGM *) (cache + MAXALIGN(VARSIZE(query)));
|
||||||
|
|
||||||
|
switch (strategy)
|
||||||
|
{
|
||||||
|
case DistanceStrategyNumber:
|
||||||
|
if (GIST_LEAF(entry))
|
||||||
|
{ /* all leafs contains orig trgm */
|
||||||
|
res = 1.0 - cnt_sml(key, qtrg);
|
||||||
|
}
|
||||||
|
else if (ISALLTRUE(key))
|
||||||
|
{ /* all leafs contains orig trgm */
|
||||||
|
res = 0.0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{ /* non-leaf contains signature */
|
||||||
|
int4 count = cnt_sml_sign_common(qtrg, GETSIGN(key));
|
||||||
|
int4 len = ARRNELEM(qtrg);
|
||||||
|
|
||||||
|
res = (len == 0) ? -1.0 : 1.0 - ((float8) count) / ((float8) len);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
elog(ERROR, "unrecognized strategy number: %d", strategy);
|
||||||
|
res = 0; /* keep compiler quiet */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
PG_RETURN_FLOAT8(res);
|
||||||
|
}
|
||||||
|
|
||||||
static int4
|
static int4
|
||||||
unionkey(BITVECP sbase, TRGM *add)
|
unionkey(BITVECP sbase, TRGM *add)
|
||||||
{
|
{
|
||||||
|
@ -1,11 +1,16 @@
|
|||||||
/*
|
/*
|
||||||
* contrib/pg_trgm/trgm_op.c
|
* contrib/pg_trgm/trgm_op.c
|
||||||
*/
|
*/
|
||||||
#include "trgm.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include "utils/array.h"
|
|
||||||
|
#include "trgm.h"
|
||||||
|
|
||||||
#include "catalog/pg_type.h"
|
#include "catalog/pg_type.h"
|
||||||
#include "tsearch/ts_locale.h"
|
#include "tsearch/ts_locale.h"
|
||||||
|
#include "utils/array.h"
|
||||||
|
|
||||||
|
|
||||||
PG_MODULE_MAGIC;
|
PG_MODULE_MAGIC;
|
||||||
|
|
||||||
@ -359,16 +364,25 @@ similarity(PG_FUNCTION_ARGS)
|
|||||||
PG_RETURN_FLOAT4(res);
|
PG_RETURN_FLOAT4(res);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(similarity_dist);
|
||||||
|
Datum similarity_dist(PG_FUNCTION_ARGS);
|
||||||
|
Datum
|
||||||
|
similarity_dist(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
float4 res = DatumGetFloat4(DirectFunctionCall2(similarity,
|
||||||
|
PG_GETARG_DATUM(0),
|
||||||
|
PG_GETARG_DATUM(1)));
|
||||||
|
PG_RETURN_FLOAT4(1.0 - res);
|
||||||
|
}
|
||||||
|
|
||||||
PG_FUNCTION_INFO_V1(similarity_op);
|
PG_FUNCTION_INFO_V1(similarity_op);
|
||||||
Datum similarity_op(PG_FUNCTION_ARGS);
|
Datum similarity_op(PG_FUNCTION_ARGS);
|
||||||
Datum
|
Datum
|
||||||
similarity_op(PG_FUNCTION_ARGS)
|
similarity_op(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
float4 res = DatumGetFloat4(DirectFunctionCall2(
|
float4 res = DatumGetFloat4(DirectFunctionCall2(similarity,
|
||||||
similarity,
|
|
||||||
PG_GETARG_DATUM(0),
|
PG_GETARG_DATUM(0),
|
||||||
PG_GETARG_DATUM(1)
|
PG_GETARG_DATUM(1)));
|
||||||
));
|
|
||||||
|
|
||||||
PG_RETURN_BOOL(res >= trgm_limit);
|
PG_RETURN_BOOL(res >= trgm_limit);
|
||||||
}
|
}
|
||||||
|
@ -19,6 +19,8 @@ DROP FUNCTION gtrgm_compress(internal);
|
|||||||
|
|
||||||
DROP FUNCTION gtrgm_consistent(internal,text,int,oid,internal);
|
DROP FUNCTION gtrgm_consistent(internal,text,int,oid,internal);
|
||||||
|
|
||||||
|
DROP FUNCTION gtrgm_distance(internal,text,int,oid);
|
||||||
|
|
||||||
DROP TYPE gtrgm CASCADE;
|
DROP TYPE gtrgm CASCADE;
|
||||||
|
|
||||||
DROP OPERATOR CLASS gin_trgm_ops USING gin;
|
DROP OPERATOR CLASS gin_trgm_ops USING gin;
|
||||||
@ -33,6 +35,10 @@ DROP OPERATOR % (text, text);
|
|||||||
|
|
||||||
DROP FUNCTION similarity_op(text,text);
|
DROP FUNCTION similarity_op(text,text);
|
||||||
|
|
||||||
|
DROP OPERATOR <-> (text, text);
|
||||||
|
|
||||||
|
DROP FUNCTION similarity_dist(text,text);
|
||||||
|
|
||||||
DROP FUNCTION similarity(text,text);
|
DROP FUNCTION similarity(text,text);
|
||||||
|
|
||||||
DROP FUNCTION show_trgm(text);
|
DROP FUNCTION show_trgm(text);
|
||||||
|
@ -117,6 +117,14 @@
|
|||||||
<function>set_limit</>.
|
<function>set_limit</>.
|
||||||
</entry>
|
</entry>
|
||||||
</row>
|
</row>
|
||||||
|
<row>
|
||||||
|
<entry><type>text</> <literal><-></literal> <type>text</></entry>
|
||||||
|
<entry><type>real</type></entry>
|
||||||
|
<entry>
|
||||||
|
Returns the <quote>distance</> between the arguments, that is
|
||||||
|
one minus the <function>similarity()</> value.
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
</tbody>
|
</tbody>
|
||||||
</tgroup>
|
</tgroup>
|
||||||
</table>
|
</table>
|
||||||
@ -129,7 +137,7 @@
|
|||||||
The <filename>pg_trgm</filename> module provides GiST and GIN index
|
The <filename>pg_trgm</filename> module provides GiST and GIN index
|
||||||
operator classes that allow you to create an index over a text column for
|
operator classes that allow you to create an index over a text column for
|
||||||
the purpose of very fast similarity searches. These index types support
|
the purpose of very fast similarity searches. These index types support
|
||||||
the <literal>%</> similarity operator (and no other operators, so you may
|
the above-described similarity operators (and no other operators, so you may
|
||||||
want a regular B-tree index too).
|
want a regular B-tree index too).
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
@ -161,6 +169,18 @@ SELECT t, similarity(t, '<replaceable>word</>') AS sml
|
|||||||
sets.
|
sets.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
A variant of the above query is
|
||||||
|
<programlisting>
|
||||||
|
SELECT t, t <-> '<replaceable>word</>' AS dist
|
||||||
|
FROM test_trgm
|
||||||
|
ORDER BY dist LIMIT 10;
|
||||||
|
</programlisting>
|
||||||
|
This can be implemented quite efficiently by GiST indexes, but not
|
||||||
|
by GIN indexes. It will usually beat the first formulation when only
|
||||||
|
a small number of the closest matches is wanted.
|
||||||
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
The choice between GiST and GIN indexing depends on the relative
|
The choice between GiST and GIN indexing depends on the relative
|
||||||
performance characteristics of GiST and GIN, which are discussed elsewhere.
|
performance characteristics of GiST and GIN, which are discussed elsewhere.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user