This supports the triconsistent function for pg_trgm GIN opclass

to make it faster to implement indexed queries where some keys are
common and some are rare.

Patch by Jeff Janes
This commit is contained in:
Teodor Sigaev 2015-07-20 18:18:48 +03:00
parent 1a51180080
commit 97f3014647
5 changed files with 117 additions and 3 deletions

View File

@ -4,7 +4,7 @@ MODULE_big = pg_trgm
OBJS = trgm_op.o trgm_gist.o trgm_gin.o trgm_regexp.o $(WIN32RES) OBJS = trgm_op.o trgm_gist.o trgm_gin.o trgm_regexp.o $(WIN32RES)
EXTENSION = pg_trgm EXTENSION = pg_trgm
DATA = pg_trgm--1.1.sql pg_trgm--1.0--1.1.sql pg_trgm--unpackaged--1.0.sql DATA = pg_trgm--1.2.sql pg_trgm--1.0--1.1.sql pg_trgm--1.1--1.2.sql pg_trgm--unpackaged--1.0.sql
PGFILEDESC = "pg_trgm - trigram matching" PGFILEDESC = "pg_trgm - trigram matching"
REGRESS = pg_trgm REGRESS = pg_trgm

View File

@ -0,0 +1,12 @@
/* contrib/pg_trgm/pg_trgm--1.1--1.2.sql */
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.2'" to load this file. \quit
CREATE FUNCTION gin_trgm_triconsistent(internal, int2, text, int4, internal, internal, internal)
RETURNS "char"
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;
ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
FUNCTION 6 (text, text) gin_trgm_triconsistent (internal, int2, text, int4, internal, internal, internal);

View File

@ -1,4 +1,4 @@
/* contrib/pg_trgm/pg_trgm--1.1.sql */ /* contrib/pg_trgm/pg_trgm--1.2.sql */
-- complain if script is sourced in psql, rather than via CREATE EXTENSION -- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION pg_trgm" to load this file. \quit \echo Use "CREATE EXTENSION pg_trgm" to load this file. \quit
@ -176,3 +176,13 @@ ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
OPERATOR 5 pg_catalog.~ (text, text), OPERATOR 5 pg_catalog.~ (text, text),
OPERATOR 6 pg_catalog.~* (text, text); OPERATOR 6 pg_catalog.~* (text, text);
-- Add functions that are new in 9.6 (pg_trgm 1.2).
CREATE FUNCTION gin_trgm_triconsistent(internal, int2, text, int4, internal, internal, internal)
RETURNS "char"
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;
ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
FUNCTION 6 (text,text) gin_trgm_triconsistent (internal, int2, text, int4, internal, internal, internal);

View File

@ -1,5 +1,5 @@
# pg_trgm extension # pg_trgm extension
comment = 'text similarity measurement and index searching based on trigrams' comment = 'text similarity measurement and index searching based on trigrams'
default_version = '1.1' default_version = '1.2'
module_pathname = '$libdir/pg_trgm' module_pathname = '$libdir/pg_trgm'
relocatable = true relocatable = true

View File

@ -14,6 +14,7 @@ PG_FUNCTION_INFO_V1(gin_extract_trgm);
PG_FUNCTION_INFO_V1(gin_extract_value_trgm); PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
PG_FUNCTION_INFO_V1(gin_extract_query_trgm); PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
PG_FUNCTION_INFO_V1(gin_trgm_consistent); PG_FUNCTION_INFO_V1(gin_trgm_consistent);
PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);
/* /*
* This function can only be called if a pre-9.1 version of the GIN operator * This function can only be called if a pre-9.1 version of the GIN operator
@ -235,3 +236,94 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(res); PG_RETURN_BOOL(res);
} }
/*
* In all cases, GIN_TRUE is at least as favorable to inclusion as
* GIN_MAYBE. If no better option is available, simply treat
* GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
* consistent function.
*/
Datum
gin_trgm_triconsistent(PG_FUNCTION_ARGS)
{
GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
StrategyNumber strategy = PG_GETARG_UINT16(1);
/* text *query = PG_GETARG_TEXT_P(2); */
int32 nkeys = PG_GETARG_INT32(3);
Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
GinTernaryValue res = GIN_MAYBE;
int32 i,
ntrue;
bool *boolcheck;
switch (strategy)
{
case SimilarityStrategyNumber:
/* Count the matches */
ntrue = 0;
for (i = 0; i < nkeys; i++)
{
if (check[i] != GIN_FALSE)
ntrue++;
}
#ifdef DIVUNION
res = (nkeys == ntrue) ? GIN_MAYBE : (((((float4) ntrue) / ((float4) (nkeys - ntrue))) >= trgm_limit) ? GIN_MAYBE : GIN_FALSE);
#else
res = (nkeys == 0) ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= trgm_limit) ? GIN_MAYBE : GIN_FALSE);
#endif
break;
case ILikeStrategyNumber:
#ifndef IGNORECASE
elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
#endif
/* FALL THRU */
case LikeStrategyNumber:
/* Check if all extracted trigrams are presented. */
res = GIN_MAYBE;
for (i = 0; i < nkeys; i++)
{
if (check[i] == GIN_FALSE)
{
res = GIN_FALSE;
break;
}
}
break;
case RegExpICaseStrategyNumber:
#ifndef IGNORECASE
elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
#endif
/* FALL THRU */
case RegExpStrategyNumber:
if (nkeys < 1)
{
/* Regex processing gave no result: do full index scan */
res = GIN_MAYBE;
}
else
{
/*
* As trigramsMatchGraph implements a montonic boolean function,
* promoting all GIN_MAYBE keys to GIN_TRUE will give a
* conservative result.
*/
boolcheck = (bool *) palloc(sizeof(bool) * nkeys);
for (i = 0; i < nkeys; i++)
boolcheck[i] = (check[i] != GIN_FALSE);
if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
boolcheck))
res = GIN_FALSE;
pfree(boolcheck);
}
break;
default:
elog(ERROR, "unrecognized strategy number: %d", strategy);
res = GIN_FALSE; /* keep compiler quiet */
break;
}
/* All cases served by this function are inexact */
Assert(res != GIN_TRUE);
PG_RETURN_GIN_TERNARY_VALUE(res);
}