From 97f3014647a5bd570032abd2b809d3233003f13f Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Mon, 20 Jul 2015 18:18:48 +0300 Subject: [PATCH] This supports the triconsistent function for pg_trgm GIN opclass to make it faster to implement indexed queries where some keys are common and some are rare. Patch by Jeff Janes --- contrib/pg_trgm/Makefile | 2 +- contrib/pg_trgm/pg_trgm--1.1--1.2.sql | 12 +++ .../{pg_trgm--1.1.sql => pg_trgm--1.2.sql} | 12 ++- contrib/pg_trgm/pg_trgm.control | 2 +- contrib/pg_trgm/trgm_gin.c | 92 +++++++++++++++++++ 5 files changed, 117 insertions(+), 3 deletions(-) create mode 100644 contrib/pg_trgm/pg_trgm--1.1--1.2.sql rename contrib/pg_trgm/{pg_trgm--1.1.sql => pg_trgm--1.2.sql} (92%) diff --git a/contrib/pg_trgm/Makefile b/contrib/pg_trgm/Makefile index e081a1e5e9..1e38753622 100644 --- a/contrib/pg_trgm/Makefile +++ b/contrib/pg_trgm/Makefile @@ -4,7 +4,7 @@ MODULE_big = pg_trgm OBJS = trgm_op.o trgm_gist.o trgm_gin.o trgm_regexp.o $(WIN32RES) EXTENSION = pg_trgm -DATA = pg_trgm--1.1.sql pg_trgm--1.0--1.1.sql pg_trgm--unpackaged--1.0.sql +DATA = pg_trgm--1.2.sql pg_trgm--1.0--1.1.sql pg_trgm--1.1--1.2.sql pg_trgm--unpackaged--1.0.sql PGFILEDESC = "pg_trgm - trigram matching" REGRESS = pg_trgm diff --git a/contrib/pg_trgm/pg_trgm--1.1--1.2.sql b/contrib/pg_trgm/pg_trgm--1.1--1.2.sql new file mode 100644 index 0000000000..c101f21061 --- /dev/null +++ b/contrib/pg_trgm/pg_trgm--1.1--1.2.sql @@ -0,0 +1,12 @@ +/* contrib/pg_trgm/pg_trgm--1.1--1.2.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.2'" to load this file. \quit + +CREATE FUNCTION gin_trgm_triconsistent(internal, int2, text, int4, internal, internal, internal) +RETURNS "char" +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD + FUNCTION 6 (text, text) gin_trgm_triconsistent (internal, int2, text, int4, internal, internal, internal); diff --git a/contrib/pg_trgm/pg_trgm--1.1.sql b/contrib/pg_trgm/pg_trgm--1.2.sql similarity index 92% rename from contrib/pg_trgm/pg_trgm--1.1.sql rename to contrib/pg_trgm/pg_trgm--1.2.sql index 34b37e4787..03d46d07f9 100644 --- a/contrib/pg_trgm/pg_trgm--1.1.sql +++ b/contrib/pg_trgm/pg_trgm--1.2.sql @@ -1,4 +1,4 @@ -/* contrib/pg_trgm/pg_trgm--1.1.sql */ +/* contrib/pg_trgm/pg_trgm--1.2.sql */ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION pg_trgm" to load this file. \quit @@ -176,3 +176,13 @@ ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD OPERATOR 5 pg_catalog.~ (text, text), OPERATOR 6 pg_catalog.~* (text, text); + +-- Add functions that are new in 9.6 (pg_trgm 1.2). + +CREATE FUNCTION gin_trgm_triconsistent(internal, int2, text, int4, internal, internal, internal) +RETURNS "char" +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD + FUNCTION 6 (text,text) gin_trgm_triconsistent (internal, int2, text, int4, internal, internal, internal); diff --git a/contrib/pg_trgm/pg_trgm.control b/contrib/pg_trgm/pg_trgm.control index 2ac51e6890..cbf5a186d7 100644 --- a/contrib/pg_trgm/pg_trgm.control +++ b/contrib/pg_trgm/pg_trgm.control @@ -1,5 +1,5 @@ # pg_trgm extension comment = 'text similarity measurement and index searching based on trigrams' -default_version = '1.1' +default_version = '1.2' module_pathname = '$libdir/pg_trgm' relocatable = true diff --git a/contrib/pg_trgm/trgm_gin.c b/contrib/pg_trgm/trgm_gin.c index d524ceaa19..6a0731d44e 100644 --- a/contrib/pg_trgm/trgm_gin.c +++ b/contrib/pg_trgm/trgm_gin.c @@ -14,6 +14,7 @@ PG_FUNCTION_INFO_V1(gin_extract_trgm); PG_FUNCTION_INFO_V1(gin_extract_value_trgm); PG_FUNCTION_INFO_V1(gin_extract_query_trgm); PG_FUNCTION_INFO_V1(gin_trgm_consistent); +PG_FUNCTION_INFO_V1(gin_trgm_triconsistent); /* * This function can only be called if a pre-9.1 version of the GIN operator @@ -235,3 +236,94 @@ gin_trgm_consistent(PG_FUNCTION_ARGS) PG_RETURN_BOOL(res); } + +/* + * In all cases, GIN_TRUE is at least as favorable to inclusion as + * GIN_MAYBE. If no better option is available, simply treat + * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary + * consistent function. + */ +Datum +gin_trgm_triconsistent(PG_FUNCTION_ARGS) +{ + GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); + StrategyNumber strategy = PG_GETARG_UINT16(1); + + /* text *query = PG_GETARG_TEXT_P(2); */ + int32 nkeys = PG_GETARG_INT32(3); + Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); + GinTernaryValue res = GIN_MAYBE; + int32 i, + ntrue; + bool *boolcheck; + + switch (strategy) + { + case SimilarityStrategyNumber: + /* Count the matches */ + ntrue = 0; + for (i = 0; i < nkeys; i++) + { + if (check[i] != GIN_FALSE) + ntrue++; + } +#ifdef DIVUNION + res = (nkeys == ntrue) ? GIN_MAYBE : (((((float4) ntrue) / ((float4) (nkeys - ntrue))) >= trgm_limit) ? GIN_MAYBE : GIN_FALSE); +#else + res = (nkeys == 0) ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= trgm_limit) ? GIN_MAYBE : GIN_FALSE); +#endif + break; + case ILikeStrategyNumber: +#ifndef IGNORECASE + elog(ERROR, "cannot handle ~~* with case-sensitive trigrams"); +#endif + /* FALL THRU */ + case LikeStrategyNumber: + /* Check if all extracted trigrams are presented. */ + res = GIN_MAYBE; + for (i = 0; i < nkeys; i++) + { + if (check[i] == GIN_FALSE) + { + res = GIN_FALSE; + break; + } + } + break; + case RegExpICaseStrategyNumber: +#ifndef IGNORECASE + elog(ERROR, "cannot handle ~* with case-sensitive trigrams"); +#endif + /* FALL THRU */ + case RegExpStrategyNumber: + if (nkeys < 1) + { + /* Regex processing gave no result: do full index scan */ + res = GIN_MAYBE; + } + else + { + /* + * As trigramsMatchGraph implements a montonic boolean function, + * promoting all GIN_MAYBE keys to GIN_TRUE will give a + * conservative result. + */ + boolcheck = (bool *) palloc(sizeof(bool) * nkeys); + for (i = 0; i < nkeys; i++) + boolcheck[i] = (check[i] != GIN_FALSE); + if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0], + boolcheck)) + res = GIN_FALSE; + pfree(boolcheck); + } + break; + default: + elog(ERROR, "unrecognized strategy number: %d", strategy); + res = GIN_FALSE; /* keep compiler quiet */ + break; + } + + /* All cases served by this function are inexact */ + Assert(res != GIN_TRUE); + PG_RETURN_GIN_TERNARY_VALUE(res); +}