6e2f3ae884
Unlike Btree-based LIKE optimization, this works for non-left-anchored search patterns. The effectiveness of the search depends on how many trigrams can be extracted from the pattern. (The worst case, with no trigrams, degrades to a full-table scan, so this isn't a panacea. But it can be very useful.) Alexander Korotkov, reviewed by Jan Urbanski
199 lines
4.5 KiB
C
199 lines
4.5 KiB
C
/*
|
|
* contrib/pg_trgm/trgm_gin.c
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "trgm.h"
|
|
|
|
#include "access/gin.h"
|
|
#include "access/itup.h"
|
|
#include "access/skey.h"
|
|
#include "access/tuptoaster.h"
|
|
#include "storage/bufpage.h"
|
|
#include "utils/array.h"
|
|
#include "utils/builtins.h"
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(gin_extract_trgm);
|
|
Datum gin_extract_trgm(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
|
|
Datum gin_extract_value_trgm(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
|
|
Datum gin_extract_query_trgm(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(gin_trgm_consistent);
|
|
Datum gin_trgm_consistent(PG_FUNCTION_ARGS);
|
|
|
|
/*
|
|
* This function can only be called if a pre-9.1 version of the GIN operator
|
|
* class definition is present in the catalogs (probably as a consequence
|
|
* of upgrade-in-place). Complain.
|
|
*/
|
|
Datum
|
|
gin_extract_trgm(PG_FUNCTION_ARGS)
|
|
{
|
|
ereport(ERROR,
|
|
(errmsg("GIN operator class for pg_trgm is out of date"),
|
|
errhint("Please drop and re-create the pg_trgm catalog entries.")));
|
|
PG_RETURN_NULL();
|
|
}
|
|
|
|
Datum
|
|
gin_extract_value_trgm(PG_FUNCTION_ARGS)
|
|
{
|
|
text *val = (text *) PG_GETARG_TEXT_P(0);
|
|
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
|
Datum *entries = NULL;
|
|
TRGM *trg;
|
|
int32 trglen;
|
|
|
|
*nentries = 0;
|
|
|
|
trg = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
|
|
trglen = ARRNELEM(trg);
|
|
|
|
if (trglen > 0)
|
|
{
|
|
trgm *ptr;
|
|
int32 i;
|
|
|
|
*nentries = trglen;
|
|
entries = (Datum *) palloc(sizeof(Datum) * trglen);
|
|
|
|
ptr = GETARR(trg);
|
|
for (i = 0; i < trglen; i++)
|
|
{
|
|
int32 item = trgm2int(ptr);
|
|
|
|
entries[i] = Int32GetDatum(item);
|
|
ptr++;
|
|
}
|
|
}
|
|
|
|
PG_RETURN_POINTER(entries);
|
|
}
|
|
|
|
Datum
|
|
gin_extract_query_trgm(PG_FUNCTION_ARGS)
|
|
{
|
|
text *val = (text *) PG_GETARG_TEXT_P(0);
|
|
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
|
StrategyNumber strategy = PG_GETARG_UINT16(2);
|
|
/* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */
|
|
/* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
|
|
/* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
|
|
int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
|
|
Datum *entries = NULL;
|
|
TRGM *trg;
|
|
int32 trglen;
|
|
trgm *ptr;
|
|
int32 i;
|
|
|
|
switch (strategy)
|
|
{
|
|
case SimilarityStrategyNumber:
|
|
trg = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
|
|
break;
|
|
case ILikeStrategyNumber:
|
|
#ifndef IGNORECASE
|
|
elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
|
|
#endif
|
|
/* FALL THRU */
|
|
case LikeStrategyNumber:
|
|
/*
|
|
* For wildcard search we extract all the trigrams that every
|
|
* potentially-matching string must include.
|
|
*/
|
|
trg = generate_wildcard_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
|
|
break;
|
|
default:
|
|
elog(ERROR, "unrecognized strategy number: %d", strategy);
|
|
trg = NULL; /* keep compiler quiet */
|
|
break;
|
|
}
|
|
|
|
trglen = ARRNELEM(trg);
|
|
*nentries = trglen;
|
|
|
|
if (trglen > 0)
|
|
{
|
|
entries = (Datum *) palloc(sizeof(Datum) * trglen);
|
|
ptr = GETARR(trg);
|
|
for (i = 0; i < trglen; i++)
|
|
{
|
|
int32 item = trgm2int(ptr);
|
|
|
|
entries[i] = Int32GetDatum(item);
|
|
ptr++;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If no trigram was extracted then we have to scan all the index.
|
|
*/
|
|
if (trglen == 0)
|
|
*searchMode = GIN_SEARCH_MODE_ALL;
|
|
|
|
PG_RETURN_POINTER(entries);
|
|
}
|
|
|
|
Datum
|
|
gin_trgm_consistent(PG_FUNCTION_ARGS)
|
|
{
|
|
bool *check = (bool *) PG_GETARG_POINTER(0);
|
|
StrategyNumber strategy = PG_GETARG_UINT16(1);
|
|
/* text *query = PG_GETARG_TEXT_P(2); */
|
|
int32 nkeys = PG_GETARG_INT32(3);
|
|
/* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
|
|
bool *recheck = (bool *) PG_GETARG_POINTER(5);
|
|
bool res;
|
|
int32 i,
|
|
ntrue;
|
|
|
|
/* All cases served by this function are inexact */
|
|
*recheck = true;
|
|
|
|
switch (strategy)
|
|
{
|
|
case SimilarityStrategyNumber:
|
|
/* Count the matches */
|
|
ntrue = 0;
|
|
for (i = 0; i < nkeys; i++)
|
|
{
|
|
if (check[i])
|
|
ntrue++;
|
|
}
|
|
#ifdef DIVUNION
|
|
res = (nkeys == ntrue) ? true : ((((((float4) ntrue) / ((float4) (nkeys - ntrue)))) >= trgm_limit) ? true : false);
|
|
#else
|
|
res = (nkeys == 0) ? false : ((((((float4) ntrue) / ((float4) nkeys))) >= trgm_limit) ? true : false);
|
|
#endif
|
|
break;
|
|
case ILikeStrategyNumber:
|
|
#ifndef IGNORECASE
|
|
elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
|
|
#endif
|
|
/* FALL THRU */
|
|
case LikeStrategyNumber:
|
|
/* Check if all extracted trigrams are presented. */
|
|
res = true;
|
|
for (i = 0; i < nkeys; i++)
|
|
{
|
|
if (!check[i])
|
|
{
|
|
res = false;
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
elog(ERROR, "unrecognized strategy number: %d", strategy);
|
|
res = false; /* keep compiler quiet */
|
|
break;
|
|
}
|
|
|
|
PG_RETURN_BOOL(res);
|
|
}
|