Add optional compression method to SP-GiST
Patch allows to have different types of column and value stored in leaf tuples of SP-GiST. The main application of feature is to transform complex column type to simple indexed type or for truncating too long value, transformation could be lossy. Simple example: polygons are converted to their bounding boxes, this opclass follows. Authors: me, Heikki Linnakangas, Alexander Korotkov, Nikita Glukhov Reviewed-By: all authors + Darafei Praliaskouski Discussions: https://www.postgresql.org/message-id/5447B3FF.2080406@sigaev.ru https://www.postgresql.org/message-id/flat/54907069.1030506@sigaev.ru#54907069.1030506@sigaev.ru
This commit is contained in:
parent
9373baa0f7
commit
854823fa33
@ -240,20 +240,22 @@
|
||||
|
||||
<para>
|
||||
There are five user-defined methods that an index operator class for
|
||||
<acronym>SP-GiST</acronym> must provide. All five follow the convention
|
||||
of accepting two <type>internal</type> arguments, the first of which is a
|
||||
pointer to a C struct containing input values for the support method,
|
||||
while the second argument is a pointer to a C struct where output values
|
||||
must be placed. Four of the methods just return <type>void</type>, since
|
||||
all their results appear in the output struct; but
|
||||
<acronym>SP-GiST</acronym> must provide, and one is optional. All five
|
||||
mandatory methods follow the convention of accepting two <type>internal</type>
|
||||
arguments, the first of which is a pointer to a C struct containing input
|
||||
values for the support method, while the second argument is a pointer to a
|
||||
C struct where output values must be placed. Four of the mandatory methods just
|
||||
return <type>void</type>, since all their results appear in the output struct; but
|
||||
<function>leaf_consistent</function> additionally returns a <type>boolean</type> result.
|
||||
The methods must not modify any fields of their input structs. In all
|
||||
cases, the output struct is initialized to zeroes before calling the
|
||||
user-defined method.
|
||||
user-defined method. Optional sixth method <function>compress</function>
|
||||
accepts datum to be indexed as the only argument and returns value suitable
|
||||
for physical storage in leaf tuple.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The five user-defined methods are:
|
||||
The five mandatory user-defined methods are:
|
||||
</para>
|
||||
|
||||
<variablelist>
|
||||
@ -283,6 +285,7 @@ typedef struct spgConfigOut
|
||||
{
|
||||
Oid prefixType; /* Data type of inner-tuple prefixes */
|
||||
Oid labelType; /* Data type of inner-tuple node labels */
|
||||
Oid leafType; /* Data type of leaf-tuple values */
|
||||
bool canReturnData; /* Opclass can reconstruct original data */
|
||||
bool longValuesOK; /* Opclass can cope with values > 1 page */
|
||||
} spgConfigOut;
|
||||
@ -305,6 +308,22 @@ typedef struct spgConfigOut
|
||||
class is capable of segmenting long values by repeated suffixing
|
||||
(see <xref linkend="spgist-limits"/>).
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<structfield>leafType</structfield> is typically the same as
|
||||
<structfield>attType</structfield>. For the reasons of backward
|
||||
compatibility, method <function>config</function> can
|
||||
leave <structfield>leafType</structfield> uninitialized; that would
|
||||
give the same effect as setting <structfield>leafType</structfield> equal
|
||||
to <structfield>attType</structfield>. When <structfield>attType</structfield>
|
||||
and <structfield>leafType</structfield> are different, then optional
|
||||
method <function>compress</function> must be provided.
|
||||
Method <function>compress</function> is responsible
|
||||
for transformation of datums to be indexed from <structfield>attType</structfield>
|
||||
to <structfield>leafType</structfield>.
|
||||
Note: both consistent functions will get <structfield>scankeys</structfield>
|
||||
unchanged, without transformation using <function>compress</function>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
@ -380,10 +399,16 @@ typedef struct spgChooseOut
|
||||
} spgChooseOut;
|
||||
</programlisting>
|
||||
|
||||
<structfield>datum</structfield> is the original datum that was to be inserted
|
||||
into the index.
|
||||
<structfield>leafDatum</structfield> is initially the same as
|
||||
<structfield>datum</structfield>, but can change at lower levels of the tree
|
||||
<structfield>datum</structfield> is the original datum of
|
||||
<structname>spgConfigIn</structname>.<structfield>attType</structfield>
|
||||
type that was to be inserted into the index.
|
||||
<structfield>leafDatum</structfield> is a value of
|
||||
<structname>spgConfigOut</structname>.<structfield>leafType</structfield>
|
||||
type which is initially an result of method
|
||||
<function>compress</function> applied to <structfield>datum</structfield>
|
||||
when method <function>compress</function> is provided, or same value as
|
||||
<structfield>datum</structfield> otherwise.
|
||||
<structfield>leafDatum</structfield> can change at lower levels of the tree
|
||||
if the <function>choose</function> or <function>picksplit</function>
|
||||
methods change it. When the insertion search reaches a leaf page,
|
||||
the current value of <structfield>leafDatum</structfield> is what will be stored
|
||||
@ -418,7 +443,7 @@ typedef struct spgChooseOut
|
||||
Set <structfield>levelAdd</structfield> to the increment in
|
||||
<structfield>level</structfield> caused by descending through that node,
|
||||
or leave it as zero if the operator class does not use levels.
|
||||
Set <structfield>restDatum</structfield> to equal <structfield>datum</structfield>
|
||||
Set <structfield>restDatum</structfield> to equal <structfield>leafDatum</structfield>
|
||||
if the operator class does not modify datums from one level to the
|
||||
next, or otherwise set it to the modified value to be used as
|
||||
<structfield>leafDatum</structfield> at the next level.
|
||||
@ -509,7 +534,9 @@ typedef struct spgPickSplitOut
|
||||
</programlisting>
|
||||
|
||||
<structfield>nTuples</structfield> is the number of leaf tuples provided.
|
||||
<structfield>datums</structfield> is an array of their datum values.
|
||||
<structfield>datums</structfield> is an array of their datum values of
|
||||
<structname>spgConfigOut</structname>.<structfield>leafType</structfield>
|
||||
type.
|
||||
<structfield>level</structfield> is the current level that all the leaf tuples
|
||||
share, which will become the level of the new inner tuple.
|
||||
</para>
|
||||
@ -624,7 +651,8 @@ typedef struct spgInnerConsistentOut
|
||||
<structfield>reconstructedValue</structfield> is the value reconstructed for the
|
||||
parent tuple; it is <literal>(Datum) 0</literal> at the root level or if the
|
||||
<function>inner_consistent</function> function did not provide a value at the
|
||||
parent level.
|
||||
parent level. <structfield>reconstructedValue</structfield> is always of
|
||||
<structname>spgConfigOut</structname>.<structfield>leafType</structfield> type.
|
||||
<structfield>traversalValue</structfield> is a pointer to any traverse data
|
||||
passed down from the previous call of <function>inner_consistent</function>
|
||||
on the parent index tuple, or NULL at the root level.
|
||||
@ -659,6 +687,7 @@ typedef struct spgInnerConsistentOut
|
||||
necessarily so, so an array is used.)
|
||||
If value reconstruction is needed, set
|
||||
<structfield>reconstructedValues</structfield> to an array of the values
|
||||
of <structname>spgConfigOut</structname>.<structfield>leafType</structfield> type
|
||||
reconstructed for each child node to be visited; otherwise, leave
|
||||
<structfield>reconstructedValues</structfield> as NULL.
|
||||
If it is desired to pass down additional out-of-band information
|
||||
@ -730,7 +759,8 @@ typedef struct spgLeafConsistentOut
|
||||
<structfield>reconstructedValue</structfield> is the value reconstructed for the
|
||||
parent tuple; it is <literal>(Datum) 0</literal> at the root level or if the
|
||||
<function>inner_consistent</function> function did not provide a value at the
|
||||
parent level.
|
||||
parent level. <structfield>reconstructedValue</structfield> is always of
|
||||
<structname>spgConfigOut</structname>.<structfield>leafType</structfield> type.
|
||||
<structfield>traversalValue</structfield> is a pointer to any traverse data
|
||||
passed down from the previous call of <function>inner_consistent</function>
|
||||
on the parent index tuple, or NULL at the root level.
|
||||
@ -739,16 +769,18 @@ typedef struct spgLeafConsistentOut
|
||||
<structfield>returnData</structfield> is <literal>true</literal> if reconstructed data is
|
||||
required for this query; this will only be so if the
|
||||
<function>config</function> function asserted <structfield>canReturnData</structfield>.
|
||||
<structfield>leafDatum</structfield> is the key value stored in the current
|
||||
leaf tuple.
|
||||
<structfield>leafDatum</structfield> is the key value of
|
||||
<structname>spgConfigOut</structname>.<structfield>leafType</structfield>
|
||||
stored in the current leaf tuple.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The function must return <literal>true</literal> if the leaf tuple matches the
|
||||
query, or <literal>false</literal> if not. In the <literal>true</literal> case,
|
||||
if <structfield>returnData</structfield> is <literal>true</literal> then
|
||||
<structfield>leafValue</structfield> must be set to the value originally supplied
|
||||
to be indexed for this leaf tuple. Also,
|
||||
<structfield>leafValue</structfield> must be set to the value of
|
||||
<structname>spgConfigIn</structname>.<structfield>attType</structfield> type
|
||||
originally supplied to be indexed for this leaf tuple. Also,
|
||||
<structfield>recheck</structfield> may be set to <literal>true</literal> if the match
|
||||
is uncertain and so the operator(s) must be re-applied to the actual
|
||||
heap tuple to verify the match.
|
||||
@ -757,6 +789,26 @@ typedef struct spgLeafConsistentOut
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
|
||||
<para>
|
||||
The optional user-defined method is:
|
||||
</para>
|
||||
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term><function>Datum compress(Datum in)</function></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Converts the data item into a format suitable for physical storage in
|
||||
a leaf tuple of index page. It accepts
|
||||
<structname>spgConfigIn</structname>.<structfield>attType</structfield>
|
||||
value and return
|
||||
<structname>spgConfigOut</structname>.<structfield>leafType</structfield>
|
||||
value. Output value should not be toasted.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
|
||||
<para>
|
||||
All the SP-GiST support methods are normally called in a short-lived
|
||||
memory context; that is, <varname>CurrentMemoryContext</varname> will be reset
|
||||
|
@ -1906,14 +1906,37 @@ spgdoinsert(Relation index, SpGistState *state,
|
||||
procinfo = index_getprocinfo(index, 1, SPGIST_CHOOSE_PROC);
|
||||
|
||||
/*
|
||||
* Since we don't use index_form_tuple in this AM, we have to make sure
|
||||
* Prepare the leaf datum to insert.
|
||||
*
|
||||
* If an optional "compress" method is provided, then call it to form
|
||||
* the leaf datum from the input datum. Otherwise store the input datum as
|
||||
* is. Since we don't use index_form_tuple in this AM, we have to make sure
|
||||
* value to be inserted is not toasted; FormIndexDatum doesn't guarantee
|
||||
* that.
|
||||
* that. But we assume the "compress" method to return an untoasted value.
|
||||
*/
|
||||
if (!isnull && state->attType.attlen == -1)
|
||||
datum = PointerGetDatum(PG_DETOAST_DATUM(datum));
|
||||
if (!isnull)
|
||||
{
|
||||
if (OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC)))
|
||||
{
|
||||
FmgrInfo *compressProcinfo = NULL;
|
||||
|
||||
leafDatum = datum;
|
||||
compressProcinfo = index_getprocinfo(index, 1, SPGIST_COMPRESS_PROC);
|
||||
leafDatum = FunctionCall1Coll(compressProcinfo,
|
||||
index->rd_indcollation[0],
|
||||
datum);
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert(state->attLeafType.type == state->attType.type);
|
||||
|
||||
if (state->attType.attlen == -1)
|
||||
leafDatum = PointerGetDatum(PG_DETOAST_DATUM(datum));
|
||||
else
|
||||
leafDatum = datum;
|
||||
}
|
||||
}
|
||||
else
|
||||
leafDatum = (Datum) 0;
|
||||
|
||||
/*
|
||||
* Compute space needed for a leaf tuple containing the given datum.
|
||||
@ -1923,7 +1946,7 @@ spgdoinsert(Relation index, SpGistState *state,
|
||||
*/
|
||||
if (!isnull)
|
||||
leafSize = SGLTHDRSZ + sizeof(ItemIdData) +
|
||||
SpGistGetTypeSize(&state->attType, leafDatum);
|
||||
SpGistGetTypeSize(&state->attLeafType, leafDatum);
|
||||
else
|
||||
leafSize = SGDTSIZE + sizeof(ItemIdData);
|
||||
|
||||
@ -2138,7 +2161,7 @@ spgdoinsert(Relation index, SpGistState *state,
|
||||
{
|
||||
leafDatum = out.result.matchNode.restDatum;
|
||||
leafSize = SGLTHDRSZ + sizeof(ItemIdData) +
|
||||
SpGistGetTypeSize(&state->attType, leafDatum);
|
||||
SpGistGetTypeSize(&state->attLeafType, leafDatum);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -40,7 +40,7 @@ typedef struct ScanStackEntry
|
||||
static void
|
||||
freeScanStackEntry(SpGistScanOpaque so, ScanStackEntry *stackEntry)
|
||||
{
|
||||
if (!so->state.attType.attbyval &&
|
||||
if (!so->state.attLeafType.attbyval &&
|
||||
DatumGetPointer(stackEntry->reconstructedValue) != NULL)
|
||||
pfree(DatumGetPointer(stackEntry->reconstructedValue));
|
||||
if (stackEntry->traversalValue)
|
||||
@ -527,8 +527,8 @@ redirect:
|
||||
if (out.reconstructedValues)
|
||||
newEntry->reconstructedValue =
|
||||
datumCopy(out.reconstructedValues[i],
|
||||
so->state.attType.attbyval,
|
||||
so->state.attType.attlen);
|
||||
so->state.attLeafType.attbyval,
|
||||
so->state.attLeafType.attlen);
|
||||
else
|
||||
newEntry->reconstructedValue = (Datum) 0;
|
||||
|
||||
|
@ -125,6 +125,22 @@ spgGetCache(Relation index)
|
||||
|
||||
/* Get the information we need about each relevant datatype */
|
||||
fillTypeDesc(&cache->attType, atttype);
|
||||
|
||||
if (OidIsValid(cache->config.leafType) &&
|
||||
cache->config.leafType != atttype)
|
||||
{
|
||||
if (!OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC)))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("compress method must not defined when leaf type is different from input type")));
|
||||
|
||||
fillTypeDesc(&cache->attLeafType, cache->config.leafType);
|
||||
}
|
||||
else
|
||||
{
|
||||
cache->attLeafType = cache->attType;
|
||||
}
|
||||
|
||||
fillTypeDesc(&cache->attPrefixType, cache->config.prefixType);
|
||||
fillTypeDesc(&cache->attLabelType, cache->config.labelType);
|
||||
|
||||
@ -164,6 +180,7 @@ initSpGistState(SpGistState *state, Relation index)
|
||||
|
||||
state->config = cache->config;
|
||||
state->attType = cache->attType;
|
||||
state->attLeafType = cache->attLeafType;
|
||||
state->attPrefixType = cache->attPrefixType;
|
||||
state->attLabelType = cache->attLabelType;
|
||||
|
||||
@ -618,7 +635,7 @@ spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr,
|
||||
/* compute space needed (note result is already maxaligned) */
|
||||
size = SGLTHDRSZ;
|
||||
if (!isnull)
|
||||
size += SpGistGetTypeSize(&state->attType, datum);
|
||||
size += SpGistGetTypeSize(&state->attLeafType, datum);
|
||||
|
||||
/*
|
||||
* Ensure that we can replace the tuple with a dead tuple later. This
|
||||
@ -634,7 +651,7 @@ spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr,
|
||||
tup->nextOffset = InvalidOffsetNumber;
|
||||
tup->heapPtr = *heapPtr;
|
||||
if (!isnull)
|
||||
memcpyDatum(SGLTDATAPTR(tup), &state->attType, datum);
|
||||
memcpyDatum(SGLTDATAPTR(tup), &state->attLeafType, datum);
|
||||
|
||||
return tup;
|
||||
}
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "catalog/pg_opfamily.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/regproc.h"
|
||||
#include "utils/syscache.h"
|
||||
|
||||
@ -52,6 +53,10 @@ spgvalidate(Oid opclassoid)
|
||||
OpFamilyOpFuncGroup *opclassgroup;
|
||||
int i;
|
||||
ListCell *lc;
|
||||
spgConfigIn configIn;
|
||||
spgConfigOut configOut;
|
||||
Oid configOutLefttype = InvalidOid;
|
||||
Oid configOutRighttype = InvalidOid;
|
||||
|
||||
/* Fetch opclass information */
|
||||
classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclassoid));
|
||||
@ -74,6 +79,7 @@ spgvalidate(Oid opclassoid)
|
||||
/* Fetch all operators and support functions of the opfamily */
|
||||
oprlist = SearchSysCacheList1(AMOPSTRATEGY, ObjectIdGetDatum(opfamilyoid));
|
||||
proclist = SearchSysCacheList1(AMPROCNUM, ObjectIdGetDatum(opfamilyoid));
|
||||
grouplist = identify_opfamily_groups(oprlist, proclist);
|
||||
|
||||
/* Check individual support functions */
|
||||
for (i = 0; i < proclist->n_members; i++)
|
||||
@ -100,6 +106,40 @@ spgvalidate(Oid opclassoid)
|
||||
switch (procform->amprocnum)
|
||||
{
|
||||
case SPGIST_CONFIG_PROC:
|
||||
ok = check_amproc_signature(procform->amproc, VOIDOID, true,
|
||||
2, 2, INTERNALOID, INTERNALOID);
|
||||
configIn.attType = procform->amproclefttype;
|
||||
memset(&configOut, 0, sizeof(configOut));
|
||||
|
||||
OidFunctionCall2(procform->amproc,
|
||||
PointerGetDatum(&configIn),
|
||||
PointerGetDatum(&configOut));
|
||||
|
||||
configOutLefttype = procform->amproclefttype;
|
||||
configOutRighttype = procform->amprocrighttype;
|
||||
|
||||
/*
|
||||
* When leaf and attribute types are the same, compress function
|
||||
* is not required and we set corresponding bit in functionset
|
||||
* for later group consistency check.
|
||||
*/
|
||||
if (!OidIsValid(configOut.leafType) ||
|
||||
configOut.leafType == configIn.attType)
|
||||
{
|
||||
foreach(lc, grouplist)
|
||||
{
|
||||
OpFamilyOpFuncGroup *group = lfirst(lc);
|
||||
|
||||
if (group->lefttype == procform->amproclefttype &&
|
||||
group->righttype == procform->amprocrighttype)
|
||||
{
|
||||
group->functionset |=
|
||||
((uint64) 1) << SPGIST_COMPRESS_PROC;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case SPGIST_CHOOSE_PROC:
|
||||
case SPGIST_PICKSPLIT_PROC:
|
||||
case SPGIST_INNER_CONSISTENT_PROC:
|
||||
@ -110,6 +150,15 @@ spgvalidate(Oid opclassoid)
|
||||
ok = check_amproc_signature(procform->amproc, BOOLOID, true,
|
||||
2, 2, INTERNALOID, INTERNALOID);
|
||||
break;
|
||||
case SPGIST_COMPRESS_PROC:
|
||||
if (configOutLefttype != procform->amproclefttype ||
|
||||
configOutRighttype != procform->amprocrighttype)
|
||||
ok = false;
|
||||
else
|
||||
ok = check_amproc_signature(procform->amproc,
|
||||
configOut.leafType, true,
|
||||
1, 1, procform->amproclefttype);
|
||||
break;
|
||||
default:
|
||||
ereport(INFO,
|
||||
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
||||
@ -178,7 +227,6 @@ spgvalidate(Oid opclassoid)
|
||||
}
|
||||
|
||||
/* Now check for inconsistent groups of operators/functions */
|
||||
grouplist = identify_opfamily_groups(oprlist, proclist);
|
||||
opclassgroup = NULL;
|
||||
foreach(lc, grouplist)
|
||||
{
|
||||
|
@ -30,7 +30,9 @@
|
||||
#define SPGIST_PICKSPLIT_PROC 3
|
||||
#define SPGIST_INNER_CONSISTENT_PROC 4
|
||||
#define SPGIST_LEAF_CONSISTENT_PROC 5
|
||||
#define SPGISTNProc 5
|
||||
#define SPGIST_COMPRESS_PROC 6
|
||||
#define SPGISTNRequiredProc 5
|
||||
#define SPGISTNProc 6
|
||||
|
||||
/*
|
||||
* Argument structs for spg_config method
|
||||
@ -44,6 +46,7 @@ typedef struct spgConfigOut
|
||||
{
|
||||
Oid prefixType; /* Data type of inner-tuple prefixes */
|
||||
Oid labelType; /* Data type of inner-tuple node labels */
|
||||
Oid leafType; /* Data type of leaf-tuple values */
|
||||
bool canReturnData; /* Opclass can reconstruct original data */
|
||||
bool longValuesOK; /* Opclass can cope with values > 1 page */
|
||||
} spgConfigOut;
|
||||
|
@ -119,7 +119,8 @@ typedef struct SpGistState
|
||||
{
|
||||
spgConfigOut config; /* filled in by opclass config method */
|
||||
|
||||
SpGistTypeDesc attType; /* type of input data and leaf values */
|
||||
SpGistTypeDesc attType; /* type of values to be indexed/restored */
|
||||
SpGistTypeDesc attLeafType; /* type of leaf-tuple values */
|
||||
SpGistTypeDesc attPrefixType; /* type of inner-tuple prefix values */
|
||||
SpGistTypeDesc attLabelType; /* type of node label values */
|
||||
|
||||
@ -178,7 +179,8 @@ typedef struct SpGistCache
|
||||
{
|
||||
spgConfigOut config; /* filled in by opclass config method */
|
||||
|
||||
SpGistTypeDesc attType; /* type of input data and leaf values */
|
||||
SpGistTypeDesc attType; /* type of values to be indexed/restored */
|
||||
SpGistTypeDesc attLeafType; /* type of leaf-tuple values */
|
||||
SpGistTypeDesc attPrefixType; /* type of inner-tuple prefix values */
|
||||
SpGistTypeDesc attLabelType; /* type of node label values */
|
||||
|
||||
@ -300,7 +302,7 @@ typedef SpGistLeafTupleData *SpGistLeafTuple;
|
||||
|
||||
#define SGLTHDRSZ MAXALIGN(sizeof(SpGistLeafTupleData))
|
||||
#define SGLTDATAPTR(x) (((char *) (x)) + SGLTHDRSZ)
|
||||
#define SGLTDATUM(x, s) ((s)->attType.attbyval ? \
|
||||
#define SGLTDATUM(x, s) ((s)->attLeafType.attbyval ? \
|
||||
*(Datum *) SGLTDATAPTR(x) : \
|
||||
PointerGetDatum(SGLTDATAPTR(x)))
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user