Remove hardcoded hash opclass function signature exceptions

hashvalidate(), which validates the signatures of support functions
for the hash AM, contained several hardcoded exceptions.  For example,
hash/date_ops support function 1 was hashint4(), which would
ordinarily fail validation because the function argument is int4, not
date.  But this works internally because int4 and date are of the same
size.  There are several more exceptions like this that happen to work
and were allowed historically but would now fail the function
signature validation.

This patch removes those exceptions by providing new support functions
that have the proper declared signatures.  They internally share most
of the code with the "wrong" functions they replace, so the behavior
is still the same.

With the exceptions gone, hashvalidate() is now simplified and relies
fully on check_amproc_signature().

hashvarlena() and hashvarlenaextended() are kept in pg_proc.dat
because some extensions currently use them to build hash functions for
their own types, and we need to keep exposing these functions as
"LANGUAGE internal" functions for that to continue to work.

Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/29c3b746-69e7-482a-b37c-dbbf7e5b009b@eisentraut.org
This commit is contained in:
Peter Eisentraut 2024-09-12 12:52:37 +02:00
parent 5bb9ba2739
commit 23d0b48468
9 changed files with 174 additions and 122 deletions

View File

@ -376,6 +376,11 @@ hashtextextended(PG_FUNCTION_ARGS)
/*
* hashvarlena() can be used for any varlena datatype in which there are
* no non-significant bits, ie, distinct bitpatterns never compare as equal.
*
* (However, you need to define an SQL-level wrapper function around it with
* the concrete input data type; otherwise hashvalidate() won't accept it.
* Moreover, at least for built-in types, a C-level wrapper function is also
* recommended; otherwise, the opr_sanity test will get upset.)
*/
Datum
hashvarlena(PG_FUNCTION_ARGS)
@ -406,3 +411,15 @@ hashvarlenaextended(PG_FUNCTION_ARGS)
return result;
}
Datum
hashbytea(PG_FUNCTION_ARGS)
{
return hashvarlena(fcinfo);
}
Datum
hashbyteaextended(PG_FUNCTION_ARGS)
{
return hashvarlenaextended(fcinfo);
}

View File

@ -22,19 +22,13 @@
#include "catalog/pg_amproc.h"
#include "catalog/pg_opclass.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
#include "parser/parse_coerce.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/regproc.h"
#include "utils/syscache.h"
static bool check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype);
/*
* Validator for a hash opclass.
*
@ -90,6 +84,7 @@ hashvalidate(Oid opclassoid)
{
HeapTuple proctup = &proclist->members[i]->tuple;
Form_pg_amproc procform = (Form_pg_amproc) GETSTRUCT(proctup);
bool ok;
/*
* All hash functions should be registered with matching left/right
@ -109,29 +104,15 @@ hashvalidate(Oid opclassoid)
switch (procform->amprocnum)
{
case HASHSTANDARD_PROC:
ok = check_amproc_signature(procform->amproc, INT4OID, true,
1, 1, procform->amproclefttype);
break;
case HASHEXTENDED_PROC:
if (!check_hash_func_signature(procform->amproc, procform->amprocnum,
procform->amproclefttype))
{
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("operator family \"%s\" of access method %s contains function %s with wrong signature for support number %d",
opfamilyname, "hash",
format_procedure(procform->amproc),
procform->amprocnum)));
result = false;
}
else
{
/* Remember which types we can hash */
hashabletypes =
list_append_unique_oid(hashabletypes,
procform->amproclefttype);
}
ok = check_amproc_signature(procform->amproc, INT8OID, true,
2, 2, procform->amproclefttype, INT8OID);
break;
case HASHOPTIONS_PROC:
if (!check_amoptsproc_signature(procform->amproc))
result = false;
ok = check_amoptsproc_signature(procform->amproc);
break;
default:
ereport(INFO,
@ -141,7 +122,24 @@ hashvalidate(Oid opclassoid)
format_procedure(procform->amproc),
procform->amprocnum)));
result = false;
break;
continue; /* don't want additional message */
}
if (!ok)
{
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("operator family \"%s\" of access method %s contains function %s with wrong signature for support number %d",
opfamilyname, "hash",
format_procedure(procform->amproc),
procform->amprocnum)));
result = false;
}
/* Remember which types we can hash */
if (ok && (procform->amprocnum == HASHSTANDARD_PROC || procform->amprocnum == HASHEXTENDED_PROC))
{
hashabletypes = list_append_unique_oid(hashabletypes, procform->amproclefttype);
}
}
@ -267,84 +265,6 @@ hashvalidate(Oid opclassoid)
}
/*
* We need a custom version of check_amproc_signature because of assorted
* hacks in the core hash opclass definitions.
*/
static bool
check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype)
{
bool result = true;
Oid restype;
int16 nargs;
HeapTuple tp;
Form_pg_proc procform;
switch (amprocnum)
{
case HASHSTANDARD_PROC:
restype = INT4OID;
nargs = 1;
break;
case HASHEXTENDED_PROC:
restype = INT8OID;
nargs = 2;
break;
default:
elog(ERROR, "invalid amprocnum");
}
tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
if (!HeapTupleIsValid(tp))
elog(ERROR, "cache lookup failed for function %u", funcid);
procform = (Form_pg_proc) GETSTRUCT(tp);
if (procform->prorettype != restype || procform->proretset ||
procform->pronargs != nargs)
result = false;
if (!IsBinaryCoercible(argtype, procform->proargtypes.values[0]))
{
/*
* Some of the built-in hash opclasses cheat by using hash functions
* that are different from but physically compatible with the opclass
* datatype. In some of these cases, even a "binary coercible" check
* fails because there's no relevant cast. For the moment, fix it by
* having a list of allowed cases. Test the specific function
* identity, not just its input type, because hashvarlena() takes
* INTERNAL and allowing any such function seems too scary.
*/
if ((funcid == F_HASHINT4 || funcid == F_HASHINT4EXTENDED) &&
(argtype == DATEOID ||
argtype == XIDOID || argtype == CIDOID))
/* okay, allowed use of hashint4() */ ;
else if ((funcid == F_HASHINT8 || funcid == F_HASHINT8EXTENDED) &&
(argtype == XID8OID))
/* okay, allowed use of hashint8() */ ;
else if ((funcid == F_TIMESTAMP_HASH ||
funcid == F_TIMESTAMP_HASH_EXTENDED) &&
argtype == TIMESTAMPTZOID)
/* okay, allowed use of timestamp_hash() */ ;
else if ((funcid == F_HASHCHAR || funcid == F_HASHCHAREXTENDED) &&
argtype == BOOLOID)
/* okay, allowed use of hashchar() */ ;
else if ((funcid == F_HASHVARLENA || funcid == F_HASHVARLENAEXTENDED) &&
argtype == BYTEAOID)
/* okay, allowed use of hashvarlena() */ ;
else
result = false;
}
/* If function takes a second argument, it must be for a 64-bit salt. */
if (nargs == 2 && procform->proargtypes.values[1] != INT8OID)
result = false;
ReleaseSysCache(tp);
return result;
}
/*
* Prechecking function for adding operators/functions to a hash opfamily.
*/

View File

@ -17,6 +17,7 @@
#include <ctype.h>
#include "common/hashfn.h"
#include "libpq/pqformat.h"
#include "utils/builtins.h"
@ -273,6 +274,18 @@ boolge(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(arg1 >= arg2);
}
Datum
hashbool(PG_FUNCTION_ARGS)
{
return hash_uint32((int32) PG_GETARG_BOOL(0));
}
Datum
hashboolextended(PG_FUNCTION_ARGS)
{
return hash_uint32_extended((int32) PG_GETARG_BOOL(0), PG_GETARG_INT64(1));
}
/*
* boolean-and and boolean-or aggregates.
*/

View File

@ -455,6 +455,18 @@ date_sortsupport(PG_FUNCTION_ARGS)
PG_RETURN_VOID();
}
Datum
hashdate(PG_FUNCTION_ARGS)
{
return hash_uint32(PG_GETARG_DATEADT(0));
}
Datum
hashdateextended(PG_FUNCTION_ARGS)
{
return hash_uint32_extended(PG_GETARG_DATEADT(0), PG_GETARG_INT64(1));
}
Datum
date_finite(PG_FUNCTION_ARGS)
{

View File

@ -2298,6 +2298,18 @@ timestamp_hash_extended(PG_FUNCTION_ARGS)
return hashint8extended(fcinfo);
}
Datum
timestamptz_hash(PG_FUNCTION_ARGS)
{
return hashint8(fcinfo);
}
Datum
timestamptz_hash_extended(PG_FUNCTION_ARGS)
{
return hashint8extended(fcinfo);
}
/*
* Cross-type comparison functions for timestamp vs timestamptz
*/

View File

@ -19,6 +19,7 @@
#include "access/multixact.h"
#include "access/transam.h"
#include "access/xact.h"
#include "common/hashfn.h"
#include "common/int.h"
#include "libpq/pqformat.h"
#include "utils/builtins.h"
@ -97,6 +98,18 @@ xidneq(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(!TransactionIdEquals(xid1, xid2));
}
Datum
hashxid(PG_FUNCTION_ARGS)
{
return hash_uint32(PG_GETARG_TRANSACTIONID(0));
}
Datum
hashxidextended(PG_FUNCTION_ARGS)
{
return hash_uint32_extended(PG_GETARG_TRANSACTIONID(0), PG_GETARG_INT64(1));
}
/*
* xid_age - compute age of an XID (relative to latest stable xid)
*/
@ -287,6 +300,18 @@ xid8cmp(PG_FUNCTION_ARGS)
PG_RETURN_INT32(-1);
}
Datum
hashxid8(PG_FUNCTION_ARGS)
{
return hashint8(fcinfo);
}
Datum
hashxid8extended(PG_FUNCTION_ARGS)
{
return hashint8extended(fcinfo);
}
Datum
xid8_larger(PG_FUNCTION_ARGS)
{
@ -374,3 +399,15 @@ cideq(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(arg1 == arg2);
}
Datum
hashcid(PG_FUNCTION_ARGS)
{
return hash_uint32(PG_GETARG_COMMANDID(0));
}
Datum
hashcidextended(PG_FUNCTION_ARGS)
{
return hash_uint32_extended(PG_GETARG_COMMANDID(0), PG_GETARG_INT64(1));
}

View File

@ -57,6 +57,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 202409102
#define CATALOG_VERSION_NO 202409121
#endif

View File

@ -304,9 +304,9 @@
{ amprocfamily => 'hash/char_ops', amproclefttype => 'char',
amprocrighttype => 'char', amprocnum => '2', amproc => 'hashcharextended' },
{ amprocfamily => 'hash/date_ops', amproclefttype => 'date',
amprocrighttype => 'date', amprocnum => '1', amproc => 'hashint4' },
amprocrighttype => 'date', amprocnum => '1', amproc => 'hashdate' },
{ amprocfamily => 'hash/date_ops', amproclefttype => 'date',
amprocrighttype => 'date', amprocnum => '2', amproc => 'hashint4extended' },
amprocrighttype => 'date', amprocnum => '2', amproc => 'hashdateextended' },
{ amprocfamily => 'hash/array_ops', amproclefttype => 'anyarray',
amprocrighttype => 'anyarray', amprocnum => '1', amproc => 'hash_array' },
{ amprocfamily => 'hash/array_ops', amproclefttype => 'anyarray',
@ -376,10 +376,10 @@
amproc => 'hash_numeric_extended' },
{ amprocfamily => 'hash/timestamptz_ops', amproclefttype => 'timestamptz',
amprocrighttype => 'timestamptz', amprocnum => '1',
amproc => 'timestamp_hash' },
amproc => 'timestamptz_hash' },
{ amprocfamily => 'hash/timestamptz_ops', amproclefttype => 'timestamptz',
amprocrighttype => 'timestamptz', amprocnum => '2',
amproc => 'timestamp_hash_extended' },
amproc => 'timestamptz_hash_extended' },
{ amprocfamily => 'hash/timetz_ops', amproclefttype => 'timetz',
amprocrighttype => 'timetz', amprocnum => '1', amproc => 'timetz_hash' },
{ amprocfamily => 'hash/timetz_ops', amproclefttype => 'timetz',
@ -392,26 +392,25 @@
amprocrighttype => 'timestamp', amprocnum => '2',
amproc => 'timestamp_hash_extended' },
{ amprocfamily => 'hash/bool_ops', amproclefttype => 'bool',
amprocrighttype => 'bool', amprocnum => '1', amproc => 'hashchar' },
amprocrighttype => 'bool', amprocnum => '1', amproc => 'hashbool' },
{ amprocfamily => 'hash/bool_ops', amproclefttype => 'bool',
amprocrighttype => 'bool', amprocnum => '2', amproc => 'hashcharextended' },
amprocrighttype => 'bool', amprocnum => '2', amproc => 'hashboolextended' },
{ amprocfamily => 'hash/bytea_ops', amproclefttype => 'bytea',
amprocrighttype => 'bytea', amprocnum => '1', amproc => 'hashvarlena' },
amprocrighttype => 'bytea', amprocnum => '1', amproc => 'hashbytea' },
{ amprocfamily => 'hash/bytea_ops', amproclefttype => 'bytea',
amprocrighttype => 'bytea', amprocnum => '2',
amproc => 'hashvarlenaextended' },
amprocrighttype => 'bytea', amprocnum => '2', amproc => 'hashbyteaextended' },
{ amprocfamily => 'hash/xid_ops', amproclefttype => 'xid',
amprocrighttype => 'xid', amprocnum => '1', amproc => 'hashint4' },
amprocrighttype => 'xid', amprocnum => '1', amproc => 'hashxid' },
{ amprocfamily => 'hash/xid_ops', amproclefttype => 'xid',
amprocrighttype => 'xid', amprocnum => '2', amproc => 'hashint4extended' },
amprocrighttype => 'xid', amprocnum => '2', amproc => 'hashxidextended' },
{ amprocfamily => 'hash/xid8_ops', amproclefttype => 'xid8',
amprocrighttype => 'xid8', amprocnum => '1', amproc => 'hashint8' },
amprocrighttype => 'xid8', amprocnum => '1', amproc => 'hashxid8' },
{ amprocfamily => 'hash/xid8_ops', amproclefttype => 'xid8',
amprocrighttype => 'xid8', amprocnum => '2', amproc => 'hashint8extended' },
amprocrighttype => 'xid8', amprocnum => '2', amproc => 'hashxid8extended' },
{ amprocfamily => 'hash/cid_ops', amproclefttype => 'cid',
amprocrighttype => 'cid', amprocnum => '1', amproc => 'hashint4' },
amprocrighttype => 'cid', amprocnum => '1', amproc => 'hashcid' },
{ amprocfamily => 'hash/cid_ops', amproclefttype => 'cid',
amprocrighttype => 'cid', amprocnum => '2', amproc => 'hashint4extended' },
amprocrighttype => 'cid', amprocnum => '2', amproc => 'hashcidextended' },
{ amprocfamily => 'hash/tid_ops', amproclefttype => 'tid',
amprocrighttype => 'tid', amprocnum => '1', amproc => 'hashtid' },
{ amprocfamily => 'hash/tid_ops', amproclefttype => 'tid',
@ -819,7 +818,7 @@
amprocrighttype => 'bytea', amprocnum => '5',
amproc => 'brin_bloom_options' },
{ amprocfamily => 'brin/bytea_bloom_ops', amproclefttype => 'bytea',
amprocrighttype => 'bytea', amprocnum => '11', amproc => 'hashvarlena' },
amprocrighttype => 'bytea', amprocnum => '11', amproc => 'hashbytea' },
# minmax "char"
{ amprocfamily => 'brin/char_minmax_ops', amproclefttype => 'char',

View File

@ -1225,6 +1225,12 @@
{ oid => '772', descr => 'hash',
proname => 'hashvarlenaextended', prorettype => 'int8',
proargtypes => 'internal int8', prosrc => 'hashvarlenaextended' },
{ oid => '9708', descr => 'hash',
proname => 'hashbytea', prorettype => 'int4', proargtypes => 'bytea',
prosrc => 'hashbytea' },
{ oid => '9709', descr => 'hash',
proname => 'hashbyteaextended', prorettype => 'int8',
proargtypes => 'bytea int8', prosrc => 'hashbyteaextended' },
{ oid => '457', descr => 'hash',
proname => 'hashoidvector', prorettype => 'int4', proargtypes => 'oidvector',
prosrc => 'hashoidvector' },
@ -1261,6 +1267,36 @@
{ oid => '781', descr => 'hash',
proname => 'hashmacaddr8extended', prorettype => 'int8',
proargtypes => 'macaddr8 int8', prosrc => 'hashmacaddr8extended' },
{ oid => '9710', descr => 'hash',
proname => 'hashdate', prorettype => 'int4', proargtypes => 'date',
prosrc => 'hashdate' },
{ oid => '9711', descr => 'hash',
proname => 'hashdateextended', prorettype => 'int8',
proargtypes => 'date int8', prosrc => 'hashdateextended' },
{ oid => '9712', descr => 'hash',
proname => 'hashbool', prorettype => 'int4', proargtypes => 'bool',
prosrc => 'hashbool' },
{ oid => '9713', descr => 'hash',
proname => 'hashboolextended', prorettype => 'int8',
proargtypes => 'bool int8', prosrc => 'hashboolextended' },
{ oid => '9714', descr => 'hash',
proname => 'hashxid', prorettype => 'int4', proargtypes => 'xid',
prosrc => 'hashxid' },
{ oid => '9715', descr => 'hash',
proname => 'hashxidextended', prorettype => 'int8', proargtypes => 'xid int8',
prosrc => 'hashxidextended' },
{ oid => '9716', descr => 'hash',
proname => 'hashxid8', prorettype => 'int4', proargtypes => 'xid8',
prosrc => 'hashxid8' },
{ oid => '9717', descr => 'hash',
proname => 'hashxid8extended', prorettype => 'int8',
proargtypes => 'xid8 int8', prosrc => 'hashxid8extended' },
{ oid => '9718', descr => 'hash',
proname => 'hashcid', prorettype => 'int4', proargtypes => 'cid',
prosrc => 'hashcid' },
{ oid => '9719', descr => 'hash',
proname => 'hashcidextended', prorettype => 'int8', proargtypes => 'cid int8',
prosrc => 'hashcidextended' },
{ oid => '438', descr => 'count the number of NULL arguments',
proname => 'num_nulls', provariadic => 'any', proisstrict => 'f',
@ -6180,6 +6216,12 @@
{ oid => '3411', descr => 'hash',
proname => 'timestamp_hash_extended', prorettype => 'int8',
proargtypes => 'timestamp int8', prosrc => 'timestamp_hash_extended' },
{ oid => '9720', descr => 'hash',
proname => 'timestamptz_hash', prorettype => 'int4',
proargtypes => 'timestamptz', prosrc => 'timestamptz_hash' },
{ oid => '9721', descr => 'hash',
proname => 'timestamptz_hash_extended', prorettype => 'int8',
proargtypes => 'timestamptz int8', prosrc => 'timestamptz_hash_extended' },
{ oid => '2041', descr => 'intervals overlap?',
proname => 'overlaps', proisstrict => 'f', prorettype => 'bool',
proargtypes => 'timestamp timestamp timestamp timestamp',