diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 15669c8238..146ee8dd1e 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -1852,9 +1852,12 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt) /* * Make a SortGroupClause node for a SetOperationStmt's groupClauses + * + * If require_hash is true, the caller is indicating that they need hash + * support or they will fail. So look extra hard for hash support. */ SortGroupClause * -makeSortGroupClauseForSetOp(Oid rescoltype) +makeSortGroupClauseForSetOp(Oid rescoltype, bool require_hash) { SortGroupClause *grpcl = makeNode(SortGroupClause); Oid sortop; @@ -1867,6 +1870,15 @@ makeSortGroupClauseForSetOp(Oid rescoltype) &sortop, &eqop, NULL, &hashable); + /* + * The type cache doesn't believe that record is hashable (see + * cache_record_field_properties()), but if the caller really needs hash + * support, we can assume it does. Worst case, if any components of the + * record don't support hashing, we will fail at execution. + */ + if (require_hash && (rescoltype == RECORDOID || rescoltype == RECORDARRAYOID)) + hashable = true; + /* we don't have a tlist yet, so can't assign sortgrouprefs */ grpcl->tleSortGroupRef = 0; grpcl->eqop = eqop; @@ -2027,6 +2039,8 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt, ListCell *ltl; ListCell *rtl; const char *context; + bool recursive = (pstate->p_parent_cte && + pstate->p_parent_cte->cterecursive); context = (stmt->op == SETOP_UNION ? "UNION" : (stmt->op == SETOP_INTERSECT ? "INTERSECT" : @@ -2048,9 +2062,7 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt, * containing CTE as having those result columns. We should do this * only at the topmost setop of the CTE, of course. */ - if (isTopLevel && - pstate->p_parent_cte && - pstate->p_parent_cte->cterecursive) + if (isTopLevel && recursive) determineRecursiveColTypes(pstate, op->larg, ltargetlist); /* @@ -2182,8 +2194,9 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt, setup_parser_errposition_callback(&pcbstate, pstate, bestlocation); + /* If it's a recursive union, we need to require hashing support. */ op->groupClauses = lappend(op->groupClauses, - makeSortGroupClauseForSetOp(rescoltype)); + makeSortGroupClauseForSetOp(rescoltype, recursive)); cancel_parser_errposition_callback(&pcbstate); } diff --git a/src/backend/rewrite/rewriteSearchCycle.c b/src/backend/rewrite/rewriteSearchCycle.c index c50ebdba24..ef38f4025a 100644 --- a/src/backend/rewrite/rewriteSearchCycle.c +++ b/src/backend/rewrite/rewriteSearchCycle.c @@ -594,7 +594,7 @@ rewriteSearchAndCycle(CommonTableExpr *cte) sos->colCollations = lappend_oid(sos->colCollations, InvalidOid); if (!sos->all) sos->groupClauses = lappend(sos->groupClauses, - makeSortGroupClauseForSetOp(search_seq_type)); + makeSortGroupClauseForSetOp(search_seq_type, true)); } if (cte->cycle_clause) { @@ -603,14 +603,14 @@ rewriteSearchAndCycle(CommonTableExpr *cte) sos->colCollations = lappend_oid(sos->colCollations, cte->cycle_clause->cycle_mark_collation); if (!sos->all) sos->groupClauses = lappend(sos->groupClauses, - makeSortGroupClauseForSetOp(cte->cycle_clause->cycle_mark_type)); + makeSortGroupClauseForSetOp(cte->cycle_clause->cycle_mark_type, true)); sos->colTypes = lappend_oid(sos->colTypes, RECORDARRAYOID); sos->colTypmods = lappend_int(sos->colTypmods, -1); sos->colCollations = lappend_oid(sos->colCollations, InvalidOid); if (!sos->all) sos->groupClauses = lappend(sos->groupClauses, - makeSortGroupClauseForSetOp(RECORDARRAYOID)); + makeSortGroupClauseForSetOp(RECORDARRAYOID, true)); } /* diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 04d487c544..bc2f30bae1 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -29,6 +29,7 @@ #include "utils/arrayaccess.h" #include "utils/builtins.h" #include "utils/datum.h" +#include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/selfuncs.h" @@ -3973,13 +3974,46 @@ hash_array(PG_FUNCTION_ARGS) { typentry = lookup_type_cache(element_type, TYPECACHE_HASH_PROC_FINFO); - if (!OidIsValid(typentry->hash_proc_finfo.fn_oid)) + if (!OidIsValid(typentry->hash_proc_finfo.fn_oid) && element_type != RECORDOID) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), errmsg("could not identify a hash function for type %s", format_type_be(element_type)))); + + /* + * The type cache doesn't believe that record is hashable (see + * cache_record_field_properties()), but since we're here, we're + * committed to hashing, so we can assume it does. Worst case, if any + * components of the record don't support hashing, we will fail at + * execution. + */ + if (element_type == RECORDOID) + { + MemoryContext oldcontext; + TypeCacheEntry *record_typentry; + + oldcontext = MemoryContextSwitchTo(CacheMemoryContext); + + /* + * Make fake type cache entry structure. Note that we can't just + * modify typentry, since that points directly into the type cache. + */ + record_typentry = palloc(sizeof(*record_typentry)); + + /* fill in what we need below */ + record_typentry->typlen = typentry->typlen; + record_typentry->typbyval = typentry->typbyval; + record_typentry->typalign = typentry->typalign; + fmgr_info(F_HASH_RECORD, &record_typentry->hash_proc_finfo); + + MemoryContextSwitchTo(oldcontext); + + typentry = record_typentry; + } + fcinfo->flinfo->fn_extra = (void *) typentry; } + typlen = typentry->typlen; typbyval = typentry->typbyval; typalign = typentry->typalign; diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c index 326fae62e2..70e5c51297 100644 --- a/src/backend/utils/cache/typcache.c +++ b/src/backend/utils/cache/typcache.c @@ -1515,14 +1515,17 @@ cache_record_field_properties(TypeCacheEntry *typentry) /* * For type RECORD, we can't really tell what will work, since we don't * have access here to the specific anonymous type. Just assume that - * everything will (we may get a failure at runtime ...) + * equality and comparison will (we may get a failure at runtime). We + * could also claim that hashing works, but then if code that has the + * option between a comparison-based (sort-based) and a hash-based plan + * chooses hashing, stuff could fail that would otherwise work if it chose + * a comparison-based plan. In practice more types support comparison + * than hashing. */ if (typentry->type_id == RECORDOID) { typentry->flags |= (TCFLAGS_HAVE_FIELD_EQUALITY | - TCFLAGS_HAVE_FIELD_COMPARE | - TCFLAGS_HAVE_FIELD_HASHING | - TCFLAGS_HAVE_FIELD_EXTENDED_HASHING); + TCFLAGS_HAVE_FIELD_COMPARE); } else if (typentry->typtype == TYPTYPE_COMPOSITE) { diff --git a/src/include/parser/analyze.h b/src/include/parser/analyze.h index 6716db6c13..a0f0bd38d7 100644 --- a/src/include/parser/analyze.h +++ b/src/include/parser/analyze.h @@ -48,6 +48,6 @@ extern void applyLockingClause(Query *qry, Index rtindex, extern List *BuildOnConflictExcludedTargetlist(Relation targetrel, Index exclRelIndex); -extern SortGroupClause *makeSortGroupClauseForSetOp(Oid rescoltype); +extern SortGroupClause *makeSortGroupClauseForSetOp(Oid rescoltype, bool require_hash); #endif /* ANALYZE_H */ diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index 75f78db8f5..dece7310cf 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -648,34 +648,37 @@ reset enable_hashagg; set enable_hashagg to on; explain (costs off) select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x); - QUERY PLAN ------------------------------------------ - HashAggregate - Group Key: "*VALUES*".column1 - -> Append - -> Values Scan on "*VALUES*" - -> Values Scan on "*VALUES*_1" -(5 rows) + QUERY PLAN +----------------------------------------------- + Unique + -> Sort + Sort Key: "*VALUES*".column1 + -> Append + -> Values Scan on "*VALUES*" + -> Values Scan on "*VALUES*_1" +(6 rows) select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x); x ------- - (1,4) - (1,3) (1,2) + (1,3) + (1,4) (3 rows) explain (costs off) select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x); - QUERY PLAN ------------------------------------------------ - HashSetOp Intersect - -> Append - -> Subquery Scan on "*SELECT* 1" - -> Values Scan on "*VALUES*" - -> Subquery Scan on "*SELECT* 2" - -> Values Scan on "*VALUES*_1" -(6 rows) + QUERY PLAN +----------------------------------------------------- + SetOp Intersect + -> Sort + Sort Key: "*SELECT* 1".x + -> Append + -> Subquery Scan on "*SELECT* 1" + -> Values Scan on "*VALUES*" + -> Subquery Scan on "*SELECT* 2" + -> Values Scan on "*VALUES*_1" +(8 rows) select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x); x @@ -685,15 +688,17 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (va explain (costs off) select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x); - QUERY PLAN ------------------------------------------------ - HashSetOp Except - -> Append - -> Subquery Scan on "*SELECT* 1" - -> Values Scan on "*VALUES*" - -> Subquery Scan on "*SELECT* 2" - -> Values Scan on "*VALUES*_1" -(6 rows) + QUERY PLAN +----------------------------------------------------- + SetOp Except + -> Sort + Sort Key: "*SELECT* 1".x + -> Append + -> Subquery Scan on "*SELECT* 1" + -> Values Scan on "*VALUES*" + -> Subquery Scan on "*SELECT* 2" + -> Values Scan on "*VALUES*_1" +(8 rows) select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x); x @@ -702,21 +707,28 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (value (1 row) -- non-hashable type --- With an anonymous row type, the typcache reports that the type is --- hashable, but then it will fail at run time. +-- With an anonymous row type, the typcache does not report that the +-- type is hashable. (Otherwise, this would fail at execution time.) explain (costs off) select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x); - QUERY PLAN ------------------------------------------ - HashAggregate - Group Key: "*VALUES*".column1 - -> Append - -> Values Scan on "*VALUES*" - -> Values Scan on "*VALUES*_1" -(5 rows) + QUERY PLAN +----------------------------------------------- + Unique + -> Sort + Sort Key: "*VALUES*".column1 + -> Append + -> Values Scan on "*VALUES*" + -> Values Scan on "*VALUES*_1" +(6 rows) select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x); -ERROR: could not identify a hash function for type money + x +----------- + ($100.00) + ($200.00) + ($300.00) +(3 rows) + -- With a defined row type, the typcache can inspect the type's fields -- for hashability. create type ct1 as (f1 money); diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql index ce22f34c71..ca8c9b4d12 100644 --- a/src/test/regress/sql/union.sql +++ b/src/test/regress/sql/union.sql @@ -218,8 +218,8 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (value -- non-hashable type --- With an anonymous row type, the typcache reports that the type is --- hashable, but then it will fail at run time. +-- With an anonymous row type, the typcache does not report that the +-- type is hashable. (Otherwise, this would fail at execution time.) explain (costs off) select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x); select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);