diff --git a/src/backend/utils/adt/rangetypes.c b/src/backend/utils/adt/rangetypes.c index d0f890c28a..2b9e7b320a 100644 --- a/src/backend/utils/adt/rangetypes.c +++ b/src/backend/utils/adt/rangetypes.c @@ -1622,6 +1622,24 @@ range_get_flags(RangeType *range) return *((char *) range + VARSIZE(range) - 1); } +/* + * range_set_contain_empty: set the RANGE_CONTAIN_EMPTY bit in the value. + * + * This is only needed in GiST operations, so we don't include a provision + * for setting it in range_serialize; rather, this function must be applied + * afterwards. + */ +void +range_set_contain_empty(RangeType *range) +{ + char *flagsp; + + /* flag byte is datum's last byte */ + flagsp = (char *) range + VARSIZE(range) - 1; + + *flagsp |= RANGE_CONTAIN_EMPTY; +} + /* * This both serializes and canonicalizes (if applicable) the range. * This should be used by most callers. diff --git a/src/backend/utils/adt/rangetypes_gist.c b/src/backend/utils/adt/rangetypes_gist.c index be59a5c4a3..b683958828 100644 --- a/src/backend/utils/adt/rangetypes_gist.c +++ b/src/backend/utils/adt/rangetypes_gist.c @@ -17,6 +17,7 @@ #include "access/gist.h" #include "access/skey.h" #include "utils/builtins.h" +#include "utils/datum.h" #include "utils/rangetypes.h" @@ -32,7 +33,11 @@ #define RANGESTRAT_CONTAINED_BY 8 #define RANGESTRAT_CONTAINS_ELEM 16 #define RANGESTRAT_EQ 18 -#define RANGESTRAT_NE 19 + +/* Copy a RangeType datum (hardwires typbyval and typlen for ranges...) */ +#define rangeCopy(r) \ + ((RangeType *) DatumGetPointer(datumCopy(PointerGetDatum(r), \ + false, -1))) /* * Auxiliary structure for picksplit method. @@ -145,6 +150,16 @@ range_gist_penalty(PG_FUNCTION_ARGS) subtype_diff = &typcache->rng_subdiff_finfo; + /* + * If new is or contains empty, and orig doesn't, apply infinite penalty. + * We really don't want to pollute an empty-free subtree with empties. + */ + if (RangeIsOrContainsEmpty(new) && !RangeIsOrContainsEmpty(orig)) + { + *penalty = get_float4_infinity(); + PG_RETURN_POINTER(penalty); + } + /* * We want to compare the size of "orig" to size of "orig union new". * The penalty will be the sum of the reduction in the lower bound plus @@ -163,30 +178,9 @@ range_gist_penalty(PG_FUNCTION_ARGS) } else if (empty1) { - if (lower2.infinite || upper2.infinite) - { - /* from empty to infinite */ - *penalty = get_float4_infinity(); - PG_RETURN_POINTER(penalty); - } - else if (OidIsValid(subtype_diff->fn_oid)) - { - /* from empty to upper2-lower2 */ - *penalty = DatumGetFloat8(FunctionCall2Coll(subtype_diff, - typcache->rng_collation, - upper2.val, - lower2.val)); - /* upper2 must be >= lower2 */ - if (*penalty < 0) - *penalty = 0; /* subtype_diff is broken */ - PG_RETURN_POINTER(penalty); - } - else - { - /* wild guess */ - *penalty = 1.0; - PG_RETURN_POINTER(penalty); - } + /* infinite penalty for pushing non-empty into all-empty subtree */ + *penalty = get_float4_infinity(); + PG_RETURN_POINTER(penalty); } /* if orig isn't empty, s_union can't be either */ @@ -334,15 +328,27 @@ range_gist_picksplit(PG_FUNCTION_ARGS) Datum range_gist_same(PG_FUNCTION_ARGS) { - /* Datum r1 = PG_GETARG_DATUM(0); */ - /* Datum r2 = PG_GETARG_DATUM(1); */ + RangeType *r1 = PG_GETARG_RANGE(0); + RangeType *r2 = PG_GETARG_RANGE(1); bool *result = (bool *) PG_GETARG_POINTER(2); /* - * We can safely call range_eq using our fcinfo directly; it won't notice - * the third argument. This allows it to use fn_extra for caching. + * range_eq will ignore the RANGE_CONTAIN_EMPTY flag, so we have to + * check that for ourselves. More generally, if the entries have been + * properly normalized, then unequal flags bytes must mean unequal ranges + * ... so let's just test all the flag bits at once. */ - *result = DatumGetBool(range_eq(fcinfo)); + if (range_get_flags(r1) != range_get_flags(r2)) + *result = false; + else + { + /* + * We can safely call range_eq using our fcinfo directly; it won't + * notice the third argument. This allows it to use fn_extra for + * caching. + */ + *result = DatumGetBool(range_eq(fcinfo)); + } PG_RETURN_POINTER(result); } @@ -356,27 +362,53 @@ range_gist_same(PG_FUNCTION_ARGS) /* * Return the smallest range that contains r1 and r2 * - * XXX would it be better to redefine range_union as working this way? + * This differs from regular range_union in two critical ways: + * 1. It won't throw an error for non-adjacent r1 and r2, but just absorb + * the intervening values into the result range. + * 2. We track whether any empty range has been union'd into the result, + * so that contained_by searches can be indexed. Note that this means + * that *all* unions formed within the GiST index must go through here. */ static RangeType * range_super_union(TypeCacheEntry *typcache, RangeType * r1, RangeType * r2) { + RangeType *result; RangeBound lower1, lower2; RangeBound upper1, upper2; bool empty1, empty2; + char flags1, + flags2; RangeBound *result_lower; RangeBound *result_upper; range_deserialize(typcache, r1, &lower1, &upper1, &empty1); range_deserialize(typcache, r2, &lower2, &upper2, &empty2); + flags1 = range_get_flags(r1); + flags2 = range_get_flags(r2); if (empty1) + { + /* We can return r2 as-is if it already is or contains empty */ + if (flags2 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY)) + return r2; + /* Else we'd better copy it (modify-in-place isn't safe) */ + r2 = rangeCopy(r2); + range_set_contain_empty(r2); return r2; + } if (empty2) + { + /* We can return r1 as-is if it already is or contains empty */ + if (flags1 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY)) + return r1; + /* Else we'd better copy it (modify-in-place isn't safe) */ + r1 = rangeCopy(r1); + range_set_contain_empty(r1); return r1; + } if (range_cmp_bounds(typcache, &lower1, &lower2) <= 0) result_lower = &lower1; @@ -389,12 +421,19 @@ range_super_union(TypeCacheEntry *typcache, RangeType * r1, RangeType * r2) result_upper = &upper2; /* optimization to avoid constructing a new range */ - if (result_lower == &lower1 && result_upper == &upper1) + if (result_lower == &lower1 && result_upper == &upper1 && + ((flags1 & RANGE_CONTAIN_EMPTY) || !(flags2 & RANGE_CONTAIN_EMPTY))) return r1; - if (result_lower == &lower2 && result_upper == &upper2) + if (result_lower == &lower2 && result_upper == &upper2 && + ((flags2 & RANGE_CONTAIN_EMPTY) || !(flags1 & RANGE_CONTAIN_EMPTY))) return r2; - return make_range(typcache, result_lower, result_upper, false); + result = make_range(typcache, result_lower, result_upper, false); + + if ((flags1 & RANGE_CONTAIN_EMPTY) || (flags2 & RANGE_CONTAIN_EMPTY)) + range_set_contain_empty(result); + + return result; } /* @@ -484,21 +523,26 @@ range_gist_consistent_int(FmgrInfo *flinfo, StrategyNumber strategy, break; case RANGESTRAT_CONTAINED_BY: /* - * Ideally we'd apply range_overlaps here, but at present it - * might fail to find empty ranges in the index, which should - * be reported as being contained by anything. This needs work. + * Empty ranges are contained by anything, so if key is or + * contains any empty ranges, we must descend into it. Otherwise, + * descend only if key overlaps the query. */ - return true; + if (RangeIsOrContainsEmpty(key)) + return true; + proc = range_overlaps; break; case RANGESTRAT_CONTAINS_ELEM: proc = range_contains_elem; break; case RANGESTRAT_EQ: + /* + * If query is empty, descend only if the key is or contains any + * empty ranges. Otherwise, descend if key contains query. + */ + if (RangeIsEmpty(DatumGetRangeType(query))) + return RangeIsOrContainsEmpty(key); proc = range_contains; break; - case RANGESTRAT_NE: - return true; - break; default: elog(ERROR, "unrecognized range strategy: %d", strategy); proc = NULL; /* keep compiler quiet */ @@ -555,9 +599,6 @@ range_gist_consistent_leaf(FmgrInfo *flinfo, StrategyNumber strategy, case RANGESTRAT_EQ: proc = range_eq; break; - case RANGESTRAT_NE: - proc = range_ne; - break; default: elog(ERROR, "unrecognized range strategy: %d", strategy); proc = NULL; /* keep compiler quiet */ diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 088c307660..5a73726ba3 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201111241 +#define CATALOG_VERSION_NO 201111271 #endif diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h index a95cf5fa50..1e8c9a289f 100644 --- a/src/include/catalog/pg_amop.h +++ b/src/include/catalog/pg_amop.h @@ -736,6 +736,5 @@ DATA(insert ( 3919 3831 3831 7 s 3890 783 0 )); DATA(insert ( 3919 3831 3831 8 s 3892 783 0 )); DATA(insert ( 3919 3831 2283 16 s 3889 783 0 )); DATA(insert ( 3919 3831 3831 18 s 3882 783 0 )); -DATA(insert ( 3919 3831 3831 19 s 3883 783 0 )); #endif /* PG_AMOP_H */ diff --git a/src/include/utils/rangetypes.h b/src/include/utils/rangetypes.h index c9a6834e6d..307476d976 100644 --- a/src/include/utils/rangetypes.h +++ b/src/include/utils/rangetypes.h @@ -33,13 +33,15 @@ typedef struct #define RangeTypeGetOid(r) ((r)->rangetypid) /* A range's flags byte contains these bits: */ -#define RANGE_EMPTY 0x01 /* range is empty */ -#define RANGE_LB_INC 0x02 /* lower bound is inclusive (vs exclusive) */ -#define RANGE_LB_NULL 0x04 /* lower bound is null (NOT CURRENTLY USED) */ -#define RANGE_LB_INF 0x08 /* lower bound is +/- infinity */ -#define RANGE_UB_INC 0x10 /* upper bound is inclusive (vs exclusive) */ -#define RANGE_UB_NULL 0x20 /* upper bound is null (NOT CURRENTLY USED) */ -#define RANGE_UB_INF 0x40 /* upper bound is +/- infinity */ +#define RANGE_EMPTY 0x01 /* range is empty */ +#define RANGE_LB_INC 0x02 /* lower bound is inclusive */ +#define RANGE_UB_INC 0x04 /* upper bound is inclusive */ +#define RANGE_LB_INF 0x08 /* lower bound is -infinity */ +#define RANGE_UB_INF 0x10 /* upper bound is +infinity */ +#define RANGE_LB_NULL 0x20 /* lower bound is null (NOT USED) */ +#define RANGE_UB_NULL 0x40 /* upper bound is null (NOT USED) */ +#define RANGE_CONTAIN_EMPTY 0x80 /* marks a GiST internal-page entry whose + * subtree contains some empty ranges */ #define RANGE_HAS_LBOUND(flags) (!((flags) & (RANGE_EMPTY | \ RANGE_LB_NULL | \ @@ -49,7 +51,9 @@ typedef struct RANGE_UB_NULL | \ RANGE_UB_INF))) -#define RangeIsEmpty(r) (range_get_flags(r) & RANGE_EMPTY) +#define RangeIsEmpty(r) ((range_get_flags(r) & RANGE_EMPTY) != 0) +#define RangeIsOrContainsEmpty(r) \ + ((range_get_flags(r) & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY)) != 0) /* Internal representation of either bound of a range (not what's on disk) */ @@ -152,6 +156,7 @@ extern void range_deserialize(TypeCacheEntry *typcache, RangeType *range, RangeBound *lower, RangeBound *upper, bool *empty); extern char range_get_flags(RangeType *range); +extern void range_set_contain_empty(RangeType *range); extern RangeType *make_range(TypeCacheEntry *typcache, RangeBound *lower, RangeBound *upper, bool empty); extern int range_cmp_bounds(TypeCacheEntry *typcache, RangeBound *b1, diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 9bce504dd3..b915134fff 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -1041,7 +1041,6 @@ ORDER BY 1, 2, 3; 783 | 15 | <-> 783 | 16 | @> 783 | 18 | = - 783 | 19 | <> 783 | 27 | @> 783 | 28 | <@ 783 | 47 | @> @@ -1054,7 +1053,7 @@ ORDER BY 1, 2, 3; 2742 | 2 | @@@ 2742 | 3 | <@ 2742 | 4 | = -(44 rows) +(43 rows) -- Check that all opclass search operators have selectivity estimators. -- This is not absolutely required, but it seems a reasonable thing