diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 2e8ccd0578..66b68fc71d 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -2785,7 +2785,6 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation, Oid expr_coll; Const *patt; Const *prefix = NULL; - Const *rest = NULL; Pattern_Prefix_Status pstatus = Pattern_Prefix_None; /* @@ -2814,13 +2813,13 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation, case OID_NAME_LIKE_OP: /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, expr_coll, - &prefix, &rest); + &prefix, NULL); isIndexable = (pstatus != Pattern_Prefix_None); break; case OID_BYTEA_LIKE_OP: pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, expr_coll, - &prefix, &rest); + &prefix, NULL); isIndexable = (pstatus != Pattern_Prefix_None); break; @@ -2829,7 +2828,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation, case OID_NAME_ICLIKE_OP: /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, expr_coll, - &prefix, &rest); + &prefix, NULL); isIndexable = (pstatus != Pattern_Prefix_None); break; @@ -2838,7 +2837,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation, case OID_NAME_REGEXEQ_OP: /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex, expr_coll, - &prefix, &rest); + &prefix, NULL); isIndexable = (pstatus != Pattern_Prefix_None); break; @@ -2847,7 +2846,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation, case OID_NAME_ICREGEXEQ_OP: /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, expr_coll, - &prefix, &rest); + &prefix, NULL); isIndexable = (pstatus != Pattern_Prefix_None); break; @@ -3115,7 +3114,6 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation) Oid expr_coll = ((OpExpr *) clause)->inputcollid; Const *patt = (Const *) rightop; Const *prefix = NULL; - Const *rest = NULL; Pattern_Prefix_Status pstatus; /* @@ -3135,7 +3133,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation) if (!op_in_opfamily(expr_op, opfamily)) { pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, expr_coll, - &prefix, &rest); + &prefix, NULL); return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus); } break; @@ -3147,7 +3145,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation) { /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, expr_coll, - &prefix, &rest); + &prefix, NULL); return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus); } break; @@ -3159,7 +3157,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation) { /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex, expr_coll, - &prefix, &rest); + &prefix, NULL); return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus); } break; @@ -3171,7 +3169,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation) { /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, expr_coll, - &prefix, &rest); + &prefix, NULL); return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus); } break; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index dc38034104..7eb64cba4b 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -192,7 +192,10 @@ static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids); static Selectivity prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, Oid vartype, Oid opfamily, Const *prefixcon); -static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype); +static Selectivity like_selectivity(const char *patt, int pattlen, + bool case_insensitive); +static Selectivity regex_selectivity(const char *patt, int pattlen, + bool case_insensitive); static Datum string_to_datum(const char *str, Oid datatype); static Const *string_to_const(const char *str, Oid datatype); static Const *string_to_bytea_const(const char *str, size_t str_len); @@ -1115,9 +1118,9 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) Oid vartype; Oid opfamily; Pattern_Prefix_Status pstatus; - Const *patt = NULL; + Const *patt; Const *prefix = NULL; - Const *rest = NULL; + Selectivity rest_selec = 0; double result; /* @@ -1207,8 +1210,9 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) } /* - * Divide pattern into fixed prefix and remainder. Unlike many of the - * other functions in this file, we use the pattern operator's actual + * Pull out any fixed prefix implied by the pattern, and estimate the + * fractional selectivity of the remainder of the pattern. Unlike many of + * the other functions in this file, we use the pattern operator's actual * collation for this step. This is not because we expect the collation * to make a big difference in the selectivity estimate (it seldom would), * but because we want to be sure we cache compiled regexps under the @@ -1216,11 +1220,10 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) */ patt = (Const *) other; pstatus = pattern_fixed_prefix(patt, ptype, collation, - &prefix, &rest); + &prefix, &rest_selec); /* - * If necessary, coerce the prefix constant to the right type. (The "rest" - * constant need not be changed.) + * If necessary, coerce the prefix constant to the right type. */ if (prefix && prefix->consttype != vartype) { @@ -1294,15 +1297,13 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) { Selectivity heursel; Selectivity prefixsel; - Selectivity restsel; if (pstatus == Pattern_Prefix_Partial) prefixsel = prefix_selectivity(root, &vardata, vartype, opfamily, prefix); else prefixsel = 1.0; - restsel = pattern_selectivity(rest, ptype); - heursel = prefixsel * restsel; + heursel = prefixsel * rest_selec; if (selec < 0) /* fewer than 10 histogram entries? */ selec = heursel; @@ -5133,9 +5134,9 @@ pattern_char_isalpha(char c, bool is_multibyte, * * *prefix is set to a palloc'd prefix string (in the form of a Const node), * or to NULL if no fixed prefix exists for the pattern. - * *rest is set to a palloc'd Const representing the remainder of the pattern - * after the portion describing the fixed prefix. - * Each of these has the same type (TEXT or BYTEA) as the given pattern Const. + * If rest_selec is not NULL, *rest_selec is set to an estimate of the + * selectivity of the remainder of the pattern (without any fixed prefix). + * The prefix Const has the same type (TEXT or BYTEA) as the input pattern. * * The return value distinguishes no fixed prefix, a partial prefix, * or an exact-match-only pattern. @@ -5143,12 +5144,11 @@ pattern_char_isalpha(char c, bool is_multibyte, static Pattern_Prefix_Status like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, - Const **prefix_const, Const **rest_const) + Const **prefix_const, Selectivity *rest_selec) { char *match; char *patt; int pattlen; - char *rest; Oid typeid = patt_const->consttype; int pos, match_pos; @@ -5228,18 +5228,15 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, } match[match_pos] = '\0'; - rest = &patt[pos]; if (typeid != BYTEAOID) - { *prefix_const = string_to_const(match, typeid); - *rest_const = string_to_const(rest, typeid); - } else - { *prefix_const = string_to_bytea_const(match, match_pos); - *rest_const = string_to_bytea_const(rest, pattlen - pos); - } + + if (rest_selec != NULL) + *rest_selec = like_selectivity(&patt[pos], pattlen - pos, + case_insensitive); pfree(patt); pfree(match); @@ -5256,7 +5253,7 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, static Pattern_Prefix_Status regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, - Const **prefix_const, Const **rest_const) + Const **prefix_const, Selectivity *rest_selec) { char *match; int pos, @@ -5318,10 +5315,11 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, /* Pattern must be anchored left */ if (patt[pos] != '^') { - rest = patt; - *prefix_const = NULL; - *rest_const = string_to_const(rest, typeid); + + if (rest_selec != NULL) + *rest_selec = regex_selectivity(patt, strlen(patt), + case_insensitive); return Pattern_Prefix_None; } @@ -5335,10 +5333,11 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, */ if (strchr(patt + pos, '|') != NULL) { - rest = patt; - *prefix_const = NULL; - *rest_const = string_to_const(rest, typeid); + + if (rest_selec != NULL) + *rest_selec = regex_selectivity(patt, strlen(patt), + case_insensitive); return Pattern_Prefix_None; } @@ -5434,10 +5433,10 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, if (patt[pos] == '$' && patt[pos + 1] == '\0') { - rest = &patt[pos + 1]; - *prefix_const = string_to_const(match, typeid); - *rest_const = string_to_const(rest, typeid); + + if (rest_selec != NULL) + *rest_selec = 1.0; pfree(patt); pfree(match); @@ -5446,7 +5445,10 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, } *prefix_const = string_to_const(match, typeid); - *rest_const = string_to_const(rest, typeid); + + if (rest_selec != NULL) + *rest_selec = regex_selectivity(rest, strlen(rest), + case_insensitive); pfree(patt); pfree(match); @@ -5459,23 +5461,27 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, Pattern_Prefix_Status pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation, - Const **prefix, Const **rest) + Const **prefix, Selectivity *rest_selec) { Pattern_Prefix_Status result; switch (ptype) { case Pattern_Type_Like: - result = like_fixed_prefix(patt, false, collation, prefix, rest); + result = like_fixed_prefix(patt, false, collation, + prefix, rest_selec); break; case Pattern_Type_Like_IC: - result = like_fixed_prefix(patt, true, collation, prefix, rest); + result = like_fixed_prefix(patt, true, collation, + prefix, rest_selec); break; case Pattern_Type_Regex: - result = regex_fixed_prefix(patt, false, collation, prefix, rest); + result = regex_fixed_prefix(patt, false, collation, + prefix, rest_selec); break; case Pattern_Type_Regex_IC: - result = regex_fixed_prefix(patt, true, collation, prefix, rest); + result = regex_fixed_prefix(patt, true, collation, + prefix, rest_selec); break; default: elog(ERROR, "unrecognized ptype: %d", (int) ptype); @@ -5590,7 +5596,8 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, /* * Estimate the selectivity of a pattern of the specified type. - * Note that any fixed prefix of the pattern will have been removed already. + * Note that any fixed prefix of the pattern will have been removed already, + * so actually we may be looking at just a fragment of the pattern. * * For now, we use a very simplistic approach: fixed characters reduce the * selectivity a good deal, character ranges reduce it a little, @@ -5604,37 +5611,10 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, #define PARTIAL_WILDCARD_SEL 2.0 static Selectivity -like_selectivity(Const *patt_const, bool case_insensitive) +like_selectivity(const char *patt, int pattlen, bool case_insensitive) { Selectivity sel = 1.0; int pos; - Oid typeid = patt_const->consttype; - char *patt; - int pattlen; - - /* the right-hand const is type text or bytea */ - Assert(typeid == BYTEAOID || typeid == TEXTOID); - - if (typeid == BYTEAOID && case_insensitive) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("case insensitive matching not supported on type bytea"))); - - if (typeid != BYTEAOID) - { - patt = TextDatumGetCString(patt_const->constvalue); - pattlen = strlen(patt); - } - else - { - bytea *bstr = DatumGetByteaP(patt_const->constvalue); - - pattlen = VARSIZE(bstr) - VARHDRSZ; - patt = (char *) palloc(pattlen); - memcpy(patt, VARDATA(bstr), pattlen); - if ((Pointer) bstr != DatumGetPointer(patt_const->constvalue)) - pfree(bstr); - } /* Skip any leading wildcard; it's already factored into initial sel */ for (pos = 0; pos < pattlen; pos++) @@ -5664,13 +5644,11 @@ like_selectivity(Const *patt_const, bool case_insensitive) /* Could get sel > 1 if multiple wildcards */ if (sel > 1.0) sel = 1.0; - - pfree(patt); return sel; } static Selectivity -regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive) +regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive) { Selectivity sel = 1.0; int paren_depth = 0; @@ -5763,26 +5741,9 @@ regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive) } static Selectivity -regex_selectivity(Const *patt_const, bool case_insensitive) +regex_selectivity(const char *patt, int pattlen, bool case_insensitive) { Selectivity sel; - char *patt; - int pattlen; - Oid typeid = patt_const->consttype; - - /* - * Should be unnecessary, there are no bytea regex operators defined. As - * such, it should be noted that the rest of this function has *not* been - * made safe for binary (possibly NULL containing) strings. - */ - if (typeid == BYTEAOID) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("regular-expression matching not supported on type bytea"))); - - /* the right-hand const is type text for all of these */ - patt = TextDatumGetCString(patt_const->constvalue); - pattlen = strlen(patt); /* If patt doesn't end with $, consider it to have a trailing wildcard */ if (pattlen > 0 && patt[pattlen - 1] == '$' && @@ -5802,33 +5763,6 @@ regex_selectivity(Const *patt_const, bool case_insensitive) return sel; } -static Selectivity -pattern_selectivity(Const *patt, Pattern_Type ptype) -{ - Selectivity result; - - switch (ptype) - { - case Pattern_Type_Like: - result = like_selectivity(patt, false); - break; - case Pattern_Type_Like_IC: - result = like_selectivity(patt, true); - break; - case Pattern_Type_Regex: - result = regex_selectivity(patt, false); - break; - case Pattern_Type_Regex_IC: - result = regex_selectivity(patt, true); - break; - default: - elog(ERROR, "unrecognized ptype: %d", (int) ptype); - result = 1.0; /* keep compiler quiet */ - break; - } - return result; -} - /* * For bytea, the increment function need only increment the current byte diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index 87c6554b32..aa5ee69d8b 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -134,7 +134,7 @@ extern Pattern_Prefix_Status pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation, Const **prefix, - Const **rest); + Selectivity *rest_selec); extern Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation);