diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index b3f96eb773..b0660ad977 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -2137,7 +2137,6 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid expr_op; Const *patt; Const *prefix = NULL; - Const *rest = NULL; Pattern_Prefix_Status pstatus = Pattern_Prefix_None; /* @@ -2165,13 +2164,13 @@ match_special_index_operator(Expr *clause, Oid opfamily, case OID_NAME_LIKE_OP: /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, - &prefix, &rest); + &prefix, NULL); isIndexable = (pstatus != Pattern_Prefix_None); break; case OID_BYTEA_LIKE_OP: pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, - &prefix, &rest); + &prefix, NULL); isIndexable = (pstatus != Pattern_Prefix_None); break; @@ -2180,7 +2179,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, case OID_NAME_ICLIKE_OP: /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, - &prefix, &rest); + &prefix, NULL); isIndexable = (pstatus != Pattern_Prefix_None); break; @@ -2189,7 +2188,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, case OID_NAME_REGEXEQ_OP: /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex, - &prefix, &rest); + &prefix, NULL); isIndexable = (pstatus != Pattern_Prefix_None); break; @@ -2198,7 +2197,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, case OID_NAME_ICREGEXEQ_OP: /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, - &prefix, &rest); + &prefix, NULL); isIndexable = (pstatus != Pattern_Prefix_None); break; @@ -2454,7 +2453,6 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily) Oid expr_op = ((OpExpr *) clause)->opno; Const *patt = (Const *) rightop; Const *prefix = NULL; - Const *rest = NULL; Pattern_Prefix_Status pstatus; /* @@ -2474,7 +2472,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily) if (!op_in_opfamily(expr_op, opfamily)) { pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, - &prefix, &rest); + &prefix, NULL); return prefix_quals(leftop, opfamily, prefix, pstatus); } break; @@ -2486,7 +2484,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily) { /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, - &prefix, &rest); + &prefix, NULL); return prefix_quals(leftop, opfamily, prefix, pstatus); } break; @@ -2498,7 +2496,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily) { /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex, - &prefix, &rest); + &prefix, NULL); return prefix_quals(leftop, opfamily, prefix, pstatus); } break; @@ -2510,7 +2508,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily) { /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, - &prefix, &rest); + &prefix, NULL); return prefix_quals(leftop, opfamily, prefix, pstatus); } break; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 7fe5e74527..ce7f7c487d 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -164,7 +164,10 @@ static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids); static Selectivity prefix_selectivity(VariableStatData *vardata, Oid vartype, Oid opfamily, Const *prefixcon); -static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype); +static Selectivity like_selectivity(const char *patt, int pattlen, + bool case_insensitive); +static Selectivity regex_selectivity(const char *patt, int pattlen, + bool case_insensitive); static Datum string_to_datum(const char *str, Oid datatype); static Const *string_to_const(const char *str, Oid datatype); static Const *string_to_bytea_const(const char *str, size_t str_len); @@ -1024,9 +1027,9 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) Oid vartype; Oid opfamily; Pattern_Prefix_Status pstatus; - Const *patt = NULL; + Const *patt; Const *prefix = NULL; - Const *rest = NULL; + Selectivity rest_selec = 0; double result; /* @@ -1116,13 +1119,15 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) return result; } - /* divide pattern into fixed prefix and remainder */ + /* + * Pull out any fixed prefix implied by the pattern, and estimate the + * fractional selectivity of the remainder of the pattern. + */ patt = (Const *) other; - pstatus = pattern_fixed_prefix(patt, ptype, &prefix, &rest); + pstatus = pattern_fixed_prefix(patt, ptype, &prefix, &rest_selec); /* - * If necessary, coerce the prefix constant to the right type. (The "rest" - * constant need not be changed.) + * If necessary, coerce the prefix constant to the right type. */ if (prefix && prefix->consttype != vartype) { @@ -1196,15 +1201,13 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) { Selectivity heursel; Selectivity prefixsel; - Selectivity restsel; if (pstatus == Pattern_Prefix_Partial) prefixsel = prefix_selectivity(&vardata, vartype, opfamily, prefix); else prefixsel = 1.0; - restsel = pattern_selectivity(rest, ptype); - heursel = prefixsel * restsel; + heursel = prefixsel * rest_selec; if (selec < 0) /* fewer than 10 histogram entries? */ selec = heursel; @@ -4533,9 +4536,9 @@ find_join_input_rel(PlannerInfo *root, Relids relids) * * *prefix is set to a palloc'd prefix string (in the form of a Const node), * or to NULL if no fixed prefix exists for the pattern. - * *rest is set to a palloc'd Const representing the remainder of the pattern - * after the portion describing the fixed prefix. - * Each of these has the same type (TEXT or BYTEA) as the given pattern Const. + * If rest_selec is not NULL, *rest_selec is set to an estimate of the + * selectivity of the remainder of the pattern (without any fixed prefix). + * The prefix Const has the same type (TEXT or BYTEA) as the input pattern. * * The return value distinguishes no fixed prefix, a partial prefix, * or an exact-match-only pattern. @@ -4543,12 +4546,11 @@ find_join_input_rel(PlannerInfo *root, Relids relids) static Pattern_Prefix_Status like_fixed_prefix(Const *patt_const, bool case_insensitive, - Const **prefix_const, Const **rest_const) + Const **prefix_const, Selectivity *rest_selec) { char *match; char *patt; int pattlen; - char *rest; Oid typeid = patt_const->consttype; int pos, match_pos; @@ -4616,18 +4618,15 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, } match[match_pos] = '\0'; - rest = &patt[pos]; if (typeid != BYTEAOID) - { *prefix_const = string_to_const(match, typeid); - *rest_const = string_to_const(rest, typeid); - } else - { *prefix_const = string_to_bytea_const(match, match_pos); - *rest_const = string_to_bytea_const(rest, pattlen - pos); - } + + if (rest_selec != NULL) + *rest_selec = like_selectivity(&patt[pos], pattlen - pos, + case_insensitive); pfree(patt); pfree(match); @@ -4644,7 +4643,7 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, static Pattern_Prefix_Status regex_fixed_prefix(Const *patt_const, bool case_insensitive, - Const **prefix_const, Const **rest_const) + Const **prefix_const, Selectivity *rest_selec) { char *match; int pos, @@ -4685,10 +4684,11 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, /* Pattern must be anchored left */ if (patt[pos] != '^') { - rest = patt; - *prefix_const = NULL; - *rest_const = string_to_const(rest, typeid); + + if (rest_selec != NULL) + *rest_selec = regex_selectivity(patt, strlen(patt), + case_insensitive); return Pattern_Prefix_None; } @@ -4702,10 +4702,11 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, */ if (strchr(patt + pos, '|') != NULL) { - rest = patt; - *prefix_const = NULL; - *rest_const = string_to_const(rest, typeid); + + if (rest_selec != NULL) + *rest_selec = regex_selectivity(patt, strlen(patt), + case_insensitive); return Pattern_Prefix_None; } @@ -4817,10 +4818,10 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, if (patt[pos] == '$' && patt[pos + 1] == '\0') { - rest = &patt[pos + 1]; - *prefix_const = string_to_const(match, typeid); - *rest_const = string_to_const(rest, typeid); + + if (rest_selec != NULL) + *rest_selec = 1.0; pfree(patt); pfree(match); @@ -4829,7 +4830,10 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, } *prefix_const = string_to_const(match, typeid); - *rest_const = string_to_const(rest, typeid); + + if (rest_selec != NULL) + *rest_selec = regex_selectivity(rest, strlen(rest), + case_insensitive); pfree(patt); pfree(match); @@ -4842,23 +4846,23 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Pattern_Prefix_Status pattern_fixed_prefix(Const *patt, Pattern_Type ptype, - Const **prefix, Const **rest) + Const **prefix, Selectivity *rest_selec) { Pattern_Prefix_Status result; switch (ptype) { case Pattern_Type_Like: - result = like_fixed_prefix(patt, false, prefix, rest); + result = like_fixed_prefix(patt, false, prefix, rest_selec); break; case Pattern_Type_Like_IC: - result = like_fixed_prefix(patt, true, prefix, rest); + result = like_fixed_prefix(patt, true, prefix, rest_selec); break; case Pattern_Type_Regex: - result = regex_fixed_prefix(patt, false, prefix, rest); + result = regex_fixed_prefix(patt, false, prefix, rest_selec); break; case Pattern_Type_Regex_IC: - result = regex_fixed_prefix(patt, true, prefix, rest); + result = regex_fixed_prefix(patt, true, prefix, rest_selec); break; default: elog(ERROR, "unrecognized ptype: %d", (int) ptype); @@ -4973,7 +4977,8 @@ prefix_selectivity(VariableStatData *vardata, /* * Estimate the selectivity of a pattern of the specified type. - * Note that any fixed prefix of the pattern will have been removed already. + * Note that any fixed prefix of the pattern will have been removed already, + * so actually we may be looking at just a fragment of the pattern. * * For now, we use a very simplistic approach: fixed characters reduce the * selectivity a good deal, character ranges reduce it a little, @@ -4987,37 +4992,10 @@ prefix_selectivity(VariableStatData *vardata, #define PARTIAL_WILDCARD_SEL 2.0 static Selectivity -like_selectivity(Const *patt_const, bool case_insensitive) +like_selectivity(const char *patt, int pattlen, bool case_insensitive) { Selectivity sel = 1.0; int pos; - Oid typeid = patt_const->consttype; - char *patt; - int pattlen; - - /* the right-hand const is type text or bytea */ - Assert(typeid == BYTEAOID || typeid == TEXTOID); - - if (typeid == BYTEAOID && case_insensitive) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("case insensitive matching not supported on type bytea"))); - - if (typeid != BYTEAOID) - { - patt = TextDatumGetCString(patt_const->constvalue); - pattlen = strlen(patt); - } - else - { - bytea *bstr = DatumGetByteaP(patt_const->constvalue); - - pattlen = VARSIZE(bstr) - VARHDRSZ; - patt = (char *) palloc(pattlen); - memcpy(patt, VARDATA(bstr), pattlen); - if ((Pointer) bstr != DatumGetPointer(patt_const->constvalue)) - pfree(bstr); - } /* Skip any leading wildcard; it's already factored into initial sel */ for (pos = 0; pos < pattlen; pos++) @@ -5047,13 +5025,11 @@ like_selectivity(Const *patt_const, bool case_insensitive) /* Could get sel > 1 if multiple wildcards */ if (sel > 1.0) sel = 1.0; - - pfree(patt); return sel; } static Selectivity -regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive) +regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive) { Selectivity sel = 1.0; int paren_depth = 0; @@ -5146,26 +5122,9 @@ regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive) } static Selectivity -regex_selectivity(Const *patt_const, bool case_insensitive) +regex_selectivity(const char *patt, int pattlen, bool case_insensitive) { Selectivity sel; - char *patt; - int pattlen; - Oid typeid = patt_const->consttype; - - /* - * Should be unnecessary, there are no bytea regex operators defined. As - * such, it should be noted that the rest of this function has *not* been - * made safe for binary (possibly NULL containing) strings. - */ - if (typeid == BYTEAOID) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("regular-expression matching not supported on type bytea"))); - - /* the right-hand const is type text for all of these */ - patt = TextDatumGetCString(patt_const->constvalue); - pattlen = strlen(patt); /* If patt doesn't end with $, consider it to have a trailing wildcard */ if (pattlen > 0 && patt[pattlen - 1] == '$' && @@ -5185,33 +5144,6 @@ regex_selectivity(Const *patt_const, bool case_insensitive) return sel; } -static Selectivity -pattern_selectivity(Const *patt, Pattern_Type ptype) -{ - Selectivity result; - - switch (ptype) - { - case Pattern_Type_Like: - result = like_selectivity(patt, false); - break; - case Pattern_Type_Like_IC: - result = like_selectivity(patt, true); - break; - case Pattern_Type_Regex: - result = regex_selectivity(patt, false); - break; - case Pattern_Type_Regex_IC: - result = regex_selectivity(patt, true); - break; - default: - elog(ERROR, "unrecognized ptype: %d", (int) ptype); - result = 1.0; /* keep compiler quiet */ - break; - } - return result; -} - /* * Try to generate a string greater than the given string or any diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index 534fd8ec61..38371ff89f 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -133,7 +133,7 @@ extern double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, extern Pattern_Prefix_Status pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Const **prefix, - Const **rest); + Selectivity *rest_selec); extern Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc); extern Datum eqsel(PG_FUNCTION_ARGS);