Teach pattern_fixed_prefix() about collations.
This is necessary, not optional, now that ILIKE and regexes are collation aware --- else we might derive a wrong comparison constant for index optimized pattern matches.
This commit is contained in:
parent
dad1f46382
commit
3c381a55b0
@ -2446,6 +2446,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
|
||||
bool isIndexable = false;
|
||||
Node *rightop;
|
||||
Oid expr_op;
|
||||
Oid expr_coll;
|
||||
Const *patt;
|
||||
Const *prefix = NULL;
|
||||
Const *rest = NULL;
|
||||
@ -2462,6 +2463,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
|
||||
/* we know these will succeed */
|
||||
rightop = get_rightop(clause);
|
||||
expr_op = ((OpExpr *) clause)->opno;
|
||||
expr_coll = ((OpExpr *) clause)->inputcollid;
|
||||
|
||||
/* again, required for all current special ops: */
|
||||
if (!IsA(rightop, Const) ||
|
||||
@ -2475,13 +2477,13 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
|
||||
case OID_BPCHAR_LIKE_OP:
|
||||
case OID_NAME_LIKE_OP:
|
||||
/* the right-hand const is type text for all of these */
|
||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like,
|
||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, expr_coll,
|
||||
&prefix, &rest);
|
||||
isIndexable = (pstatus != Pattern_Prefix_None);
|
||||
break;
|
||||
|
||||
case OID_BYTEA_LIKE_OP:
|
||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like,
|
||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, expr_coll,
|
||||
&prefix, &rest);
|
||||
isIndexable = (pstatus != Pattern_Prefix_None);
|
||||
break;
|
||||
@ -2490,7 +2492,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
|
||||
case OID_BPCHAR_ICLIKE_OP:
|
||||
case OID_NAME_ICLIKE_OP:
|
||||
/* the right-hand const is type text for all of these */
|
||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC,
|
||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, expr_coll,
|
||||
&prefix, &rest);
|
||||
isIndexable = (pstatus != Pattern_Prefix_None);
|
||||
break;
|
||||
@ -2499,7 +2501,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
|
||||
case OID_BPCHAR_REGEXEQ_OP:
|
||||
case OID_NAME_REGEXEQ_OP:
|
||||
/* the right-hand const is type text for all of these */
|
||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex,
|
||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex, expr_coll,
|
||||
&prefix, &rest);
|
||||
isIndexable = (pstatus != Pattern_Prefix_None);
|
||||
break;
|
||||
@ -2508,7 +2510,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
|
||||
case OID_BPCHAR_ICREGEXEQ_OP:
|
||||
case OID_NAME_ICREGEXEQ_OP:
|
||||
/* the right-hand const is type text for all of these */
|
||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
|
||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, expr_coll,
|
||||
&prefix, &rest);
|
||||
isIndexable = (pstatus != Pattern_Prefix_None);
|
||||
break;
|
||||
@ -2544,10 +2546,9 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
|
||||
*
|
||||
* The non-pattern opclasses will not sort the way we need in most non-C
|
||||
* locales. We can use such an index anyway for an exact match (simple
|
||||
* equality), but not for prefix-match cases. Note that we are looking at
|
||||
* the index's collation, not the expression's collation -- this test is
|
||||
* not dependent on the LIKE/regex operator's collation (which would only
|
||||
* affect case folding behavior of ILIKE, anyway).
|
||||
* equality), but not for prefix-match cases. Note that here we are
|
||||
* looking at the index's collation, not the expression's collation --
|
||||
* this test is *not* dependent on the LIKE/regex operator's collation.
|
||||
*/
|
||||
switch (expr_op)
|
||||
{
|
||||
@ -2558,7 +2559,8 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
|
||||
isIndexable =
|
||||
(opfamily == TEXT_PATTERN_BTREE_FAM_OID) ||
|
||||
(opfamily == TEXT_BTREE_FAM_OID &&
|
||||
(pstatus == Pattern_Prefix_Exact || lc_collate_is_c(idxcollation)));
|
||||
(pstatus == Pattern_Prefix_Exact ||
|
||||
lc_collate_is_c(idxcollation)));
|
||||
break;
|
||||
|
||||
case OID_BPCHAR_LIKE_OP:
|
||||
@ -2568,7 +2570,8 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
|
||||
isIndexable =
|
||||
(opfamily == BPCHAR_PATTERN_BTREE_FAM_OID) ||
|
||||
(opfamily == BPCHAR_BTREE_FAM_OID &&
|
||||
(pstatus == Pattern_Prefix_Exact || lc_collate_is_c(idxcollation)));
|
||||
(pstatus == Pattern_Prefix_Exact ||
|
||||
lc_collate_is_c(idxcollation)));
|
||||
break;
|
||||
|
||||
case OID_NAME_LIKE_OP:
|
||||
@ -2770,6 +2773,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
|
||||
Node *leftop = get_leftop(clause);
|
||||
Node *rightop = get_rightop(clause);
|
||||
Oid expr_op = ((OpExpr *) clause)->opno;
|
||||
Oid expr_coll = ((OpExpr *) clause)->inputcollid;
|
||||
Const *patt = (Const *) rightop;
|
||||
Const *prefix = NULL;
|
||||
Const *rest = NULL;
|
||||
@ -2791,7 +2795,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
|
||||
case OID_BYTEA_LIKE_OP:
|
||||
if (!op_in_opfamily(expr_op, opfamily))
|
||||
{
|
||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like,
|
||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, expr_coll,
|
||||
&prefix, &rest);
|
||||
return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus);
|
||||
}
|
||||
@ -2803,7 +2807,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
|
||||
if (!op_in_opfamily(expr_op, opfamily))
|
||||
{
|
||||
/* the right-hand const is type text for all of these */
|
||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC,
|
||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, expr_coll,
|
||||
&prefix, &rest);
|
||||
return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus);
|
||||
}
|
||||
@ -2815,7 +2819,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
|
||||
if (!op_in_opfamily(expr_op, opfamily))
|
||||
{
|
||||
/* the right-hand const is type text for all of these */
|
||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex,
|
||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex, expr_coll,
|
||||
&prefix, &rest);
|
||||
return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus);
|
||||
}
|
||||
@ -2827,7 +2831,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
|
||||
if (!op_in_opfamily(expr_op, opfamily))
|
||||
{
|
||||
/* the right-hand const is type text for all of these */
|
||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
|
||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, expr_coll,
|
||||
&prefix, &rest);
|
||||
return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus);
|
||||
}
|
||||
|
@ -1181,9 +1181,14 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
|
||||
return result;
|
||||
}
|
||||
|
||||
/* divide pattern into fixed prefix and remainder */
|
||||
/*
|
||||
* Divide pattern into fixed prefix and remainder. XXX we have to assume
|
||||
* default collation here, because we don't have access to the actual
|
||||
* input collation for the operator. FIXME ...
|
||||
*/
|
||||
patt = (Const *) other;
|
||||
pstatus = pattern_fixed_prefix(patt, ptype, &prefix, &rest);
|
||||
pstatus = pattern_fixed_prefix(patt, ptype, DEFAULT_COLLATION_OID,
|
||||
&prefix, &rest);
|
||||
|
||||
/*
|
||||
* If necessary, coerce the prefix constant to the right type. (The "rest"
|
||||
@ -4755,6 +4760,29 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* Check whether char is a letter (and, hence, subject to case-folding)
|
||||
*
|
||||
* In multibyte character sets, we can't use isalpha, and it does not seem
|
||||
* worth trying to convert to wchar_t to use iswalpha. Instead, just assume
|
||||
* any multibyte char is potentially case-varying.
|
||||
*/
|
||||
static int
|
||||
pattern_char_isalpha(char c, bool is_multibyte,
|
||||
pg_locale_t locale, bool locale_is_c)
|
||||
{
|
||||
if (locale_is_c)
|
||||
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
|
||||
else if (is_multibyte && IS_HIGHBIT_SET(c))
|
||||
return true;
|
||||
#ifdef HAVE_LOCALE_T
|
||||
else if (locale)
|
||||
return isalpha_l((unsigned char) c, locale);
|
||||
#endif
|
||||
else
|
||||
return isalpha((unsigned char) c);
|
||||
}
|
||||
|
||||
/*
|
||||
* Extract the fixed prefix, if any, for a pattern.
|
||||
*
|
||||
@ -4769,7 +4797,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
|
||||
*/
|
||||
|
||||
static Pattern_Prefix_Status
|
||||
like_fixed_prefix(Const *patt_const, bool case_insensitive,
|
||||
like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
|
||||
Const **prefix_const, Const **rest_const)
|
||||
{
|
||||
char *match;
|
||||
@ -4780,15 +4808,39 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive,
|
||||
int pos,
|
||||
match_pos;
|
||||
bool is_multibyte = (pg_database_encoding_max_length() > 1);
|
||||
pg_locale_t locale = 0;
|
||||
bool locale_is_c = false;
|
||||
|
||||
/* the right-hand const is type text or bytea */
|
||||
Assert(typeid == BYTEAOID || typeid == TEXTOID);
|
||||
|
||||
if (typeid == BYTEAOID && case_insensitive)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
if (case_insensitive)
|
||||
{
|
||||
if (typeid == BYTEAOID)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("case insensitive matching not supported on type bytea")));
|
||||
|
||||
/* If case-insensitive, we need locale info */
|
||||
if (lc_ctype_is_c(collation))
|
||||
locale_is_c = true;
|
||||
else if (collation != DEFAULT_COLLATION_OID)
|
||||
{
|
||||
if (!OidIsValid(collation))
|
||||
{
|
||||
/*
|
||||
* This typically means that the parser could not resolve a
|
||||
* conflict of implicit collations, so report it that way.
|
||||
*/
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDETERMINATE_COLLATION),
|
||||
errmsg("could not determine which collation to use for ILIKE"),
|
||||
errhint("Use the COLLATE clause to set the collation explicitly.")));
|
||||
}
|
||||
locale = pg_newlocale_from_collation(collation);
|
||||
}
|
||||
}
|
||||
|
||||
if (typeid != BYTEAOID)
|
||||
{
|
||||
patt = TextDatumGetCString(patt_const->constvalue);
|
||||
@ -4822,23 +4874,11 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive,
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX In multibyte character sets, we can't trust isalpha, so assume
|
||||
* any multibyte char is potentially case-varying.
|
||||
*/
|
||||
if (case_insensitive)
|
||||
{
|
||||
if (is_multibyte && (unsigned char) patt[pos] >= 0x80)
|
||||
break;
|
||||
if (isalpha((unsigned char) patt[pos]))
|
||||
break;
|
||||
}
|
||||
/* Stop if case-varying character (it's sort of a wildcard) */
|
||||
if (case_insensitive &&
|
||||
pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
|
||||
break;
|
||||
|
||||
/*
|
||||
* NOTE: this code used to think that %% meant a literal %, but
|
||||
* textlike() itself does not think that, and the SQL92 spec doesn't
|
||||
* say any such thing either.
|
||||
*/
|
||||
match[match_pos++] = patt[pos];
|
||||
}
|
||||
|
||||
@ -4870,7 +4910,7 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive,
|
||||
}
|
||||
|
||||
static Pattern_Prefix_Status
|
||||
regex_fixed_prefix(Const *patt_const, bool case_insensitive,
|
||||
regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
|
||||
Const **prefix_const, Const **rest_const)
|
||||
{
|
||||
char *match;
|
||||
@ -4883,6 +4923,8 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
|
||||
char *rest;
|
||||
Oid typeid = patt_const->consttype;
|
||||
bool is_multibyte = (pg_database_encoding_max_length() > 1);
|
||||
pg_locale_t locale = 0;
|
||||
bool locale_is_c = false;
|
||||
|
||||
/*
|
||||
* Should be unnecessary, there are no bytea regex operators defined. As
|
||||
@ -4894,6 +4936,28 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("regular-expression matching not supported on type bytea")));
|
||||
|
||||
if (case_insensitive)
|
||||
{
|
||||
/* If case-insensitive, we need locale info */
|
||||
if (lc_ctype_is_c(collation))
|
||||
locale_is_c = true;
|
||||
else if (collation != DEFAULT_COLLATION_OID)
|
||||
{
|
||||
if (!OidIsValid(collation))
|
||||
{
|
||||
/*
|
||||
* This typically means that the parser could not resolve a
|
||||
* conflict of implicit collations, so report it that way.
|
||||
*/
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDETERMINATE_COLLATION),
|
||||
errmsg("could not determine which collation to use for regular expression"),
|
||||
errhint("Use the COLLATE clause to set the collation explicitly.")));
|
||||
}
|
||||
locale = pg_newlocale_from_collation(collation);
|
||||
}
|
||||
}
|
||||
|
||||
/* the right-hand const is type text for all of these */
|
||||
patt = TextDatumGetCString(patt_const->constvalue);
|
||||
|
||||
@ -4969,17 +5033,10 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
|
||||
patt[pos] == '$')
|
||||
break;
|
||||
|
||||
/*
|
||||
* XXX In multibyte character sets, we can't trust isalpha, so assume
|
||||
* any multibyte char is potentially case-varying.
|
||||
*/
|
||||
if (case_insensitive)
|
||||
{
|
||||
if (is_multibyte && (unsigned char) patt[pos] >= 0x80)
|
||||
break;
|
||||
if (isalpha((unsigned char) patt[pos]))
|
||||
break;
|
||||
}
|
||||
/* Stop if case-varying character (it's sort of a wildcard) */
|
||||
if (case_insensitive &&
|
||||
pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
|
||||
break;
|
||||
|
||||
/*
|
||||
* Check for quantifiers. Except for +, this means the preceding
|
||||
@ -5004,7 +5061,7 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
|
||||
* backslash followed by alphanumeric is an escape, not a quoted
|
||||
* character. Must treat it as having multiple possible matches.
|
||||
* Note: since only ASCII alphanumerics are escapes, we don't have to
|
||||
* be paranoid about multibyte here.
|
||||
* be paranoid about multibyte or collations here.
|
||||
*/
|
||||
if (patt[pos] == '\\')
|
||||
{
|
||||
@ -5056,7 +5113,7 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
|
||||
}
|
||||
|
||||
Pattern_Prefix_Status
|
||||
pattern_fixed_prefix(Const *patt, Pattern_Type ptype,
|
||||
pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
|
||||
Const **prefix, Const **rest)
|
||||
{
|
||||
Pattern_Prefix_Status result;
|
||||
@ -5064,16 +5121,16 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype,
|
||||
switch (ptype)
|
||||
{
|
||||
case Pattern_Type_Like:
|
||||
result = like_fixed_prefix(patt, false, prefix, rest);
|
||||
result = like_fixed_prefix(patt, false, collation, prefix, rest);
|
||||
break;
|
||||
case Pattern_Type_Like_IC:
|
||||
result = like_fixed_prefix(patt, true, prefix, rest);
|
||||
result = like_fixed_prefix(patt, true, collation, prefix, rest);
|
||||
break;
|
||||
case Pattern_Type_Regex:
|
||||
result = regex_fixed_prefix(patt, false, prefix, rest);
|
||||
result = regex_fixed_prefix(patt, false, collation, prefix, rest);
|
||||
break;
|
||||
case Pattern_Type_Regex_IC:
|
||||
result = regex_fixed_prefix(patt, true, prefix, rest);
|
||||
result = regex_fixed_prefix(patt, true, collation, prefix, rest);
|
||||
break;
|
||||
default:
|
||||
elog(ERROR, "unrecognized ptype: %d", (int) ptype);
|
||||
|
@ -132,6 +132,7 @@ extern double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
|
||||
|
||||
extern Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
|
||||
Pattern_Type ptype,
|
||||
Oid collation,
|
||||
Const **prefix,
|
||||
Const **rest);
|
||||
extern Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc);
|
||||
|
Loading…
x
Reference in New Issue
Block a user