diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c index 35361c46a6..8f34948ad3 100644 --- a/src/backend/regex/regc_pg_locale.c +++ b/src/backend/regex/regc_pg_locale.c @@ -246,9 +246,13 @@ pg_set_regex_collation(Oid collation) errhint("Use the COLLATE clause to set the collation explicitly."))); } - if (lc_ctype_is_c(collation)) + if (collation == C_COLLATION_OID) { - /* C/POSIX collations use this path regardless of database encoding */ + /* + * Some callers expect regexes to work for C_COLLATION_OID before + * catalog access is available, so we can't call + * pg_newlocale_from_collation(). + */ strategy = PG_REGEX_STRATEGY_C; collation = C_COLLATION_OID; } @@ -261,7 +265,17 @@ pg_set_regex_collation(Oid collation) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("nondeterministic collations are not supported for regular expressions"))); - if (locale->provider == COLLPROVIDER_BUILTIN) + if (locale->ctype_is_c) + { + /* + * C/POSIX collations use this path regardless of database + * encoding + */ + strategy = PG_REGEX_STRATEGY_C; + locale = 0; + collation = C_COLLATION_OID; + } + else if (locale->provider == COLLPROVIDER_BUILTIN) { Assert(GetDatabaseEncoding() == PG_UTF8); strategy = PG_REGEX_STRATEGY_BUILTIN; @@ -274,6 +288,7 @@ pg_set_regex_collation(Oid collation) #endif else { + Assert(locale->provider == COLLPROVIDER_LIBC); if (GetDatabaseEncoding() == PG_UTF8) strategy = PG_REGEX_STRATEGY_LIBC_WIDE; else diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 33a810b008..68fa89418f 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -1636,6 +1636,7 @@ char * str_tolower(const char *buff, size_t nbytes, Oid collid) { char *result; + pg_locale_t mylocale; if (!buff) return NULL; @@ -1653,17 +1654,15 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) errhint("Use the COLLATE clause to set the collation explicitly."))); } + mylocale = pg_newlocale_from_collation(collid); + /* C/POSIX collations use this path regardless of database encoding */ - if (lc_ctype_is_c(collid)) + if (mylocale->ctype_is_c) { result = asc_tolower(buff, nbytes); } else { - pg_locale_t mylocale; - - mylocale = pg_newlocale_from_collation(collid); - #ifdef USE_ICU if (mylocale->provider == COLLPROVIDER_ICU) { @@ -1774,6 +1773,7 @@ char * str_toupper(const char *buff, size_t nbytes, Oid collid) { char *result; + pg_locale_t mylocale; if (!buff) return NULL; @@ -1791,17 +1791,15 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) errhint("Use the COLLATE clause to set the collation explicitly."))); } + mylocale = pg_newlocale_from_collation(collid); + /* C/POSIX collations use this path regardless of database encoding */ - if (lc_ctype_is_c(collid)) + if (mylocale->ctype_is_c) { result = asc_toupper(buff, nbytes); } else { - pg_locale_t mylocale; - - mylocale = pg_newlocale_from_collation(collid); - #ifdef USE_ICU if (mylocale->provider == COLLPROVIDER_ICU) { @@ -1954,6 +1952,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) { char *result; int wasalnum = false; + pg_locale_t mylocale; if (!buff) return NULL; @@ -1971,17 +1970,15 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) errhint("Use the COLLATE clause to set the collation explicitly."))); } + mylocale = pg_newlocale_from_collation(collid); + /* C/POSIX collations use this path regardless of database encoding */ - if (lc_ctype_is_c(collid)) + if (mylocale->ctype_is_c) { result = asc_initcap(buff, nbytes); } else { - pg_locale_t mylocale; - - mylocale = pg_newlocale_from_collation(collid); - #ifdef USE_ICU if (mylocale->provider == COLLPROVIDER_ICU) { diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 131616fa6b..f87675d755 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -147,7 +147,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c) static inline int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation) { - if (collation && !lc_ctype_is_c(collation)) + if (collation) { pg_locale_t locale = pg_newlocale_from_collation(collation); diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c index fb9291441d..79c4ddc757 100644 --- a/src/backend/utils/adt/like_support.c +++ b/src/backend/utils/adt/like_support.c @@ -100,7 +100,7 @@ static Selectivity regex_selectivity(const char *patt, int pattlen, bool case_insensitive, int fixed_prefix_len); static int pattern_char_isalpha(char c, bool is_multibyte, - pg_locale_t locale, bool locale_is_c); + pg_locale_t locale); static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation); static Datum string_to_datum(const char *str, Oid datatype); @@ -1000,7 +1000,6 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, match_pos; bool is_multibyte = (pg_database_encoding_max_length() > 1); pg_locale_t locale = 0; - bool locale_is_c = false; /* the right-hand const is type text or bytea */ Assert(typeid == BYTEAOID || typeid == TEXTOID); @@ -1024,11 +1023,7 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, errhint("Use the COLLATE clause to set the collation explicitly."))); } - /* If case-insensitive, we need locale info */ - if (lc_ctype_is_c(collation)) - locale_is_c = true; - else - locale = pg_newlocale_from_collation(collation); + locale = pg_newlocale_from_collation(collation); } if (typeid != BYTEAOID) @@ -1065,7 +1060,7 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, /* Stop if case-varying character (it's sort of a wildcard) */ if (case_insensitive && - pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c)) + pattern_char_isalpha(patt[pos], is_multibyte, locale)) break; match[match_pos++] = patt[pos]; @@ -1499,16 +1494,16 @@ regex_selectivity(const char *patt, int pattlen, bool case_insensitive, */ static int pattern_char_isalpha(char c, bool is_multibyte, - pg_locale_t locale, bool locale_is_c) + pg_locale_t locale) { - if (locale_is_c) + if (locale->ctype_is_c) return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); else if (is_multibyte && IS_HIGHBIT_SET(c)) return true; - else if (locale && locale->provider == COLLPROVIDER_ICU) + else if (locale->provider == COLLPROVIDER_ICU) return IS_HIGHBIT_SET(c) || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); - else if (locale && locale->provider == COLLPROVIDER_LIBC) + else if (locale->provider == COLLPROVIDER_LIBC) return isalpha_l((unsigned char) c, locale->info.lt); else return isalpha((unsigned char) c); diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index cb9f1b4f78..a738da5674 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1266,32 +1266,6 @@ lookup_collation_cache(Oid collation) return cache_entry; } -/* - * Detect whether collation's LC_CTYPE property is C - */ -bool -lc_ctype_is_c(Oid collation) -{ - /* - * If we're asked about "collation 0", return false, so that the code will - * go into the non-C path and report that the collation is bogus. - */ - if (!OidIsValid(collation)) - return false; - - /* - * If we're asked about the built-in C/POSIX collations, we know that. - */ - if (collation == C_COLLATION_OID || - collation == POSIX_COLLATION_OID) - return true; - - /* - * Otherwise, we have to consult pg_collation, but we cache that. - */ - return pg_newlocale_from_collation(collation)->ctype_is_c; -} - /* simple subroutine for reporting errors from newlocale() */ static void report_newlocale_failure(const char *localename) diff --git a/src/include/catalog/pg_collation.dat b/src/include/catalog/pg_collation.dat index f126201276..af5c9aa582 100644 --- a/src/include/catalog/pg_collation.dat +++ b/src/include/catalog/pg_collation.dat @@ -19,8 +19,7 @@ descr => 'standard C collation', collname => 'C', collprovider => 'c', collencoding => '-1', collcollate => 'C', collctype => 'C' }, -{ oid => '951', oid_symbol => 'POSIX_COLLATION_OID', - descr => 'standard POSIX collation', +{ oid => '951', descr => 'standard POSIX collation', collname => 'POSIX', collprovider => 'c', collencoding => '-1', collcollate => 'POSIX', collctype => 'POSIX' }, { oid => '962', descr => 'sorts by Unicode code point, C character semantics', diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 8ec24437f4..ab1c37a44b 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -54,8 +54,6 @@ extern PGDLLIMPORT bool database_ctype_is_c; extern bool check_locale(int category, const char *locale, char **canonname); extern char *pg_perm_setlocale(int category, const char *locale); -extern bool lc_ctype_is_c(Oid collation); - /* * Return the POSIX lconv struct (contains number/money formatting * information) with locale information for all categories.