From f413941f41d370a7893caa3e6ed384b89a0577fd Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 17 Mar 2023 11:47:35 -0700 Subject: [PATCH] Fix t_isspace(), etc., when datlocprovider=i and datctype=C. Check whether the datctype is C to determine whether t_isspace() and related functions use isspace() or iswspace(). Previously, t_isspace() checked whether the database default collation was C; which is incorrect when the default collation uses the ICU provider. Discussion: https://postgr.es/m/79e4354d9eccfdb00483146a6b9f6295202e7890.camel@j-davis.com Reviewed-by: Peter Eisentraut Backpatch-through: 15 --- contrib/unaccent/expected/unaccent.out | 9 --------- contrib/unaccent/expected/unaccent_1.out | 8 -------- contrib/unaccent/sql/unaccent.sql | 11 ----------- src/backend/tsearch/ts_locale.c | 18 ++++++------------ src/backend/tsearch/wparser_def.c | 3 +-- src/backend/utils/adt/pg_locale.c | 3 +++ src/backend/utils/init/postinit.c | 4 ++++ src/include/utils/pg_locale.h | 2 ++ 8 files changed, 16 insertions(+), 42 deletions(-) delete mode 100644 contrib/unaccent/expected/unaccent_1.out diff --git a/contrib/unaccent/expected/unaccent.out b/contrib/unaccent/expected/unaccent.out index cef98ee60c..ee0ac71a1c 100644 --- a/contrib/unaccent/expected/unaccent.out +++ b/contrib/unaccent/expected/unaccent.out @@ -1,12 +1,3 @@ --- unaccent is broken if the default collation is provided by ICU and --- LC_CTYPE=C -SELECT current_setting('lc_ctype') = 'C' AND - (SELECT datlocprovider='i' FROM pg_database - WHERE datname=current_database()) - AS skip_test \gset -\if :skip_test -\quit -\endif CREATE EXTENSION unaccent; -- must have a UTF8 database SELECT getdatabaseencoding(); diff --git a/contrib/unaccent/expected/unaccent_1.out b/contrib/unaccent/expected/unaccent_1.out deleted file mode 100644 index 0a4a3838ab..0000000000 --- a/contrib/unaccent/expected/unaccent_1.out +++ /dev/null @@ -1,8 +0,0 @@ --- unaccent is broken if the default collation is provided by ICU and --- LC_CTYPE=C -SELECT current_setting('lc_ctype') = 'C' AND - (SELECT datlocprovider='i' FROM pg_database - WHERE datname=current_database()) - AS skip_test \gset -\if :skip_test -\quit diff --git a/contrib/unaccent/sql/unaccent.sql b/contrib/unaccent/sql/unaccent.sql index 027dfb964a..3fc0c706be 100644 --- a/contrib/unaccent/sql/unaccent.sql +++ b/contrib/unaccent/sql/unaccent.sql @@ -1,14 +1,3 @@ - --- unaccent is broken if the default collation is provided by ICU and --- LC_CTYPE=C -SELECT current_setting('lc_ctype') = 'C' AND - (SELECT datlocprovider='i' FROM pg_database - WHERE datname=current_database()) - AS skip_test \gset -\if :skip_test -\quit -\endif - CREATE EXTENSION unaccent; -- must have a UTF8 database diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c index 0c03170990..f1150d30b7 100644 --- a/src/backend/tsearch/ts_locale.c +++ b/src/backend/tsearch/ts_locale.c @@ -38,10 +38,9 @@ t_isdigit(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - Oid collation = DEFAULT_COLLATION_OID; /* TODO */ pg_locale_t mylocale = 0; /* TODO */ - if (clen == 1 || lc_ctype_is_c(collation)) + if (clen == 1 || database_ctype_is_c) return isdigit(TOUCHAR(ptr)); char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); @@ -54,10 +53,9 @@ t_isspace(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - Oid collation = DEFAULT_COLLATION_OID; /* TODO */ pg_locale_t mylocale = 0; /* TODO */ - if (clen == 1 || lc_ctype_is_c(collation)) + if (clen == 1 || database_ctype_is_c) return isspace(TOUCHAR(ptr)); char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); @@ -70,10 +68,9 @@ t_isalpha(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - Oid collation = DEFAULT_COLLATION_OID; /* TODO */ pg_locale_t mylocale = 0; /* TODO */ - if (clen == 1 || lc_ctype_is_c(collation)) + if (clen == 1 || database_ctype_is_c) return isalpha(TOUCHAR(ptr)); char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); @@ -86,10 +83,9 @@ t_isalnum(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - Oid collation = DEFAULT_COLLATION_OID; /* TODO */ pg_locale_t mylocale = 0; /* TODO */ - if (clen == 1 || lc_ctype_is_c(collation)) + if (clen == 1 || database_ctype_is_c) return isalnum(TOUCHAR(ptr)); char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); @@ -102,10 +98,9 @@ t_isprint(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - Oid collation = DEFAULT_COLLATION_OID; /* TODO */ pg_locale_t mylocale = 0; /* TODO */ - if (clen == 1 || lc_ctype_is_c(collation)) + if (clen == 1 || database_ctype_is_c) return isprint(TOUCHAR(ptr)); char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); @@ -273,7 +268,6 @@ char * lowerstr_with_len(const char *str, int len) { char *out; - Oid collation = DEFAULT_COLLATION_OID; /* TODO */ pg_locale_t mylocale = 0; /* TODO */ if (len == 0) @@ -285,7 +279,7 @@ lowerstr_with_len(const char *str, int len) * Also, for a C locale there is no need to process as multibyte. From * backend/utils/adt/oracle_compat.c Teodor */ - if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collation)) + if (pg_database_encoding_max_length() > 1 && !database_ctype_is_c) { wchar_t *wstr, *wptr; diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index cc3736454e..840a44ec00 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -297,11 +297,10 @@ TParserInit(char *str, int len) */ if (prs->charmaxlen > 1) { - Oid collation = DEFAULT_COLLATION_OID; /* TODO */ pg_locale_t mylocale = 0; /* TODO */ prs->usewide = true; - if (lc_ctype_is_c(collation)) + if (database_ctype_is_c) { /* * char2wchar doesn't work for C-locale and sizeof(pg_wchar) could diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 1d3d4d86d3..90ec773c02 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -107,6 +107,9 @@ char *localized_full_days[7 + 1]; char *localized_abbrev_months[12 + 1]; char *localized_full_months[12 + 1]; +/* is the databases's LC_CTYPE the C locale? */ +bool database_ctype_is_c = false; + /* indicates whether locale information cache is valid */ static bool CurrentLocaleConvValid = false; static bool CurrentLCTimeValid = false; diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 92bac8b63f..31d6a05426 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -419,6 +419,10 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect " which is not recognized by setlocale().", ctype), errhint("Recreate the database with another locale or install the missing locale."))); + if (strcmp(ctype, "C") == 0 || + strcmp(ctype, "POSIX") == 0) + database_ctype_is_c = true; + if (dbform->datlocprovider == COLLPROVIDER_ICU) { char *icurules; diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index f9ce428233..dd822a68be 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -47,6 +47,8 @@ extern PGDLLIMPORT char *localized_full_days[]; extern PGDLLIMPORT char *localized_abbrev_months[]; extern PGDLLIMPORT char *localized_full_months[]; +/* is the databases's LC_CTYPE the C locale? */ +extern PGDLLIMPORT bool database_ctype_is_c; extern bool check_locale(int category, const char *locale, char **canonname); extern char *pg_perm_setlocale(int category, const char *locale);