diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 71730cc52f..bda0da2dc8 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -9804,6 +9804,32 @@ SET XML OPTION { DOCUMENT | CONTENT }; + + icu_validation_level (enum) + + icu_validation_level configuration parameter + + + + + When ICU locale validation problems are encountered, controls which + message level is + used to report the problem. Valid values are + DISABLED, DEBUG5, + DEBUG4, DEBUG3, + DEBUG2, DEBUG1, + INFO, NOTICE, + WARNING, ERROR, and + LOG. + + + If set to DISABLED, does not report validation + problems at all. Otherwise reports problems at the given message + level. The default is ERROR. + + + + default_text_search_config (string) diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index 13ad7dff42..45de78352c 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -258,6 +258,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("parameter \"locale\" must be specified"))); + + icu_validate_locale(colliculocale); } /* diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 9408dd6cc7..24bcc5adfe 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -1058,7 +1058,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("ICU locale must be specified"))); - check_icu_locale(dbiculocale); + icu_validate_locale(dbiculocale); } else { diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 8345c4602f..9497c20d12 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -58,6 +58,7 @@ #include "catalog/pg_collation.h" #include "catalog/pg_control.h" #include "mb/pg_wchar.h" +#include "miscadmin.h" #include "utils/builtins.h" #include "utils/formatting.h" #include "utils/guc_hooks.h" @@ -95,6 +96,8 @@ char *locale_monetary; char *locale_numeric; char *locale_time; +int icu_validation_level = ERROR; + /* * lc_time localization cache. * @@ -2821,24 +2824,77 @@ icu_set_collation_attributes(UCollator *collator, const char *loc, pfree(lower_str); } -#endif /* USE_ICU */ +#endif /* - * Check if the given locale ID is valid, and ereport(ERROR) if it isn't. + * Perform best-effort check that the locale is a valid one. */ void -check_icu_locale(const char *icu_locale) +icu_validate_locale(const char *loc_str) { #ifdef USE_ICU - UCollator *collator; + UCollator *collator; + UErrorCode status; + char lang[ULOC_LANG_CAPACITY]; + bool found = false; + int elevel = icu_validation_level; - collator = pg_ucol_open(icu_locale); + /* no validation */ + if (elevel < 0) + return; + + /* downgrade to WARNING during pg_upgrade */ + if (IsBinaryUpgrade && elevel > WARNING) + elevel = WARNING; + + /* validate that we can extract the language */ + status = U_ZERO_ERROR; + uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status); + if (U_FAILURE(status)) + { + ereport(elevel, + (errmsg("could not get language from ICU locale \"%s\": %s", + loc_str, u_errorName(status)), + errhint("To disable ICU locale validation, set parameter icu_validation_level to DISABLED."))); + return; + } + + /* check for special language name */ + if (strcmp(lang, "") == 0 || + strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 || + strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0) + found = true; + + /* search for matching language within ICU */ + for (int32_t i = 0; !found && i < uloc_countAvailable(); i++) + { + const char *otherloc = uloc_getAvailable(i); + char otherlang[ULOC_LANG_CAPACITY]; + + status = U_ZERO_ERROR; + uloc_getLanguage(otherloc, otherlang, ULOC_LANG_CAPACITY, &status); + if (U_FAILURE(status)) + continue; + + if (strcmp(lang, otherlang) == 0) + found = true; + } + + if (!found) + ereport(elevel, + (errmsg("ICU locale \"%s\" has unknown language \"%s\"", + loc_str, lang), + errhint("To disable ICU locale validation, set parameter icu_validation_level to DISABLED."))); + + /* check that it can be opened */ + collator = pg_ucol_open(loc_str); ucol_close(collator); -#else +#else /* not USE_ICU */ + /* could get here if a collation was created by a build with ICU */ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("ICU is not supported in this build"))); -#endif +#endif /* not USE_ICU */ } /* diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index a60bd48499..8062589efd 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -166,6 +166,22 @@ static const struct config_enum_entry intervalstyle_options[] = { {NULL, 0, false} }; +static const struct config_enum_entry icu_validation_level_options[] = { + {"disabled", -1, false}, + {"debug5", DEBUG5, false}, + {"debug4", DEBUG4, false}, + {"debug3", DEBUG3, false}, + {"debug2", DEBUG2, false}, + {"debug1", DEBUG1, false}, + {"debug", DEBUG2, true}, + {"log", LOG, false}, + {"info", INFO, true}, + {"notice", NOTICE, false}, + {"warning", WARNING, false}, + {"error", ERROR, false}, + {NULL, 0, false} +}; + StaticAssertDecl(lengthof(intervalstyle_options) == (INTSTYLE_ISO_8601 + 2), "array length mismatch"); @@ -4643,6 +4659,16 @@ struct config_enum ConfigureNamesEnum[] = NULL, NULL, NULL }, + { + {"icu_validation_level", PGC_USERSET, CLIENT_CONN_LOCALE, + gettext_noop("Log level for reporting invalid ICU locale strings."), + NULL + }, + &icu_validation_level, + ERROR, icu_validation_level_options, + NULL, NULL, NULL + }, + { {"log_error_verbosity", PGC_SUSET, LOGGING_WHAT, gettext_noop("Sets the verbosity of logged messages."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index fc831565d9..ee49ca3937 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -731,6 +731,9 @@ #lc_numeric = 'C' # locale for number formatting #lc_time = 'C' # locale for time formatting +#icu_validation_level = ERROR # report ICU locale validation + # errors at the given level + # default configuration for text search #default_text_search_config = 'pg_catalog.simple' diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 6b20a962ec..d5ac25904d 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -2242,6 +2242,58 @@ check_icu_locale_encoding(int user_enc) return true; } +/* + * Perform best-effort check that the locale is a valid one. Should be + * consistent with pg_locale.c, except that it doesn't need to open the + * collator (that will happen during post-bootstrap initialization). + */ +static void +icu_validate_locale(const char *loc_str) +{ +#ifdef USE_ICU + UErrorCode status; + char lang[ULOC_LANG_CAPACITY]; + bool found = false; + + /* validate that we can extract the language */ + status = U_ZERO_ERROR; + uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status); + if (U_FAILURE(status)) + { + pg_fatal("could not get language from locale \"%s\": %s", + loc_str, u_errorName(status)); + return; + } + + /* check for special language name */ + if (strcmp(lang, "") == 0 || + strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 || + strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0) + found = true; + + /* search for matching language within ICU */ + for (int32_t i = 0; !found && i < uloc_countAvailable(); i++) + { + const char *otherloc = uloc_getAvailable(i); + char otherlang[ULOC_LANG_CAPACITY]; + + status = U_ZERO_ERROR; + uloc_getLanguage(otherloc, otherlang, ULOC_LANG_CAPACITY, &status); + if (U_FAILURE(status)) + continue; + + if (strcmp(lang, otherlang) == 0) + found = true; + } + + if (!found) + pg_fatal("locale \"%s\" has unknown language \"%s\"", + loc_str, lang); +#else + pg_fatal("ICU is not supported in this build"); +#endif +} + /* * Determine default ICU locale by opening the default collator and reading * its locale. @@ -2344,9 +2396,11 @@ setlocales(void) printf(_("Using default ICU locale \"%s\".\n"), icu_locale); } + icu_validate_locale(icu_locale); + /* - * In supported builds, the ICU locale ID will be checked by the - * backend during post-bootstrap initialization. + * In supported builds, the ICU locale ID will be opened during + * post-bootstrap initialization, which will perform extra checks. */ #ifndef USE_ICU pg_fatal("ICU is not supported in this build"); diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl index b97420f7e8..db7995fe28 100644 --- a/src/bin/initdb/t/001_initdb.pl +++ b/src/bin/initdb/t/001_initdb.pl @@ -128,6 +128,24 @@ if ($ENV{with_icu} eq 'yes') ], qr/error: encoding mismatch/, 'fails for encoding not supported by ICU'); + + command_fails_like( + [ + 'initdb', '--no-sync', + '--locale-provider=icu', + '--icu-locale=nonsense-nowhere', "$tempdir/dataX" + ], + qr/error: locale "nonsense-nowhere" has unknown language "nonsense"/, + 'fails for nonsense language'); + + command_fails_like( + [ + 'initdb', '--no-sync', + '--locale-provider=icu', + '--icu-locale=@colNumeric=lower', "$tempdir/dataX" + ], + qr/could not open collator for locale "\@colNumeric=lower": U_ILLEGAL_ARGUMENT_ERROR/, + 'fails for invalid collation argument'); } else { diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index dd822a68be..c275427976 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -40,6 +40,7 @@ extern PGDLLIMPORT char *locale_messages; extern PGDLLIMPORT char *locale_monetary; extern PGDLLIMPORT char *locale_numeric; extern PGDLLIMPORT char *locale_time; +extern PGDLLIMPORT int icu_validation_level; /* lc_time localization cache */ extern PGDLLIMPORT char *localized_abbrev_days[]; @@ -118,11 +119,12 @@ extern size_t pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, extern size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale); +extern void icu_validate_locale(const char *loc_str); + #ifdef USE_ICU extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes); extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar); #endif -extern void check_icu_locale(const char *icu_locale); /* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */ extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen, diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index f135200c99..5e480d45cd 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1035,7 +1035,14 @@ END $$; CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale" ERROR: parameter "locale" must be specified -CREATE COLLATION testx (provider = icu, locale = 'nonsense'); /* never fails with ICU */ DROP COLLATION testx; +CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); -- fails +ERROR: ICU locale "nonsense-nowhere" has unknown language "nonsense" +HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED. +SET icu_validation_level = WARNING; +CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); DROP COLLATION testx; +WARNING: ICU locale "nonsense-nowhere" has unknown language "nonsense" +HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED. +RESET icu_validation_level; CREATE COLLATION test4 FROM nonsense; ERROR: collation "nonsense" for encoding "UTF8" does not exist CREATE COLLATION test5 FROM test0; diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index 8105ebc8ae..95d96f2eb8 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -371,7 +371,10 @@ BEGIN END $$; CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale" -CREATE COLLATION testx (provider = icu, locale = 'nonsense'); /* never fails with ICU */ DROP COLLATION testx; +CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); -- fails +SET icu_validation_level = WARNING; +CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); DROP COLLATION testx; +RESET icu_validation_level; CREATE COLLATION test4 FROM nonsense; CREATE COLLATION test5 FROM test0;