Revert "ICU: do not convert locale 'C' to 'en-US-u-va-posix'."

This reverts commit f7faa9976cc0504c027a20ed66ceca9018041dd4.

Discussion: https://postgr.es/m/483826.1683582475@sss.pgh.pa.us
This commit is contained in:
Jeff Davis 2023-05-08 20:50:51 -07:00
parent 5698f07947
commit 455f948b0d
4 changed files with 34 additions and 14 deletions

View File

@ -2782,10 +2782,26 @@ icu_language_tag(const char *loc_str, int elevel)
{
#ifdef USE_ICU
UErrorCode status;
char lang[ULOC_LANG_CAPACITY];
char *langtag;
size_t buflen = 32; /* arbitrary starting buffer size */
const bool strict = true;
status = U_ZERO_ERROR;
uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
if (U_FAILURE(status))
{
if (elevel > 0)
ereport(elevel,
(errmsg("could not get language from locale \"%s\": %s",
loc_str, u_errorName(status))));
return NULL;
}
/* C/POSIX locales aren't handled by uloc_getLanguageTag() */
if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
return pstrdup("en-US-u-va-posix");
/*
* A BCP47 language tag doesn't have a clearly-defined upper limit
* (cf. RFC5646 section 4.4). Additionally, in older ICU versions,
@ -2873,7 +2889,8 @@ icu_validate_locale(const char *loc_str)
/* check for special language name */
if (strcmp(lang, "") == 0 ||
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
found = true;
/* search for matching language within ICU */

View File

@ -2238,10 +2238,24 @@ icu_language_tag(const char *loc_str)
{
#ifdef USE_ICU
UErrorCode status;
char lang[ULOC_LANG_CAPACITY];
char *langtag;
size_t buflen = 32; /* arbitrary starting buffer size */
const bool strict = true;
status = U_ZERO_ERROR;
uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
if (U_FAILURE(status))
{
pg_fatal("could not get language from locale \"%s\": %s",
loc_str, u_errorName(status));
return NULL;
}
/* C/POSIX locales aren't handled by uloc_getLanguageTag() */
if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
return pstrdup("en-US-u-va-posix");
/*
* A BCP47 language tag doesn't have a clearly-defined upper limit
* (cf. RFC5646 section 4.4). Additionally, in older ICU versions,
@ -2313,7 +2327,8 @@ icu_validate_locale(const char *loc_str)
/* check for special language name */
if (strcmp(lang, "") == 0 ||
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
found = true;
/* search for matching language within ICU */

View File

@ -1020,7 +1020,6 @@ CREATE ROLE regress_test_role;
CREATE SCHEMA test_schema;
-- We need to do this this way to cope with varying names for encodings:
SET client_min_messages TO WARNING;
SET icu_validation_level = disabled;
do $$
BEGIN
EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' ||
@ -1035,24 +1034,17 @@ BEGIN
quote_literal(current_setting('lc_collate')) || ');';
END
$$;
RESET icu_validation_level;
RESET client_min_messages;
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
ERROR: parameter "locale" must be specified
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); -- fails
ERROR: ICU locale "nonsense-nowhere" has unknown language "nonsense"
HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED.
CREATE COLLATION testx (provider = icu, locale = 'C'); -- fails
ERROR: could not convert locale name "C" to language tag: U_ILLEGAL_ARGUMENT_ERROR
CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); -- fails
ERROR: could not convert locale name "@colStrength=primary;nonsense=yes" to language tag: U_ILLEGAL_ARGUMENT_ERROR
SET icu_validation_level = WARNING;
CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); DROP COLLATION testx;
WARNING: could not convert locale name "@colStrength=primary;nonsense=yes" to language tag: U_ILLEGAL_ARGUMENT_ERROR
CREATE COLLATION testx (provider = icu, locale = 'C'); DROP COLLATION testx;
WARNING: could not convert locale name "C" to language tag: U_ILLEGAL_ARGUMENT_ERROR
WARNING: ICU locale "C" has unknown language "c"
HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED.
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); DROP COLLATION testx;
WARNING: ICU locale "nonsense-nowhere" has unknown language "nonsense"
HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED.

View File

@ -358,7 +358,6 @@ CREATE SCHEMA test_schema;
-- We need to do this this way to cope with varying names for encodings:
SET client_min_messages TO WARNING;
SET icu_validation_level = disabled;
do $$
BEGIN
@ -374,16 +373,13 @@ BEGIN
END
$$;
RESET icu_validation_level;
RESET client_min_messages;
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); -- fails
CREATE COLLATION testx (provider = icu, locale = 'C'); -- fails
CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); -- fails
SET icu_validation_level = WARNING;
CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); DROP COLLATION testx;
CREATE COLLATION testx (provider = icu, locale = 'C'); DROP COLLATION testx;
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); DROP COLLATION testx;
RESET icu_validation_level;