diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index 8949684afe..3d0aea0568 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -950,7 +950,6 @@ pg_import_system_collations(PG_FUNCTION_ARGS) const char *name; char *langtag; char *icucomment; - const char *iculocstr; Oid collid; if (i == -1) @@ -959,20 +958,19 @@ pg_import_system_collations(PG_FUNCTION_ARGS) name = uloc_getAvailable(i); langtag = get_icu_language_tag(name); - iculocstr = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name; /* * Be paranoid about not allowing any non-ASCII strings into * pg_collation */ - if (!pg_is_ascii(langtag) || !pg_is_ascii(iculocstr)) + if (!pg_is_ascii(langtag)) continue; collid = CollationCreate(psprintf("%s-x-icu", langtag), nspid, GetUserId(), COLLPROVIDER_ICU, true, -1, - NULL, NULL, iculocstr, NULL, - get_collation_actual_version(COLLPROVIDER_ICU, iculocstr), + NULL, NULL, langtag, NULL, + get_collation_actual_version(COLLPROVIDER_ICU, langtag), true, true); if (OidIsValid(collid)) { diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 90ec773c02..c3ede994be 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -2634,9 +2634,12 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar) } /* - * Parse collation attributes and apply them to the open collator. This takes - * a string like "und@colStrength=primary;colCaseLevel=yes" and parses and - * applies the key-value arguments. + * Parse collation attributes from the given locale string and apply them to + * the open collator. + * + * First, the locale string is canonicalized to an ICU format locale ID such + * as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies + * the key-value arguments. * * Starting with ICU version 54, the attributes are processed automatically by * ucol_open(), so this is only necessary for emulating this behavior on older @@ -2646,9 +2649,34 @@ pg_attribute_unused() static void icu_set_collation_attributes(UCollator *collator, const char *loc) { - char *str = asc_tolower(loc, strlen(loc)); + UErrorCode status; + int32_t len; + char *icu_locale_id; + char *lower_str; + char *str; - str = strchr(str, '@'); + /* + * The input locale may be a BCP 47 language tag, e.g. + * "und-u-kc-ks-level1", which expresses the same attributes in a + * different form. It will be converted to the equivalent ICU format + * locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by + * uloc_canonicalize(). + */ + status = U_ZERO_ERROR; + len = uloc_canonicalize(loc, NULL, 0, &status); + icu_locale_id = palloc(len + 1); + status = U_ZERO_ERROR; + len = uloc_canonicalize(loc, icu_locale_id, len + 1, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("canonicalization failed for locale string \"%s\": %s", + loc, u_errorName(status)))); + + lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id)); + + pfree(icu_locale_id); + + str = strchr(lower_str, '@'); if (!str) return; str++; @@ -2663,7 +2691,6 @@ icu_set_collation_attributes(UCollator *collator, const char *loc) char *value; UColAttribute uattr; UColAttributeValue uvalue; - UErrorCode status; status = U_ZERO_ERROR; @@ -2730,6 +2757,8 @@ icu_set_collation_attributes(UCollator *collator, const char *loc) loc, u_errorName(status)))); } } + + pfree(lower_str); } #endif /* USE_ICU */ diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index 9a3e12e42d..6225b575ce 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1304,6 +1304,14 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse t | t (1 row) +-- test language tags +CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false); +SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive; + ?column? +---------- + t +(1 row) + CREATE TABLE test1cs (x text COLLATE case_sensitive); CREATE TABLE test2cs (x text COLLATE case_sensitive); CREATE TABLE test3cs (x text COLLATE case_sensitive); diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index 0790068f31..64cbfd0a5b 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -518,6 +518,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive; SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive; +-- test language tags +CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false); +SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive; + CREATE TABLE test1cs (x text COLLATE case_sensitive); CREATE TABLE test2cs (x text COLLATE case_sensitive); CREATE TABLE test3cs (x text COLLATE case_sensitive);