Support language tags in older ICU versions (53 and earlier).
By calling uloc_canonicalize() before parsing the attributes, the existing locale attribute parsing logic works on language tags as well. Fix a small memory leak, too. Discussion: http://postgr.es/m/60da0cecfb512a78b8666b31631a636215d8ce73.camel@j-davis.com Reviewed-by: Peter Eisentraut
This commit is contained in:
parent
e8e1f96c49
commit
869650fa86
@ -950,7 +950,6 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
|
||||
const char *name;
|
||||
char *langtag;
|
||||
char *icucomment;
|
||||
const char *iculocstr;
|
||||
Oid collid;
|
||||
|
||||
if (i == -1)
|
||||
@ -959,20 +958,19 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
|
||||
name = uloc_getAvailable(i);
|
||||
|
||||
langtag = get_icu_language_tag(name);
|
||||
iculocstr = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name;
|
||||
|
||||
/*
|
||||
* Be paranoid about not allowing any non-ASCII strings into
|
||||
* pg_collation
|
||||
*/
|
||||
if (!pg_is_ascii(langtag) || !pg_is_ascii(iculocstr))
|
||||
if (!pg_is_ascii(langtag))
|
||||
continue;
|
||||
|
||||
collid = CollationCreate(psprintf("%s-x-icu", langtag),
|
||||
nspid, GetUserId(),
|
||||
COLLPROVIDER_ICU, true, -1,
|
||||
NULL, NULL, iculocstr, NULL,
|
||||
get_collation_actual_version(COLLPROVIDER_ICU, iculocstr),
|
||||
NULL, NULL, langtag, NULL,
|
||||
get_collation_actual_version(COLLPROVIDER_ICU, langtag),
|
||||
true, true);
|
||||
if (OidIsValid(collid))
|
||||
{
|
||||
|
@ -2634,9 +2634,12 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse collation attributes and apply them to the open collator. This takes
|
||||
* a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
|
||||
* applies the key-value arguments.
|
||||
* Parse collation attributes from the given locale string and apply them to
|
||||
* the open collator.
|
||||
*
|
||||
* First, the locale string is canonicalized to an ICU format locale ID such
|
||||
* as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
|
||||
* the key-value arguments.
|
||||
*
|
||||
* Starting with ICU version 54, the attributes are processed automatically by
|
||||
* ucol_open(), so this is only necessary for emulating this behavior on older
|
||||
@ -2646,9 +2649,34 @@ pg_attribute_unused()
|
||||
static void
|
||||
icu_set_collation_attributes(UCollator *collator, const char *loc)
|
||||
{
|
||||
char *str = asc_tolower(loc, strlen(loc));
|
||||
UErrorCode status;
|
||||
int32_t len;
|
||||
char *icu_locale_id;
|
||||
char *lower_str;
|
||||
char *str;
|
||||
|
||||
str = strchr(str, '@');
|
||||
/*
|
||||
* The input locale may be a BCP 47 language tag, e.g.
|
||||
* "und-u-kc-ks-level1", which expresses the same attributes in a
|
||||
* different form. It will be converted to the equivalent ICU format
|
||||
* locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
|
||||
* uloc_canonicalize().
|
||||
*/
|
||||
status = U_ZERO_ERROR;
|
||||
len = uloc_canonicalize(loc, NULL, 0, &status);
|
||||
icu_locale_id = palloc(len + 1);
|
||||
status = U_ZERO_ERROR;
|
||||
len = uloc_canonicalize(loc, icu_locale_id, len + 1, &status);
|
||||
if (U_FAILURE(status))
|
||||
ereport(ERROR,
|
||||
(errmsg("canonicalization failed for locale string \"%s\": %s",
|
||||
loc, u_errorName(status))));
|
||||
|
||||
lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
|
||||
|
||||
pfree(icu_locale_id);
|
||||
|
||||
str = strchr(lower_str, '@');
|
||||
if (!str)
|
||||
return;
|
||||
str++;
|
||||
@ -2663,7 +2691,6 @@ icu_set_collation_attributes(UCollator *collator, const char *loc)
|
||||
char *value;
|
||||
UColAttribute uattr;
|
||||
UColAttributeValue uvalue;
|
||||
UErrorCode status;
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
|
||||
@ -2730,6 +2757,8 @@ icu_set_collation_attributes(UCollator *collator, const char *loc)
|
||||
loc, u_errorName(status))));
|
||||
}
|
||||
}
|
||||
|
||||
pfree(lower_str);
|
||||
}
|
||||
|
||||
#endif /* USE_ICU */
|
||||
|
@ -1304,6 +1304,14 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
|
||||
t | t
|
||||
(1 row)
|
||||
|
||||
-- test language tags
|
||||
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
|
||||
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
|
||||
?column?
|
||||
----------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
CREATE TABLE test1cs (x text COLLATE case_sensitive);
|
||||
CREATE TABLE test2cs (x text COLLATE case_sensitive);
|
||||
CREATE TABLE test3cs (x text COLLATE case_sensitive);
|
||||
|
@ -518,6 +518,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
|
||||
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
|
||||
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
|
||||
|
||||
-- test language tags
|
||||
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
|
||||
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
|
||||
|
||||
CREATE TABLE test1cs (x text COLLATE case_sensitive);
|
||||
CREATE TABLE test2cs (x text COLLATE case_sensitive);
|
||||
CREATE TABLE test3cs (x text COLLATE case_sensitive);
|
||||
|
Loading…
x
Reference in New Issue
Block a user