Support language tags in older ICU versions (53 and earlier).
By calling uloc_canonicalize() before parsing the attributes, the existing locale attribute parsing logic works on language tags as well. Fix a small memory leak, too. Discussion: http://postgr.es/m/60da0cecfb512a78b8666b31631a636215d8ce73.camel@j-davis.com Reviewed-by: Peter Eisentraut
This commit is contained in:
parent
e8e1f96c49
commit
869650fa86
@ -950,7 +950,6 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
|
|||||||
const char *name;
|
const char *name;
|
||||||
char *langtag;
|
char *langtag;
|
||||||
char *icucomment;
|
char *icucomment;
|
||||||
const char *iculocstr;
|
|
||||||
Oid collid;
|
Oid collid;
|
||||||
|
|
||||||
if (i == -1)
|
if (i == -1)
|
||||||
@ -959,20 +958,19 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
|
|||||||
name = uloc_getAvailable(i);
|
name = uloc_getAvailable(i);
|
||||||
|
|
||||||
langtag = get_icu_language_tag(name);
|
langtag = get_icu_language_tag(name);
|
||||||
iculocstr = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Be paranoid about not allowing any non-ASCII strings into
|
* Be paranoid about not allowing any non-ASCII strings into
|
||||||
* pg_collation
|
* pg_collation
|
||||||
*/
|
*/
|
||||||
if (!pg_is_ascii(langtag) || !pg_is_ascii(iculocstr))
|
if (!pg_is_ascii(langtag))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
collid = CollationCreate(psprintf("%s-x-icu", langtag),
|
collid = CollationCreate(psprintf("%s-x-icu", langtag),
|
||||||
nspid, GetUserId(),
|
nspid, GetUserId(),
|
||||||
COLLPROVIDER_ICU, true, -1,
|
COLLPROVIDER_ICU, true, -1,
|
||||||
NULL, NULL, iculocstr, NULL,
|
NULL, NULL, langtag, NULL,
|
||||||
get_collation_actual_version(COLLPROVIDER_ICU, iculocstr),
|
get_collation_actual_version(COLLPROVIDER_ICU, langtag),
|
||||||
true, true);
|
true, true);
|
||||||
if (OidIsValid(collid))
|
if (OidIsValid(collid))
|
||||||
{
|
{
|
||||||
|
@ -2634,9 +2634,12 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Parse collation attributes and apply them to the open collator. This takes
|
* Parse collation attributes from the given locale string and apply them to
|
||||||
* a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
|
* the open collator.
|
||||||
* applies the key-value arguments.
|
*
|
||||||
|
* First, the locale string is canonicalized to an ICU format locale ID such
|
||||||
|
* as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
|
||||||
|
* the key-value arguments.
|
||||||
*
|
*
|
||||||
* Starting with ICU version 54, the attributes are processed automatically by
|
* Starting with ICU version 54, the attributes are processed automatically by
|
||||||
* ucol_open(), so this is only necessary for emulating this behavior on older
|
* ucol_open(), so this is only necessary for emulating this behavior on older
|
||||||
@ -2646,9 +2649,34 @@ pg_attribute_unused()
|
|||||||
static void
|
static void
|
||||||
icu_set_collation_attributes(UCollator *collator, const char *loc)
|
icu_set_collation_attributes(UCollator *collator, const char *loc)
|
||||||
{
|
{
|
||||||
char *str = asc_tolower(loc, strlen(loc));
|
UErrorCode status;
|
||||||
|
int32_t len;
|
||||||
|
char *icu_locale_id;
|
||||||
|
char *lower_str;
|
||||||
|
char *str;
|
||||||
|
|
||||||
str = strchr(str, '@');
|
/*
|
||||||
|
* The input locale may be a BCP 47 language tag, e.g.
|
||||||
|
* "und-u-kc-ks-level1", which expresses the same attributes in a
|
||||||
|
* different form. It will be converted to the equivalent ICU format
|
||||||
|
* locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
|
||||||
|
* uloc_canonicalize().
|
||||||
|
*/
|
||||||
|
status = U_ZERO_ERROR;
|
||||||
|
len = uloc_canonicalize(loc, NULL, 0, &status);
|
||||||
|
icu_locale_id = palloc(len + 1);
|
||||||
|
status = U_ZERO_ERROR;
|
||||||
|
len = uloc_canonicalize(loc, icu_locale_id, len + 1, &status);
|
||||||
|
if (U_FAILURE(status))
|
||||||
|
ereport(ERROR,
|
||||||
|
(errmsg("canonicalization failed for locale string \"%s\": %s",
|
||||||
|
loc, u_errorName(status))));
|
||||||
|
|
||||||
|
lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
|
||||||
|
|
||||||
|
pfree(icu_locale_id);
|
||||||
|
|
||||||
|
str = strchr(lower_str, '@');
|
||||||
if (!str)
|
if (!str)
|
||||||
return;
|
return;
|
||||||
str++;
|
str++;
|
||||||
@ -2663,7 +2691,6 @@ icu_set_collation_attributes(UCollator *collator, const char *loc)
|
|||||||
char *value;
|
char *value;
|
||||||
UColAttribute uattr;
|
UColAttribute uattr;
|
||||||
UColAttributeValue uvalue;
|
UColAttributeValue uvalue;
|
||||||
UErrorCode status;
|
|
||||||
|
|
||||||
status = U_ZERO_ERROR;
|
status = U_ZERO_ERROR;
|
||||||
|
|
||||||
@ -2730,6 +2757,8 @@ icu_set_collation_attributes(UCollator *collator, const char *loc)
|
|||||||
loc, u_errorName(status))));
|
loc, u_errorName(status))));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pfree(lower_str);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* USE_ICU */
|
#endif /* USE_ICU */
|
||||||
|
@ -1304,6 +1304,14 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
|
|||||||
t | t
|
t | t
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
-- test language tags
|
||||||
|
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
|
||||||
|
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
|
||||||
|
?column?
|
||||||
|
----------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
CREATE TABLE test1cs (x text COLLATE case_sensitive);
|
CREATE TABLE test1cs (x text COLLATE case_sensitive);
|
||||||
CREATE TABLE test2cs (x text COLLATE case_sensitive);
|
CREATE TABLE test2cs (x text COLLATE case_sensitive);
|
||||||
CREATE TABLE test3cs (x text COLLATE case_sensitive);
|
CREATE TABLE test3cs (x text COLLATE case_sensitive);
|
||||||
|
@ -518,6 +518,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
|
|||||||
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
|
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
|
||||||
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
|
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
|
||||||
|
|
||||||
|
-- test language tags
|
||||||
|
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
|
||||||
|
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
|
||||||
|
|
||||||
CREATE TABLE test1cs (x text COLLATE case_sensitive);
|
CREATE TABLE test1cs (x text COLLATE case_sensitive);
|
||||||
CREATE TABLE test2cs (x text COLLATE case_sensitive);
|
CREATE TABLE test2cs (x text COLLATE case_sensitive);
|
||||||
CREATE TABLE test3cs (x text COLLATE case_sensitive);
|
CREATE TABLE test3cs (x text COLLATE case_sensitive);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user