Validate ICU locales.
For ICU collations, ensure that the locale's language exists in ICU, and that the locale can be opened. Basic validation helps avoid minor mistakes and misspellings, which often fall back to the root locale instead of the intended locale. It's even more important to avoid such mistakes in ICU versions 54 and earlier, where the same (misspelled) locale string could fall back to different locales depending on the environment. Discussion: https://postgr.es/m/11b1eeb7e7667fdd4178497aeb796c48d26e69b9.camel@j-davis.com Discussion: https://postgr.es/m/df2efad0cae7c65180df8e5ebb709e5eb4f2a82b.camel@j-davis.com Reviewed-by: Peter Eisentraut
This commit is contained in:
parent
b7cea58822
commit
1671f990dd
@ -9804,6 +9804,32 @@ SET XML OPTION { DOCUMENT | CONTENT };
|
|||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry id="guc-icu-validation-level" xreflabel="icu_validation_level">
|
||||||
|
<term><varname>icu_validation_level</varname> (<type>enum</type>)
|
||||||
|
<indexterm>
|
||||||
|
<primary><varname>icu_validation_level</varname> configuration parameter</primary>
|
||||||
|
</indexterm>
|
||||||
|
</term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
When ICU locale validation problems are encountered, controls which
|
||||||
|
<link linkend="runtime-config-severity-levels">message level</link> is
|
||||||
|
used to report the problem. Valid values are
|
||||||
|
<literal>DISABLED</literal>, <literal>DEBUG5</literal>,
|
||||||
|
<literal>DEBUG4</literal>, <literal>DEBUG3</literal>,
|
||||||
|
<literal>DEBUG2</literal>, <literal>DEBUG1</literal>,
|
||||||
|
<literal>INFO</literal>, <literal>NOTICE</literal>,
|
||||||
|
<literal>WARNING</literal>, <literal>ERROR</literal>, and
|
||||||
|
<literal>LOG</literal>.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If set to <literal>DISABLED</literal>, does not report validation
|
||||||
|
problems at all. Otherwise reports problems at the given message
|
||||||
|
level. The default is <literal>ERROR</literal>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry id="guc-default-text-search-config" xreflabel="default_text_search_config">
|
<varlistentry id="guc-default-text-search-config" xreflabel="default_text_search_config">
|
||||||
<term><varname>default_text_search_config</varname> (<type>string</type>)
|
<term><varname>default_text_search_config</varname> (<type>string</type>)
|
||||||
<indexterm>
|
<indexterm>
|
||||||
|
@ -258,6 +258,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
|
|||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
||||||
errmsg("parameter \"locale\" must be specified")));
|
errmsg("parameter \"locale\" must be specified")));
|
||||||
|
|
||||||
|
icu_validate_locale(colliculocale);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1058,7 +1058,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
|
|||||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
errmsg("ICU locale must be specified")));
|
errmsg("ICU locale must be specified")));
|
||||||
|
|
||||||
check_icu_locale(dbiculocale);
|
icu_validate_locale(dbiculocale);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -58,6 +58,7 @@
|
|||||||
#include "catalog/pg_collation.h"
|
#include "catalog/pg_collation.h"
|
||||||
#include "catalog/pg_control.h"
|
#include "catalog/pg_control.h"
|
||||||
#include "mb/pg_wchar.h"
|
#include "mb/pg_wchar.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
#include "utils/builtins.h"
|
#include "utils/builtins.h"
|
||||||
#include "utils/formatting.h"
|
#include "utils/formatting.h"
|
||||||
#include "utils/guc_hooks.h"
|
#include "utils/guc_hooks.h"
|
||||||
@ -95,6 +96,8 @@ char *locale_monetary;
|
|||||||
char *locale_numeric;
|
char *locale_numeric;
|
||||||
char *locale_time;
|
char *locale_time;
|
||||||
|
|
||||||
|
int icu_validation_level = ERROR;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* lc_time localization cache.
|
* lc_time localization cache.
|
||||||
*
|
*
|
||||||
@ -2821,24 +2824,77 @@ icu_set_collation_attributes(UCollator *collator, const char *loc,
|
|||||||
pfree(lower_str);
|
pfree(lower_str);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* USE_ICU */
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if the given locale ID is valid, and ereport(ERROR) if it isn't.
|
* Perform best-effort check that the locale is a valid one.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
check_icu_locale(const char *icu_locale)
|
icu_validate_locale(const char *loc_str)
|
||||||
{
|
{
|
||||||
#ifdef USE_ICU
|
#ifdef USE_ICU
|
||||||
UCollator *collator;
|
UCollator *collator;
|
||||||
|
UErrorCode status;
|
||||||
|
char lang[ULOC_LANG_CAPACITY];
|
||||||
|
bool found = false;
|
||||||
|
int elevel = icu_validation_level;
|
||||||
|
|
||||||
collator = pg_ucol_open(icu_locale);
|
/* no validation */
|
||||||
|
if (elevel < 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* downgrade to WARNING during pg_upgrade */
|
||||||
|
if (IsBinaryUpgrade && elevel > WARNING)
|
||||||
|
elevel = WARNING;
|
||||||
|
|
||||||
|
/* validate that we can extract the language */
|
||||||
|
status = U_ZERO_ERROR;
|
||||||
|
uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
|
||||||
|
if (U_FAILURE(status))
|
||||||
|
{
|
||||||
|
ereport(elevel,
|
||||||
|
(errmsg("could not get language from ICU locale \"%s\": %s",
|
||||||
|
loc_str, u_errorName(status)),
|
||||||
|
errhint("To disable ICU locale validation, set parameter icu_validation_level to DISABLED.")));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check for special language name */
|
||||||
|
if (strcmp(lang, "") == 0 ||
|
||||||
|
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
|
||||||
|
strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
|
||||||
|
found = true;
|
||||||
|
|
||||||
|
/* search for matching language within ICU */
|
||||||
|
for (int32_t i = 0; !found && i < uloc_countAvailable(); i++)
|
||||||
|
{
|
||||||
|
const char *otherloc = uloc_getAvailable(i);
|
||||||
|
char otherlang[ULOC_LANG_CAPACITY];
|
||||||
|
|
||||||
|
status = U_ZERO_ERROR;
|
||||||
|
uloc_getLanguage(otherloc, otherlang, ULOC_LANG_CAPACITY, &status);
|
||||||
|
if (U_FAILURE(status))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (strcmp(lang, otherlang) == 0)
|
||||||
|
found = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!found)
|
||||||
|
ereport(elevel,
|
||||||
|
(errmsg("ICU locale \"%s\" has unknown language \"%s\"",
|
||||||
|
loc_str, lang),
|
||||||
|
errhint("To disable ICU locale validation, set parameter icu_validation_level to DISABLED.")));
|
||||||
|
|
||||||
|
/* check that it can be opened */
|
||||||
|
collator = pg_ucol_open(loc_str);
|
||||||
ucol_close(collator);
|
ucol_close(collator);
|
||||||
#else
|
#else /* not USE_ICU */
|
||||||
|
/* could get here if a collation was created by a build with ICU */
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
errmsg("ICU is not supported in this build")));
|
errmsg("ICU is not supported in this build")));
|
||||||
#endif
|
#endif /* not USE_ICU */
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -166,6 +166,22 @@ static const struct config_enum_entry intervalstyle_options[] = {
|
|||||||
{NULL, 0, false}
|
{NULL, 0, false}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const struct config_enum_entry icu_validation_level_options[] = {
|
||||||
|
{"disabled", -1, false},
|
||||||
|
{"debug5", DEBUG5, false},
|
||||||
|
{"debug4", DEBUG4, false},
|
||||||
|
{"debug3", DEBUG3, false},
|
||||||
|
{"debug2", DEBUG2, false},
|
||||||
|
{"debug1", DEBUG1, false},
|
||||||
|
{"debug", DEBUG2, true},
|
||||||
|
{"log", LOG, false},
|
||||||
|
{"info", INFO, true},
|
||||||
|
{"notice", NOTICE, false},
|
||||||
|
{"warning", WARNING, false},
|
||||||
|
{"error", ERROR, false},
|
||||||
|
{NULL, 0, false}
|
||||||
|
};
|
||||||
|
|
||||||
StaticAssertDecl(lengthof(intervalstyle_options) == (INTSTYLE_ISO_8601 + 2),
|
StaticAssertDecl(lengthof(intervalstyle_options) == (INTSTYLE_ISO_8601 + 2),
|
||||||
"array length mismatch");
|
"array length mismatch");
|
||||||
|
|
||||||
@ -4643,6 +4659,16 @@ struct config_enum ConfigureNamesEnum[] =
|
|||||||
NULL, NULL, NULL
|
NULL, NULL, NULL
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
{"icu_validation_level", PGC_USERSET, CLIENT_CONN_LOCALE,
|
||||||
|
gettext_noop("Log level for reporting invalid ICU locale strings."),
|
||||||
|
NULL
|
||||||
|
},
|
||||||
|
&icu_validation_level,
|
||||||
|
ERROR, icu_validation_level_options,
|
||||||
|
NULL, NULL, NULL
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
{"log_error_verbosity", PGC_SUSET, LOGGING_WHAT,
|
{"log_error_verbosity", PGC_SUSET, LOGGING_WHAT,
|
||||||
gettext_noop("Sets the verbosity of logged messages."),
|
gettext_noop("Sets the verbosity of logged messages."),
|
||||||
|
@ -731,6 +731,9 @@
|
|||||||
#lc_numeric = 'C' # locale for number formatting
|
#lc_numeric = 'C' # locale for number formatting
|
||||||
#lc_time = 'C' # locale for time formatting
|
#lc_time = 'C' # locale for time formatting
|
||||||
|
|
||||||
|
#icu_validation_level = ERROR # report ICU locale validation
|
||||||
|
# errors at the given level
|
||||||
|
|
||||||
# default configuration for text search
|
# default configuration for text search
|
||||||
#default_text_search_config = 'pg_catalog.simple'
|
#default_text_search_config = 'pg_catalog.simple'
|
||||||
|
|
||||||
|
@ -2242,6 +2242,58 @@ check_icu_locale_encoding(int user_enc)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Perform best-effort check that the locale is a valid one. Should be
|
||||||
|
* consistent with pg_locale.c, except that it doesn't need to open the
|
||||||
|
* collator (that will happen during post-bootstrap initialization).
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
icu_validate_locale(const char *loc_str)
|
||||||
|
{
|
||||||
|
#ifdef USE_ICU
|
||||||
|
UErrorCode status;
|
||||||
|
char lang[ULOC_LANG_CAPACITY];
|
||||||
|
bool found = false;
|
||||||
|
|
||||||
|
/* validate that we can extract the language */
|
||||||
|
status = U_ZERO_ERROR;
|
||||||
|
uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
|
||||||
|
if (U_FAILURE(status))
|
||||||
|
{
|
||||||
|
pg_fatal("could not get language from locale \"%s\": %s",
|
||||||
|
loc_str, u_errorName(status));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check for special language name */
|
||||||
|
if (strcmp(lang, "") == 0 ||
|
||||||
|
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
|
||||||
|
strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
|
||||||
|
found = true;
|
||||||
|
|
||||||
|
/* search for matching language within ICU */
|
||||||
|
for (int32_t i = 0; !found && i < uloc_countAvailable(); i++)
|
||||||
|
{
|
||||||
|
const char *otherloc = uloc_getAvailable(i);
|
||||||
|
char otherlang[ULOC_LANG_CAPACITY];
|
||||||
|
|
||||||
|
status = U_ZERO_ERROR;
|
||||||
|
uloc_getLanguage(otherloc, otherlang, ULOC_LANG_CAPACITY, &status);
|
||||||
|
if (U_FAILURE(status))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (strcmp(lang, otherlang) == 0)
|
||||||
|
found = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!found)
|
||||||
|
pg_fatal("locale \"%s\" has unknown language \"%s\"",
|
||||||
|
loc_str, lang);
|
||||||
|
#else
|
||||||
|
pg_fatal("ICU is not supported in this build");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Determine default ICU locale by opening the default collator and reading
|
* Determine default ICU locale by opening the default collator and reading
|
||||||
* its locale.
|
* its locale.
|
||||||
@ -2344,9 +2396,11 @@ setlocales(void)
|
|||||||
printf(_("Using default ICU locale \"%s\".\n"), icu_locale);
|
printf(_("Using default ICU locale \"%s\".\n"), icu_locale);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
icu_validate_locale(icu_locale);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In supported builds, the ICU locale ID will be checked by the
|
* In supported builds, the ICU locale ID will be opened during
|
||||||
* backend during post-bootstrap initialization.
|
* post-bootstrap initialization, which will perform extra checks.
|
||||||
*/
|
*/
|
||||||
#ifndef USE_ICU
|
#ifndef USE_ICU
|
||||||
pg_fatal("ICU is not supported in this build");
|
pg_fatal("ICU is not supported in this build");
|
||||||
|
@ -128,6 +128,24 @@ if ($ENV{with_icu} eq 'yes')
|
|||||||
],
|
],
|
||||||
qr/error: encoding mismatch/,
|
qr/error: encoding mismatch/,
|
||||||
'fails for encoding not supported by ICU');
|
'fails for encoding not supported by ICU');
|
||||||
|
|
||||||
|
command_fails_like(
|
||||||
|
[
|
||||||
|
'initdb', '--no-sync',
|
||||||
|
'--locale-provider=icu',
|
||||||
|
'--icu-locale=nonsense-nowhere', "$tempdir/dataX"
|
||||||
|
],
|
||||||
|
qr/error: locale "nonsense-nowhere" has unknown language "nonsense"/,
|
||||||
|
'fails for nonsense language');
|
||||||
|
|
||||||
|
command_fails_like(
|
||||||
|
[
|
||||||
|
'initdb', '--no-sync',
|
||||||
|
'--locale-provider=icu',
|
||||||
|
'--icu-locale=@colNumeric=lower', "$tempdir/dataX"
|
||||||
|
],
|
||||||
|
qr/could not open collator for locale "\@colNumeric=lower": U_ILLEGAL_ARGUMENT_ERROR/,
|
||||||
|
'fails for invalid collation argument');
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -40,6 +40,7 @@ extern PGDLLIMPORT char *locale_messages;
|
|||||||
extern PGDLLIMPORT char *locale_monetary;
|
extern PGDLLIMPORT char *locale_monetary;
|
||||||
extern PGDLLIMPORT char *locale_numeric;
|
extern PGDLLIMPORT char *locale_numeric;
|
||||||
extern PGDLLIMPORT char *locale_time;
|
extern PGDLLIMPORT char *locale_time;
|
||||||
|
extern PGDLLIMPORT int icu_validation_level;
|
||||||
|
|
||||||
/* lc_time localization cache */
|
/* lc_time localization cache */
|
||||||
extern PGDLLIMPORT char *localized_abbrev_days[];
|
extern PGDLLIMPORT char *localized_abbrev_days[];
|
||||||
@ -118,11 +119,12 @@ extern size_t pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
|
|||||||
extern size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
|
extern size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
|
||||||
size_t srclen, pg_locale_t locale);
|
size_t srclen, pg_locale_t locale);
|
||||||
|
|
||||||
|
extern void icu_validate_locale(const char *loc_str);
|
||||||
|
|
||||||
#ifdef USE_ICU
|
#ifdef USE_ICU
|
||||||
extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
|
extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
|
||||||
extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar);
|
extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar);
|
||||||
#endif
|
#endif
|
||||||
extern void check_icu_locale(const char *icu_locale);
|
|
||||||
|
|
||||||
/* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */
|
/* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */
|
||||||
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen,
|
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen,
|
||||||
|
@ -1035,7 +1035,14 @@ END
|
|||||||
$$;
|
$$;
|
||||||
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
|
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
|
||||||
ERROR: parameter "locale" must be specified
|
ERROR: parameter "locale" must be specified
|
||||||
CREATE COLLATION testx (provider = icu, locale = 'nonsense'); /* never fails with ICU */ DROP COLLATION testx;
|
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); -- fails
|
||||||
|
ERROR: ICU locale "nonsense-nowhere" has unknown language "nonsense"
|
||||||
|
HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED.
|
||||||
|
SET icu_validation_level = WARNING;
|
||||||
|
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); DROP COLLATION testx;
|
||||||
|
WARNING: ICU locale "nonsense-nowhere" has unknown language "nonsense"
|
||||||
|
HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED.
|
||||||
|
RESET icu_validation_level;
|
||||||
CREATE COLLATION test4 FROM nonsense;
|
CREATE COLLATION test4 FROM nonsense;
|
||||||
ERROR: collation "nonsense" for encoding "UTF8" does not exist
|
ERROR: collation "nonsense" for encoding "UTF8" does not exist
|
||||||
CREATE COLLATION test5 FROM test0;
|
CREATE COLLATION test5 FROM test0;
|
||||||
|
@ -371,7 +371,10 @@ BEGIN
|
|||||||
END
|
END
|
||||||
$$;
|
$$;
|
||||||
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
|
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
|
||||||
CREATE COLLATION testx (provider = icu, locale = 'nonsense'); /* never fails with ICU */ DROP COLLATION testx;
|
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); -- fails
|
||||||
|
SET icu_validation_level = WARNING;
|
||||||
|
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); DROP COLLATION testx;
|
||||||
|
RESET icu_validation_level;
|
||||||
|
|
||||||
CREATE COLLATION test4 FROM nonsense;
|
CREATE COLLATION test4 FROM nonsense;
|
||||||
CREATE COLLATION test5 FROM test0;
|
CREATE COLLATION test5 FROM test0;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user