Reject non-ASCII locale names.

Commit bf03cfd1 started scanning all available BCP 47 locale names on
Windows.  This caused an abort/crash in the Windows runtime library if
the default locale name contained non-ASCII characters, because of our
use of the setlocale() save/restore pattern with "char" strings.  After
switching to another locale with a different encoding, the saved name
could no longer be understood, and setlocale() would abort.

"Turkish_Türkiye.1254" is the example from recent reports, but there are
other examples of countries and languages with non-ASCII characters in
their names, and they appear in Windows' (old style) locale names.

To defend against this:

1.  In initdb, reject non-ASCII locale names given explicity on the
command line, or returned by the operating system environment with
setlocale(..., ""), or "canonicalized" by the operating system when we
set it.

2.  In initdb only, perform the save-and-restore with Windows'
non-standard wchar_t variant of setlocale(), so that it is not subject
to round trip failures stemming from char string encoding confusion.

3.  In the backend, we don't have to worry about the save-and-restore
problem because we have already vetted the defaults, so we just have to
make sure that CREATE DATABASE also rejects non-ASCII names in any new
databases.  SET lc_XXX doesn't suffer from the problem, but the ban
applies to it too because it uses check_locale().  CREATE COLLATION
doesn't suffer from the problem either, but it doesn't use
check_locale() so it is not included in the new ban for now, to minimize
the change.

Anyone who encounters the new error message should either create a new
duplicated locale with an ASCII-only name using Windows Locale Builder,
or consider using BCP 47 names like "tr-TR".  Users already couldn't
initialize a cluster with "Turkish_Türkiye.1254" on PostgreSQL 16+, but
the new failure mode is an error message that explains why, instead of a
crash.

Back-patch to 16, where bf03cfd1 landed.  Older versions are affected
in theory too, but only 16 and later are causing crash reports.

Reviewed-by: Andrew Dunstan <andrew@dunslane.net> (the idea, not the patch)
Reported-by: Haifang Wang (Centific Technologies Inc) <v-haiwang@microsoft.com>
Discussion: https://postgr.es/m/PH8PR21MB3902F334A3174C54058F792CE5182%40PH8PR21MB3902.namprd21.prod.outlook.com
This commit is contained in:
Thomas Munro 2024-10-05 13:48:33 +13:00 committed by Muhammad Usama
parent e676bdac1a
commit b53817a861
2 changed files with 93 additions and 17 deletions

View File

@ -56,6 +56,7 @@
#include "access/htup_details.h"
#include "catalog/pg_collation.h"
#include "common/string.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "utils/builtins.h"
@ -317,6 +318,16 @@ check_locale(int category, const char *locale, char **canonname)
char *save;
char *res;
/* Don't let Windows' non-ASCII locale names in. */
if (!pg_is_ascii(locale))
{
ereport(WARNING,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("locale name \"%s\" contains non-ASCII characters",
locale)));
return false;
}
if (canonname)
*canonname = NULL; /* in case of failure */
@ -339,6 +350,18 @@ check_locale(int category, const char *locale, char **canonname)
elog(WARNING, "failed to restore old locale \"%s\"", save);
pfree(save);
/* Don't let Windows' non-ASCII locale names out. */
if (canonname && *canonname && !pg_is_ascii(*canonname))
{
ereport(WARNING,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("locale name \"%s\" contains non-ASCII characters",
*canonname)));
pfree(*canonname);
*canonname = NULL;
return false;
}
return (res != NULL);
}

View File

@ -340,6 +340,61 @@ do { \
output_failed = true, output_errno = errno; \
} while (0)
#ifdef WIN32
typedef wchar_t *save_locale_t;
#else
typedef char *save_locale_t;
#endif
/*
* Save a copy of the current global locale's name, for the given category.
* The returned value must be passed to restore_global_locale().
*
* Since names from the environment haven't been vetted for non-ASCII
* characters, we use the wchar_t variant of setlocale() on Windows. Otherwise
* they might not survive a save-restore round trip: when restoring, the name
* itself might be interpreted with a different encoding by plain setlocale(),
* after we switch to another locale in between. (This is a problem only in
* initdb, not in similar backend code where the global locale's name should
* already have been verified as ASCII-only.)
*/
static save_locale_t
save_global_locale(int category)
{
save_locale_t save;
#ifdef WIN32
save = _wsetlocale(category, NULL);
if (!save)
pg_fatal("_wsetlocale() failed");
save = wcsdup(save);
if (!save)
pg_fatal("out of memory");
#else
save = setlocale(category, NULL);
if (!save)
pg_fatal("setlocale() failed");
save = pg_strdup(save);
#endif
return save;
}
/*
* Restore the global locale returned by save_global_locale().
*/
static void
restore_global_locale(int category, save_locale_t save)
{
#ifdef WIN32
if (!_wsetlocale(category, save))
pg_fatal("failed to restore old locale");
#else
if (!setlocale(category, save))
pg_fatal("failed to restore old locale \"%s\"", save);
#endif
free(save);
}
/*
* Escape single quotes and backslashes, suitably for insertions into
* configuration files or SQL E'' strings.
@ -2074,16 +2129,13 @@ locale_date_order(const char *locale)
char *posD;
char *posM;
char *posY;
char *save;
save_locale_t save;
size_t res;
int result;
result = DATEORDER_MDY; /* default */
save = setlocale(LC_TIME, NULL);
if (!save)
return result;
save = pg_strdup(save);
save = save_global_locale(LC_TIME);
setlocale(LC_TIME, locale);
@ -2094,8 +2146,7 @@ locale_date_order(const char *locale)
res = my_strftime(buf, sizeof(buf), "%x", &testtime);
setlocale(LC_TIME, save);
free(save);
restore_global_locale(LC_TIME, save);
if (res == 0)
return result;
@ -2132,18 +2183,17 @@ locale_date_order(const char *locale)
static void
check_locale_name(int category, const char *locale, char **canonname)
{
char *save;
save_locale_t save;
char *res;
/* Don't let Windows' non-ASCII locale names in. */
if (locale && !pg_is_ascii(locale))
pg_fatal("locale name \"%s\" contains non-ASCII characters", locale);
if (canonname)
*canonname = NULL; /* in case of failure */
save = setlocale(category, NULL);
if (!save)
pg_fatal("setlocale() failed");
/* save may be pointing at a modifiable scratch variable, so copy it. */
save = pg_strdup(save);
save = save_global_locale(category);
/* for setlocale() call */
if (!locale)
@ -2157,9 +2207,7 @@ check_locale_name(int category, const char *locale, char **canonname)
*canonname = pg_strdup(res);
/* restore old value. */
if (!setlocale(category, save))
pg_fatal("failed to restore old locale \"%s\"", save);
free(save);
restore_global_locale(category, save);
/* complain if locale wasn't valid */
if (res == NULL)
@ -2183,6 +2231,11 @@ check_locale_name(int category, const char *locale, char **canonname)
pg_fatal("invalid locale settings; check LANG and LC_* environment variables");
}
}
/* Don't let Windows' non-ASCII locale names out. */
if (canonname && !pg_is_ascii(*canonname))
pg_fatal("locale name \"%s\" contains non-ASCII characters",
*canonname);
}
/*