diff --git a/contrib/citext/expected/citext_utf8.out b/contrib/citext/expected/citext_utf8.out index 666b07ccec..77b4586d8f 100644 --- a/contrib/citext/expected/citext_utf8.out +++ b/contrib/citext/expected/citext_utf8.out @@ -1,9 +1,16 @@ /* * This test must be run in a database with UTF-8 encoding * and a Unicode-aware locale. + * + * Also disable this file for ICU, because the test for the the + * Turkish dotted I is not correct for many ICU locales. citext always + * uses the default collation, so it's not easy to restrict the test + * to the "tr-TR-x-icu" collation where it will succeed. */ SELECT getdatabaseencoding() <> 'UTF8' OR - current_setting('lc_ctype') = 'C' + current_setting('lc_ctype') = 'C' OR + (SELECT datlocprovider='i' FROM pg_database + WHERE datname=current_database()) AS skip_test \gset \if :skip_test \quit diff --git a/contrib/citext/expected/citext_utf8_1.out b/contrib/citext/expected/citext_utf8_1.out index 433e985349..d1e1fe1a9d 100644 --- a/contrib/citext/expected/citext_utf8_1.out +++ b/contrib/citext/expected/citext_utf8_1.out @@ -1,9 +1,16 @@ /* * This test must be run in a database with UTF-8 encoding * and a Unicode-aware locale. + * + * Also disable this file for ICU, because the test for the the + * Turkish dotted I is not correct for many ICU locales. citext always + * uses the default collation, so it's not easy to restrict the test + * to the "tr-TR-x-icu" collation where it will succeed. */ SELECT getdatabaseencoding() <> 'UTF8' OR - current_setting('lc_ctype') = 'C' + current_setting('lc_ctype') = 'C' OR + (SELECT datlocprovider='i' FROM pg_database + WHERE datname=current_database()) AS skip_test \gset \if :skip_test \quit diff --git a/contrib/citext/sql/citext_utf8.sql b/contrib/citext/sql/citext_utf8.sql index d068000b42..8530c68dd7 100644 --- a/contrib/citext/sql/citext_utf8.sql +++ b/contrib/citext/sql/citext_utf8.sql @@ -1,10 +1,17 @@ /* * This test must be run in a database with UTF-8 encoding * and a Unicode-aware locale. + * + * Also disable this file for ICU, because the test for the the + * Turkish dotted I is not correct for many ICU locales. citext always + * uses the default collation, so it's not easy to restrict the test + * to the "tr-TR-x-icu" collation where it will succeed. */ SELECT getdatabaseencoding() <> 'UTF8' OR - current_setting('lc_ctype') = 'C' + current_setting('lc_ctype') = 'C' OR + (SELECT datlocprovider='i' FROM pg_database + WHERE datname=current_database()) AS skip_test \gset \if :skip_test \quit diff --git a/contrib/unaccent/expected/unaccent.out b/contrib/unaccent/expected/unaccent.out index ee0ac71a1c..cef98ee60c 100644 --- a/contrib/unaccent/expected/unaccent.out +++ b/contrib/unaccent/expected/unaccent.out @@ -1,3 +1,12 @@ +-- unaccent is broken if the default collation is provided by ICU and +-- LC_CTYPE=C +SELECT current_setting('lc_ctype') = 'C' AND + (SELECT datlocprovider='i' FROM pg_database + WHERE datname=current_database()) + AS skip_test \gset +\if :skip_test +\quit +\endif CREATE EXTENSION unaccent; -- must have a UTF8 database SELECT getdatabaseencoding(); diff --git a/contrib/unaccent/expected/unaccent_1.out b/contrib/unaccent/expected/unaccent_1.out new file mode 100644 index 0000000000..0a4a3838ab --- /dev/null +++ b/contrib/unaccent/expected/unaccent_1.out @@ -0,0 +1,8 @@ +-- unaccent is broken if the default collation is provided by ICU and +-- LC_CTYPE=C +SELECT current_setting('lc_ctype') = 'C' AND + (SELECT datlocprovider='i' FROM pg_database + WHERE datname=current_database()) + AS skip_test \gset +\if :skip_test +\quit diff --git a/contrib/unaccent/sql/unaccent.sql b/contrib/unaccent/sql/unaccent.sql index 3fc0c706be..027dfb964a 100644 --- a/contrib/unaccent/sql/unaccent.sql +++ b/contrib/unaccent/sql/unaccent.sql @@ -1,3 +1,14 @@ + +-- unaccent is broken if the default collation is provided by ICU and +-- LC_CTYPE=C +SELECT current_setting('lc_ctype') = 'C' AND + (SELECT datlocprovider='i' FROM pg_database + WHERE datname=current_database()) + AS skip_test \gset +\if :skip_test +\quit +\endif + CREATE EXTENSION unaccent; -- must have a UTF8 database diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml index c96164195d..05a9c2cf58 100644 --- a/doc/src/sgml/ref/initdb.sgml +++ b/doc/src/sgml/ref/initdb.sgml @@ -89,10 +89,28 @@ PostgreSQL documentation and character set encoding. These can also be set separately for each database when it is created. <command>initdb</command> determines those settings for the template databases, which will serve as the default for - all other databases. By default, <command>initdb</command> uses the - locale provider <literal>libc</literal>, takes the locale settings from - the environment, and determines the encoding from the locale settings. - This is almost always sufficient, unless there are special requirements. + all other databases. + </para> + + <para> + By default, <command>initdb</command> uses the ICU library to provide + locale services if the server was built with ICU support; otherwise it uses + the <literal>libc</literal> locale provider (see <xref + linkend="locale-providers"/>). To choose the specific ICU locale ID to + apply, use the option <option>--icu-locale</option>. Note that for + implementation reasons and to support legacy code, + <command>initdb</command> will still select and initialize libc locale + settings when the ICU locale provider is used. + </para> + + <para> + Alternatively, <command>initdb</command> can use the locale provider + <literal>libc</literal>. To select this option, specify + <literal>--locale-provider=libc</literal>, or build the server without ICU + support. The <literal>libc</literal> locale provider takes the locale + settings from the environment, and determines the encoding from the locale + settings. This is almost always sufficient, unless there are special + requirements. </para> <para> @@ -103,17 +121,6 @@ PostgreSQL documentation categories can give nonsensical results, so this should be used with care. </para> - <para> - Alternatively, the ICU library can be used to provide locale services. - (Again, this only sets the default for subsequently created databases.) To - select this option, specify <literal>--locale-provider=icu</literal>. - To choose the specific ICU locale ID to apply, use the option - <option>--icu-locale</option>. Note that - for implementation reasons and to support legacy code, - <command>initdb</command> will still select and initialize libc locale - settings when the ICU locale provider is used. - </para> - <para> When <command>initdb</command> runs, it will print out the locale settings it has chosen. If you have complex requirements or specified multiple @@ -234,7 +241,13 @@ PostgreSQL documentation <term><option>--icu-locale=<replaceable>locale</replaceable></option></term> <listitem> <para> - Specifies the ICU locale ID, if the ICU locale provider is used. + Specifies the ICU locale when the ICU provider is used. Locale support + is described in <xref linkend="locale"/>. + </para> + <para> + If this option is not specified, the locale is inherited from the + environment in which <command>initdb</command> runs. The environment's + locale is matched to a similar ICU locale name, if possible. </para> </listitem> </varlistentry> @@ -307,10 +320,12 @@ PostgreSQL documentation <term><option>--locale-provider={<literal>libc</literal>|<literal>icu</literal>}</option></term> <listitem> <para> - This option sets the locale provider for databases created in the - new cluster. It can be overridden in the <command>CREATE + This option sets the locale provider for databases created in the new + cluster. It can be overridden in the <command>CREATE DATABASE</command> command when new databases are subsequently - created. The default is <literal>libc</literal>. + created. The default is <literal>icu</literal> if the server was + built with ICU support; otherwise the default is + <literal>libc</literal> (see <xref linkend="locale-providers"/>). </para> </listitem> </varlistentry> diff --git a/src/bin/initdb/Makefile b/src/bin/initdb/Makefile index eab89c5501..d69bd89572 100644 --- a/src/bin/initdb/Makefile +++ b/src/bin/initdb/Makefile @@ -16,7 +16,7 @@ subdir = src/bin/initdb top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -override CPPFLAGS := -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(CPPFLAGS) +override CPPFLAGS := -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(ICU_CFLAGS) $(CPPFLAGS) # Note: it's important that we link to encnames.o from libpgcommon, not # from libpq, else we have risks of version skew if we run with a libpq @@ -24,7 +24,7 @@ override CPPFLAGS := -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(CPPFLAGS) # should ensure that that happens. # # We need libpq only because fe_utils does. -LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) +LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) $(ICU_LIBS) # use system timezone data? ifneq (,$(with_system_tzdata)) diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 5e3c6a27c4..bf88cd2439 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -53,6 +53,9 @@ #include <netdb.h> #include <sys/socket.h> #include <sys/stat.h> +#ifdef USE_ICU +#include <unicode/ucol.h> +#endif #include <unistd.h> #include <signal.h> #include <time.h> @@ -133,7 +136,11 @@ static char *lc_monetary = NULL; static char *lc_numeric = NULL; static char *lc_time = NULL; static char *lc_messages = NULL; +#ifdef USE_ICU +static char locale_provider = COLLPROVIDER_ICU; +#else static char locale_provider = COLLPROVIDER_LIBC; +#endif static char *icu_locale = NULL; static char *icu_rules = NULL; static const char *default_text_search_config = NULL; @@ -2028,6 +2035,50 @@ check_icu_locale_encoding(int user_enc) return true; } +/* + * Check that ICU accepts the locale name; or if not specified, retrieve the + * default ICU locale. + */ +static void +check_icu_locale(void) +{ +#ifdef USE_ICU + UCollator *collator; + UErrorCode status; + + status = U_ZERO_ERROR; + collator = ucol_open(icu_locale, &status); + if (U_FAILURE(status)) + { + if (icu_locale) + pg_fatal("could not open collator for locale \"%s\": %s", + icu_locale, u_errorName(status)); + else + pg_fatal("could not open collator for default locale: %s", + u_errorName(status)); + } + + /* if not specified, get locale from default collator */ + if (icu_locale == NULL) + { + const char *default_locale; + + status = U_ZERO_ERROR; + default_locale = ucol_getLocaleByType(collator, ULOC_VALID_LOCALE, + &status); + if (U_FAILURE(status)) + { + ucol_close(collator); + pg_fatal("could not determine default ICU locale"); + } + + icu_locale = pg_strdup(default_locale); + } + + ucol_close(collator); +#endif +} + /* * set up the locale variables * @@ -2081,8 +2132,7 @@ setlocales(void) if (locale_provider == COLLPROVIDER_ICU) { - if (!icu_locale) - pg_fatal("ICU locale must be specified"); + check_icu_locale(); /* * In supported builds, the ICU locale ID will be checked by the diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl index 772769acab..e5d214e09c 100644 --- a/src/bin/initdb/t/001_initdb.pl +++ b/src/bin/initdb/t/001_initdb.pl @@ -97,11 +97,6 @@ SKIP: if ($ENV{with_icu} eq 'yes') { - command_fails_like( - [ 'initdb', '--no-sync', '--locale-provider=icu', "$tempdir/data2" ], - qr/initdb: error: ICU locale must be specified/, - 'locale provider ICU requires --icu-locale'); - command_ok( [ 'initdb', '--no-sync', @@ -116,7 +111,7 @@ if ($ENV{with_icu} eq 'yes') '--locale-provider=icu', '--icu-locale=@colNumeric=lower', "$tempdir/dataX" ], - qr/FATAL: could not open collator for locale/, + qr/error: could not open collator for locale/, 'fails for invalid ICU locale'); command_fails_like( diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 187e4b8d07..9c354213ce 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -1758,7 +1758,7 @@ my %tests = ( create_sql => "CREATE DATABASE dump_test2 LOCALE = 'C' TEMPLATE = template0;", regexp => qr/^ - \QCREATE DATABASE dump_test2 \E.*\QLOCALE = 'C';\E + \QCREATE DATABASE dump_test2 \E.*\QLOCALE = 'C'\E /xm, like => { pg_dumpall_dbprivs => 1, }, }, diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl index 3ad4fbb00c..8ec58cdd64 100644 --- a/src/bin/scripts/t/020_createdb.pl +++ b/src/bin/scripts/t/020_createdb.pl @@ -13,7 +13,7 @@ program_version_ok('createdb'); program_options_handling_ok('createdb'); my $node = PostgreSQL::Test::Cluster->new('main'); -$node->init; +$node->init(extra => ['--locale-provider=libc']); $node->start; $node->issues_sql_like( diff --git a/src/interfaces/ecpg/test/Makefile b/src/interfaces/ecpg/test/Makefile index d7a7d1d1ca..cf841a3a5b 100644 --- a/src/interfaces/ecpg/test/Makefile +++ b/src/interfaces/ecpg/test/Makefile @@ -14,9 +14,6 @@ override CPPFLAGS := \ '-DSHELLPROG="$(SHELL)"' \ $(CPPFLAGS) -# default encoding for regression tests -ENCODING = SQL_ASCII - ifneq ($(build_os),mingw32) abs_builddir := $(shell pwd) else diff --git a/src/interfaces/ecpg/test/connect/test5.pgc b/src/interfaces/ecpg/test/connect/test5.pgc index de29160089..d512553677 100644 --- a/src/interfaces/ecpg/test/connect/test5.pgc +++ b/src/interfaces/ecpg/test/connect/test5.pgc @@ -55,7 +55,7 @@ exec sql end declare section; exec sql connect to 'unix:postgresql://localhost/ecpg2_regression' as main user :user USING "connectpw"; exec sql disconnect main; - exec sql connect to unix:postgresql://localhost/ecpg2_regression?connect_timeout=180&client_encoding=latin1 as main user regress_ecpg_user1/connectpw; + exec sql connect to unix:postgresql://localhost/ecpg2_regression?connect_timeout=180&client_encoding=sql_ascii as main user regress_ecpg_user1/connectpw; exec sql disconnect main; exec sql connect to "unix:postgresql://200.46.204.71/ecpg2_regression" as main user regress_ecpg_user1/connectpw; diff --git a/src/interfaces/ecpg/test/expected/connect-test5.c b/src/interfaces/ecpg/test/expected/connect-test5.c index c1124c627f..ec1514ed9a 100644 --- a/src/interfaces/ecpg/test/expected/connect-test5.c +++ b/src/interfaces/ecpg/test/expected/connect-test5.c @@ -117,7 +117,7 @@ main(void) #line 56 "test5.pgc" - { ECPGconnect(__LINE__, 0, "unix:postgresql://localhost/ecpg2_regression?connect_timeout=180 & client_encoding=latin1" , "regress_ecpg_user1" , "connectpw" , "main", 0); } + { ECPGconnect(__LINE__, 0, "unix:postgresql://localhost/ecpg2_regression?connect_timeout=180 & client_encoding=sql_ascii" , "regress_ecpg_user1" , "connectpw" , "main", 0); } #line 58 "test5.pgc" { ECPGdisconnect(__LINE__, "main");} diff --git a/src/interfaces/ecpg/test/expected/connect-test5.stderr b/src/interfaces/ecpg/test/expected/connect-test5.stderr index 01a6a0a13b..51cc18916a 100644 --- a/src/interfaces/ecpg/test/expected/connect-test5.stderr +++ b/src/interfaces/ecpg/test/expected/connect-test5.stderr @@ -50,7 +50,7 @@ [NO_PID]: sqlca: code: 0, state: 00000 [NO_PID]: ecpg_finish: connection main closed [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ECPGconnect: opening database ecpg2_regression on <DEFAULT> port <DEFAULT> with options connect_timeout=180 & client_encoding=latin1 for user regress_ecpg_user1 +[NO_PID]: ECPGconnect: opening database ecpg2_regression on <DEFAULT> port <DEFAULT> with options connect_timeout=180 & client_encoding=sql_ascii for user regress_ecpg_user1 [NO_PID]: sqlca: code: 0, state: 00000 [NO_PID]: ecpg_finish: connection main closed [NO_PID]: sqlca: code: 0, state: 00000 diff --git a/src/interfaces/ecpg/test/meson.build b/src/interfaces/ecpg/test/meson.build index d0be73ccf9..04c6819a79 100644 --- a/src/interfaces/ecpg/test/meson.build +++ b/src/interfaces/ecpg/test/meson.build @@ -69,7 +69,6 @@ ecpg_test_files = files( ecpg_regress_args = [ '--dbname=ecpg1_regression,ecpg2_regression', '--create-role=regress_ecpg_user1,regress_ecpg_user2', - '--encoding=SQL_ASCII', ] tests += { diff --git a/src/test/icu/t/010_database.pl b/src/test/icu/t/010_database.pl index 80ab1c7789..45d77c319a 100644 --- a/src/test/icu/t/010_database.pl +++ b/src/test/icu/t/010_database.pl @@ -12,7 +12,7 @@ if ($ENV{with_icu} ne 'yes') } my $node1 = PostgreSQL::Test::Cluster->new('node1'); -$node1->init; +$node1->init(extra => ['--locale-provider=libc']); $node1->start; $node1->safe_psql('postgres',