From 8978b7f1d6e4d01a737559ebaf38df1810cfd622 Mon Sep 17 00:00:00 2001 From: "Yury V. Zaytsev" Date: Wed, 11 Sep 2024 19:32:43 +0200 Subject: [PATCH 01/13] Ticket #3972: fix test harness on Illumos by resolving shellcheck warnings Signed-off-by: Yury V. Zaytsev --- tests/src/vfs/extfs/helpers-list/test_all | 48 +++++++++++++++-------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/tests/src/vfs/extfs/helpers-list/test_all b/tests/src/vfs/extfs/helpers-list/test_all index f5f9be616..012d7603b 100755 --- a/tests/src/vfs/extfs/helpers-list/test_all +++ b/tests/src/vfs/extfs/helpers-list/test_all @@ -19,6 +19,9 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . +# +# Suppress warnings about `local` +# shellcheck disable=SC3043 help() { cat << EOS @@ -94,6 +97,11 @@ opt_run_mcdiff_on_error=no # "yes" if '--mcdiff' provided. ############################ Utility functions ############################# +# Support shells missing local, but having typeset like ksh93+ on Solaris +if type typeset > /dev/null 2>&1; then + alias local="typeset" +fi + # # Does $1 contain $2? # @@ -109,7 +117,8 @@ has_string() { # Given "/path/to/basename.and.some.ext", returns "basename" # basename_sans_extensions() { - local base="$(basename "$1")" + local base + base="$(basename "$1")" echo "${base%%.*}" } @@ -137,25 +146,26 @@ has_colors() { init_colors() { if has_colors; then - local esc="$(printf '\033')" # for portability - C_bold="$esc[1m" - C_green="$esc[1;32m" - C_red="$esc[1;31m" - C_magenta="$esc[1;35m" - C_norm="$esc[0m" + local esc + esc="$(printf '\033')" # for portability + C_bold="${esc}[1m" + C_green="${esc}[1;32m" + C_red="${esc}[1;31m" + C_magenta="${esc}[1;35m" + C_norm="${esc}[0m" fi } # # A few colorful alternatives to 'echo'. # -header() { echo $C_bold"$@"$C_norm; } -err() { echo $C_red"$@"$C_norm; } -notice() { echo $C_magenta"$@"$C_norm; } -success() { echo $C_green"$@"$C_norm; } +header() { echo "$C_bold$*$C_norm"; } +err() { echo "$C_red$*$C_norm"; } +notice() { echo "$C_magenta$*$C_norm"; } +success() { echo "$C_green$*$C_norm"; } die() { - err "Error: $@" + err "Error: $*" exit 1 } @@ -251,7 +261,9 @@ run() { # Set up variables: # - local helper_name="$(basename_sans_extensions "$input")" + local helper_name + helper_name="$(basename_sans_extensions "$input")" + local expected_parsed_output="${input%.input}.output" local env_vars_file="${input%.input}.env_vars" local args_file="${input%.input}.args" @@ -281,8 +293,10 @@ run() { local extra_parser_args="" [ -f "$args_file" ] && extra_parser_args="$(cat "$args_file")" - local actual_output="$(temp_file $helper_name.actual-output.XXXXXXXX)" - local actual_parsed_output="$(temp_file $helper_name.actual-parsed-output.XXXXXXXX)" + local actual_output + local actual_parsed_output + actual_output="$(temp_file $helper_name.actual-output.XXXXXXXX)" + actual_parsed_output="$(temp_file $helper_name.actual-parsed-output.XXXXXXXX)" # # Variables are all set. Now do the actual stuff: @@ -350,7 +364,7 @@ run() { if is_interactive; then if [ $opt_run_mcdiff_on_error = "yes" ]; then notice "Hit ENTER to launch mcdiff ..." - read dummy_var # dash needs this. + read -r _dummy_argument # dash needs an argument ${MCDIFF:-mcdiff} "$expected_parsed_output" "$actual_parsed_output" else notice "Tip: invoke this program with '--mcdiff' to automatically launch" @@ -377,7 +391,7 @@ run() { done - [ $pass_count = "0" -a $error_count = "0" ] && notice "Note: The data directory contains no *.input files." + [ $pass_count = "0" ] && [ $error_count = "0" ] && notice "Note: The data directory contains no *.input files." [ $error_count = "0" ] # exit status of function. } From f84099512e29a992d6138fda81d6d7c520653bb5 Mon Sep 17 00:00:00 2001 From: "Yury V. Zaytsev" Date: Wed, 11 Sep 2024 19:38:11 +0200 Subject: [PATCH 02/13] extfs helpers: remove usage of `local`, instead use subshell to isolate variables Signed-off-by: Yury V. Zaytsev --- src/vfs/extfs/helpers/iso9660.in | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/vfs/extfs/helpers/iso9660.in b/src/vfs/extfs/helpers/iso9660.in index 61b0b1357..1ee5cc8bf 100644 --- a/src/vfs/extfs/helpers/iso9660.in +++ b/src/vfs/extfs/helpers/iso9660.in @@ -80,17 +80,16 @@ awk_xorriso_unesc=$(cat <<'EOF' EOF ) -xorriso_list() { +xorriso_list() ( if test -z "$XORRISO"; then return 1 fi - local temp_ls temp_ls=$(mktemp "${MC_TMPDIR:-/tmp}"/mc-iso9660.XXXXXX) || return 1 # $XORRISO must be unquoted here to hook into the testing framework $XORRISO -abort_on FATAL -dev stdio:"$1" -find / -exec lsdl 2>/dev/null >"$temp_ls" - local r=$? + r=$? if [ "$r" != 0 ]; then rm -f "$temp_ls" @@ -105,7 +104,7 @@ xorriso_list() { @AWK@ "$awk_xorriso_unesc" rm -f "$temp_ls" -} +) xorriso_copyout() { if test -z "$XORRISO"; then @@ -175,9 +174,7 @@ test_iso () { fi } -mcisofs_list () { - local lsl r - +mcisofs_list () ( # left as a reminder to implement compressed image support =) case "$1" in *.lz) MYCAT="lzip -dc";; @@ -227,7 +224,7 @@ BEGIN { if (name == "..") next; printf "%s%s%s\n", attr, dir, name }' -} +) mcisofs_copyout () { if [ "x$SEMICOLON" = "xYES" ]; then From faea164957fdc888a0324eb24f6be3dd9a93354f Mon Sep 17 00:00:00 2001 From: "Yury V. Zaytsev" Date: Tue, 24 Sep 2024 11:34:14 +0200 Subject: [PATCH 03/13] buildsys: add warning for non-GNU gettext and fix shellcheck issues Signed-off-by: Yury V. Zaytsev --- autogen.sh | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/autogen.sh b/autogen.sh index 8c06da89c..f17c51a9d 100755 --- a/autogen.sh +++ b/autogen.sh @@ -11,16 +11,20 @@ cd "$srcdir" ${AUTORECONF:-autoreconf} --verbose --install --force -I m4 ${AUTORECONF_FLAGS} # Customize the INSTALL file -rm -f INSTALL && ln -s doc/INSTALL +rm -f INSTALL && ln -s doc/INSTALL . # Generate po/POTFILES.in -${XGETTEXT:-xgettext} --keyword=_ --keyword=N_ --keyword=Q_ --output=- \ +if ! xgettext -h 2>&1 | grep -e '--keyword=' >/dev/null ; then + echo "gettext is unable to extract translations, set XGETTEXT to GNU gettext!" >&2 +else + ${XGETTEXT:-xgettext} --keyword=_ --keyword=N_ --keyword=Q_ --output=- \ `find . -name '*.[ch]'` | ${SED-sed} -ne '/^#:/{s/#://;s/:[0-9]*/\ /g;s/ //g;p;}' | \ grep -v '^$' | sort | uniq >po/POTFILES.in - -$srcdir/version.sh "$srcdir" - -if test -x $srcdir/configure.mc; then - $srcdir/configure.mc "$@" +fi + +"$srcdir/version.sh" "$srcdir" + +if test -x "$srcdir/configure.mc"; then + "$srcdir/configure.mc" "$@" fi From ffd6fd11f7c7bf62918e2be74d49c11ebef027b6 Mon Sep 17 00:00:00 2001 From: "Yury V. Zaytsev" Date: Tue, 24 Sep 2024 11:27:39 +0200 Subject: [PATCH 04/13] ydiff: fix `-Wdiscarded-qualifiers` warning ``` ../../../src/diffviewer/ydiff.c:613:17: warning: assignment discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers] 613 | next_ch = g_utf8_next_char (str); ``` Signed-off-by: Yury V. Zaytsev --- src/diffviewer/ydiff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffviewer/ydiff.c b/src/diffviewer/ydiff.c index 2c6326b04..6f7caf239 100644 --- a/src/diffviewer/ydiff.c +++ b/src/diffviewer/ydiff.c @@ -607,7 +607,7 @@ dview_get_utf (const char *str, int *ch, int *ch_length) } else { - char *next_ch; + const char *next_ch; /* Calculate UTF-8 char length */ next_ch = g_utf8_next_char (str); From 07e24c0289d2043a9c85ea9cec37b889736c15f3 Mon Sep 17 00:00:00 2001 From: "Yury V. Zaytsev" Date: Fri, 13 Sep 2024 09:26:53 +0200 Subject: [PATCH 05/13] lib/path: clarify conditions for creating converters Signed-off-by: Yury V. Zaytsev --- lib/vfs/path.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/vfs/path.c b/lib/vfs/path.c index c7ecc63c0..d6cec2136 100644 --- a/lib/vfs/path.c +++ b/lib/vfs/path.c @@ -896,8 +896,8 @@ vfs_path_element_clone (const vfs_path_element_t *element) new_element->vfs_prefix = g_strdup (element->vfs_prefix); #ifdef HAVE_CHARSET new_element->encoding = g_strdup (element->encoding); - if (vfs_path_element_need_cleanup_converter (element) && new_element->encoding != NULL) - new_element->dir.converter = str_crt_conv_from (new_element->encoding); + if (vfs_path_element_need_cleanup_converter (element) && element->encoding != NULL) + new_element->dir.converter = str_crt_conv_from (element->encoding); else new_element->dir.converter = element->dir.converter; #endif From 319598df415d0b6d2e63b576b206ca70fd414f2f Mon Sep 17 00:00:00 2001 From: "Yury V. Zaytsev" Date: Wed, 11 Sep 2024 19:34:38 +0200 Subject: [PATCH 06/13] tests: path_len - fix assertions, now test should fail Signed-off-by: Yury V. Zaytsev --- tests/lib/vfs/path_len.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tests/lib/vfs/path_len.c b/tests/lib/vfs/path_len.c index 6bab6f551..b9a2717bb 100644 --- a/tests/lib/vfs/path_len.c +++ b/tests/lib/vfs/path_len.c @@ -74,25 +74,30 @@ teardown (void) static const struct test_path_length_ds { const char *input_path; - const size_t expected_length; + const size_t expected_length_element_encoding; + const size_t expected_length_terminal_encoding; } test_path_length_ds[] = { { /* 0. */ NULL, + 0, 0 }, { /* 1. */ "/", + 1, 1 }, { /* 2. */ "/тестовый/путь", + 26, 26 }, #ifdef HAVE_CHARSET { /* 3. */ "/#enc:KOI8-R/тестовый/путь", - 38 + 14, + 38, }, #endif /* HAVE_CHARSET */ }; @@ -105,15 +110,19 @@ START_PARAMETRIZED_TEST (test_path_length, test_path_length_ds) { /* given */ vfs_path_t *vpath; - size_t actual_length; + char *path; + size_t actual_length_terminal_encoding, actual_length_element_encoding; vpath = vfs_path_from_str (data->input_path); + path = vpath != NULL ? vfs_path_get_by_index (vpath, 0)->path : NULL; /* when */ - actual_length = vfs_path_len (vpath); + actual_length_terminal_encoding = vfs_path_len (vpath); + actual_length_element_encoding = path != NULL ? strlen (path) : 0; /* then */ - ck_assert_int_eq (actual_length, data->expected_length); + ck_assert_int_eq (actual_length_terminal_encoding, data->expected_length_terminal_encoding); + ck_assert_int_eq (actual_length_element_encoding, data->expected_length_element_encoding); vfs_path_free (vpath, TRUE); } From f34e2ae715079f5dd7064787e40ed9e7b9bb735f Mon Sep 17 00:00:00 2001 From: "Yury V. Zaytsev" Date: Thu, 12 Sep 2024 07:55:18 +0200 Subject: [PATCH 07/13] tests: use UTF-8 to prevent creation of invalid converters Signed-off-by: Yury V. Zaytsev --- tests/lib/vfs/path_cmp.c | 2 +- tests/lib/vfs/path_len.c | 2 +- tests/lib/vfs/path_manipulations.c | 2 +- tests/lib/vfs/path_serialize.c | 2 +- tests/lib/vfs/vfs_path_string_convert.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/lib/vfs/path_cmp.c b/tests/lib/vfs/path_cmp.c index b2db3d2a1..28af0374f 100644 --- a/tests/lib/vfs/path_cmp.c +++ b/tests/lib/vfs/path_cmp.c @@ -42,7 +42,7 @@ static void setup (void) { - str_init_strings (NULL); + str_init_strings ("UTF-8"); vfs_init (); vfs_init_localfs (); diff --git a/tests/lib/vfs/path_len.c b/tests/lib/vfs/path_len.c index b9a2717bb..3475c7c47 100644 --- a/tests/lib/vfs/path_len.c +++ b/tests/lib/vfs/path_len.c @@ -42,7 +42,7 @@ static void setup (void) { - str_init_strings (NULL); + str_init_strings ("UTF-8"); vfs_init (); vfs_init_localfs (); diff --git a/tests/lib/vfs/path_manipulations.c b/tests/lib/vfs/path_manipulations.c index f379d912f..36a17f4e6 100644 --- a/tests/lib/vfs/path_manipulations.c +++ b/tests/lib/vfs/path_manipulations.c @@ -60,7 +60,7 @@ init_test_classes (void) static void setup (void) { - str_init_strings (NULL); + str_init_strings ("UTF-8"); vfs_init (); vfs_init_localfs (); diff --git a/tests/lib/vfs/path_serialize.c b/tests/lib/vfs/path_serialize.c index 442a3b224..477088544 100644 --- a/tests/lib/vfs/path_serialize.c +++ b/tests/lib/vfs/path_serialize.c @@ -45,7 +45,7 @@ static struct vfs_class vfs_test_ops1, vfs_test_ops2, vfs_test_ops3; static void setup (void) { - str_init_strings (NULL); + str_init_strings ("UTF-8"); vfs_init (); vfs_init_localfs (); diff --git a/tests/lib/vfs/vfs_path_string_convert.c b/tests/lib/vfs/vfs_path_string_convert.c index b062a83e8..1dcf93c95 100644 --- a/tests/lib/vfs/vfs_path_string_convert.c +++ b/tests/lib/vfs/vfs_path_string_convert.c @@ -48,7 +48,7 @@ static struct vfs_class vfs_test_ops1, vfs_test_ops2, vfs_test_ops3; static void setup (void) { - str_init_strings (NULL); + str_init_strings ("UTF-8"); vfs_init (); vfs_init_localfs (); From 1e0e6b5e1e3236023eb20bb8ab33ab9fd7d984e3 Mon Sep 17 00:00:00 2001 From: "Yury V. Zaytsev" Date: Fri, 13 Sep 2024 10:18:11 +0200 Subject: [PATCH 08/13] charset: rename IBM866 to CP866 for iconv and adjust charset names for codeset ``` linux $ iconv -l | grep 866 866// 866NAV// CP866// CP866NAV// CSIBM866// IBM866// IBM866NAV// solaris $ iconv -l | grep 866 CP866 (CP866, CP-866, CP_866, 866), IBM-866, macos % iconv -l | grep 866 CP866 866 CSIBM866 IBM866 MSCP866 musl/src/locale/codepages.h: "cp866\0" ``` On glibc-based systems, codeset will be set to charmap name, on most other systems it seems to be taken from locale name. ## Linux ``` zaytsev@fedora:~$ locale -a | grep ru ru_RU ru_RU.cp866 ru_RU.ibm866 ru_RU.iso88595 ru_RU.koi8r ru_RU.utf8 russian ru_UA ru_UA.koi8u ru_UA.utf8 zaytsev@fedora:~/src$ LC_ALL=ru_RU.cp866 LANG=ru_RU.cp866 ./a.out IBM866 zaytsev@fedora:~/src$ LC_ALL=ru_RU.koi8r LANG=ru_RU.koi8r ./a.out KOI8-R zaytsev@fedora:~/src$ LC_ALL=ru_RU.iso88595 LANG=ru_RU.iso88595 ./a.out ISO-8859-5 ``` ## macOS ``` ru_RU.ISO8859-5 ru_RU.CP866 ru_RU.CP1251 ru_RU.UTF-8 ru_RU.KOI8-R ru_RU zaytsev@Yurys-MBP mc % LANG=ru_RU.CP866 LC_ALL=ru_RU.CP866 ./a.out CP866 zaytsev@Yurys-MBP mc % LANG=ru_RU.ISO8859-5 LC_ALL=ru_RU.ISO8859-5 ./a.out ISO8859-5 ``` ## FreeBSD ``` ru_RU.CP1251 ru_RU.CP866 ru_RU.ISO8859-5 ru_RU.KOI8-R ru_RU.UTF-8 zaytsev@cfarm240:~ $ LANG=ru_RU.ISO8859-5 LC_ALL=ru_RU.ISO8859-5 ./a.out ISO8859-5 zaytsev@cfarm240:~ $ LANG=ru_RU.CP866 LC_ALL=ru_RU.CP866 ./a.out CP866 ``` ## Solaris ``` ru ru.UTF-8 ru.koi8-r ru_RU ru_RU.ANSI1251 ru_RU.ISO8859-5 ru_RU.KOI8-R ru_RU.UTF-8 zaytsev@gcc-solaris10:~/src$ LANG=ru_RU.ISO8859-5 LC_ALL=ru_RU.ISO8859-5 ./a.o> ISO8859-5 zaytsev@gcc-solaris10:~/src$ LANG=ru.koi8-r LC_ALL=ru.koi8-r ./a.out KOI8-R ``` ## AIX ``` zaytsev@gcc111:[/home/zaytsev]locale -a C POSIX en_US.8859-15 en_US.IBM-858 en_US.ISO8859-1 en_US -bash-5.1$ LANG=en_US.ISO8859-1 LC_ALL=en_US.ISO8859-1 ./a.out ISO8859-1 ``` Signed-off-by: Yury V. Zaytsev --- configure.ac | 1 + lib/strutil/strutil.c | 10 ++++------ m4.include/mc-i18n.m4 | 17 ++++++++++++++--- misc/mc.charsets.in | 10 +++++----- tests/lib/vfs/Makefile.am | 8 ++++++-- tests/lib/vfs/{mc.charsets => mc.charsets.in} | 2 +- tests/lib/vfs/path_cmp.c | 2 +- tests/lib/vfs/path_len.c | 2 +- tests/lib/vfs/path_manipulations.c | 2 +- tests/lib/vfs/path_recode.c | 2 -- tests/lib/vfs/path_serialize.c | 2 +- tests/lib/vfs/relative_cd.c | 2 -- tests/lib/vfs/vfs_get_encoding.c | 4 ++-- tests/lib/vfs/vfs_path_string_convert.c | 14 +++++++------- 14 files changed, 44 insertions(+), 34 deletions(-) rename tests/lib/vfs/{mc.charsets => mc.charsets.in} (71%) diff --git a/configure.ac b/configure.ac index 5a9a4447f..8ee63b632 100644 --- a/configure.ac +++ b/configure.ac @@ -705,6 +705,7 @@ tests/lib/mcconfig/Makefile tests/lib/search/Makefile tests/lib/strutil/Makefile tests/lib/vfs/Makefile +tests/lib/vfs/mc.charsets tests/lib/widget/Makefile tests/src/Makefile tests/src/filemanager/Makefile diff --git a/lib/strutil/strutil.c b/lib/strutil/strutil.c index bc7f6ae4a..e7515efbd 100644 --- a/lib/strutil/strutil.c +++ b/lib/strutil/strutil.c @@ -57,18 +57,16 @@ static const char *const str_utf8_encodings[] = { /* standard 8bit encodings, no wide or multibytes characters */ static const char *const str_8bit_encodings[] = { - /* Solaris has different names of Windows 1251 encoding */ -#ifdef __sun - "ansi-1251", - "ansi1251", -#else "cp-1251", "cp1251", -#endif + /* solaris */ + "ansi-1251", + "ansi1251", "cp-1250", "cp1250", "cp-866", "cp866", + /* glibc */ "ibm-866", "ibm866", "cp-850", diff --git a/m4.include/mc-i18n.m4 b/m4.include/mc-i18n.m4 index 8f1474139..8e7237cc9 100644 --- a/m4.include/mc-i18n.m4 +++ b/m4.include/mc-i18n.m4 @@ -18,16 +18,27 @@ AC_DEFUN([mc_I18N],[ have_charset=yes charset_msg="yes" + AC_CHECK_HEADERS([gnu/libc-version.h]) + dnl Solaris has different name of Windows 1251 encoding case $host_os in solaris*) - CP1251="ANSI-1251" + ENCODING_CP1251="ANSI-1251" ;; *) - CP1251="CP1251" + ENCODING_CP1251="CP1251" ;; esac - AC_SUBST(CP1251) + if test "x$ac_cv_header_gnu_libc_version_h" != "xno"; then + ENCODING_CP866="IBM866" + ENCODING_ISO8859="ISO-8859" + else + ENCODING_CP866="CP866" + ENCODING_ISO8859="ISO8859" + fi + + AC_SUBST(ENCODING_CP1251) + AC_SUBST(ENCODING_CP866) fi ]) diff --git a/misc/mc.charsets.in b/misc/mc.charsets.in index 50921b033..68f1b03af 100644 --- a/misc/mc.charsets.in +++ b/misc/mc.charsets.in @@ -1,13 +1,13 @@ ASCII 7-bit ASCII -ISO-8859-1 ISO 8859-1 -ISO-8859-2 ISO 8859-2 -ISO-8859-5 ISO 8859-5 +@ENCODING_ISO8859@-1 ISO 8859-1 +@ENCODING_ISO8859@-2 ISO 8859-2 +@ENCODING_ISO8859@-5 ISO 8859-5 CP1250 Windows 1250 -@CP1251@ Windows 1251 +@ENCODING_CP1251@ Windows 1251 CP437 CP 437 CP850 CP 850 CP852 CP 852 -IBM866 CP 866 +@ENCODING_CP866@ CP 866 KOI8-R KOI8-R KOI8-U KOI8-U UTF-8 UTF-8 diff --git a/tests/lib/vfs/Makefile.am b/tests/lib/vfs/Makefile.am index e5e78f835..9e551fd73 100644 --- a/tests/lib/vfs/Makefile.am +++ b/tests/lib/vfs/Makefile.am @@ -1,7 +1,7 @@ PACKAGE_STRING = "/lib/vfs" AM_CPPFLAGS = \ - -DTEST_SHARE_DIR=\"$(abs_srcdir)\" \ + -DTEST_SHARE_DIR=\"$(abs_builddir)\" \ $(GLIB_CFLAGS) \ -I$(top_srcdir) \ -I$(top_srcdir)/lib/vfs \ @@ -9,7 +9,11 @@ AM_CPPFLAGS = \ AM_LDFLAGS = @TESTS_LDFLAGS@ -EXTRA_DIST = mc.charsets +EXTRA_DIST = mc.charsets.in + +if CHARSET +CLEANFILES = mc.charsets +endif LIBS = @CHECK_LIBS@ \ $(top_builddir)/lib/libmc.la diff --git a/tests/lib/vfs/mc.charsets b/tests/lib/vfs/mc.charsets.in similarity index 71% rename from tests/lib/vfs/mc.charsets rename to tests/lib/vfs/mc.charsets.in index f51ee2a79..92fcb9e29 100644 --- a/tests/lib/vfs/mc.charsets +++ b/tests/lib/vfs/mc.charsets.in @@ -1,5 +1,5 @@ ASCII 7-bit ASCII -IBM866 CP 866 +@ENCODING_CP866@ CP 866 KOI8-R KOI8-R KOI8-U KOI8-U UTF-8 UTF-8 diff --git a/tests/lib/vfs/path_cmp.c b/tests/lib/vfs/path_cmp.c index 28af0374f..4dc69da94 100644 --- a/tests/lib/vfs/path_cmp.c +++ b/tests/lib/vfs/path_cmp.c @@ -48,8 +48,8 @@ setup (void) vfs_init_localfs (); vfs_setup_work_dir (); - mc_global.sysconfig_dir = (char *) TEST_SHARE_DIR; #ifdef HAVE_CHARSET + mc_global.sysconfig_dir = (char *) TEST_SHARE_DIR; load_codepages_list (); #endif } diff --git a/tests/lib/vfs/path_len.c b/tests/lib/vfs/path_len.c index 3475c7c47..bfd87ae56 100644 --- a/tests/lib/vfs/path_len.c +++ b/tests/lib/vfs/path_len.c @@ -48,8 +48,8 @@ setup (void) vfs_init_localfs (); vfs_setup_work_dir (); - mc_global.sysconfig_dir = (char *) TEST_SHARE_DIR; #ifdef HAVE_CHARSET + mc_global.sysconfig_dir = (char *) TEST_SHARE_DIR; load_codepages_list (); #endif } diff --git a/tests/lib/vfs/path_manipulations.c b/tests/lib/vfs/path_manipulations.c index 36a17f4e6..0559b1912 100644 --- a/tests/lib/vfs/path_manipulations.c +++ b/tests/lib/vfs/path_manipulations.c @@ -68,8 +68,8 @@ setup (void) init_test_classes (); - mc_global.sysconfig_dir = (char *) TEST_SHARE_DIR; #ifdef HAVE_CHARSET + mc_global.sysconfig_dir = (char *) TEST_SHARE_DIR; load_codepages_list (); #endif } diff --git a/tests/lib/vfs/path_recode.c b/tests/lib/vfs/path_recode.c index a6f6abe65..d7461329d 100644 --- a/tests/lib/vfs/path_recode.c +++ b/tests/lib/vfs/path_recode.c @@ -71,8 +71,6 @@ test_init_vfs (const char *encoding) vfs_init_localfs (); vfs_setup_work_dir (); - mc_global.sysconfig_dir = (char *) TEST_SHARE_DIR; - mc_global.sysconfig_dir = (char *) TEST_SHARE_DIR; load_codepages_list (); } diff --git a/tests/lib/vfs/path_serialize.c b/tests/lib/vfs/path_serialize.c index 477088544..af8d47dd2 100644 --- a/tests/lib/vfs/path_serialize.c +++ b/tests/lib/vfs/path_serialize.c @@ -60,8 +60,8 @@ setup (void) vfs_init_class (&vfs_test_ops3, "testfs3", VFSF_UNKNOWN, "test3"); vfs_register_class (&vfs_test_ops3); - mc_global.sysconfig_dir = (char *) TEST_SHARE_DIR; #ifdef HAVE_CHARSET + mc_global.sysconfig_dir = (char *) TEST_SHARE_DIR; load_codepages_list (); #endif } diff --git a/tests/lib/vfs/relative_cd.c b/tests/lib/vfs/relative_cd.c index 1ba688590..93ec93322 100644 --- a/tests/lib/vfs/relative_cd.c +++ b/tests/lib/vfs/relative_cd.c @@ -84,8 +84,6 @@ setup (void) vfs_test_ops1->chdir = test_chdir; vfs_register_class (vfs_test_ops1); - mc_global.sysconfig_dir = (char *) TEST_SHARE_DIR; - vfs_local_ops->chdir = test_chdir; test_chdir__init (); diff --git a/tests/lib/vfs/vfs_get_encoding.c b/tests/lib/vfs/vfs_get_encoding.c index d599cdf93..1ce5a364a 100644 --- a/tests/lib/vfs/vfs_get_encoding.c +++ b/tests/lib/vfs/vfs_get_encoding.c @@ -112,8 +112,8 @@ static const struct test_vfs_get_encoding_ds NULL }, { /* 14 */ - "/aaaa/#enc:UTF-8/bbbb/#enc:KOI8-R#enc:IBM866/cccc", - "KOI8-R#enc:IBM866" + "/aaaa/#enc:UTF-8/bbbb/#enc:KOI8-R#enc:CP866/cccc", + "KOI8-R#enc:CP866" } }; /* *INDENT-ON* */ diff --git a/tests/lib/vfs/vfs_path_string_convert.c b/tests/lib/vfs/vfs_path_string_convert.c index 1dcf93c95..c9ff77549 100644 --- a/tests/lib/vfs/vfs_path_string_convert.c +++ b/tests/lib/vfs/vfs_path_string_convert.c @@ -125,36 +125,36 @@ static const struct test_from_to_string_ds &vfs_test_ops3 }, { /* 4. */ - "/#test1/bla-bla1/#enc:IBM866/some/path/#test2/bla-bla2/#enc:KOI8-R/some/path#test3/111/22/33", - "/test1://#enc:IBM866/bla-bla1/some/path/test2://#enc:KOI8-R/bla-bla2/some/path/test3://111/22/33", + "/#test1/bla-bla1/#enc:CP866/some/path/#test2/bla-bla2/#enc:KOI8-R/some/path#test3/111/22/33", + "/test1://#enc:CP866/bla-bla1/some/path/test2://#enc:KOI8-R/bla-bla2/some/path/test3://111/22/33", "111/22/33", 4, &vfs_test_ops3 }, { /* 5. */ - "/#test1/bla-bla1/some/path/#test2/bla-bla2/#enc:IBM866/#enc:KOI8-R/some/path#test3/111/22/33", + "/#test1/bla-bla1/some/path/#test2/bla-bla2/#enc:CP866/#enc:KOI8-R/some/path#test3/111/22/33", "/test1://bla-bla1/some/path/test2://#enc:KOI8-R/bla-bla2/some/path/test3://111/22/33", "111/22/33", 4, &vfs_test_ops3 }, { /* 6. */ - "/#test1/bla-bla1/some/path/#test2/bla-bla2/#enc:IBM866/some/#enc:KOI8-R/path#test3/111/22/33", + "/#test1/bla-bla1/some/path/#test2/bla-bla2/#enc:CP866/some/#enc:KOI8-R/path#test3/111/22/33", "/test1://bla-bla1/some/path/test2://#enc:KOI8-R/bla-bla2/some/path/test3://111/22/33", "111/22/33", 4, &vfs_test_ops3 }, { /* 7. */ - "/#test1/bla-bla1/some/path/#test2/#enc:IBM866/bla-bla2/#enc:KOI8-R/some/path#test3/111/22/33", + "/#test1/bla-bla1/some/path/#test2/#enc:CP866/bla-bla2/#enc:KOI8-R/some/path#test3/111/22/33", "/test1://bla-bla1/some/path/test2://#enc:KOI8-R/bla-bla2/some/path/test3://111/22/33", "111/22/33", 4, &vfs_test_ops3 }, { /* 8. */ - "/#test1/bla-bla1/some/path/#enc:IBM866/#test2/bla-bla2/#enc:KOI8-R/some/path#test3/111/22/33", - "/test1://#enc:IBM866/bla-bla1/some/path/test2://#enc:KOI8-R/bla-bla2/some/path/test3://111/22/33", + "/#test1/bla-bla1/some/path/#enc:CP866/#test2/bla-bla2/#enc:KOI8-R/some/path#test3/111/22/33", + "/test1://#enc:CP866/bla-bla1/some/path/test2://#enc:KOI8-R/bla-bla2/some/path/test3://111/22/33", "111/22/33", 4, &vfs_test_ops3 From 3526dfcf34fb304476d6e9d63033f84c66f02992 Mon Sep 17 00:00:00 2001 From: Andrew Borodin Date: Fri, 13 Sep 2024 12:29:37 +0300 Subject: [PATCH 09/13] (str_nconvert_to_display, str_nconvert_to_input): handle invalid conversion. Signed-off-by: Andrew Borodin Signed-off-by: Yury V. Zaytsev --- lib/charsets.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/charsets.c b/lib/charsets.c index 0df5ecb21..ccaf4f6ae 100644 --- a/lib/charsets.c +++ b/lib/charsets.c @@ -364,6 +364,8 @@ str_nconvert_to_display (const char *str, int len) return g_string_new (str); conv = str_crt_conv_from (cp_source); + if (conv == INVALID_CONV) + return g_string_new (str); buff = g_string_new (""); str_nconvert (conv, str, len, buff); @@ -396,6 +398,8 @@ str_nconvert_to_input (const char *str, int len) return g_string_new (str); conv = str_crt_conv_to (cp_source); + if (conv == INVALID_CONV) + return g_string_new (str); buff = g_string_new (""); str_nconvert (conv, str, len, buff); From 35c103fa970bf825627a047f5d3ef2414548d6fb Mon Sep 17 00:00:00 2001 From: Andrew Borodin Date: Fri, 13 Sep 2024 12:42:30 +0300 Subject: [PATCH 10/13] (str_crt_conv_from): handle INVALID_CONV. Signed-off-by: Andrew Borodin Signed-off-by: Yury V. Zaytsev --- lib/strutil/strutil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/strutil/strutil.c b/lib/strutil/strutil.c index e7515efbd..c833f7926 100644 --- a/lib/strutil/strutil.c +++ b/lib/strutil/strutil.c @@ -272,7 +272,7 @@ str_crt_conv_from (const char *from_enc) void str_close_conv (GIConv conv) { - if (conv != str_cnv_not_convert) + if (conv != INVALID_CONV && conv != str_cnv_not_convert) g_iconv_close (conv); } From c079a09612c1a52811c277d0cf0c5cbc1f68b602 Mon Sep 17 00:00:00 2001 From: "Yury V. Zaytsev" Date: Fri, 13 Sep 2024 12:01:07 +0200 Subject: [PATCH 11/13] tests: edit_complete_word_cmd - fix `-Winvalid-source-encoding` to make it easier to debug Signed-off-by: Yury V. Zaytsev --- tests/src/editor/edit_complete_word_cmd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/src/editor/edit_complete_word_cmd.c b/tests/src/editor/edit_complete_word_cmd.c index 2415b1797..3924104a4 100644 --- a/tests/src/editor/edit_complete_word_cmd.c +++ b/tests/src/editor/edit_complete_word_cmd.c @@ -241,12 +241,12 @@ static const struct test_autocomplete_ds 1, "KOI8-R", 0, - "", + "\xDC\xDF\xCA\xC3\xD5\xCB\xC5\xCE", // эъйцукен 8, 2, 136, - "" + "\xDC\xDF\xCA\xC3\xD5\xCB\xC5\xCE" // эъйцукен }, }; /* *INDENT-ON* */ @@ -328,7 +328,7 @@ static const struct test_autocomplete_single_ds 0, 145, - "" + "\xC6\xD9\xD7\xC1" // фыва }, }; /* *INDENT-ON* */ From f4ef5c64a4dcf078b6dc1336ab6dbb121d83d485 Mon Sep 17 00:00:00 2001 From: Andrew Borodin Date: Tue, 24 Sep 2024 12:15:17 +0200 Subject: [PATCH 12/13] (edit_draw_this_line): fix printable character recognition in 8-bit locales when displaying UTF-8 Signed-off-by: Andrew Borodin Signed-off-by: Yury V. Zaytsev --- src/editor/editdraw.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/src/editor/editdraw.c b/src/editor/editdraw.c index bdaea7493..c1706d266 100644 --- a/src/editor/editdraw.c +++ b/src/editor/editdraw.c @@ -573,6 +573,7 @@ edit_draw_this_line (WEdit *edit, off_t b, long row, long start_col, long end_co unsigned int c; gboolean wide_width_char = FALSE; gboolean control_char = FALSE; + gboolean printable; p->ch = 0; p->style = q == edit->buffer.curs1 ? MOD_CURSOR : 0; @@ -759,34 +760,30 @@ edit_draw_this_line (WEdit *edit, off_t b, long row, long start_col, long end_co control_char = TRUE; break; } + #ifdef HAVE_CHARSET if (edit->utf8) { - if (g_unichar_isprint (c)) - p->ch = c; + if (mc_global.utf8_display) + /* c is gunichar */ + printable = g_unichar_isprint (c); else - { - p->ch = '.'; - p->style = abn_style; - } - p++; + /* c was gunichar; now c is 8-bit char converted from gunichar */ + printable = is_printable (c); } else #endif + /* c is 8-bit char */ + printable = is_printable (c); + + if (printable) + p->ch = c; + else { - if ((mc_global.utf8_display && g_unichar_isprint (c)) || - (!mc_global.utf8_display && is_printable (c))) - { - p->ch = c; - p++; - } - else - { - p->ch = '.'; - p->style = abn_style; - p++; - } + p->ch = '.'; + p->style = abn_style; } + p++; col++; break; } /* case */ From 8f723b8a7f93cf96c6b05fdd89cbf30a284eb546 Mon Sep 17 00:00:00 2001 From: "Yury V. Zaytsev" Date: Thu, 3 Oct 2024 13:29:28 +0200 Subject: [PATCH 13/13] charset: reimplement `is_supported_encoding` to use iconv instead of mc built-in charset table Signed-off-by: Yury V. Zaytsev --- lib/charsets.c | 17 ++++---- lib/utilunix.c | 32 ++++++++++++--- lib/vfs/path.c | 69 +++++++++++++++----------------- lib/vfs/path.h | 1 + tests/lib/vfs/vfs_get_encoding.c | 2 +- 5 files changed, 69 insertions(+), 52 deletions(-) diff --git a/lib/charsets.c b/lib/charsets.c index ccaf4f6ae..f57f8a577 100644 --- a/lib/charsets.c +++ b/lib/charsets.c @@ -267,17 +267,16 @@ get_codepage_index (const char *id) gboolean is_supported_encoding (const char *encoding) { - gboolean result = FALSE; - guint t; + GIConv coder; + gboolean result; - for (t = 0; t < codepages->len; t++) - { - const char *id; - - id = ((codepage_desc *) g_ptr_array_index (codepages, t))->id; - result |= (g_ascii_strncasecmp (encoding, id, strlen (id)) == 0); - } + if (encoding == NULL) + return FALSE; + coder = str_crt_conv_from (encoding); + result = coder != INVALID_CONV; + if (result) + str_close_conv (coder); return result; } diff --git a/lib/utilunix.c b/lib/utilunix.c index 97f8349d9..a2c22f9de 100644 --- a/lib/utilunix.c +++ b/lib/utilunix.c @@ -920,10 +920,20 @@ canonicalize_pathname_custom (char *path, canon_path_flags_t flags) { /* "token/../foo" -> "foo" */ #ifdef HAVE_CHARSET - if ((strncmp (s, VFS_ENCODING_PREFIX, enc_prefix_len) == 0) - && (is_supported_encoding (s + enc_prefix_len))) - /* special case: remove encoding */ - str_move (s, p + 1); + if (strncmp (s, VFS_ENCODING_PREFIX, enc_prefix_len) == 0) + { + char *enc; + + enc = vfs_get_encoding (s, -1); + + if (is_supported_encoding (enc)) + /* special case: remove encoding */ + str_move (s, p + 1); + else + str_move (s, p + 4); + + g_free (enc); + } else #endif /* HAVE_CHARSET */ str_move (s, p + 4); @@ -947,9 +957,18 @@ canonicalize_pathname_custom (char *path, canon_path_flags_t flags) if (s == lpath + 1) s[0] = '\0'; #ifdef HAVE_CHARSET - else if ((strncmp (s, VFS_ENCODING_PREFIX, enc_prefix_len) == 0) - && (is_supported_encoding (s + enc_prefix_len))) + else if (strncmp (s, VFS_ENCODING_PREFIX, enc_prefix_len) == 0) { + char *enc; + gboolean ok; + + enc = vfs_get_encoding (s, -1); + ok = is_supported_encoding (enc); + g_free (enc); + + if (!ok) + goto last; + /* special case: remove encoding */ s[0] = '.'; s[1] = '.'; @@ -966,6 +985,7 @@ canonicalize_pathname_custom (char *path, canon_path_flags_t flags) #endif /* HAVE_CHARSET */ else { + last: if (s >= lpath + url_delim_len && strncmp (s - url_delim_len, VFS_PATH_URL_DELIMITER, url_delim_len) == 0) *s = '\0'; diff --git a/lib/vfs/path.c b/lib/vfs/path.c index d6cec2136..c66406342 100644 --- a/lib/vfs/path.c +++ b/lib/vfs/path.c @@ -182,42 +182,6 @@ vfs_canon (const char *path) return result; } -/* --------------------------------------------------------------------------------------------- */ - -#ifdef HAVE_CHARSET -/** get encoding after last #enc: or NULL, if part does not contain #enc: - * - * @param path null-terminated string - * @param len the maximum length of path, where #enc: should be searched - * - * @return newly allocated string. - */ - -static char * -vfs_get_encoding (const char *path, ssize_t len) -{ - char *semi; - - /* try found #enc: */ - semi = g_strrstr_len (path, len, VFS_ENCODING_PREFIX); - if (semi == NULL) - return NULL; - - if (semi == path || IS_PATH_SEP (semi[-1])) - { - char *slash; - - semi += strlen (VFS_ENCODING_PREFIX); /* skip "#enc:" */ - slash = strchr (semi, PATH_SEP); - if (slash != NULL) - return g_strndup (semi, slash - semi); - return g_strdup (semi); - } - - return vfs_get_encoding (path, semi - path); -} -#endif - /* --------------------------------------------------------------------------------------------- */ /** Extract the hostname and username from the path * @@ -1071,6 +1035,39 @@ vfs_prefix_to_class (const char *prefix) #ifdef HAVE_CHARSET +/** get encoding after last #enc: or NULL, if part does not contain #enc: + * + * @param path null-terminated string + * @param len the maximum length of path, where #enc: should be searched + * + * @return newly allocated string. + */ + +char * +vfs_get_encoding (const char *path, ssize_t len) +{ + char *semi; + + /* try found #enc: */ + semi = g_strrstr_len (path, len, VFS_ENCODING_PREFIX); + if (semi == NULL) + return NULL; + + if (semi == path || IS_PATH_SEP (semi[-1])) + { + char *slash; + + semi += strlen (VFS_ENCODING_PREFIX); /* skip "#enc:" */ + slash = strchr (semi, PATH_SEP); + if (slash != NULL) + return g_strndup (semi, slash - semi); + return g_strdup (semi); + } + + return vfs_get_encoding (path, semi - path); +} + +/* --------------------------------------------------------------------------------------------- */ /** * Check if need cleanup charset converter for vfs_path_element_t * diff --git a/lib/vfs/path.h b/lib/vfs/path.h index 8ec440985..b508e5f0e 100644 --- a/lib/vfs/path.h +++ b/lib/vfs/path.h @@ -82,6 +82,7 @@ void vfs_path_element_free (vfs_path_element_t * element); struct vfs_class *vfs_prefix_to_class (const char *prefix); #ifdef HAVE_CHARSET +char *vfs_get_encoding(const char *path, ssize_t len); gboolean vfs_path_element_need_cleanup_converter (const vfs_path_element_t * element); vfs_path_t *vfs_path_change_encoding (vfs_path_t * vpath, const char *encoding); #endif diff --git a/tests/lib/vfs/vfs_get_encoding.c b/tests/lib/vfs/vfs_get_encoding.c index 1ce5a364a..13cd6763a 100644 --- a/tests/lib/vfs/vfs_get_encoding.c +++ b/tests/lib/vfs/vfs_get_encoding.c @@ -27,7 +27,7 @@ #include "tests/mctest.h" -#include "lib/vfs/path.c" /* for testing of static vfs_get_encoding() */ +#include "lib/vfs/path.h" /* --------------------------------------------------------------------------------------------- */