From 8f723b8a7f93cf96c6b05fdd89cbf30a284eb546 Mon Sep 17 00:00:00 2001 From: "Yury V. Zaytsev" Date: Thu, 3 Oct 2024 13:29:28 +0200 Subject: [PATCH] charset: reimplement `is_supported_encoding` to use iconv instead of mc built-in charset table Signed-off-by: Yury V. Zaytsev --- lib/charsets.c | 17 ++++---- lib/utilunix.c | 32 ++++++++++++--- lib/vfs/path.c | 69 +++++++++++++++----------------- lib/vfs/path.h | 1 + tests/lib/vfs/vfs_get_encoding.c | 2 +- 5 files changed, 69 insertions(+), 52 deletions(-) diff --git a/lib/charsets.c b/lib/charsets.c index ccaf4f6ae..f57f8a577 100644 --- a/lib/charsets.c +++ b/lib/charsets.c @@ -267,17 +267,16 @@ get_codepage_index (const char *id) gboolean is_supported_encoding (const char *encoding) { - gboolean result = FALSE; - guint t; + GIConv coder; + gboolean result; - for (t = 0; t < codepages->len; t++) - { - const char *id; - - id = ((codepage_desc *) g_ptr_array_index (codepages, t))->id; - result |= (g_ascii_strncasecmp (encoding, id, strlen (id)) == 0); - } + if (encoding == NULL) + return FALSE; + coder = str_crt_conv_from (encoding); + result = coder != INVALID_CONV; + if (result) + str_close_conv (coder); return result; } diff --git a/lib/utilunix.c b/lib/utilunix.c index 97f8349d9..a2c22f9de 100644 --- a/lib/utilunix.c +++ b/lib/utilunix.c @@ -920,10 +920,20 @@ canonicalize_pathname_custom (char *path, canon_path_flags_t flags) { /* "token/../foo" -> "foo" */ #ifdef HAVE_CHARSET - if ((strncmp (s, VFS_ENCODING_PREFIX, enc_prefix_len) == 0) - && (is_supported_encoding (s + enc_prefix_len))) - /* special case: remove encoding */ - str_move (s, p + 1); + if (strncmp (s, VFS_ENCODING_PREFIX, enc_prefix_len) == 0) + { + char *enc; + + enc = vfs_get_encoding (s, -1); + + if (is_supported_encoding (enc)) + /* special case: remove encoding */ + str_move (s, p + 1); + else + str_move (s, p + 4); + + g_free (enc); + } else #endif /* HAVE_CHARSET */ str_move (s, p + 4); @@ -947,9 +957,18 @@ canonicalize_pathname_custom (char *path, canon_path_flags_t flags) if (s == lpath + 1) s[0] = '\0'; #ifdef HAVE_CHARSET - else if ((strncmp (s, VFS_ENCODING_PREFIX, enc_prefix_len) == 0) - && (is_supported_encoding (s + enc_prefix_len))) + else if (strncmp (s, VFS_ENCODING_PREFIX, enc_prefix_len) == 0) { + char *enc; + gboolean ok; + + enc = vfs_get_encoding (s, -1); + ok = is_supported_encoding (enc); + g_free (enc); + + if (!ok) + goto last; + /* special case: remove encoding */ s[0] = '.'; s[1] = '.'; @@ -966,6 +985,7 @@ canonicalize_pathname_custom (char *path, canon_path_flags_t flags) #endif /* HAVE_CHARSET */ else { + last: if (s >= lpath + url_delim_len && strncmp (s - url_delim_len, VFS_PATH_URL_DELIMITER, url_delim_len) == 0) *s = '\0'; diff --git a/lib/vfs/path.c b/lib/vfs/path.c index d6cec2136..c66406342 100644 --- a/lib/vfs/path.c +++ b/lib/vfs/path.c @@ -182,42 +182,6 @@ vfs_canon (const char *path) return result; } -/* --------------------------------------------------------------------------------------------- */ - -#ifdef HAVE_CHARSET -/** get encoding after last #enc: or NULL, if part does not contain #enc: - * - * @param path null-terminated string - * @param len the maximum length of path, where #enc: should be searched - * - * @return newly allocated string. - */ - -static char * -vfs_get_encoding (const char *path, ssize_t len) -{ - char *semi; - - /* try found #enc: */ - semi = g_strrstr_len (path, len, VFS_ENCODING_PREFIX); - if (semi == NULL) - return NULL; - - if (semi == path || IS_PATH_SEP (semi[-1])) - { - char *slash; - - semi += strlen (VFS_ENCODING_PREFIX); /* skip "#enc:" */ - slash = strchr (semi, PATH_SEP); - if (slash != NULL) - return g_strndup (semi, slash - semi); - return g_strdup (semi); - } - - return vfs_get_encoding (path, semi - path); -} -#endif - /* --------------------------------------------------------------------------------------------- */ /** Extract the hostname and username from the path * @@ -1071,6 +1035,39 @@ vfs_prefix_to_class (const char *prefix) #ifdef HAVE_CHARSET +/** get encoding after last #enc: or NULL, if part does not contain #enc: + * + * @param path null-terminated string + * @param len the maximum length of path, where #enc: should be searched + * + * @return newly allocated string. + */ + +char * +vfs_get_encoding (const char *path, ssize_t len) +{ + char *semi; + + /* try found #enc: */ + semi = g_strrstr_len (path, len, VFS_ENCODING_PREFIX); + if (semi == NULL) + return NULL; + + if (semi == path || IS_PATH_SEP (semi[-1])) + { + char *slash; + + semi += strlen (VFS_ENCODING_PREFIX); /* skip "#enc:" */ + slash = strchr (semi, PATH_SEP); + if (slash != NULL) + return g_strndup (semi, slash - semi); + return g_strdup (semi); + } + + return vfs_get_encoding (path, semi - path); +} + +/* --------------------------------------------------------------------------------------------- */ /** * Check if need cleanup charset converter for vfs_path_element_t * diff --git a/lib/vfs/path.h b/lib/vfs/path.h index 8ec440985..b508e5f0e 100644 --- a/lib/vfs/path.h +++ b/lib/vfs/path.h @@ -82,6 +82,7 @@ void vfs_path_element_free (vfs_path_element_t * element); struct vfs_class *vfs_prefix_to_class (const char *prefix); #ifdef HAVE_CHARSET +char *vfs_get_encoding(const char *path, ssize_t len); gboolean vfs_path_element_need_cleanup_converter (const vfs_path_element_t * element); vfs_path_t *vfs_path_change_encoding (vfs_path_t * vpath, const char *encoding); #endif diff --git a/tests/lib/vfs/vfs_get_encoding.c b/tests/lib/vfs/vfs_get_encoding.c index 1ce5a364a..13cd6763a 100644 --- a/tests/lib/vfs/vfs_get_encoding.c +++ b/tests/lib/vfs/vfs_get_encoding.c @@ -27,7 +27,7 @@ #include "tests/mctest.h" -#include "lib/vfs/path.c" /* for testing of static vfs_get_encoding() */ +#include "lib/vfs/path.h" /* --------------------------------------------------------------------------------------------- */