charset: reimplement is_supported_encoding to use iconv instead of mc built-in charset table

Signed-off-by: Yury V. Zaytsev <yury@shurup.com>
This commit is contained in:
Yury V. Zaytsev 2024-10-03 13:29:28 +02:00
parent f4ef5c64a4
commit 8f723b8a7f
5 changed files with 69 additions and 52 deletions

View File

@ -267,17 +267,16 @@ get_codepage_index (const char *id)
gboolean gboolean
is_supported_encoding (const char *encoding) is_supported_encoding (const char *encoding)
{ {
gboolean result = FALSE; GIConv coder;
guint t; gboolean result;
for (t = 0; t < codepages->len; t++) if (encoding == NULL)
{ return FALSE;
const char *id;
id = ((codepage_desc *) g_ptr_array_index (codepages, t))->id;
result |= (g_ascii_strncasecmp (encoding, id, strlen (id)) == 0);
}
coder = str_crt_conv_from (encoding);
result = coder != INVALID_CONV;
if (result)
str_close_conv (coder);
return result; return result;
} }

View File

@ -920,11 +920,21 @@ canonicalize_pathname_custom (char *path, canon_path_flags_t flags)
{ {
/* "token/../foo" -> "foo" */ /* "token/../foo" -> "foo" */
#ifdef HAVE_CHARSET #ifdef HAVE_CHARSET
if ((strncmp (s, VFS_ENCODING_PREFIX, enc_prefix_len) == 0) if (strncmp (s, VFS_ENCODING_PREFIX, enc_prefix_len) == 0)
&& (is_supported_encoding (s + enc_prefix_len))) {
char *enc;
enc = vfs_get_encoding (s, -1);
if (is_supported_encoding (enc))
/* special case: remove encoding */ /* special case: remove encoding */
str_move (s, p + 1); str_move (s, p + 1);
else else
str_move (s, p + 4);
g_free (enc);
}
else
#endif /* HAVE_CHARSET */ #endif /* HAVE_CHARSET */
str_move (s, p + 4); str_move (s, p + 4);
} }
@ -947,9 +957,18 @@ canonicalize_pathname_custom (char *path, canon_path_flags_t flags)
if (s == lpath + 1) if (s == lpath + 1)
s[0] = '\0'; s[0] = '\0';
#ifdef HAVE_CHARSET #ifdef HAVE_CHARSET
else if ((strncmp (s, VFS_ENCODING_PREFIX, enc_prefix_len) == 0) else if (strncmp (s, VFS_ENCODING_PREFIX, enc_prefix_len) == 0)
&& (is_supported_encoding (s + enc_prefix_len)))
{ {
char *enc;
gboolean ok;
enc = vfs_get_encoding (s, -1);
ok = is_supported_encoding (enc);
g_free (enc);
if (!ok)
goto last;
/* special case: remove encoding */ /* special case: remove encoding */
s[0] = '.'; s[0] = '.';
s[1] = '.'; s[1] = '.';
@ -966,6 +985,7 @@ canonicalize_pathname_custom (char *path, canon_path_flags_t flags)
#endif /* HAVE_CHARSET */ #endif /* HAVE_CHARSET */
else else
{ {
last:
if (s >= lpath + url_delim_len if (s >= lpath + url_delim_len
&& strncmp (s - url_delim_len, VFS_PATH_URL_DELIMITER, url_delim_len) == 0) && strncmp (s - url_delim_len, VFS_PATH_URL_DELIMITER, url_delim_len) == 0)
*s = '\0'; *s = '\0';

View File

@ -182,42 +182,6 @@ vfs_canon (const char *path)
return result; return result;
} }
/* --------------------------------------------------------------------------------------------- */
#ifdef HAVE_CHARSET
/** get encoding after last #enc: or NULL, if part does not contain #enc:
*
* @param path null-terminated string
* @param len the maximum length of path, where #enc: should be searched
*
* @return newly allocated string.
*/
static char *
vfs_get_encoding (const char *path, ssize_t len)
{
char *semi;
/* try found #enc: */
semi = g_strrstr_len (path, len, VFS_ENCODING_PREFIX);
if (semi == NULL)
return NULL;
if (semi == path || IS_PATH_SEP (semi[-1]))
{
char *slash;
semi += strlen (VFS_ENCODING_PREFIX); /* skip "#enc:" */
slash = strchr (semi, PATH_SEP);
if (slash != NULL)
return g_strndup (semi, slash - semi);
return g_strdup (semi);
}
return vfs_get_encoding (path, semi - path);
}
#endif
/* --------------------------------------------------------------------------------------------- */ /* --------------------------------------------------------------------------------------------- */
/** Extract the hostname and username from the path /** Extract the hostname and username from the path
* *
@ -1071,6 +1035,39 @@ vfs_prefix_to_class (const char *prefix)
#ifdef HAVE_CHARSET #ifdef HAVE_CHARSET
/** get encoding after last #enc: or NULL, if part does not contain #enc:
*
* @param path null-terminated string
* @param len the maximum length of path, where #enc: should be searched
*
* @return newly allocated string.
*/
char *
vfs_get_encoding (const char *path, ssize_t len)
{
char *semi;
/* try found #enc: */
semi = g_strrstr_len (path, len, VFS_ENCODING_PREFIX);
if (semi == NULL)
return NULL;
if (semi == path || IS_PATH_SEP (semi[-1]))
{
char *slash;
semi += strlen (VFS_ENCODING_PREFIX); /* skip "#enc:" */
slash = strchr (semi, PATH_SEP);
if (slash != NULL)
return g_strndup (semi, slash - semi);
return g_strdup (semi);
}
return vfs_get_encoding (path, semi - path);
}
/* --------------------------------------------------------------------------------------------- */
/** /**
* Check if need cleanup charset converter for vfs_path_element_t * Check if need cleanup charset converter for vfs_path_element_t
* *

View File

@ -82,6 +82,7 @@ void vfs_path_element_free (vfs_path_element_t * element);
struct vfs_class *vfs_prefix_to_class (const char *prefix); struct vfs_class *vfs_prefix_to_class (const char *prefix);
#ifdef HAVE_CHARSET #ifdef HAVE_CHARSET
char *vfs_get_encoding(const char *path, ssize_t len);
gboolean vfs_path_element_need_cleanup_converter (const vfs_path_element_t * element); gboolean vfs_path_element_need_cleanup_converter (const vfs_path_element_t * element);
vfs_path_t *vfs_path_change_encoding (vfs_path_t * vpath, const char *encoding); vfs_path_t *vfs_path_change_encoding (vfs_path_t * vpath, const char *encoding);
#endif #endif

View File

@ -27,7 +27,7 @@
#include "tests/mctest.h" #include "tests/mctest.h"
#include "lib/vfs/path.c" /* for testing of static vfs_get_encoding() */ #include "lib/vfs/path.h"
/* --------------------------------------------------------------------------------------------- */ /* --------------------------------------------------------------------------------------------- */