mirror of
https://github.com/MidnightCommander/mc
synced 2024-12-22 12:32:40 +03:00
Ticket #1838: source_codepage autodetect with enca program.
Added support of autodetect codepages via 'enca' program. New parameter in user configuration file (~/.mc/ini): [Misc] autodetect_codeset=(one of `enca --list languages | cut -d : -f1`) Signed-off-by: Slava Zanko <slavazanko@gmail.com> Little code cleanup. Signed-off-by: Ilia Maslakov <il.smind@gmail.com> Type accuracy. Added missing includes. Signed-off-by: Andrew Borodin <aborodin@vmail.ru>
This commit is contained in:
parent
110977b161
commit
56d955152c
@ -7,7 +7,7 @@ CP1251 Windows 1251
|
||||
CP437 CP 437
|
||||
CP850 CP 850
|
||||
CP852 CP 852
|
||||
CP866 CP 866
|
||||
IBM866 CP 866
|
||||
KOI8-R KOI8-R
|
||||
KOI8-U KOI8-U
|
||||
UTF-8 UTF-8
|
||||
|
@ -334,6 +334,9 @@ edit_cmd (void)
|
||||
void
|
||||
edit_cmd_new (void)
|
||||
{
|
||||
#if HAVE_CHARSET
|
||||
source_codepage = default_source_codepage;
|
||||
#endif
|
||||
do_edit (NULL);
|
||||
}
|
||||
|
||||
|
149
src/ext.c
149
src/ext.c
@ -45,10 +45,12 @@
|
||||
#include "user.h"
|
||||
#include "main.h"
|
||||
#include "wtools.h"
|
||||
#include "ext.h"
|
||||
#include "execute.h"
|
||||
#include "history.h"
|
||||
#include "layout.h"
|
||||
#include "charsets.h" /* get_codepage_index */
|
||||
#include "selcodepage.h" /* do_set_codepage */
|
||||
#include "ext.h"
|
||||
|
||||
/* If set, we execute the file command to check the file type */
|
||||
int use_file_to_check_type = 1;
|
||||
@ -305,6 +307,45 @@ exec_extension (const char *filename, const char *lc_data, int *move_dir,
|
||||
# define FILE_CMD "file "
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Run cmd_file with args, put result into buf.
|
||||
* If error, put '\0' into buf[0]
|
||||
* Return 1 if the data is valid, 0 otherwise, -1 for fatal errors.
|
||||
*
|
||||
* NOTES: buf is null-terminated string.
|
||||
*/
|
||||
static int
|
||||
get_popen_information (const char *cmd_file, const char *args, char *buf, int buflen)
|
||||
{
|
||||
gboolean read_bytes = FALSE;
|
||||
char *command;
|
||||
FILE *f;
|
||||
|
||||
command = g_strconcat (cmd_file, args, " 2>/dev/null", (char *) NULL);
|
||||
f = popen (command, "r");
|
||||
g_free (command);
|
||||
|
||||
if (f != NULL) {
|
||||
#ifdef __QNXNTO__
|
||||
if (setvbuf (f, NULL, _IOFBF, 0) != 0) {
|
||||
(void) pclose (f);
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
read_bytes = (fgets (buf, buflen, f) != NULL);
|
||||
if (!read_bytes)
|
||||
buf[0] = '\0'; /* Paranoid termination */
|
||||
pclose (f);
|
||||
} else {
|
||||
buf[0] = '\0'; /* Paranoid termination */
|
||||
return -1;
|
||||
}
|
||||
|
||||
buf[buflen - 1] = '\0';
|
||||
|
||||
return read_bytes ? 1 : 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Run the "file" command on the local file.
|
||||
* Return 1 if the data is valid, 0 otherwise, -1 for fatal errors.
|
||||
@ -312,33 +353,38 @@ exec_extension (const char *filename, const char *lc_data, int *move_dir,
|
||||
static int
|
||||
get_file_type_local (const char *filename, char *buf, int buflen)
|
||||
{
|
||||
int read_bytes = 0;
|
||||
|
||||
char *tmp = name_quote (filename, 0);
|
||||
char *command = g_strconcat (FILE_CMD, tmp, " 2>/dev/null", (char *) NULL);
|
||||
FILE *f = popen (command, "r");
|
||||
char *tmp;
|
||||
int ret;
|
||||
|
||||
tmp = name_quote (filename, 0);
|
||||
ret = get_popen_information (FILE_CMD, tmp, buf, buflen);
|
||||
g_free (tmp);
|
||||
g_free (command);
|
||||
if (f != NULL) {
|
||||
#ifdef __QNXNTO__
|
||||
if (setvbuf (f, NULL, _IOFBF, 0) != 0) {
|
||||
(void)pclose (f);
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
read_bytes = (fgets (buf, buflen, f)
|
||||
!= NULL);
|
||||
if (read_bytes == 0)
|
||||
buf[0] = 0;
|
||||
pclose (f);
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return (read_bytes > 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Run the "enca" command on the local file.
|
||||
* Return 1 if the data is valid, 0 otherwise, -1 for fatal errors.
|
||||
*/
|
||||
static int
|
||||
get_file_encoding_local (const char *filename, char *buf, int buflen)
|
||||
{
|
||||
char *tmp, *lang, *args;
|
||||
int ret;
|
||||
|
||||
tmp = name_quote (filename, 0);
|
||||
lang = name_quote (autodetect_codeset, 0);
|
||||
args= g_strconcat (" -L", lang, " -i ", tmp, (char *) NULL);
|
||||
|
||||
ret = get_popen_information ("enca", args, buf, buflen);
|
||||
|
||||
g_free (args);
|
||||
g_free (lang);
|
||||
g_free (tmp);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Invoke the "file" command on the file and match its output against PTR.
|
||||
@ -353,39 +399,58 @@ regex_check_type (const char *filename, const char *ptr, int *have_type)
|
||||
|
||||
/* Following variables are valid if *have_type is 1 */
|
||||
static char content_string[2048];
|
||||
static int content_shift = 0;
|
||||
static char encoding_id[21]; /* CSISO51INISCYRILLIC -- 20 */
|
||||
static size_t content_shift = 0;
|
||||
static int got_data = 0;
|
||||
|
||||
if (!use_file_to_check_type) {
|
||||
if (!use_file_to_check_type)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!*have_type) {
|
||||
if (*have_type == 0) {
|
||||
char *realname; /* name used with "file" */
|
||||
char *localfile;
|
||||
int got_encoding_data;
|
||||
|
||||
/* Don't repeate even unsuccessful checks */
|
||||
*have_type = 1;
|
||||
|
||||
localfile = mc_getlocalcopy (filename);
|
||||
if (!localfile)
|
||||
if (localfile == NULL)
|
||||
return -1;
|
||||
|
||||
realname = localfile;
|
||||
got_data =
|
||||
get_file_type_local (localfile, content_string,
|
||||
sizeof (content_string));
|
||||
|
||||
got_encoding_data = is_autodetect_codeset_enabled
|
||||
? get_file_encoding_local (localfile, encoding_id, sizeof (encoding_id))
|
||||
: 0;
|
||||
|
||||
mc_ungetlocalcopy (filename, localfile, 0);
|
||||
|
||||
if (got_encoding_data > 0) {
|
||||
char *pp;
|
||||
int cp_id;
|
||||
|
||||
pp = strchr (encoding_id, '\n');
|
||||
if (pp != NULL)
|
||||
*pp = '\0';
|
||||
|
||||
cp_id = get_codepage_index (encoding_id);
|
||||
if (cp_id == -1)
|
||||
cp_id = default_source_codepage;
|
||||
|
||||
do_set_codepage (cp_id);
|
||||
}
|
||||
|
||||
got_data = get_file_type_local (localfile, content_string, sizeof (content_string));
|
||||
|
||||
if (got_data > 0) {
|
||||
char *pp;
|
||||
|
||||
/* Paranoid termination */
|
||||
content_string[sizeof (content_string) - 1] = 0;
|
||||
pp = strchr (content_string, '\n');
|
||||
if (pp != NULL)
|
||||
*pp = '\0';
|
||||
|
||||
if ((pp = strchr (content_string, '\n')) != 0)
|
||||
*pp = 0;
|
||||
|
||||
if (!strncmp (content_string, realname, strlen (realname))) {
|
||||
if (strncmp (content_string, realname, strlen (realname)) == 0) {
|
||||
/* Skip "realname: " */
|
||||
content_shift = strlen (realname);
|
||||
if (content_string[content_shift] == ':') {
|
||||
@ -393,21 +458,21 @@ regex_check_type (const char *filename, const char *ptr, int *have_type)
|
||||
for (content_shift++;
|
||||
content_string[content_shift] == ' '
|
||||
|| content_string[content_shift] == '\t';
|
||||
content_shift++);
|
||||
content_shift++)
|
||||
;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* No data */
|
||||
content_string[0] = 0;
|
||||
content_string[0] = '\0';
|
||||
}
|
||||
g_free (realname);
|
||||
}
|
||||
|
||||
if (got_data == -1) {
|
||||
if (got_data == -1)
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (content_string[0]
|
||||
if (content_string[0] != '\0'
|
||||
&& mc_search (ptr, content_string + content_shift, MC_SEARCH_T_REGEX)) {
|
||||
found = 1;
|
||||
}
|
||||
|
@ -2271,6 +2271,7 @@ main (int argc, char *argv[])
|
||||
done_key ();
|
||||
#ifdef HAVE_CHARSET
|
||||
free_codepages_list ();
|
||||
g_free (autodetect_codeset);
|
||||
#endif
|
||||
str_uninit_strings ();
|
||||
|
||||
|
@ -44,7 +44,10 @@ extern int option_tab_spacing;
|
||||
|
||||
#ifdef HAVE_CHARSET
|
||||
extern int source_codepage;
|
||||
extern int default_source_codepage;
|
||||
extern int display_codepage;
|
||||
extern char* autodetect_codeset;
|
||||
extern gboolean is_autodetect_codeset_enabled;
|
||||
#else
|
||||
extern int eight_bit_clean;
|
||||
extern int full_eight_bits;
|
||||
|
@ -2346,6 +2346,10 @@ do_enter_on_file_entry (file_entry *fe)
|
||||
g_free (cmd);
|
||||
}
|
||||
|
||||
#if HAVE_CHARSET
|
||||
source_codepage = default_source_codepage;
|
||||
#endif
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -3140,7 +3144,7 @@ set_panel_encoding (WPanel *panel)
|
||||
const char *errmsg;
|
||||
int r;
|
||||
|
||||
r = select_charset (-1, -1, source_codepage, FALSE);
|
||||
r = select_charset (-1, -1, default_source_codepage, FALSE);
|
||||
|
||||
if (r == SELECT_CHARSET_CANCEL)
|
||||
return; /* Cancel */
|
||||
|
@ -40,7 +40,10 @@
|
||||
|
||||
/* Numbers of (file I/O) and (input/display) codepages. -1 if not selected */
|
||||
int source_codepage = -1;
|
||||
int default_source_codepage = -1;
|
||||
int display_codepage = -1;
|
||||
char* autodetect_codeset = NULL;
|
||||
gboolean is_autodetect_codeset_enabled = FALSE;
|
||||
|
||||
static unsigned char
|
||||
get_hotkey (int n)
|
||||
@ -108,19 +111,14 @@ select_charset (int center_y, int center_x, int current_charset, gboolean seldis
|
||||
}
|
||||
}
|
||||
|
||||
/* Set codepage */
|
||||
gboolean
|
||||
do_select_codepage (void)
|
||||
do_set_codepage (int codepage)
|
||||
{
|
||||
const char *errmsg = NULL;
|
||||
int r;
|
||||
|
||||
r = select_charset (-1, -1, source_codepage, FALSE);
|
||||
if (r == SELECT_CHARSET_CANCEL)
|
||||
return FALSE;
|
||||
|
||||
source_codepage = r;
|
||||
|
||||
errmsg = init_translation_table (r == SELECT_CHARSET_NO_TRANSLATE ?
|
||||
source_codepage = codepage;
|
||||
errmsg = init_translation_table (codepage == SELECT_CHARSET_NO_TRANSLATE ?
|
||||
display_codepage : source_codepage,
|
||||
display_codepage);
|
||||
if (errmsg != NULL)
|
||||
@ -129,4 +127,19 @@ do_select_codepage (void)
|
||||
return (errmsg == NULL);
|
||||
}
|
||||
|
||||
/* Show menu selecting codepage */
|
||||
|
||||
gboolean
|
||||
do_select_codepage (void)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = select_charset (-1, -1, default_source_codepage, FALSE);
|
||||
if (r == SELECT_CHARSET_CANCEL)
|
||||
return FALSE;
|
||||
|
||||
default_source_codepage = r;
|
||||
return do_set_codepage (default_source_codepage);
|
||||
}
|
||||
|
||||
#endif /* HAVE_CHARSET */
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "lib/global.h"
|
||||
|
||||
int select_charset (int center_y, int center_x, int current_charset, gboolean seldisplay);
|
||||
gboolean do_set_codepage (int);
|
||||
gboolean do_select_codepage (void);
|
||||
|
||||
/* some results of select_charset() */
|
||||
|
54
src/setup.c
54
src/setup.c
@ -390,10 +390,12 @@ save_setup (void)
|
||||
#endif /* ENABLE_VFS && USE_NETCODE */
|
||||
|
||||
#ifdef HAVE_CHARSET
|
||||
mc_config_set_string(mc_main_config, "Misc" , "display_codepage",
|
||||
mc_config_set_string (mc_main_config, "Misc" , "display_codepage",
|
||||
get_codepage_id( display_codepage ));
|
||||
mc_config_set_string(mc_main_config, "Misc" , "source_codepage",
|
||||
get_codepage_id( source_codepage ));
|
||||
mc_config_set_string (mc_main_config, "Misc" , "source_codepage",
|
||||
get_codepage_id( default_source_codepage ));
|
||||
mc_config_set_string (mc_main_config, "Misc" , "autodetect_codeset",
|
||||
autodetect_codeset );
|
||||
#endif /* HAVE_CHARSET */
|
||||
tmp_profile = g_build_filename (home_dir, MC_USERCONF_DIR, MC_CONFIG_FILE, NULL);
|
||||
ret = mc_config_save_to_file (mc_main_config, tmp_profile, NULL);
|
||||
@ -739,19 +741,19 @@ load_setup (void)
|
||||
/* mc.lib is common for all users, but has priority lower than
|
||||
~/.mc/ini. FIXME: it's only used for keys and treestore now */
|
||||
global_profile_name = concat_dir_and_file (mc_home, MC_GLOBAL_CONFIG_FILE);
|
||||
if (!exist_file(global_profile_name)) {
|
||||
if (!exist_file (global_profile_name)) {
|
||||
g_free (global_profile_name);
|
||||
global_profile_name = concat_dir_and_file (mc_home_alt, MC_GLOBAL_CONFIG_FILE);
|
||||
}
|
||||
|
||||
panels_profile_name = g_build_filename (home_dir, MC_USERCONF_DIR, MC_PANELS_FILE, NULL);
|
||||
|
||||
mc_main_config = mc_config_init(profile);
|
||||
mc_main_config = mc_config_init (profile);
|
||||
|
||||
if (!exist_file(panels_profile_name))
|
||||
setup__move_panels_config_into_separate_file(profile);
|
||||
setup__move_panels_config_into_separate_file (profile);
|
||||
|
||||
mc_panels_config = mc_config_init(panels_profile_name);
|
||||
mc_panels_config = mc_config_init (panels_profile_name);
|
||||
|
||||
/* Load integer boolean options */
|
||||
for (i = 0; int_options[i].opt_name; i++)
|
||||
@ -775,7 +777,7 @@ load_setup (void)
|
||||
startup_left_mode = view_listing;
|
||||
|
||||
if (!other_dir){
|
||||
buffer = mc_config_get_string(mc_panels_config, "Dirs", "other_dir", ".");
|
||||
buffer = mc_config_get_string (mc_panels_config, "Dirs", "other_dir", ".");
|
||||
if (vfs_file_is_local (buffer))
|
||||
other_dir = buffer;
|
||||
else
|
||||
@ -783,16 +785,16 @@ load_setup (void)
|
||||
}
|
||||
|
||||
boot_current_is_left =
|
||||
mc_config_get_int(mc_panels_config, "Dirs", "current_is_left", 1);
|
||||
mc_config_get_int (mc_panels_config, "Dirs", "current_is_left", 1);
|
||||
|
||||
#ifdef USE_NETCODE
|
||||
ftpfs_proxy_host = mc_config_get_string(mc_main_config, "Misc", "ftp_proxy_host", "gate");
|
||||
ftpfs_proxy_host = mc_config_get_string (mc_main_config, "Misc", "ftp_proxy_host", "gate");
|
||||
#endif
|
||||
|
||||
/* The default color and the terminal dependent color */
|
||||
setup_color_string = mc_config_get_string(mc_main_config, "Colors", "base_color", "");
|
||||
term_color_string = mc_config_get_string(mc_main_config, "Colors", getenv ("TERM"), "");
|
||||
color_terminal_string = mc_config_get_string(mc_main_config, "Colors", "color_terminals", "");
|
||||
setup_color_string = mc_config_get_string (mc_main_config, "Colors", "base_color", "");
|
||||
term_color_string = mc_config_get_string (mc_main_config, "Colors", getenv ("TERM"), "");
|
||||
color_terminal_string = mc_config_get_string (mc_main_config, "Colors", "color_terminals", "");
|
||||
|
||||
/* Load the directory history */
|
||||
/* directory_history_load (); */
|
||||
@ -802,25 +804,29 @@ load_setup (void)
|
||||
#endif /* ENABLE_VFS && USE_NETCODE */
|
||||
|
||||
#ifdef HAVE_CHARSET
|
||||
if ( load_codepages_list() > 0 ) {
|
||||
buffer = mc_config_get_string(mc_main_config, "Misc", "display_codepage", "");
|
||||
if ( buffer[0] != '\0' )
|
||||
{
|
||||
display_codepage = get_codepage_index( buffer );
|
||||
if (load_codepages_list () > 0) {
|
||||
buffer = mc_config_get_string (mc_main_config, "Misc", "display_codepage", "");
|
||||
if (buffer[0] != '\0') {
|
||||
display_codepage = get_codepage_index (buffer);
|
||||
cp_display = get_codepage_id (display_codepage);
|
||||
}
|
||||
g_free(buffer);
|
||||
buffer = mc_config_get_string(mc_main_config, "Misc", "source_codepage", "");
|
||||
if ( buffer[0] != '\0' )
|
||||
{
|
||||
source_codepage = get_codepage_index( buffer );
|
||||
if (buffer[0] != '\0') {
|
||||
default_source_codepage = get_codepage_index (buffer);
|
||||
source_codepage = default_source_codepage; /* May be source_codepage don't needed this */
|
||||
cp_source = get_codepage_id (source_codepage);
|
||||
}
|
||||
g_free(buffer);
|
||||
}
|
||||
init_translation_table( source_codepage, display_codepage );
|
||||
if ( get_codepage_id( display_codepage ) )
|
||||
utf8_display = str_isutf8 (get_codepage_id( display_codepage ));
|
||||
|
||||
autodetect_codeset = mc_config_get_string (mc_main_config, "Misc", "autodetect_codeset", "");
|
||||
if ((autodetect_codeset[0] != '\0') && (strcmp(autodetect_codeset, "off")))
|
||||
is_autodetect_codeset_enabled=TRUE;
|
||||
|
||||
init_translation_table (source_codepage, display_codepage);
|
||||
if (get_codepage_id (display_codepage))
|
||||
utf8_display = str_isutf8 (get_codepage_id (display_codepage));
|
||||
#endif /* HAVE_CHARSET */
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user