Merge branch '1838_add_enca_support'

* 1838_add_enca_support:
  Changes into documentation.
  Ticket #1838: source_codepage autodetect with enca program.
This commit is contained in:
Slava Zanko 2010-02-08 15:58:32 +02:00
commit ac60804870
11 changed files with 211 additions and 91 deletions

View File

@ -7,7 +7,7 @@ CP1251 Windows 1251
CP437 CP 437
CP850 CP 850
CP852 CP 852
CP866 CP 866
IBM866 CP 866
KOI8-R KOI8-R
KOI8-U KOI8-U
UTF-8 UTF-8

View File

@ -3571,7 +3571,7 @@ thin vertical line
.\"NODE " Skins oldcolors"
.SH " Compability"
.SH " Compatibility"
Appointment of color by skin\-files fully compatible with
the appointment of the colors described in
@ -3580,31 +3580,31 @@ Colors\&.
.\"Colors"
section.
.PP
In this case, reassignment of colors has priority over the skin files and is
In this case, reassignment of colors has priority over the skin file and is
complementary.
.\"NODE "Filenames Highlight"
.SH "Filenames Highlight"
Section [filehighlight] from current skin\-file contain key names as
Section [filehighlight] in current skin\-file contains key names as
highlight groups and values as color pairs. Color pairs is documented
into
in
.\"LINK2"
Skins
.\"Skins"
section.
.PP
Rules of filenames highlight placed in @prefix@/share/mc/filehighlight.ini file
Rules of filenames highlight are placed in @prefix@/share/mc/filehighlight.ini file
(~/.mc/filehighlight.ini).
Name of section in this file must be equal to parameters names into
[filehighlight] section (in current skin\-file)
Name of section in this file must be equal to parameters names in
[filehighlight] section (in current skin\-file).
.PP
Keys in these groups:
Keys in these groups are:
.TP
.I type
file type. if present, all other option ignored
file type. If present, all other options are ignored.
.TP
.I regexp
regular expression. If present, 'extensions' option ignored
regular expression. If present, 'extensions' option is ignored.
.TP
.I extensions
list of extensions of files. Separated by ';' sign.
@ -3635,7 +3635,7 @@ rule case sentitive (true) or not (false).
.\"NODE "Special Settings"
.SH "Special Settings"
Most of the settings of the Midnight Commander can be changed from the
Most of the Midnight Commander settings can be changed from the
menus. However, there are a small number of settings which can only be
changed by editing the setup file.
.PP
@ -3736,6 +3736,18 @@ contents of the selected directory.
.I fish_directory_timeout
This variable holds the lifetime of a directory cache entry in seconds. The
default value is 900 seconds.
.TP
.I autodetect_codeset
This option allows use the `enca' command to autodetect codeset of text files
in internal viewer and editor. List of valid values can be obtain by the
`enca \-\-list languages | cut \-d : \-f1' command. Option must be located
in the [Misc] section.
.PP
For example:
.PP
.nf
autodetect_codeset=russian
.fi
.\"NODE "Terminal databases"
.SH "Terminal databases"
The Midnight Commander provides a way to fix your system terminal

View File

@ -4059,12 +4059,10 @@ Home и End в активной панели. Вместо того, чтобы
.PP
.I use_file_to_guess_type
.IP
.\"If this variable is on (the default) it will spawn the file command to
.\"match the file types listed on the
Когда эта опция включена (что сделано по умолчанию), mc будет вызывать
программу
.B file
для определения типа файла в соответствии с типами файлов, указанными в
для определения типа файла в соответствии с типами файлов, указанными в файле
.\"LINK2"
mc.ext\&.
.\"Extension File Edit"
@ -4074,6 +4072,20 @@ mc.ext\&.
Если эта переменная включена (по умолчанию она отключена), то при
просмотре в одной из панелей структуры дерева каталогов во второй панели
автоматически будет отображаться список файлов выбранного каталога.
.PP
.I autodetect_codeset
.IP
Эта опция позволяет использовать команду enca для автоматического
определения кодировки текстовых файлов во встроенных просмотрщике и редакторе.
Список допустимых значений может быть получен командой
"enca \-\-list languages | cut \-d : \-f1". Опция должна располагаться в секции
[Misc].
.PP
Например:
.PP
.nf
autodetect_codeset=russian
.fi
.\"NODE "Terminal databases"
.SH "Базы терминалов (Terminal databases)"
Midnight Commander обеспечивает возможность внесения исправлений в

View File

@ -334,6 +334,9 @@ edit_cmd (void)
void
edit_cmd_new (void)
{
#if HAVE_CHARSET
source_codepage = default_source_codepage;
#endif
do_edit (NULL);
}

149
src/ext.c
View File

@ -45,10 +45,12 @@
#include "user.h"
#include "main.h"
#include "wtools.h"
#include "ext.h"
#include "execute.h"
#include "history.h"
#include "layout.h"
#include "charsets.h" /* get_codepage_index */
#include "selcodepage.h" /* do_set_codepage */
#include "ext.h"
/* If set, we execute the file command to check the file type */
int use_file_to_check_type = 1;
@ -305,6 +307,45 @@ exec_extension (const char *filename, const char *lc_data, int *move_dir,
# define FILE_CMD "file "
#endif
/*
* Run cmd_file with args, put result into buf.
* If error, put '\0' into buf[0]
* Return 1 if the data is valid, 0 otherwise, -1 for fatal errors.
*
* NOTES: buf is null-terminated string.
*/
static int
get_popen_information (const char *cmd_file, const char *args, char *buf, int buflen)
{
gboolean read_bytes = FALSE;
char *command;
FILE *f;
command = g_strconcat (cmd_file, args, " 2>/dev/null", (char *) NULL);
f = popen (command, "r");
g_free (command);
if (f != NULL) {
#ifdef __QNXNTO__
if (setvbuf (f, NULL, _IOFBF, 0) != 0) {
(void) pclose (f);
return -1;
}
#endif
read_bytes = (fgets (buf, buflen, f) != NULL);
if (!read_bytes)
buf[0] = '\0'; /* Paranoid termination */
pclose (f);
} else {
buf[0] = '\0'; /* Paranoid termination */
return -1;
}
buf[buflen - 1] = '\0';
return read_bytes ? 1 : 0;
}
/*
* Run the "file" command on the local file.
* Return 1 if the data is valid, 0 otherwise, -1 for fatal errors.
@ -312,33 +353,38 @@ exec_extension (const char *filename, const char *lc_data, int *move_dir,
static int
get_file_type_local (const char *filename, char *buf, int buflen)
{
int read_bytes = 0;
char *tmp = name_quote (filename, 0);
char *command = g_strconcat (FILE_CMD, tmp, " 2>/dev/null", (char *) NULL);
FILE *f = popen (command, "r");
char *tmp;
int ret;
tmp = name_quote (filename, 0);
ret = get_popen_information (FILE_CMD, tmp, buf, buflen);
g_free (tmp);
g_free (command);
if (f != NULL) {
#ifdef __QNXNTO__
if (setvbuf (f, NULL, _IOFBF, 0) != 0) {
(void)pclose (f);
return -1;
}
#endif
read_bytes = (fgets (buf, buflen, f)
!= NULL);
if (read_bytes == 0)
buf[0] = 0;
pclose (f);
} else {
return -1;
}
return (read_bytes > 0);
return ret;
}
/*
* Run the "enca" command on the local file.
* Return 1 if the data is valid, 0 otherwise, -1 for fatal errors.
*/
static int
get_file_encoding_local (const char *filename, char *buf, int buflen)
{
char *tmp, *lang, *args;
int ret;
tmp = name_quote (filename, 0);
lang = name_quote (autodetect_codeset, 0);
args= g_strconcat (" -L", lang, " -i ", tmp, (char *) NULL);
ret = get_popen_information ("enca", args, buf, buflen);
g_free (args);
g_free (lang);
g_free (tmp);
return ret;
}
/*
* Invoke the "file" command on the file and match its output against PTR.
@ -353,39 +399,58 @@ regex_check_type (const char *filename, const char *ptr, int *have_type)
/* Following variables are valid if *have_type is 1 */
static char content_string[2048];
static int content_shift = 0;
static char encoding_id[21]; /* CSISO51INISCYRILLIC -- 20 */
static size_t content_shift = 0;
static int got_data = 0;
if (!use_file_to_check_type) {
if (!use_file_to_check_type)
return 0;
}
if (!*have_type) {
if (*have_type == 0) {
char *realname; /* name used with "file" */
char *localfile;
int got_encoding_data;
/* Don't repeate even unsuccessful checks */
*have_type = 1;
localfile = mc_getlocalcopy (filename);
if (!localfile)
if (localfile == NULL)
return -1;
realname = localfile;
got_data =
get_file_type_local (localfile, content_string,
sizeof (content_string));
got_encoding_data = is_autodetect_codeset_enabled
? get_file_encoding_local (localfile, encoding_id, sizeof (encoding_id))
: 0;
mc_ungetlocalcopy (filename, localfile, 0);
if (got_encoding_data > 0) {
char *pp;
int cp_id;
pp = strchr (encoding_id, '\n');
if (pp != NULL)
*pp = '\0';
cp_id = get_codepage_index (encoding_id);
if (cp_id == -1)
cp_id = default_source_codepage;
do_set_codepage (cp_id);
}
got_data = get_file_type_local (localfile, content_string, sizeof (content_string));
if (got_data > 0) {
char *pp;
/* Paranoid termination */
content_string[sizeof (content_string) - 1] = 0;
pp = strchr (content_string, '\n');
if (pp != NULL)
*pp = '\0';
if ((pp = strchr (content_string, '\n')) != 0)
*pp = 0;
if (!strncmp (content_string, realname, strlen (realname))) {
if (strncmp (content_string, realname, strlen (realname)) == 0) {
/* Skip "realname: " */
content_shift = strlen (realname);
if (content_string[content_shift] == ':') {
@ -393,21 +458,21 @@ regex_check_type (const char *filename, const char *ptr, int *have_type)
for (content_shift++;
content_string[content_shift] == ' '
|| content_string[content_shift] == '\t';
content_shift++);
content_shift++)
;
}
}
} else {
/* No data */
content_string[0] = 0;
content_string[0] = '\0';
}
g_free (realname);
}
if (got_data == -1) {
if (got_data == -1)
return -1;
}
if (content_string[0]
if (content_string[0] != '\0'
&& mc_search (ptr, content_string + content_shift, MC_SEARCH_T_REGEX)) {
found = 1;
}

View File

@ -2271,6 +2271,7 @@ main (int argc, char *argv[])
done_key ();
#ifdef HAVE_CHARSET
free_codepages_list ();
g_free (autodetect_codeset);
#endif
str_uninit_strings ();

View File

@ -44,7 +44,10 @@ extern int option_tab_spacing;
#ifdef HAVE_CHARSET
extern int source_codepage;
extern int default_source_codepage;
extern int display_codepage;
extern char* autodetect_codeset;
extern gboolean is_autodetect_codeset_enabled;
#else
extern int eight_bit_clean;
extern int full_eight_bits;

View File

@ -2346,6 +2346,10 @@ do_enter_on_file_entry (file_entry *fe)
g_free (cmd);
}
#if HAVE_CHARSET
source_codepage = default_source_codepage;
#endif
return 1;
}
@ -3140,7 +3144,7 @@ set_panel_encoding (WPanel *panel)
const char *errmsg;
int r;
r = select_charset (-1, -1, source_codepage, FALSE);
r = select_charset (-1, -1, default_source_codepage, FALSE);
if (r == SELECT_CHARSET_CANCEL)
return; /* Cancel */

View File

@ -40,7 +40,10 @@
/* Numbers of (file I/O) and (input/display) codepages. -1 if not selected */
int source_codepage = -1;
int default_source_codepage = -1;
int display_codepage = -1;
char* autodetect_codeset = NULL;
gboolean is_autodetect_codeset_enabled = FALSE;
static unsigned char
get_hotkey (int n)
@ -108,19 +111,14 @@ select_charset (int center_y, int center_x, int current_charset, gboolean seldis
}
}
/* Set codepage */
gboolean
do_select_codepage (void)
do_set_codepage (int codepage)
{
const char *errmsg = NULL;
int r;
r = select_charset (-1, -1, source_codepage, FALSE);
if (r == SELECT_CHARSET_CANCEL)
return FALSE;
source_codepage = r;
errmsg = init_translation_table (r == SELECT_CHARSET_NO_TRANSLATE ?
source_codepage = codepage;
errmsg = init_translation_table (codepage == SELECT_CHARSET_NO_TRANSLATE ?
display_codepage : source_codepage,
display_codepage);
if (errmsg != NULL)
@ -129,4 +127,19 @@ do_select_codepage (void)
return (errmsg == NULL);
}
/* Show menu selecting codepage */
gboolean
do_select_codepage (void)
{
int r;
r = select_charset (-1, -1, default_source_codepage, FALSE);
if (r == SELECT_CHARSET_CANCEL)
return FALSE;
default_source_codepage = r;
return do_set_codepage (default_source_codepage);
}
#endif /* HAVE_CHARSET */

View File

@ -11,6 +11,7 @@
#include "lib/global.h"
int select_charset (int center_y, int center_x, int current_charset, gboolean seldisplay);
gboolean do_set_codepage (int);
gboolean do_select_codepage (void);
/* some results of select_charset() */

View File

@ -390,10 +390,12 @@ save_setup (void)
#endif /* ENABLE_VFS && USE_NETCODE */
#ifdef HAVE_CHARSET
mc_config_set_string(mc_main_config, "Misc" , "display_codepage",
mc_config_set_string (mc_main_config, "Misc" , "display_codepage",
get_codepage_id( display_codepage ));
mc_config_set_string(mc_main_config, "Misc" , "source_codepage",
get_codepage_id( source_codepage ));
mc_config_set_string (mc_main_config, "Misc" , "source_codepage",
get_codepage_id( default_source_codepage ));
mc_config_set_string (mc_main_config, "Misc" , "autodetect_codeset",
autodetect_codeset );
#endif /* HAVE_CHARSET */
tmp_profile = g_build_filename (home_dir, MC_USERCONF_DIR, MC_CONFIG_FILE, NULL);
ret = mc_config_save_to_file (mc_main_config, tmp_profile, NULL);
@ -739,19 +741,19 @@ load_setup (void)
/* mc.lib is common for all users, but has priority lower than
~/.mc/ini. FIXME: it's only used for keys and treestore now */
global_profile_name = concat_dir_and_file (mc_home, MC_GLOBAL_CONFIG_FILE);
if (!exist_file(global_profile_name)) {
if (!exist_file (global_profile_name)) {
g_free (global_profile_name);
global_profile_name = concat_dir_and_file (mc_home_alt, MC_GLOBAL_CONFIG_FILE);
}
panels_profile_name = g_build_filename (home_dir, MC_USERCONF_DIR, MC_PANELS_FILE, NULL);
mc_main_config = mc_config_init(profile);
mc_main_config = mc_config_init (profile);
if (!exist_file(panels_profile_name))
setup__move_panels_config_into_separate_file(profile);
setup__move_panels_config_into_separate_file (profile);
mc_panels_config = mc_config_init(panels_profile_name);
mc_panels_config = mc_config_init (panels_profile_name);
/* Load integer boolean options */
for (i = 0; int_options[i].opt_name; i++)
@ -775,7 +777,7 @@ load_setup (void)
startup_left_mode = view_listing;
if (!other_dir){
buffer = mc_config_get_string(mc_panels_config, "Dirs", "other_dir", ".");
buffer = mc_config_get_string (mc_panels_config, "Dirs", "other_dir", ".");
if (vfs_file_is_local (buffer))
other_dir = buffer;
else
@ -783,16 +785,16 @@ load_setup (void)
}
boot_current_is_left =
mc_config_get_int(mc_panels_config, "Dirs", "current_is_left", 1);
mc_config_get_int (mc_panels_config, "Dirs", "current_is_left", 1);
#ifdef USE_NETCODE
ftpfs_proxy_host = mc_config_get_string(mc_main_config, "Misc", "ftp_proxy_host", "gate");
ftpfs_proxy_host = mc_config_get_string (mc_main_config, "Misc", "ftp_proxy_host", "gate");
#endif
/* The default color and the terminal dependent color */
setup_color_string = mc_config_get_string(mc_main_config, "Colors", "base_color", "");
term_color_string = mc_config_get_string(mc_main_config, "Colors", getenv ("TERM"), "");
color_terminal_string = mc_config_get_string(mc_main_config, "Colors", "color_terminals", "");
setup_color_string = mc_config_get_string (mc_main_config, "Colors", "base_color", "");
term_color_string = mc_config_get_string (mc_main_config, "Colors", getenv ("TERM"), "");
color_terminal_string = mc_config_get_string (mc_main_config, "Colors", "color_terminals", "");
/* Load the directory history */
/* directory_history_load (); */
@ -802,25 +804,29 @@ load_setup (void)
#endif /* ENABLE_VFS && USE_NETCODE */
#ifdef HAVE_CHARSET
if ( load_codepages_list() > 0 ) {
buffer = mc_config_get_string(mc_main_config, "Misc", "display_codepage", "");
if ( buffer[0] != '\0' )
{
display_codepage = get_codepage_index( buffer );
if (load_codepages_list () > 0) {
buffer = mc_config_get_string (mc_main_config, "Misc", "display_codepage", "");
if (buffer[0] != '\0') {
display_codepage = get_codepage_index (buffer);
cp_display = get_codepage_id (display_codepage);
}
g_free(buffer);
buffer = mc_config_get_string(mc_main_config, "Misc", "source_codepage", "");
if ( buffer[0] != '\0' )
{
source_codepage = get_codepage_index( buffer );
if (buffer[0] != '\0') {
default_source_codepage = get_codepage_index (buffer);
source_codepage = default_source_codepage; /* May be source_codepage don't needed this */
cp_source = get_codepage_id (source_codepage);
}
g_free(buffer);
}
init_translation_table( source_codepage, display_codepage );
if ( get_codepage_id( display_codepage ) )
utf8_display = str_isutf8 (get_codepage_id( display_codepage ));
autodetect_codeset = mc_config_get_string (mc_main_config, "Misc", "autodetect_codeset", "");
if ((autodetect_codeset[0] != '\0') && (strcmp(autodetect_codeset, "off")))
is_autodetect_codeset_enabled=TRUE;
init_translation_table (source_codepage, display_codepage);
if (get_codepage_id (display_codepage))
utf8_display = str_isutf8 (get_codepage_id (display_codepage));
#endif /* HAVE_CHARSET */
}