Merge branch 'm-utf-8' of ssh://angel_il@midnight-commander.org:2222/git/mc into m-utf-8

2025-01-01 00:54:24 +03:00 · 2009-04-16 16:38:05 +00:00 · 2009-04-16 16:38:05 +00:00 · 9cb41f7b43
commit 9cb41f7b43
parent dbce157307 2ce1042665
11 changed files with 192 additions and 145 deletions
--- a/edit/edit-widget.h
+++ b/edit/edit-widget.h
@ -46,7 +46,7 @@ struct WEdit {
    unsigned char *buffers2[MAXBUFF + 1];	/* all data from end of file down to curs2 */

    /* UTF8 */
-    unsigned char charbuf[MB_LEN_MAX];
+    unsigned char charbuf[4 + 1];
    int charpoint;
    /* search variables */
    long search_start;		/* First character to start searching from */
--- a/edit/editkeys.c
+++ b/edit/editkeys.c
@ -44,6 +44,7 @@
 #include "../src/tty.h"		/* keys */
 #include "../src/charsets.h"	/* convert_from_input_c() */
 #include "../src/selcodepage.h"	/* do_select_codepage() */
+#include "../src/main.h"	/* display_codepage */

 /*
 * Ordinary translations.  Note that the keys listed first take priority
@ -191,6 +192,8 @@ edit_translate_key (WEdit *edit, long x_key, int *cmd, int *ch)
    int char_for_insertion = -1;
    int i = 0;
    int extmod = 0;
+    int c;
+
    const edit_key_map_type *key_map = NULL;
    switch (edit_key_emulation) {
    case EDIT_KEY_EMULATION_NORMAL:
@ -243,23 +246,55 @@ edit_translate_key (WEdit *edit, long x_key, int *cmd, int *ch)

    /* an ordinary insertable character */
    if (x_key < 256 && !extmod) {
+
+        if ( edit->charpoint >= 4 ) {
+            edit->charpoint = 0;
+            edit->charbuf[edit->charpoint] = '\0';
+        }
+        if ( edit->charpoint < 4 ) {
+            edit->charbuf[edit->charpoint++] = x_key;
+            edit->charbuf[edit->charpoint] = '\0';
+        }
+
        if (!edit->utf8) {
-            int c = convert_from_input_c (x_key);
-            if (is_printable (c)) {
-                char_for_insertion = c;
-                goto fin;
-            }
-        } else {
-            if (edit->charpoint >= MB_LEN_MAX) {
-                goto fin;
-                edit->charpoint = 0;
+            /* input from 8-bit locale */
+            if ( str_isutf8 (get_codepage_id( display_codepage )) == 1 ) {
+                c = convert_from_input_c (x_key);
+                if (is_printable (c)) {
+                    char_for_insertion = c;
+                    goto fin;
+                }
+            } else {
+                edit->charbuf[edit->charpoint + 1] = '\0';
+                int res = str_is_valid_char (edit->charbuf, edit->charpoint);
+                if (res < 0) {
+                    if (res != -2) {
+                        edit->charpoint = 0; /* broken multibyte char, skip */
+                        goto fin;
+                    }
+                    /* not finised multibyte input (in meddle multibyte utf-8 char) */
+                    goto fin;
+                } else {
+                    if ( g_unichar_isprint (g_utf8_get_char(edit->charbuf)) ) {
+                        c = convert_from_utf_to_current ( edit->charbuf );
+                        edit->charbuf[0] = '\0';
+                        edit->charpoint = 0;
+                        if (is_printable (c)) {
+                            char_for_insertion = c;
+                            goto fin;
+                        }
+                    }
+                    /* unprinteble utf input, skip it */
+                    edit->charbuf[0] = '\0';
+                    edit->charpoint = 0;
+                    goto fin;
+                }
            }

-            edit->charbuf[edit->charpoint] = x_key;
-            edit->charpoint++;
+        } else {

            int res = str_is_valid_char (edit->charbuf, edit->charpoint);
-            mc_log("res:%i, edit->charpoint : %i\n",res, edit->charpoint);
+
            if (res < 0) {
                if (res != -2) {
                    edit->charpoint = 0; /* broken multibyte char, skip */
@ -271,6 +306,7 @@ edit_translate_key (WEdit *edit, long x_key, int *cmd, int *ch)
                edit->charbuf[edit->charpoint]='\0';
                edit->charpoint = 0;
                if ( g_unichar_isprint (g_utf8_get_char(edit->charbuf))) {
+                    mc_log("input:%s \n", edit->charbuf);
                    char_for_insertion = x_key;
                    goto fin;
                }
--- a/src/charsets.c
+++ b/src/charsets.c
@ -29,6 +29,8 @@

 #include "global.h"
 #include "charsets.h"
+#include "strutil.h"		/* utf-8 functions */
+#include "main.h"

 int n_codepages = 0;

@ -249,4 +251,34 @@ convert_from_input (char *str)
 	str++;
    }
 }
+
+unsigned char
+convert_from_utf_to_current (char *str)
+{
+    if (!str)
+        return '.';
+
+    unsigned char ch;
+    char *cp_to = NULL;
+    GIConv conv;
+    GString *translated_data;
+
+    translated_data = g_string_new ("");
+    cp_to = g_strdup ( get_codepage_id ( display_codepage ) );
+    conv = str_crt_conv_to (cp_to);
+
+    if (conv != INVALID_CONV) {
+        if (str_convert (conv, str, translated_data) != ESTR_FAILURE) {
+            ch = translated_data->str[0];
+        } else {
+            ch = '.';
+        }
+        str_close_conv (conv);
+    }
+    g_free (cp_to);
+    g_string_free (translated_data, TRUE);
+    return ch;
+
+}
+
 #endif				/* HAVE_CHARSET */
--- a/src/charsets.h
+++ b/src/charsets.h
@ -6,7 +6,6 @@
 #define UNKNCHAR '\001'

 #define CHARSETS_INDEX "mc.charsets"
-
 extern int n_codepages;

 extern unsigned char conv_displ[256];
@ -27,6 +26,7 @@ const char *init_translation_table (int cpsource, int cpdisplay);
 void convert_to_display (char *str);
 void convert_from_input (char *str);
 void convert_string (unsigned char *str);
+unsigned char convert_from_utf_to_current (char *str);

 /* Convert single characters */
 static inline int
--- a/src/cmd.c
+++ b/src/cmd.c
@ -1395,9 +1395,9 @@ static char
    char *result;
    char *semi;
    char *slash;
-    
+
    semi = g_strrstr (path, "#enc:");
-    
+
    if (semi != NULL) {
        slash = strchr (semi, PATH_SEP);
        if (slash != NULL) {
@ -1410,23 +1410,26 @@ static char
    } else {
        result = g_strconcat (path, "/#enc:", encoding, NULL);
    }
-    
+
    return result;
 }

 static void
 set_panel_encoding (WPanel *panel)
 {
-    char *encoding;
+    char *encoding = NULL;
    char *cd_path;
-            
-    encoding = input_dialog ("Encoding", "Select encoding", NULL, "");
-    
+
+    do_select_codepage ();
+
+    encoding = g_strdup( get_codepage_id ( source_codepage ) );
+
    if (encoding) {
        cd_path = add_encoding_to_path (panel->cwd, encoding);
        if (!do_panel_cd (MENU_PANEL, cd_path, 0))
            message (1, MSG_ERROR, _(" Cannot chdir to %s "), cd_path);
        g_free (cd_path);
+        g_free (encoding);
    }
 }

--- a/src/strutil.c
+++ b/src/strutil.c
@ -37,9 +37,9 @@

 //names, that are used for utf-8 
 static const char *str_utf8_encodings[] = {
-    "utf-8",
-    "utf8",
-    NULL
+        "utf-8", 
+        "utf8", 
+        NULL
 };

 // standard 8bit encodings, no wide or multibytes characters
@ -284,17 +284,11 @@ str_translate_char (GIConv conv, char *keys, size_t ch_size,
    left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size;

    cnv = g_iconv (conv, &keys, &left, &output, &out_size);
-    if (cnv == (size_t) (-1))
-    {
-	if (errno == EINVAL)
-	    return ESTR_PROBLEM;
-	else
-	    return ESTR_FAILURE;
-    }
-    else
-    {
-	output[0] = '\0';
-	return 0;
+    if (cnv == (size_t)(-1)) {
+        if (errno == EINVAL) return ESTR_PROBLEM; else return ESTR_FAILURE;
+    } else {
+        output[0] = '\0';
+        return 0;
    }
 }

@ -316,7 +310,6 @@ str_test_encoding_class (const char *encoding, const char **table)
 	result += (g_ascii_strncasecmp (encoding, table[t],
 					strlen (table[t])) == 0);
    }
-
    return result;
 }

@ -351,8 +344,9 @@ str_isutf8 (char *codeset_name)
 void
 str_init_strings (const char *termenc)
 {
-    codeset = g_strdup ((termenc != NULL)
-			? termenc : str_detect_termencoding ());
+    codeset = g_strdup ((termenc != NULL) 
+                        ? termenc 
+                        : str_detect_termencoding ());

    str_cnv_not_convert = g_iconv_open (codeset, codeset);
    if (str_cnv_not_convert == INVALID_CONV)
@ -425,7 +419,7 @@ str_get_next_char (char *text)
 const char *
 str_cget_next_char (const char *text)
 {
-    used_class.cnext_char (&text);
+    used_class.cnext_char(&text);
    return text;
 }

--- a/src/strutil8bit.c
+++ b/src/strutil8bit.c
@ -490,7 +490,7 @@ str_8bit_offset_to_pos (const char *text, size_t length)
 static int
 str_8bit_column_to_pos (const char *text, size_t pos)
 {
-    return (int) pos;
+    return (int)pos;
 }

 static char *
--- a/src/strutilascii.c
+++ b/src/strutilascii.c
@ -512,7 +512,7 @@ str_ascii_offset_to_pos (const char *text, size_t length)
 static int
 str_ascii_column_to_pos (const char *text, size_t pos)
 {
-    return (int) pos;
+    return (int)pos;
 }

 static char *
--- a/src/strutilutf8.c
+++ b/src/strutilutf8.c
@ -342,7 +342,6 @@ str_utf8_vfs_convert_to (GIConv coder, const char *string,
 	result = 0;
    }
    else
-//	result = _str_utf8_vfs_convert_to (coder, string, size, buffer);
 	result = str_nconvert (coder, (char *) string, size, buffer);

    return result;
@ -386,44 +385,31 @@ str_utf8_make_make_term_form (const char *text, size_t length)
 	    }
 	}
    }
-
-    while (length != 0 && text[0] != '\0')
-    {
-	uni = g_utf8_get_char_validated (text, -1);
-	if ((uni != (gunichar) (-1)) && (uni != (gunichar) (-2)))
-	{
-	    if (g_unichar_isprint (uni))
-	    {
-		left = g_unichar_to_utf8 (uni, actual);
-		actual += left;
-		if (!str_unichar_iscombiningmark (uni))
-		{
-		    result.width++;
-		    if (g_unichar_iswide (uni))
-			result.width++;
-		}
-		else
-		    result.compose = 1;
-	    }
-	    else
-	    {
-		actual[0] = '.';
-		actual++;
-		result.width++;
-	    }
-	    text = g_utf8_next_char (text);
-	}
-	else
-	{
-	    text++;
-	    //actual[0] = '?';
-	    memcpy (actual, replch, strlen (replch));
-	    actual += strlen (replch);
-	    result.width++;
-	}
-	if (length != (size_t) (-1))
-	    length--;
-    }
+    
+    while (length != 0 && text[0] != '\0') {
+        uni = g_utf8_get_char_validated (text, -1);
+        if ((uni != (gunichar)(-1)) && (uni != (gunichar)(-2))) {
+            if (g_unichar_isprint(uni)) {
+                left = g_unichar_to_utf8 (uni, actual);
+                actual+= left;
+                if (!str_unichar_iscombiningmark (uni)) {
+                    result.width++;
+                    if (g_unichar_iswide(uni)) result.width++;
+                } else result.compose = 1;
+            } else {
+                actual[0] = '.';
+                actual++;
+                result.width++;
+            }
+            text = g_utf8_next_char (text);
+        } else {
+            text++;
+            //actual[0] = '?';
+            memcpy (actual, replch, strlen (replch));
+            actual+= strlen (replch);
+            result.width++;
+        }
+        if (length != (size_t) (-1)) length--;    }
    actual[0] = '\0';

    return &result;
@ -1218,51 +1204,48 @@ str_utf8_create_key_gen (const char *text, int case_sen,
 			 gchar * (*keygen) (const gchar *, gssize size))
 {
    char *result;
+    
+    if (case_sen) {
+        result = str_utf8_normalize (text);
+    } else {
+        const char *start, *end;
+        char *fold, *key;
+        GString *fixed = g_string_new ("");

-    if (case_sen)
-    {
-	result = str_utf8_normalize (text);
-    }
-    else
-    {
-	const char *start, *end;
-	char *fold, *key;
-	GString *fixed = g_string_new ("");
+        start = text;
+        while (!g_utf8_validate (start, -1, &end) && start[0] != '\0')
+        {
+            if (start != end)
+            {
+                fold = g_utf8_casefold (start, end - start);
+                key = keygen (fold, -1);
+                g_string_append (fixed, key);
+                g_free (key);
+                g_free (fold);
+            }
+            g_string_append_c (fixed, end[0]);
+            start = end + 1;
+        }

-	start = text;
-	while (!g_utf8_validate (start, -1, &end) && start[0] != '\0')
-	{
-	    if (start != end)
-	    {
-		fold = g_utf8_casefold (start, end - start);
-		key = keygen (fold, -1);
-		g_string_append (fixed, key);
-		g_free (key);
-		g_free (fold);
-	    }
-	    g_string_append_c (fixed, end[0]);
-	    start = end + 1;
-	}
-
-	if (start == text)
-	{
-	    fold = g_utf8_casefold (text, -1);
-	    result = keygen (fold, -1);
-	    g_free (fold);
-	}
-	else
-	{
-	    if (start[0] != '\0' && start != end)
-	    {
-		fold = g_utf8_casefold (start, end - start);
-		key = keygen (fold, -1);
-		g_string_append (fixed, key);
-		g_free (key);
-		g_free (fold);
-	    }
-	    result = g_strdup (fixed->str);
-	}
-	g_string_free (fixed, TRUE);
+        if (start == text)
+        {
+            fold = g_utf8_casefold (text, -1);
+            result = keygen (fold, -1);
+            g_free (fold);
+        }
+        else
+        {
+            if (start[0] != '\0' && start != end)
+            {
+                fold = g_utf8_casefold (start, end - start);
+                key = keygen (fold, -1);
+                g_string_append (fixed, key);
+                g_free (key);
+                g_free (fold);
+            }
+            result = g_strdup (fixed->str);
+        }
+        g_string_free (fixed, TRUE);
    }
    return result;
 }
@ -1292,7 +1275,7 @@ str_utf8_release_key (char *key, int case_sen)
    g_free (key);
 }

-struct str_class
+struct str_class 
 str_utf8_init ()
 {
    struct str_class result;
--- a/src/view.c
+++ b/src/view.c
@ -2457,24 +2457,24 @@ view_display_text (WView * view)
                } else {
                    addch ('.');
 	}
-	    } else {
-		GString *comb = g_string_new ("");
-		if (str_isprint (info.cact)) {
-		    g_string_append(comb,info.cact);
-		} else {
-		    g_string_append(comb,".");
-		}
-		while (str_iscombiningmark (info.cnxt)) {
-		    view_read_continue (view, &info);
-		    g_string_append(comb,info.cact);
-		}
-		addstr (str_term_form (comb->str));
-		g_string_free (comb, TRUE);
-	    }
+            } else {
+                GString *comb = g_string_new ("");
+                if (str_isprint (info.cact)) {
+                    g_string_append(comb,info.cact);
+                } else {
+                    g_string_append(comb,".");
+                }
+                while (str_iscombiningmark (info.cnxt)) {
+                    view_read_continue (view, &info);
+                    g_string_append(comb,info.cact);
+                }
+                addstr (str_term_form (comb->str));
+                g_string_free (comb, TRUE);
+            }
 	} else {
-	    while (str_iscombiningmark (info.cnxt)) {
-		view_read_continue (view, &info);
-	    }
+            while (str_iscombiningmark (info.cnxt)) {
+                view_read_continue (view, &info);
+            }
 	}
        col+= w;

@ -2780,7 +2780,6 @@ view_get_line_at (WView *view, offset_type from, GString * buffer,
            continue;

        if (view_read_test_nroff_back (view, &info)) {
-// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 	    g_string_truncate (buffer, buffer->len-1);
            continue;
 	}
--- a/vfs/vfs.c
+++ b/vfs/vfs.c
@ -382,7 +382,7 @@ vfs_supported_enconding (const char *encoding) {
 * buffer - used to store result of translation
 */ 
 static int
-_vfs_translate_path (const char *path, int size,
+_vfs_translate_path (const char *path, int size, 
                     GIConv defcnv, GString *buffer)
 {
    const char *semi;
@ -459,7 +459,7 @@ char *
 vfs_translate_path (const char *path) 
 {
    int state;
-
+    
    g_string_set_size(vfs_str_buffer,0);
    state = _vfs_translate_path (path, -1, str_cnv_from_term, vfs_str_buffer);
    // strict version
@ -1218,7 +1218,7 @@ vfs_shut (void)
 	    (*vfs->done) (vfs);

    g_slist_free (vfs_openfiles);
-
+    
    g_string_free (vfs_str_buffer, TRUE);
 }