mc/lib/charsets.c
Andrew Borodin 7257f794d2 Update template for .c files.
Add section for forward declarations of local functions. This section is
located before file scope variables because functions can be used in
strucutres (see find.c for example):

/*** forward declarations (file scope functions) *************************************************/

/* button callbacks */
static int start_stop (WButton * button, int action);
static int find_do_view_file (WButton * button, int action);
static int find_do_edit_file (WButton * button, int action);

/*** file scope variables ************************************************************************/

static struct
{
    ...
    bcback_fn callback;
} fbuts[] =
{
    ...
    { B_STOP, NORMAL_BUTTON, N_("S&uspend"), 0, 0, NULL, start_stop },
    ...
    { B_VIEW, NORMAL_BUTTON, N_("&View - F3"), 0, 0, NULL, find_do_view_file },
    { B_VIEW, NORMAL_BUTTON, N_("&Edit - F4"), 0, 0, NULL, find_do_edit_file }
};

Signed-off-by: Andrew Borodin <aborodin@vmail.ru>
2023-03-19 20:34:24 +03:00

528 lines
14 KiB
C

/*
Text conversion from one charset to another.
Copyright (C) 2001-2023
Free Software Foundation, Inc.
Written by:
Walery Studennikov <despair@sama.ru>
This file is part of the Midnight Commander.
The Midnight Commander is free software: you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation, either version 3 of the License,
or (at your option) any later version.
The Midnight Commander is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/** \file charsets.c
* \brief Source: Text conversion from one charset to another
*/
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lib/global.h"
#include "lib/strutil.h" /* utf-8 functions */
#include "lib/fileloc.h"
#include "lib/util.h" /* whitespace() */
#include "lib/charsets.h"
/*** global variables ****************************************************************************/
GPtrArray *codepages = NULL;
unsigned char conv_displ[256];
unsigned char conv_input[256];
const char *cp_display = NULL;
const char *cp_source = NULL;
/*** file scope macro definitions ****************************************************************/
#define UNKNCHAR '\001'
#define OTHER_8BIT "Other_8_bit"
/*** file scope type declarations ****************************************************************/
/*** forward declarations (file scope functions) *************************************************/
/*** file scope variables ************************************************************************/
/* --------------------------------------------------------------------------------------------- */
/*** file scope functions ************************************************************************/
/* --------------------------------------------------------------------------------------------- */
static codepage_desc *
new_codepage_desc (const char *id, const char *name)
{
codepage_desc *desc;
desc = g_new (codepage_desc, 1);
desc->id = g_strdup (id);
desc->name = g_strdup (name);
return desc;
}
/* --------------------------------------------------------------------------------------------- */
static void
free_codepage_desc (gpointer data)
{
codepage_desc *desc = (codepage_desc *) data;
g_free (desc->id);
g_free (desc->name);
g_free (desc);
}
/* --------------------------------------------------------------------------------------------- */
/* returns display codepage */
static void
load_codepages_list_from_file (GPtrArray ** list, const char *fname)
{
FILE *f;
char buf[BUF_MEDIUM];
char *default_codepage = NULL;
f = fopen (fname, "r");
if (f == NULL)
return;
while (fgets (buf, sizeof buf, f) != NULL)
{
/* split string into id and cpname */
char *p = buf;
size_t buflen;
if (*p == '\n' || *p == '\0' || *p == '#')
continue;
buflen = strlen (buf);
if (buflen != 0 && buf[buflen - 1] == '\n')
buf[buflen - 1] = '\0';
while (*p != '\0' && !whitespace (*p))
++p;
if (*p == '\0')
goto fail;
*p++ = '\0';
g_strstrip (p);
if (*p == '\0')
goto fail;
if (strcmp (buf, "default") == 0)
default_codepage = g_strdup (p);
else
{
const char *id = buf;
if (*list == NULL)
{
*list = g_ptr_array_sized_new (16);
g_ptr_array_add (*list, new_codepage_desc (id, p));
}
else
{
unsigned int i;
/* whether id is already present in list */
/* if yes, overwrite description */
for (i = 0; i < (*list)->len; i++)
{
codepage_desc *desc;
desc = (codepage_desc *) g_ptr_array_index (*list, i);
if (strcmp (id, desc->id) == 0)
{
/* found */
g_free (desc->name);
desc->name = g_strdup (p);
break;
}
}
/* not found */
if (i == (*list)->len)
g_ptr_array_add (*list, new_codepage_desc (id, p));
}
}
}
if (default_codepage != NULL)
{
mc_global.display_codepage = get_codepage_index (default_codepage);
g_free (default_codepage);
}
fail:
fclose (f);
}
/* --------------------------------------------------------------------------------------------- */
static char
translate_character (GIConv cd, char c)
{
gchar *tmp_buff = NULL;
gsize bytes_read, bytes_written = 0;
const char *ibuf = &c;
char ch = UNKNCHAR;
int ibuflen = 1;
tmp_buff = g_convert_with_iconv (ibuf, ibuflen, cd, &bytes_read, &bytes_written, NULL);
if (tmp_buff != NULL)
ch = tmp_buff[0];
g_free (tmp_buff);
return ch;
}
/* --------------------------------------------------------------------------------------------- */
/*** public functions ****************************************************************************/
/* --------------------------------------------------------------------------------------------- */
void
load_codepages_list (void)
{
char *fname;
/* 1: try load /usr/share/mc/mc.charsets */
fname = g_build_filename (mc_global.share_data_dir, CHARSETS_LIST, (char *) NULL);
load_codepages_list_from_file (&codepages, fname);
g_free (fname);
/* 2: try load /etc/mc/mc.charsets */
fname = g_build_filename (mc_global.sysconfig_dir, CHARSETS_LIST, (char *) NULL);
load_codepages_list_from_file (&codepages, fname);
g_free (fname);
if (codepages == NULL)
{
/* files are not found, add default codepage */
fprintf (stderr, "%s\n", _("Warning: cannot load codepages list"));
codepages = g_ptr_array_new_with_free_func (free_codepage_desc);
g_ptr_array_add (codepages, new_codepage_desc (DEFAULT_CHARSET, _("7-bit ASCII")));
}
}
/* --------------------------------------------------------------------------------------------- */
void
free_codepages_list (void)
{
g_ptr_array_free (codepages, TRUE);
/* NULL-ize pointer to make unit tests happy */
codepages = NULL;
}
/* --------------------------------------------------------------------------------------------- */
const char *
get_codepage_id (const int n)
{
return (n < 0) ? OTHER_8BIT : ((codepage_desc *) g_ptr_array_index (codepages, n))->id;
}
/* --------------------------------------------------------------------------------------------- */
int
get_codepage_index (const char *id)
{
size_t i;
if (codepages == NULL)
return -1;
if (strcmp (id, OTHER_8BIT) == 0)
return -1;
for (i = 0; i < codepages->len; i++)
if (strcmp (id, ((codepage_desc *) g_ptr_array_index (codepages, i))->id) == 0)
return i;
return -1;
}
/* --------------------------------------------------------------------------------------------- */
/** Check if specified encoding can be used in mc.
* @param encoding name of encoding
* @return TRUE if encoding is supported by mc, FALSE otherwise
*/
gboolean
is_supported_encoding (const char *encoding)
{
gboolean result = FALSE;
guint t;
for (t = 0; t < codepages->len; t++)
{
const char *id;
id = ((codepage_desc *) g_ptr_array_index (codepages, t))->id;
result |= (g_ascii_strncasecmp (encoding, id, strlen (id)) == 0);
}
return result;
}
/* --------------------------------------------------------------------------------------------- */
char *
init_translation_table (int cpsource, int cpdisplay)
{
int i;
GIConv cd;
/* Fill inpit <-> display tables */
if (cpsource < 0 || cpdisplay < 0 || cpsource == cpdisplay)
{
for (i = 0; i <= 255; ++i)
{
conv_displ[i] = i;
conv_input[i] = i;
}
cp_source = cp_display;
return NULL;
}
for (i = 0; i <= 127; ++i)
{
conv_displ[i] = i;
conv_input[i] = i;
}
cp_source = ((codepage_desc *) g_ptr_array_index (codepages, cpsource))->id;
cp_display = ((codepage_desc *) g_ptr_array_index (codepages, cpdisplay))->id;
/* display <- inpit table */
cd = g_iconv_open (cp_display, cp_source);
if (cd == INVALID_CONV)
return g_strdup_printf (_("Cannot translate from %s to %s"), cp_source, cp_display);
for (i = 128; i <= 255; ++i)
conv_displ[i] = translate_character (cd, i);
g_iconv_close (cd);
/* inpit <- display table */
cd = g_iconv_open (cp_source, cp_display);
if (cd == INVALID_CONV)
return g_strdup_printf (_("Cannot translate from %s to %s"), cp_display, cp_source);
for (i = 128; i <= 255; ++i)
{
unsigned char ch;
ch = translate_character (cd, i);
conv_input[i] = (ch == UNKNCHAR) ? i : ch;
}
g_iconv_close (cd);
return NULL;
}
/* --------------------------------------------------------------------------------------------- */
void
convert_to_display (char *str)
{
if (str != NULL)
for (; *str != '\0'; str++)
*str = conv_displ[(unsigned char) *str];
}
/* --------------------------------------------------------------------------------------------- */
GString *
str_nconvert_to_display (const char *str, int len)
{
GString *buff;
GIConv conv;
if (str == NULL)
return g_string_new ("");
if (cp_display == cp_source)
return g_string_new (str);
conv = str_crt_conv_from (cp_source);
buff = g_string_new ("");
str_nconvert (conv, str, len, buff);
str_close_conv (conv);
return buff;
}
/* --------------------------------------------------------------------------------------------- */
void
convert_from_input (char *str)
{
if (str != NULL)
for (; *str != '\0'; str++)
*str = conv_input[(unsigned char) *str];
}
/* --------------------------------------------------------------------------------------------- */
GString *
str_nconvert_to_input (const char *str, int len)
{
GString *buff;
GIConv conv;
if (str == NULL)
return g_string_new ("");
if (cp_display == cp_source)
return g_string_new (str);
conv = str_crt_conv_to (cp_source);
buff = g_string_new ("");
str_nconvert (conv, str, len, buff);
str_close_conv (conv);
return buff;
}
/* --------------------------------------------------------------------------------------------- */
unsigned char
convert_from_utf_to_current (const char *str)
{
unsigned char buf_ch[UTF8_CHAR_LEN + 1];
unsigned char ch = '.';
GIConv conv;
const char *cp_to;
if (str == NULL)
return '.';
cp_to = get_codepage_id (mc_global.source_codepage);
conv = str_crt_conv_to (cp_to);
if (conv != INVALID_CONV)
{
switch (str_translate_char (conv, str, -1, (char *) buf_ch, sizeof (buf_ch)))
{
case ESTR_SUCCESS:
ch = buf_ch[0];
break;
case ESTR_PROBLEM:
case ESTR_FAILURE:
ch = '.';
break;
default:
break;
}
str_close_conv (conv);
}
return ch;
}
/* --------------------------------------------------------------------------------------------- */
unsigned char
convert_from_utf_to_current_c (int input_char, GIConv conv)
{
unsigned char str[UTF8_CHAR_LEN + 1];
unsigned char buf_ch[UTF8_CHAR_LEN + 1];
unsigned char ch = '.';
int res;
res = g_unichar_to_utf8 (input_char, (char *) str);
if (res == 0)
return ch;
str[res] = '\0';
switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
{
case ESTR_SUCCESS:
ch = buf_ch[0];
break;
case ESTR_PROBLEM:
case ESTR_FAILURE:
ch = '.';
break;
default:
break;
}
return ch;
}
/* --------------------------------------------------------------------------------------------- */
int
convert_from_8bit_to_utf_c (char input_char, GIConv conv)
{
unsigned char str[2];
unsigned char buf_ch[UTF8_CHAR_LEN + 1];
int ch;
str[0] = (unsigned char) input_char;
str[1] = '\0';
switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
{
case ESTR_SUCCESS:
{
int res;
res = g_utf8_get_char_validated ((char *) buf_ch, -1);
ch = res >= 0 ? res : buf_ch[0];
break;
}
case ESTR_PROBLEM:
case ESTR_FAILURE:
default:
ch = '.';
break;
}
return ch;
}
/* --------------------------------------------------------------------------------------------- */
int
convert_from_8bit_to_utf_c2 (char input_char)
{
int ch = '.';
GIConv conv;
const char *cp_from;
cp_from = get_codepage_id (mc_global.source_codepage);
conv = str_crt_conv_to (cp_from);
if (conv != INVALID_CONV)
{
ch = convert_from_8bit_to_utf_c (input_char, conv);
str_close_conv (conv);
}
return ch;
}
/* --------------------------------------------------------------------------------------------- */