2002-08-27 09:27:11 +04:00
|
|
|
/* Text conversion from one charset to another.
|
|
|
|
|
|
|
|
Copyright (C) 2001 Walery Studennikov <despair@sama.ru>
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
|
|
*/
|
|
|
|
|
2001-05-31 05:27:20 +04:00
|
|
|
#include <config.h>
|
2001-06-01 03:56:27 +04:00
|
|
|
|
|
|
|
#ifdef HAVE_CHARSET
|
2001-05-31 05:27:20 +04:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <iconv.h>
|
|
|
|
|
2001-08-12 07:27:27 +04:00
|
|
|
#include "global.h"
|
2001-05-31 05:27:20 +04:00
|
|
|
#include "charsets.h"
|
|
|
|
|
|
|
|
int n_codepages = 0;
|
|
|
|
|
|
|
|
struct codepage_desc *codepages;
|
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
unsigned char conv_displ[256];
|
|
|
|
unsigned char conv_input[256];
|
2001-05-31 05:27:20 +04:00
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
int
|
|
|
|
load_codepages_list (void)
|
2001-05-31 05:27:20 +04:00
|
|
|
{
|
|
|
|
int result = -1;
|
|
|
|
FILE *f;
|
2001-08-12 07:27:27 +04:00
|
|
|
char *fname;
|
2001-05-31 05:27:20 +04:00
|
|
|
char buf[256];
|
2002-10-31 02:14:26 +03:00
|
|
|
extern char *mc_home;
|
2001-06-05 05:00:26 +04:00
|
|
|
extern int display_codepage;
|
2002-10-31 02:14:26 +03:00
|
|
|
char *default_codepage = NULL;
|
2001-05-31 05:27:20 +04:00
|
|
|
|
2001-08-12 07:27:27 +04:00
|
|
|
fname = concat_dir_and_file (mc_home, CHARSETS_INDEX);
|
2002-10-31 02:14:26 +03:00
|
|
|
if (!(f = fopen (fname, "r"))) {
|
2001-08-17 01:01:12 +04:00
|
|
|
fprintf (stderr, _("Warning: file %s not found\n"), fname);
|
2001-08-12 07:27:27 +04:00
|
|
|
g_free (fname);
|
2001-05-31 05:27:20 +04:00
|
|
|
return -1;
|
2001-08-12 07:27:27 +04:00
|
|
|
}
|
|
|
|
g_free (fname);
|
2001-05-31 05:27:20 +04:00
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
for (n_codepages = 0; fgets (buf, sizeof (buf), f);)
|
|
|
|
if (buf[0] != '\n' && buf[0] != '\0' && buf[0] != '#')
|
2001-05-31 05:27:20 +04:00
|
|
|
++n_codepages;
|
2002-10-31 02:14:26 +03:00
|
|
|
rewind (f);
|
2001-05-31 05:27:20 +04:00
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
codepages = g_new0 (struct codepage_desc, n_codepages + 1);
|
2001-05-31 05:27:20 +04:00
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
for (n_codepages = 0; fgets (buf, sizeof buf, f);) {
|
2001-05-31 05:27:20 +04:00
|
|
|
/* split string into id and cpname */
|
|
|
|
char *p = buf;
|
2002-10-31 02:14:26 +03:00
|
|
|
int buflen = strlen (buf);
|
2001-06-05 05:00:26 +04:00
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
if (*p == '\n' || *p == '\0' || *p == '#')
|
2001-05-31 05:27:20 +04:00
|
|
|
continue;
|
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
if (buflen > 0 && buf[buflen - 1] == '\n')
|
|
|
|
buf[buflen - 1] = '\0';
|
|
|
|
while (*p != '\t' && *p != ' ' && *p != '\0')
|
2001-05-31 05:27:20 +04:00
|
|
|
++p;
|
2002-10-31 02:14:26 +03:00
|
|
|
if (*p == '\0')
|
2001-05-31 05:27:20 +04:00
|
|
|
goto fail;
|
|
|
|
|
2001-06-05 05:00:26 +04:00
|
|
|
*p++ = 0;
|
2001-05-31 05:27:20 +04:00
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
while (*p == '\t' || *p == ' ')
|
2001-05-31 05:27:20 +04:00
|
|
|
++p;
|
2002-10-31 02:14:26 +03:00
|
|
|
if (*p == '\0')
|
2001-05-31 05:27:20 +04:00
|
|
|
goto fail;
|
|
|
|
|
2001-06-05 05:00:26 +04:00
|
|
|
if (strcmp (buf, "default") == 0) {
|
2001-10-29 16:40:00 +03:00
|
|
|
default_codepage = g_strdup (p);
|
2001-06-05 05:00:26 +04:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
codepages[n_codepages].id = g_strdup (buf);
|
|
|
|
codepages[n_codepages].name = g_strdup (p);
|
2001-05-31 05:27:20 +04:00
|
|
|
++n_codepages;
|
|
|
|
}
|
|
|
|
|
2001-06-05 05:00:26 +04:00
|
|
|
if (default_codepage) {
|
|
|
|
display_codepage = get_codepage_index (default_codepage);
|
2001-10-29 16:40:00 +03:00
|
|
|
g_free (default_codepage);
|
2001-06-05 05:00:26 +04:00
|
|
|
}
|
|
|
|
|
2001-05-31 05:27:20 +04:00
|
|
|
result = n_codepages;
|
2002-10-31 02:14:26 +03:00
|
|
|
fail:
|
|
|
|
fclose (f);
|
2001-05-31 05:27:20 +04:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
void
|
|
|
|
free_codepages_list (void)
|
2001-10-29 16:40:00 +03:00
|
|
|
{
|
|
|
|
if (n_codepages > 0) {
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < n_codepages; i++) {
|
|
|
|
g_free (codepages[i].id);
|
|
|
|
g_free (codepages[i].name);
|
|
|
|
}
|
|
|
|
n_codepages = 0;
|
|
|
|
g_free (codepages);
|
|
|
|
codepages = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2001-05-31 05:27:20 +04:00
|
|
|
#define OTHER_8BIT "Other_8_bit"
|
|
|
|
|
2004-08-30 02:38:06 +04:00
|
|
|
const char *
|
2002-10-31 02:14:26 +03:00
|
|
|
get_codepage_id (int n)
|
2001-05-31 05:27:20 +04:00
|
|
|
{
|
2002-10-31 02:14:26 +03:00
|
|
|
return (n < 0) ? OTHER_8BIT : codepages[n].id;
|
2001-05-31 05:27:20 +04:00
|
|
|
}
|
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
int
|
|
|
|
get_codepage_index (const char *id)
|
2001-05-31 05:27:20 +04:00
|
|
|
{
|
|
|
|
int i;
|
2002-10-31 02:14:26 +03:00
|
|
|
if (strcmp (id, OTHER_8BIT) == 0)
|
2001-05-31 05:27:20 +04:00
|
|
|
return -1;
|
2002-10-31 02:14:26 +03:00
|
|
|
for (i = 0; codepages[i].id; ++i)
|
|
|
|
if (strcmp (id, codepages[i].id) == 0)
|
2001-05-31 05:27:20 +04:00
|
|
|
return i;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
static char
|
|
|
|
translate_character (iconv_t cd, char c)
|
2001-05-31 05:27:20 +04:00
|
|
|
{
|
|
|
|
char outbuf[4], *obuf;
|
|
|
|
size_t ibuflen, obuflen, count;
|
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
ICONV_CONST char *ibuf = &c;
|
2001-05-31 05:27:20 +04:00
|
|
|
obuf = outbuf;
|
2002-10-31 02:14:26 +03:00
|
|
|
ibuflen = 1;
|
|
|
|
obuflen = 4;
|
2001-05-31 05:27:20 +04:00
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
count = iconv (cd, &ibuf, &ibuflen, &obuf, &obuflen);
|
2001-05-31 05:27:20 +04:00
|
|
|
if (count >= 0 && ibuflen == 0)
|
|
|
|
return outbuf[0];
|
|
|
|
|
|
|
|
return UNKNCHAR;
|
|
|
|
}
|
|
|
|
|
|
|
|
char errbuf[255];
|
|
|
|
|
2001-06-08 00:04:03 +04:00
|
|
|
/*
|
|
|
|
* FIXME: This assumes that ASCII is always the first encoding
|
|
|
|
* in mc.charsets
|
|
|
|
*/
|
|
|
|
#define CP_ASCII 0
|
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
char *
|
|
|
|
init_translation_table (int cpsource, int cpdisplay)
|
2001-05-31 05:27:20 +04:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
iconv_t cd;
|
|
|
|
char *cpsour, *cpdisp;
|
|
|
|
|
|
|
|
/* Fill inpit <-> display tables */
|
|
|
|
|
|
|
|
if (cpsource < 0 || cpdisplay < 0 || cpsource == cpdisplay) {
|
2002-10-31 02:14:26 +03:00
|
|
|
for (i = 0; i <= 255; ++i) {
|
2001-05-31 05:27:20 +04:00
|
|
|
conv_displ[i] = i;
|
|
|
|
conv_input[i] = i;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
for (i = 0; i <= 127; ++i) {
|
2001-05-31 05:27:20 +04:00
|
|
|
conv_displ[i] = i;
|
|
|
|
conv_input[i] = i;
|
|
|
|
}
|
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
cpsour = codepages[cpsource].id;
|
|
|
|
cpdisp = codepages[cpdisplay].id;
|
2001-05-31 05:27:20 +04:00
|
|
|
|
|
|
|
/* display <- inpit table */
|
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
cd = iconv_open (cpdisp, cpsour);
|
|
|
|
if (cd == (iconv_t) - 1) {
|
|
|
|
g_snprintf (errbuf, sizeof (errbuf),
|
|
|
|
_("Cannot translate from %s to %s"), cpsour, cpdisp);
|
2001-05-31 05:27:20 +04:00
|
|
|
return errbuf;
|
|
|
|
}
|
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
for (i = 128; i <= 255; ++i)
|
|
|
|
conv_displ[i] = translate_character (cd, i);
|
2001-05-31 05:27:20 +04:00
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
iconv_close (cd);
|
2001-05-31 05:27:20 +04:00
|
|
|
|
|
|
|
/* inpit <- display table */
|
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
cd = iconv_open (cpsour, cpdisp);
|
|
|
|
if (cd == (iconv_t) - 1) {
|
|
|
|
g_snprintf (errbuf, sizeof (errbuf),
|
|
|
|
_("Cannot translate from %s to %s"), cpdisp, cpsour);
|
2001-05-31 05:27:20 +04:00
|
|
|
return errbuf;
|
|
|
|
}
|
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
for (i = 128; i <= 255; ++i) {
|
|
|
|
unsigned char ch;
|
|
|
|
ch = translate_character (cd, i);
|
2001-05-31 05:27:20 +04:00
|
|
|
conv_input[i] = (ch == UNKNCHAR) ? i : ch;
|
|
|
|
}
|
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
iconv_close (cd);
|
2001-05-31 05:27:20 +04:00
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
void
|
|
|
|
convert_to_display (char *str)
|
2001-05-31 05:27:20 +04:00
|
|
|
{
|
2002-10-31 02:14:26 +03:00
|
|
|
if (!str)
|
|
|
|
return;
|
|
|
|
|
2002-11-11 17:48:37 +03:00
|
|
|
while (*str) {
|
|
|
|
*str = conv_displ[(unsigned char) *str];
|
2001-06-05 05:00:26 +04:00
|
|
|
str++;
|
2002-11-11 17:48:37 +03:00
|
|
|
}
|
2001-05-31 05:27:20 +04:00
|
|
|
}
|
|
|
|
|
2002-10-31 02:14:26 +03:00
|
|
|
void
|
|
|
|
convert_from_input (char *str)
|
2001-05-31 05:27:20 +04:00
|
|
|
{
|
2002-10-31 02:14:26 +03:00
|
|
|
if (!str)
|
|
|
|
return;
|
|
|
|
|
2002-11-11 17:48:37 +03:00
|
|
|
while (*str) {
|
|
|
|
*str = conv_input[(unsigned char) *str];
|
2001-06-05 05:00:26 +04:00
|
|
|
str++;
|
2002-11-11 17:48:37 +03:00
|
|
|
}
|
2001-05-31 05:27:20 +04:00
|
|
|
}
|
2002-10-31 02:14:26 +03:00
|
|
|
#endif /* HAVE_CHARSET */
|