lib/strutil/strutilutf8.c: cosmetics and minor refactoring.

Signed-off-by: Andrew Borodin <aborodin@vmail.ru>
This commit is contained in:
Andrew Borodin 2013-07-12 21:20:19 +04:00
parent 7ee58f73ff
commit 17aef88637

View File

@ -64,9 +64,9 @@ str_utf8_is_valid_char (const char *ch, size_t size)
switch (g_utf8_get_char_validated (ch, size))
{
case (gunichar) (-2):
return -2;
return (-2);
case (gunichar) (-1):
return -1;
return (-1);
default:
return 1;
}
@ -96,8 +96,10 @@ str_utf8_cnext_char_safe (const char **text)
static void
str_utf8_cprev_char_safe (const char **text)
{
const char *result = g_utf8_prev_char (*text);
const char *t = result;
const char *result, *t;
result = g_utf8_prev_char (*text);
t = result;
str_utf8_cnext_char_safe (&t);
if (t == *text)
(*text) = result;
@ -108,15 +110,13 @@ str_utf8_cprev_char_safe (const char **text)
static void
str_utf8_fix_string (char *text)
{
gunichar uni;
while (text[0] != '\0')
{
gunichar uni;
uni = g_utf8_get_char_validated (text, -1);
if ((uni != (gunichar) (-1)) && (uni != (gunichar) (-2)))
{
text = g_utf8_next_char (text);
}
else
{
text[0] = '?';
@ -128,42 +128,54 @@ str_utf8_fix_string (char *text)
static int
str_utf8_isspace (const char *text)
{
gunichar uni = g_utf8_get_char_validated (text, -1);
gunichar uni;
uni = g_utf8_get_char_validated (text, -1);
return g_unichar_isspace (uni);
}
static int
str_utf8_ispunct (const char *text)
{
gunichar uni = g_utf8_get_char_validated (text, -1);
gunichar uni;
uni = g_utf8_get_char_validated (text, -1);
return g_unichar_ispunct (uni);
}
static int
str_utf8_isalnum (const char *text)
{
gunichar uni = g_utf8_get_char_validated (text, -1);
gunichar uni;
uni = g_utf8_get_char_validated (text, -1);
return g_unichar_isalnum (uni);
}
static int
str_utf8_isdigit (const char *text)
{
gunichar uni = g_utf8_get_char_validated (text, -1);
gunichar uni;
uni = g_utf8_get_char_validated (text, -1);
return g_unichar_isdigit (uni);
}
static int
str_utf8_isprint (const char *ch)
{
gunichar uni = g_utf8_get_char_validated (ch, -1);
gunichar uni;
uni = g_utf8_get_char_validated (ch, -1);
return g_unichar_isprint (uni);
}
static gboolean
str_utf8_iscombiningmark (const char *ch)
{
gunichar uni = g_utf8_get_char_validated (ch, -1);
gunichar uni;
uni = g_utf8_get_char_validated (ch, -1);
return str_unichar_iscombiningmark (uni);
}
@ -171,6 +183,7 @@ static int
str_utf8_cnext_noncomb_char (const char **text)
{
int count = 0;
while ((*text)[0] != '\0')
{
str_utf8_cnext_char_safe (text);
@ -178,6 +191,7 @@ str_utf8_cnext_noncomb_char (const char **text)
if (!str_utf8_iscombiningmark (*text))
break;
}
return count;
}
@ -185,6 +199,7 @@ static int
str_utf8_cprev_noncomb_char (const char **text, const char *begin)
{
int count = 0;
while ((*text) != begin)
{
str_utf8_cprev_char_safe (text);
@ -192,6 +207,7 @@ str_utf8_cprev_noncomb_char (const char **text, const char *begin)
if (!str_utf8_iscombiningmark (*text))
break;
}
return count;
}
@ -248,24 +264,16 @@ str_utf8_length (const char *text)
while (!g_utf8_validate (start, -1, &end) && start[0] != '\0')
{
if (start != end)
{
result += g_utf8_strlen (start, end - start);
}
result++;
start = end + 1;
}
if (start == text)
{
result = g_utf8_strlen (text, -1);
}
else
{
if (start[0] != '\0' && start != end)
{
result += g_utf8_strlen (start, end - start);
}
}
else if (start[0] != '\0' && start != end)
result += g_utf8_strlen (start, end - start);
return result;
}
@ -291,16 +299,9 @@ str_utf8_length2 (const char *text, int size)
}
if (start == text)
{
result = g_utf8_strlen (text, size);
}
else
{
if (start[0] != '\0' && start != end && size > 0)
{
result += g_utf8_strlen (start, min (end - start, size));
}
}
else if (start[0] != '\0' && start != end && size > 0)
result += g_utf8_strlen (start, min (end - start, size));
return result;
}
@ -334,7 +335,7 @@ str_utf8_length_noncomb (const char *text)
static gchar *
str_utf8_conv_gerror_message (GError * error, const char *def_msg)
{
if ((error != NULL) && (error->message != NULL))
if (error != NULL)
return g_strdup (error->message);
return g_strdup (def_msg != NULL ? def_msg : "");
@ -343,13 +344,10 @@ str_utf8_conv_gerror_message (GError * error, const char *def_msg)
static estr_t
str_utf8_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer)
{
estr_t result;
estr_t result = ESTR_SUCCESS;
if (coder == str_cnv_not_convert)
{
g_string_append_len (buffer, string, size);
result = ESTR_SUCCESS;
}
else
result = str_nconvert (coder, (char *) string, size, buffer);
@ -383,15 +381,13 @@ str_utf8_make_make_term_form (const char *text, size_t length)
if (length != 0 && text[0] != '\0')
{
uni = g_utf8_get_char_validated (text, -1);
if ((uni != (gunichar) (-1)) && (uni != (gunichar) (-2)))
if ((uni != (gunichar) (-1)) && (uni != (gunichar) (-2))
&& str_unichar_iscombiningmark (uni))
{
if (str_unichar_iscombiningmark (uni))
{
actual[0] = ' ';
actual++;
result.width++;
result.compose = TRUE;
}
actual[0] = ' ';
actual++;
result.width++;
result.compose = TRUE;
}
}
@ -429,6 +425,7 @@ str_utf8_make_make_term_form (const char *text, size_t length)
actual += strlen (replch);
result.width++;
}
if (length != (size_t) (-1))
length--;
}
@ -452,9 +449,8 @@ str_utf8_term_form (const char *text)
g_free (composed);
}
else
{
g_strlcpy (result, pre_form->text, sizeof (result));
}
return result;
}
@ -471,13 +467,13 @@ struct utf8_tool
static gboolean
utf8_tool_copy_chars_to_end (struct utf8_tool *tool)
{
size_t left;
gunichar uni;
tool->compose = FALSE;
while (tool->cheked[0] != '\0')
{
gunichar uni;
size_t left;
uni = g_utf8_get_char (tool->cheked);
tool->compose = tool->compose || str_unichar_iscombiningmark (uni);
left = g_unichar_to_utf8 (uni, NULL);
@ -488,6 +484,7 @@ utf8_tool_copy_chars_to_end (struct utf8_tool *tool)
tool->remain -= left;
tool->cheked = g_utf8_next_char (tool->cheked);
}
return TRUE;
}
@ -496,16 +493,18 @@ utf8_tool_copy_chars_to_end (struct utf8_tool *tool)
static gboolean
utf8_tool_copy_chars_to (struct utf8_tool *tool, int to_ident)
{
size_t left;
gunichar uni;
int w;
tool->compose = FALSE;
while (tool->cheked[0] != '\0')
{
gunichar uni;
size_t left;
int w = 0;
uni = g_utf8_get_char (tool->cheked);
if (!str_unichar_iscombiningmark (uni))
if (str_unichar_iscombiningmark (uni))
tool->compose = TRUE;
else
{
w = 1;
if (g_unichar_iswide (uni))
@ -513,11 +512,6 @@ utf8_tool_copy_chars_to (struct utf8_tool *tool, int to_ident)
if (tool->ident + w > to_ident)
return TRUE;
}
else
{
w = 0;
tool->compose = TRUE;
}
left = g_unichar_to_utf8 (uni, NULL);
if (tool->remain <= left)
@ -528,6 +522,7 @@ utf8_tool_copy_chars_to (struct utf8_tool *tool, int to_ident)
tool->cheked = g_utf8_next_char (tool->cheked);
tool->ident += w;
}
return TRUE;
}
@ -539,6 +534,7 @@ utf8_tool_insert_space (struct utf8_tool *tool, int count)
return 1;
if (tool->remain <= (gsize) count)
return 0;
memset (tool->actual, ' ', count);
tool->actual += count;
tool->remain -= count;
@ -551,6 +547,7 @@ utf8_tool_insert_char (struct utf8_tool *tool, char ch)
{
if (tool->remain <= 1)
return 0;
tool->actual[0] = ch;
tool->actual++;
tool->remain--;
@ -575,24 +572,27 @@ utf8_tool_skip_chars_to (struct utf8_tool *tool, int to_ident)
}
tool->cheked = g_utf8_next_char (tool->cheked);
}
uni = g_utf8_get_char (tool->cheked);
while (str_unichar_iscombiningmark (uni))
{
tool->cheked = g_utf8_next_char (tool->cheked);
uni = g_utf8_get_char (tool->cheked);
}
return TRUE;
}
static void
utf8_tool_compose (char *buffer, size_t size)
{
char *composed = g_utf8_normalize (buffer, -1, G_NORMALIZE_DEFAULT_COMPOSE);
char *composed;
composed = g_utf8_normalize (buffer, -1, G_NORMALIZE_DEFAULT_COMPOSE);
g_strlcpy (buffer, composed, size);
g_free (composed);
}
static const char *
str_utf8_fit_to_term (const char *text, int width, align_crt_t just_mode)
{
@ -624,37 +624,34 @@ str_utf8_fit_to_term (const char *text, int width, align_crt_t just_mode)
utf8_tool_copy_chars_to_end (&tool);
utf8_tool_insert_space (&tool, width - pre_form->width - tool.ident);
}
else if (IS_FIT (just_mode))
{
tool.ident = 0;
utf8_tool_copy_chars_to (&tool, width / 2);
utf8_tool_insert_char (&tool, '~');
tool.ident = 0;
utf8_tool_skip_chars_to (&tool, pre_form->width - width + 1);
utf8_tool_copy_chars_to_end (&tool);
utf8_tool_insert_space (&tool, width - (pre_form->width - tool.ident + 1));
}
else
{
if (IS_FIT (just_mode))
tool.ident = 0;
switch (HIDE_FIT (just_mode))
{
tool.ident = 0;
utf8_tool_copy_chars_to (&tool, width / 2);
utf8_tool_insert_char (&tool, '~');
tool.ident = 0;
utf8_tool_skip_chars_to (&tool, pre_form->width - width + 1);
utf8_tool_copy_chars_to_end (&tool);
utf8_tool_insert_space (&tool, width - (pre_form->width - tool.ident + 1));
case J_CENTER:
tool.ident = (width - pre_form->width) / 2;
break;
case J_RIGHT:
tool.ident = width - pre_form->width;
break;
}
else
{
tool.ident = 0;
switch (HIDE_FIT (just_mode))
{
case J_CENTER:
tool.ident = (width - pre_form->width) / 2;
break;
case J_RIGHT:
tool.ident = width - pre_form->width;
break;
}
utf8_tool_skip_chars_to (&tool, 0);
utf8_tool_insert_space (&tool, tool.ident);
utf8_tool_copy_chars_to (&tool, width);
utf8_tool_insert_space (&tool, width - tool.ident);
}
utf8_tool_skip_chars_to (&tool, 0);
utf8_tool_insert_space (&tool, tool.ident);
utf8_tool_copy_chars_to (&tool, width);
utf8_tool_insert_space (&tool, width - tool.ident);
}
tool.actual[0] = '\0';
@ -683,27 +680,22 @@ str_utf8_term_trim (const char *text, int width)
tool.remain = sizeof (result);
tool.compose = FALSE;
if ((gsize) width < pre_form->width)
if ((gsize) width >= pre_form->width)
utf8_tool_copy_chars_to_end (&tool);
else if (width <= 3)
{
if (width <= 3)
{
memset (tool.actual, '.', width);
tool.actual += width;
tool.remain -= width;
}
else
{
memset (tool.actual, '.', 3);
tool.actual += 3;
tool.remain -= 3;
tool.ident = 0;
utf8_tool_skip_chars_to (&tool, pre_form->width - width + 3);
utf8_tool_copy_chars_to_end (&tool);
}
memset (tool.actual, '.', width);
tool.actual += width;
tool.remain -= width;
}
else
{
memset (tool.actual, '.', 3);
tool.actual += 3;
tool.remain -= 3;
tool.ident = 0;
utf8_tool_skip_chars_to (&tool, pre_form->width - width + 3);
utf8_tool_copy_chars_to_end (&tool);
}
@ -731,7 +723,9 @@ str_utf8_term_width1 (const char *text)
static int
str_utf8_term_char_width (const char *text)
{
gunichar uni = g_utf8_get_char_validated (text, -1);
gunichar uni;
uni = g_utf8_get_char_validated (text, -1);
return (str_unichar_iscombiningmark (uni)) ? 0 : ((g_unichar_iswide (uni)) ? 2 : 1);
}
@ -778,7 +772,9 @@ str_utf8_trunc (const char *text, int width)
tool.remain = sizeof (result);
tool.compose = FALSE;
if (pre_form->width > (gsize) width)
if (pre_form->width <= (gsize) width)
utf8_tool_copy_chars_to_end (&tool);
else
{
tool.ident = 0;
utf8_tool_copy_chars_to (&tool, width / 2);
@ -788,10 +784,6 @@ str_utf8_trunc (const char *text, int width)
utf8_tool_skip_chars_to (&tool, pre_form->width - width + 1);
utf8_tool_copy_chars_to_end (&tool);
}
else
{
utf8_tool_copy_chars_to_end (&tool);
}
tool.actual[0] = '\0';
if (tool.compose)
@ -807,8 +799,9 @@ str_utf8_offset_to_pos (const char *text, size_t length)
else
{
int result;
GString *buffer = g_string_new (text);
GString *buffer;
buffer = g_string_new (text);
str_utf8_fix_string (buffer->str);
result = g_utf8_offset_to_pointer (buffer->str, length) - buffer->str;
g_string_free (buffer, TRUE);
@ -819,15 +812,13 @@ str_utf8_offset_to_pos (const char *text, size_t length)
static int
str_utf8_column_to_pos (const char *text, size_t pos)
{
static int result;
gunichar uni;
int width;
width = 0;
result = 0;
int result = 0;
int width = 0;
while (text[0] != '\0')
{
gunichar uni;
uni = g_utf8_get_char_validated (text, 6);
if ((uni != (gunichar) (-1)) && (uni != (gunichar) (-2)))
{
@ -851,6 +842,7 @@ str_utf8_column_to_pos (const char *text, size_t pos)
text++;
width++;
}
if ((gsize) width > pos)
return result;
@ -863,30 +855,26 @@ str_utf8_column_to_pos (const char *text, size_t pos)
static char *
str_utf8_create_search_needle (const char *needle, int case_sen)
{
if (needle != NULL)
{
if (case_sen)
{
return g_utf8_normalize (needle, -1, G_NORMALIZE_ALL);
}
else
{
char *fold = g_utf8_casefold (needle, -1);
char *result = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL);
g_free (fold);
return result;
}
}
else
char *fold, *result;
if (needle == NULL)
return NULL;
if (case_sen)
return g_utf8_normalize (needle, -1, G_NORMALIZE_ALL);
fold = g_utf8_casefold (needle, -1);
result = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL);
g_free (fold);
return result;
}
static void
str_utf8_release_search_needle (char *needle, int case_sen)
{
(void) case_sen;
if (needle != NULL)
g_free (needle);
g_free (needle);
}
static const char *
@ -910,7 +898,6 @@ str_utf8_search_first (const char *text, const char *search, int case_sen)
if ((!str_utf8_iscombiningmark (match) || (match == deco_text)) &&
!str_utf8_iscombiningmark (match + strlen (search)))
{
result = text;
m = deco_text;
while (m < match)
@ -920,9 +907,7 @@ str_utf8_search_first (const char *text, const char *search, int case_sen)
}
}
else
{
str_utf8_cnext_char (&match);
}
}
}
while (match != NULL && result == NULL);
@ -954,7 +939,6 @@ str_utf8_search_last (const char *text, const char *search, int case_sen)
if ((!str_utf8_iscombiningmark (match) || (match == deco_text)) &&
!str_utf8_iscombiningmark (match + strlen (search)))
{
result = text;
m = deco_text;
while (m < match)
@ -964,9 +948,7 @@ str_utf8_search_last (const char *text, const char *search, int case_sen)
}
}
else
{
match[0] = '\0';
}
}
}
while (match != NULL && result == NULL);
@ -1091,12 +1073,15 @@ static int
str_utf8_ncompare (const char *t1, const char *t2)
{
char *n1, *n2;
size_t l1, l2;
int result;
n1 = str_utf8_normalize (t1);
n2 = str_utf8_normalize (t2);
result = strncmp (n1, n2, min (strlen (n1), strlen (n2)));
l1 = strlen (n1);
l2 = strlen (n2);
result = strncmp (n1, n2, min (l1, l2));
g_free (n1);
g_free (n2);
@ -1125,12 +1110,15 @@ static int
str_utf8_ncasecmp (const char *t1, const char *t2)
{
char *n1, *n2;
size_t l1, l2;
int result;
n1 = str_utf8_casefold_normalize (t1);
n2 = str_utf8_casefold_normalize (t2);
result = strncmp (n1, n2, min (strlen (n1), strlen (n2)));
l1 = strlen (n1);
l2 = strlen (n2);
result = strncmp (n1, n2, min (l1, l2));
g_free (n1);
g_free (n2);
@ -1141,14 +1129,18 @@ str_utf8_ncasecmp (const char *t1, const char *t2)
static int
str_utf8_prefix (const char *text, const char *prefix)
{
char *t = str_utf8_normalize (text);
char *p = str_utf8_normalize (prefix);
const char *nt = t;
const char *np = p;
const char *nnt = t;
const char *nnp = p;
char *t, *p;
const char *nt, *np;
const char *nnt, *nnp;
int result;
t = str_utf8_normalize (text);
p = str_utf8_normalize (prefix);
nt = t;
np = p;
nnt = t;
nnp = p;
while (nt[0] != '\0' && np[0] != '\0')
{
str_utf8_cnext_char_safe (&nnt);
@ -1172,14 +1164,18 @@ str_utf8_prefix (const char *text, const char *prefix)
static int
str_utf8_caseprefix (const char *text, const char *prefix)
{
char *t = str_utf8_casefold_normalize (text);
char *p = str_utf8_casefold_normalize (prefix);
const char *nt = t;
const char *np = p;
const char *nnt = t;
const char *nnp = p;
char *t, *p;
const char *nt, *np;
const char *nnt, *nnp;
int result;
t = str_utf8_casefold_normalize (text);
p = str_utf8_casefold_normalize (prefix);
nt = t;
np = p;
nnt = t;
nnp = p;
while (nt[0] != '\0' && np[0] != '\0')
{
str_utf8_cnext_char_safe (&nnt);
@ -1207,9 +1203,7 @@ str_utf8_create_key_gen (const char *text, int case_sen,
char *result;
if (case_sen)
{
result = str_utf8_normalize (text);
}
else
{
gboolean dot;