Merge branch '2396_find_whole_words'

* 2396_find_whole_words:
  Some optimization of loops in translation functions.
  Create strings with small preallocated sizes.
  mc_search__cond_struct_new_regex_ci_str(): get rid of extra string duplication.
  Minor optimization of translation function arguments.
  Avoid extra-allocation of string while prepare to regexp-search.
  Removed mc_search_cond_t->len (used mc_search_cond_t->str->len instead).
  Fixed bit operations in mc_search_regex__process_append_str()
  Code cleanup for avoid compiler warnings
  Ticket #2396 (Find File "Whole words" search bug)
This commit is contained in:
Slava Zanko 2010-11-01 15:48:55 +02:00
commit b60f00df0d
8 changed files with 128 additions and 120 deletions

View File

@ -48,56 +48,42 @@
/*** file scope functions ************************************************************************/
static GString *
mc_search__glob_translate_to_regex (gchar * str, gsize * len)
mc_search__glob_translate_to_regex (const GString * astr)
{
GString *buff = g_string_new ("");
gsize orig_len = *len;
gsize loop = 0;
const char *str = astr->str;
GString *buff;
gsize loop;
gboolean inside_group = FALSE;
while (loop < orig_len)
{
buff = g_string_sized_new (32);
for (loop = 0; loop < astr->len; loop++)
switch (str[loop])
{
case '*':
if (!strutils_is_char_escaped (str, &(str[loop])))
{
g_string_append (buff, (inside_group) ? ".*" : "(.*)");
loop++;
continue;
}
g_string_append (buff, inside_group ? ".*" : "(.*)");
break;
case '?':
if (!strutils_is_char_escaped (str, &(str[loop])))
{
g_string_append (buff, (inside_group) ? "." : "(.)");
loop++;
continue;
}
g_string_append (buff, inside_group ? "." : "(.)");
break;
case ',':
if (!strutils_is_char_escaped (str, &(str[loop])))
{
g_string_append (buff, "|");
loop++;
continue;
}
g_string_append_c (buff, '|');
break;
case '{':
if (!strutils_is_char_escaped (str, &(str[loop])))
{
g_string_append (buff, "(");
g_string_append_c (buff, '(');
inside_group = TRUE;
loop++;
continue;
}
break;
case '}':
if (!strutils_is_char_escaped (str, &(str[loop])))
{
g_string_append (buff, ")");
g_string_append_c (buff, ')');
inside_group = FALSE;
loop++;
continue;
}
break;
case '+':
@ -107,14 +93,12 @@ mc_search__glob_translate_to_regex (gchar * str, gsize * len)
case ')':
case '^':
g_string_append_c (buff, '\\');
/* fall through */
default:
g_string_append_c (buff, str[loop]);
loop++;
continue;
break;
}
g_string_append_c (buff, str[loop]);
loop++;
}
*len = buff->len;
return buff;
}
@ -123,9 +107,12 @@ mc_search__glob_translate_to_regex (gchar * str, gsize * len)
static GString *
mc_search__translate_replace_glob_to_regex (gchar * str)
{
GString *buff = g_string_sized_new (32);
GString *buff;
int cnt = '0';
gboolean escaped_mode = FALSE;
buff = g_string_sized_new (32);
while (*str)
{
char c = *str++;
@ -162,9 +149,9 @@ void
mc_search__cond_struct_new_init_glob (const char *charset, mc_search_t * lc_mc_search,
mc_search_cond_t * mc_search_cond)
{
GString *tmp =
mc_search__glob_translate_to_regex (mc_search_cond->str->str, &mc_search_cond->len);
GString *tmp;
tmp = mc_search__glob_translate_to_regex (mc_search_cond->str);
g_string_free (mc_search_cond->str, TRUE);
if (lc_mc_search->is_entire_line)
@ -175,7 +162,6 @@ mc_search__cond_struct_new_init_glob (const char *charset, mc_search_t * lc_mc_s
mc_search_cond->str = tmp;
mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond);
}
/* --------------------------------------------------------------------------------------------- */

View File

@ -49,48 +49,57 @@
/*** file scope functions ************************************************************************/
static GString *
mc_search__hex_translate_to_regex (gchar * str, gsize * len)
mc_search__hex_translate_to_regex (const GString * astr)
{
GString *buff = g_string_new ("");
gchar *tmp_str = g_strndup (str, *len);
gchar *tmp_str2;
const char *str = astr->str;
GString *buff;
gchar *tmp_str;
gsize loop = 0;
int val, ptr;
buff = g_string_sized_new (64);
tmp_str = g_strndup (str, astr->len);
g_strchug (tmp_str); /* trim leadind whitespaces */
while (loop < *len) {
if (sscanf (tmp_str + loop, "%i%n", &val, &ptr)) {
if (val < -128 || val > 255) {
while (loop < astr->len)
{
if (sscanf (tmp_str + loop, "%i%n", &val, &ptr))
{
gchar *tmp_str2;
if (val < -128 || val > 255)
{
loop++;
continue;
}
tmp_str2 = g_strdup_printf ("\\x%02X", (unsigned char) val);
g_string_append (buff, tmp_str2);
g_free (tmp_str2);
loop += ptr;
continue;
}
if (*(tmp_str + loop) == '"') {
else if (*(tmp_str + loop) == '"')
{
gsize loop2 = 0;
loop++;
while (loop + loop2 < *len) {
while (loop + loop2 < astr->len)
{
if (*(tmp_str + loop + loop2) == '"' &&
!strutils_is_char_escaped (tmp_str, tmp_str + loop + loop2))
break;
loop2++;
}
g_string_append_len (buff, tmp_str + loop, loop2 - 1);
loop += loop2;
continue;
}
loop++;
else
loop++;
}
g_free (tmp_str);
*len = buff->len;
return buff;
}
@ -100,14 +109,13 @@ void
mc_search__cond_struct_new_init_hex (const char *charset, mc_search_t * lc_mc_search,
mc_search_cond_t * mc_search_cond)
{
GString *tmp =
mc_search__hex_translate_to_regex (mc_search_cond->str->str, &mc_search_cond->len);
GString *tmp;
tmp = mc_search__hex_translate_to_regex (mc_search_cond->str);
g_string_free (mc_search_cond->str, TRUE);
mc_search_cond->str = tmp;
mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond);
}
/* --------------------------------------------------------------------------------------------- */

View File

@ -27,7 +27,6 @@ typedef struct mc_search_cond_struct {
GString *upper;
GString *lower;
mc_search_regex_t *regex_handle;
gsize len;
gchar *charset;
} mc_search_cond_t;

View File

@ -46,14 +46,17 @@
/*** file scope functions ************************************************************************/
static GString *
mc_search__normal_translate_to_regex (gchar * str, gsize * len)
mc_search__normal_translate_to_regex (const GString * astr)
{
GString *buff = g_string_new ("");
gsize orig_len = *len;
gsize loop = 0;
const char *str = astr->str;
GString *buff;
gsize loop;
while (loop < orig_len) {
switch (str[loop]) {
buff = g_string_sized_new (32);
for (loop = 0; loop < astr->len; loop++)
switch (str[loop])
{
case '*':
case '?':
case ',':
@ -71,14 +74,12 @@ mc_search__normal_translate_to_regex (gchar * str, gsize * len)
case '-':
case '|':
g_string_append_c (buff, '\\');
/* fall through */
default:
g_string_append_c (buff, str[loop]);
loop++;
continue;
break;
}
g_string_append_c (buff, str[loop]);
loop++;
}
*len = buff->len;
return buff;
}
@ -88,18 +89,21 @@ void
mc_search__cond_struct_new_init_normal (const char *charset, mc_search_t * lc_mc_search,
mc_search_cond_t * mc_search_cond)
{
GString *tmp =
mc_search__normal_translate_to_regex (mc_search_cond->str->str, &mc_search_cond->len);
GString *tmp;
tmp = mc_search__normal_translate_to_regex (mc_search_cond->str);
g_string_free (mc_search_cond->str, TRUE);
if (lc_mc_search->whole_words) {
g_string_prepend (tmp, "\\b");
g_string_append (tmp, "\\b");
if (lc_mc_search->whole_words)
{
/* NOTE: \b as word boundary doesn't allow search
* whole words with non-ASCII symbols */
g_string_prepend (tmp, "(^|[^\\p{L}\\p{N}_])(");
g_string_append (tmp, ")([^\\p{L}\\p{N}_]|$)");
}
mc_search_cond->str = tmp;
mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond);
}
/* --------------------------------------------------------------------------------------------- */

View File

@ -58,7 +58,7 @@ typedef enum
/*** file scope functions ************************************************************************/
static gboolean
mc_search__regex_str_append_if_special (GString * copy_to, GString * regex_str, gsize * offset)
mc_search__regex_str_append_if_special (GString * copy_to, const GString * regex_str, gsize * offset)
{
char *tmp_regex_str;
gsize spec_chr_len;
@ -194,22 +194,19 @@ mc_search__cond_struct_new_regex_accum_append (const char *charset, GString * st
/* --------------------------------------------------------------------------------------------- */
static GString *
mc_search__cond_struct_new_regex_ci_str (const char *charset, const char *str, gsize str_len)
mc_search__cond_struct_new_regex_ci_str (const char *charset, const GString *astr)
{
GString *accumulator, *spec_char, *ret_str;
gsize loop;
GString *tmp;
tmp = g_string_new_len (str, str_len);
ret_str = g_string_new ("");
accumulator = g_string_new ("");
spec_char = g_string_new ("");
ret_str = g_string_sized_new (64);
accumulator = g_string_sized_new (64);
spec_char = g_string_sized_new (64);
loop = 0;
while (loop <= str_len)
while (loop <= astr->len)
{
if (mc_search__regex_str_append_if_special (spec_char, tmp, &loop))
if (mc_search__regex_str_append_if_special (spec_char, astr, &loop))
{
mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator);
g_string_append_len (ret_str, spec_char->str, spec_char->len);
@ -217,32 +214,32 @@ mc_search__cond_struct_new_regex_ci_str (const char *charset, const char *str, g
continue;
}
if (tmp->str[loop] == '[' && !strutils_is_char_escaped (tmp->str, &(tmp->str[loop])))
if (astr->str[loop] == '[' && !strutils_is_char_escaped (astr->str, &(astr->str[loop])))
{
mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator);
while (loop < str_len && !(tmp->str[loop] == ']'
&& !strutils_is_char_escaped (tmp->str, &(tmp->str[loop]))))
while (loop < astr->len && !(astr->str[loop] == ']'
&& !strutils_is_char_escaped (astr->str, &(astr->str[loop]))))
{
g_string_append_c (ret_str, tmp->str[loop]);
g_string_append_c (ret_str, astr->str[loop]);
loop++;
}
g_string_append_c (ret_str, tmp->str[loop]);
g_string_append_c (ret_str, astr->str[loop]);
loop++;
continue;
}
/*
TODO: handle [ and ]
*/
g_string_append_c (accumulator, tmp->str[loop]);
g_string_append_c (accumulator, astr->str[loop]);
loop++;
}
mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator);
g_string_free (accumulator, TRUE);
g_string_free (spec_char, TRUE);
g_string_free (tmp, TRUE);
return ret_str;
}
@ -478,7 +475,7 @@ mc_search_regex__process_append_str (GString * dest_str, const char *from, gsize
char_len = strlen (tmp_str);
if (*replace_flags & REPLACE_T_UPP_TRANSFORM_CHAR)
{
*replace_flags &= !REPLACE_T_UPP_TRANSFORM_CHAR;
*replace_flags &= ~REPLACE_T_UPP_TRANSFORM_CHAR;
tmp_string = mc_search__toupper_case_str (NULL, tmp_str, char_len);
g_string_append (dest_str, tmp_string->str);
g_string_free (tmp_string, TRUE);
@ -486,7 +483,7 @@ mc_search_regex__process_append_str (GString * dest_str, const char *from, gsize
}
else if (*replace_flags & REPLACE_T_LOW_TRANSFORM_CHAR)
{
*replace_flags &= !REPLACE_T_LOW_TRANSFORM_CHAR;
*replace_flags &= ~REPLACE_T_LOW_TRANSFORM_CHAR;
tmp_string = mc_search__toupper_case_str (NULL, tmp_str, char_len);
g_string_append (dest_str, tmp_string->str);
g_string_free (tmp_string, TRUE);
@ -522,7 +519,6 @@ void
mc_search__cond_struct_new_init_regex (const char *charset, mc_search_t * lc_mc_search,
mc_search_cond_t * mc_search_cond)
{
GString *tmp = NULL;
#ifdef SEARCH_TYPE_GLIB
GError *error = NULL;
#else /* SEARCH_TYPE_GLIB */
@ -532,9 +528,10 @@ mc_search__cond_struct_new_init_regex (const char *charset, mc_search_t * lc_mc_
if (!lc_mc_search->is_case_sensitive)
{
tmp = g_string_new_len (mc_search_cond->str->str, mc_search_cond->str->len);
g_string_free (mc_search_cond->str, TRUE);
mc_search_cond->str = mc_search__cond_struct_new_regex_ci_str (charset, tmp->str, tmp->len);
GString *tmp;
tmp = mc_search_cond->str;
mc_search_cond->str = mc_search__cond_struct_new_regex_ci_str (charset, tmp);
g_string_free (tmp, TRUE);
}
#ifdef SEARCH_TYPE_GLIB
@ -587,7 +584,7 @@ mc_search__run_regex (mc_search_t * lc_mc_search, const void *user_data,
if (lc_mc_search->regex_buffer != NULL)
g_string_free (lc_mc_search->regex_buffer, TRUE);
lc_mc_search->regex_buffer = g_string_new ("");
lc_mc_search->regex_buffer = g_string_sized_new (64);
virtual_pos = current_pos = start_search;
while (virtual_pos <= end_search)
@ -622,10 +619,25 @@ mc_search__run_regex (mc_search_t * lc_mc_search, const void *user_data,
{
case COND__FOUND_OK:
#ifdef SEARCH_TYPE_GLIB
g_match_info_fetch_pos (lc_mc_search->regex_match_info, 0, &start_pos, &end_pos);
if (lc_mc_search->whole_words)
{
g_match_info_fetch_pos (lc_mc_search->regex_match_info, 2, &start_pos, &end_pos);
}
else
{
g_match_info_fetch_pos (lc_mc_search->regex_match_info, 0, &start_pos, &end_pos);
}
#else /* SEARCH_TYPE_GLIB */
start_pos = lc_mc_search->iovector[0];
end_pos = lc_mc_search->iovector[1];
if (lc_mc_search->whole_words)
{
start_pos = lc_mc_search->iovector[4];
end_pos = lc_mc_search->iovector[5];
}
else
{
start_pos = lc_mc_search->iovector[0];
end_pos = lc_mc_search->iovector[1];
}
#endif /* SEARCH_TYPE_GLIB */
if (found_len)
*found_len = end_pos - start_pos;
@ -684,8 +696,9 @@ mc_search_regex_prepare_replace_str (mc_search_t * lc_mc_search, GString * repla
return NULL;
}
ret = g_string_new ("");
ret = g_string_sized_new (64);
prev_str = replace_str->str;
for (loop = 0; loop < replace_str->len - 1; loop++)
{
lc_index = mc_search_regex__process_replace_str (replace_str, loop, &len, &replace_flags);

View File

@ -64,7 +64,6 @@ mc_search__cond_struct_new (mc_search_t * lc_mc_search, const char *str,
mc_search_cond = g_malloc0 (sizeof (mc_search_cond_t));
mc_search_cond->str = g_string_new_len (str, str_len);
mc_search_cond->len = str_len;
mc_search_cond->charset = g_strdup (charset);
switch (lc_mc_search->search_type) {
@ -340,12 +339,12 @@ char *
mc_search_prepare_replace_str2 (mc_search_t * lc_mc_search, char *replace_str)
{
GString *ret;
GString *replace_str2 = g_string_new (replace_str);
GString *replace_str2;
replace_str2 = g_string_new (replace_str);
ret = mc_search_prepare_replace_str (lc_mc_search, replace_str2);
g_string_free (replace_str2, TRUE);
if (ret)
return g_string_free (ret, FALSE);
return NULL;
return (ret != NULL) ? g_string_free (ret, FALSE) : NULL;
}
/* --------------------------------------------------------------------------------------------- */

View File

@ -114,9 +114,6 @@ load_codepages_list_from_file (GPtrArray **list, const char *fname)
}
else
{
guint i;
codepage_desc *desc;
/* whether id is already present in list */
/* if yes, overwrite description */
for (i = 0; i < (*list)->len; i++)
@ -154,7 +151,6 @@ load_codepages_list_from_file (GPtrArray **list, const char *fname)
void
load_codepages_list (void)
{
int result = -1;
char *fname;
/* 1: try load /usr/share/mc/mc.charsets */
@ -195,7 +191,7 @@ get_codepage_id (const int n)
int
get_codepage_index (const char *id)
{
int i;
size_t i;
if (strcmp (id, OTHER_8BIT) == 0)
return -1;
if (codepages == NULL)

View File

@ -60,7 +60,8 @@ get_hotkey (int n)
int
select_charset (int center_y, int center_x, int current_charset, gboolean seldisplay)
{
int i;
size_t i;
int listbox_result;
char buffer[255];
/* Create listbox */
@ -88,24 +89,26 @@ select_charset (int center_y, int center_x, int current_charset, gboolean seldis
/* Select the default entry */
i = (seldisplay)
? ((current_charset < 0) ? codepages->len : current_charset)
: (current_charset + 1);
? ((current_charset < 0) ? codepages->len : (size_t) current_charset)
: ((size_t)current_charset + 1);
listbox_select_entry (listbox->list, i);
i = run_listbox (listbox);
listbox_result = run_listbox (listbox);
if (i < 0) {
if (listbox_result < 0) {
/* Cancel dialog */
return SELECT_CHARSET_CANCEL;
} else {
/* some charset has been selected */
if (seldisplay) {
/* charset list is finished with "Other 8 bit" item */
return ((guint) i >= codepages->len) ? SELECT_CHARSET_OTHER_8BIT : i;
return (listbox_result >= (int) codepages->len)
? SELECT_CHARSET_OTHER_8BIT
: listbox_result;
} else {
/* charset list is began with "- < No translation >" item */
return (i - 1);
return (listbox_result - 1);
}
}
}