Ticket 3069: fix of case-sensitive search

...if file encoding and locale are different.

Example: locale is KOI8-R, file encoding is UTF-8. Note: those encodings
are not same.

File content is following (in Russian):

йцукен
Йцукен

The difference is in first line char only: Й (lowercase letter) and й
(uppercase letter).  The search of Ê gives the result Й independently of
case sensitivity. й isn't found.
If switch "All charsets" on, nothing is found.

The main idea of fix is modification of search API to allow set of search
pattern charset and use if within search engine.
Old API:
mc_search_new (pattern, pattern_len);
New API:
mc_search_new (pattern, pattern_len, pattern_charset);

Signed-off-by: Andrew Borodin <aborodin@vmail.ru>
This commit is contained in:
Andrew Borodin 2013-09-06 12:59:54 +04:00
parent cc980f3c50
commit 66da276200
18 changed files with 120 additions and 51 deletions

View File

@ -115,7 +115,7 @@ mc_fhl_parse_get_regexp (mc_fhl_t * fhl, const gchar * group_name)
mc_filter = g_new0 (mc_fhl_filter_t, 1);
mc_filter->type = MC_FLHGH_T_FREGEXP;
mc_filter->search_condition = mc_search_new (regexp, -1);
mc_filter->search_condition = mc_search_new (regexp, -1, "ASCII");
mc_filter->search_condition->is_case_sensitive = TRUE;
mc_filter->search_condition->search_type = MC_SEARCH_T_REGEX;
@ -162,7 +162,7 @@ mc_fhl_parse_get_extensions (mc_fhl_t * fhl, const gchar * group_name)
mc_filter = g_new0 (mc_fhl_filter_t, 1);
mc_filter->type = MC_FLHGH_T_FREGEXP;
mc_filter->search_condition = mc_search_new (buf->str, buf->len);
mc_filter->search_condition = mc_search_new (buf->str, buf->len, "ASCII");
mc_filter->search_condition->is_case_sensitive =
mc_config_get_bool (fhl->config, group_name, "extensions_case", TRUE);
mc_filter->search_condition->search_type = MC_SEARCH_T_REGEX;

View File

@ -108,6 +108,9 @@ typedef struct mc_search_struct
/* original search string */
gchar *original;
gsize original_len;
#ifdef HAVE_CHARSET
gchar *original_charset;
#endif
/* error code after search */
mc_search_error_t error;
@ -124,7 +127,8 @@ typedef struct mc_search_type_str_struct
/*** declarations of public functions ************************************************************/
mc_search_t *mc_search_new (const gchar * original, gsize original_len);
mc_search_t *mc_search_new (const gchar * original, gsize original_len,
const gchar * original_charset);
void mc_search_free (mc_search_t * lc_mc_search);
@ -144,7 +148,8 @@ gboolean mc_search_is_fixed_search_str (mc_search_t *);
gchar **mc_search_get_types_strings_array (size_t * num);
gboolean mc_search (const gchar *, const gchar *, mc_search_type_t);
gboolean mc_search (const gchar * pattern, const gchar * pattern_charset, const gchar * str,
mc_search_type_t type);
int mc_search_getstart_result_by_num (mc_search_t *, int);
int mc_search_getend_result_by_num (mc_search_t *, int);

View File

@ -64,7 +64,8 @@ mc_search__recode_str (const char *str, gsize str_len,
gsize bytes_read;
GIConv conv;
if (charset_from == NULL || charset_to == NULL || !strcmp (charset_to, charset_from))
if (charset_from == NULL || charset_to == NULL
|| g_ascii_strcasecmp (charset_to, charset_from) == 0)
{
*bytes_written = str_len;
return g_strndup (str, str_len);

View File

@ -129,27 +129,43 @@ mc_search__conditions_free (GPtrArray * array)
}
/* --------------------------------------------------------------------------------------------- */
/*** public functions ****************************************************************************/
/* --------------------------------------------------------------------------------------------- */
/* Init search descriptor.
*
* @param original pattern to search
* @param original_len length of #original or -1 if #original is NULL-terminated
* @param original_charset charset of #original. If NULL then cp_display will be used
*
* @return new mc_search_t object. Use #mc_search_free() to free it.
*/
mc_search_t *
mc_search_new (const gchar * original, gsize str_len)
mc_search_new (const gchar * original, gsize original_len, const gchar * original_charset)
{
mc_search_t *lc_mc_search;
if (original == NULL)
return NULL;
if ((gssize) str_len == -1)
if ((gssize) original_len == -1)
{
str_len = strlen (original);
if (str_len == 0)
original_len = strlen (original);
if (original_len == 0)
return NULL;
}
lc_mc_search = g_malloc0 (sizeof (mc_search_t));
lc_mc_search->original = g_strndup (original, str_len);
lc_mc_search->original_len = str_len;
lc_mc_search = g_new0 (mc_search_t, 1);
lc_mc_search->original = g_strndup (original, original_len);
lc_mc_search->original_len = original_len;
#ifdef HAVE_CHARSET
lc_mc_search->original_charset =
g_strdup (original_charset != NULL
&& *original_charset != '\0' ? original_charset : cp_display);
#else
(void) original_charset;
#endif
return lc_mc_search;
}
@ -162,6 +178,9 @@ mc_search_free (mc_search_t * lc_mc_search)
return;
g_free (lc_mc_search->original);
#ifdef HAVE_CHARSET
g_free (lc_mc_search->original_charset);
#endif
g_free (lc_mc_search->error_str);
if (lc_mc_search->conditions != NULL)
@ -191,25 +210,27 @@ mc_search_prepare (mc_search_t * lc_mc_search)
#ifdef HAVE_CHARSET
if (lc_mc_search->is_all_charsets)
{
gsize loop1, recoded_str_len;
gchar *buffer;
gsize loop1;
for (loop1 = 0; loop1 < codepages->len; loop1++)
{
const char *id = ((codepage_desc *) g_ptr_array_index (codepages, loop1))->id;
const char *id;
gsize recoded_str_len;
gchar *buffer;
if (!g_ascii_strcasecmp (id, cp_display))
id = ((codepage_desc *) g_ptr_array_index (codepages, loop1))->id;
if (g_ascii_strcasecmp (id, lc_mc_search->original_charset) == 0)
{
g_ptr_array_add (ret,
mc_search__cond_struct_new (lc_mc_search, lc_mc_search->original,
lc_mc_search->original_len,
cp_display));
lc_mc_search->original_charset));
continue;
}
buffer =
mc_search__recode_str (lc_mc_search->original, lc_mc_search->original_len,
cp_display, id, &recoded_str_len);
lc_mc_search->original_charset, id, &recoded_str_len);
g_ptr_array_add (ret,
mc_search__cond_struct_new (lc_mc_search, buffer,
@ -220,9 +241,9 @@ mc_search_prepare (mc_search_t * lc_mc_search)
else
{
g_ptr_array_add (ret,
mc_search__cond_struct_new (lc_mc_search,
lc_mc_search->original,
lc_mc_search->original_len, cp_display));
mc_search__cond_struct_new (lc_mc_search, lc_mc_search->original,
lc_mc_search->original_len,
lc_mc_search->original_charset));
}
#else
g_ptr_array_add (ret,
@ -382,9 +403,19 @@ mc_search_is_fixed_search_str (mc_search_t * lc_mc_search)
}
/* --------------------------------------------------------------------------------------------- */
/* Search specified pattern in specified string.
*
* @param pattern string to search
* @param pattern_charset charset of #pattern. If NULL then cp_display will be used
* @param str string where search #pattern
* @param search type (normal, regex, hex or glob)
*
* @return TRUE if found is successful, FALSE otherwise.
*/
gboolean
mc_search (const gchar * pattern, const gchar * str, mc_search_type_t type)
mc_search (const gchar * pattern, const gchar * pattern_charset, const gchar * str,
mc_search_type_t type)
{
gboolean ret;
mc_search_t *search;
@ -392,7 +423,7 @@ mc_search (const gchar * pattern, const gchar * str, mc_search_type_t type)
if (str == NULL)
return FALSE;
search = mc_search_new (pattern, -1);
search = mc_search_new (pattern, -1, pattern_charset);
if (search == NULL)
return FALSE;

View File

@ -248,7 +248,11 @@ dview_search_cmd (WDiff * dview)
return;
mc_search_free (dview->search.handle);
dview->search.handle = mc_search_new (dview->search.last_string, -1);
#ifdef HAVE_CHARSET
dview->search.handle = mc_search_new (dview->search.last_string, -1, cp_source);
#else
dview->search.handle = mc_search_new (dview->search.last_string, -1, NULL);
#endif
if (dview->search.handle == NULL)
return;

View File

@ -1177,7 +1177,11 @@ edit_collect_completions (WEdit * edit, off_t word_start, gsize word_len,
off_t last_byte, start = -1;
char *current_word;
srch = mc_search_new (match_expr, -1);
#ifdef HAVE_CHARSET
srch = mc_search_new (match_expr, -1, cp_source);
#else
srch = mc_search_new (match_expr, -1, NULL);
#endif
if (srch == NULL)
return 0;
@ -2510,9 +2514,13 @@ edit_replace_cmd (WEdit * edit, int again)
input2_str = g_string_new (input2);
if (!edit->search)
if (edit->search == NULL)
{
edit->search = mc_search_new (input1, -1);
#ifdef HAVE_CHARSET
edit->search = mc_search_new (input1, -1, cp_source);
#else
edit->search = mc_search_new (input1, -1, NULL);
#endif
if (edit->search == NULL)
{
edit->search_start = edit->buffer.curs1;
@ -2706,7 +2714,11 @@ edit_search_cmd (WEdit * edit, gboolean again)
g_list_foreach (history, (GFunc) g_free, NULL);
g_list_free (history);
edit->search = mc_search_new (edit->last_search_string, -1);
#ifdef HAVE_CHARSET
edit->search = mc_search_new (edit->last_search_string, -1, cp_source);
#else
edit->search = mc_search_new (edit->last_search_string, -1, NULL);
#endif
if (edit->search == NULL)
{
/* if not... then ask for an expression */

View File

@ -162,7 +162,11 @@ editcmd_dialog_search_show (WEdit * edit)
edit->last_search_string = search_text;
mc_search_free (edit->search);
edit->search = mc_search_new (edit->last_search_string, -1);
#ifdef HAVE_CHARSET
edit->search = mc_search_new (edit->last_search_string, -1, cp_source);
#else
edit->search = mc_search_new (edit->last_search_string, -1, NULL);
#endif
if (edit->search != NULL)
{
edit->search->search_type = edit_search_options.type;

View File

@ -1314,12 +1314,12 @@ edit_read_syntax_file (WEdit * edit, char ***pnames, const char *syntax_file,
/* 3: auto-detect rule set from regular expressions */
int q;
q = mc_search (args[1], editor_file, MC_SEARCH_T_REGEX);
q = mc_search (args[1], "ASCII", editor_file, MC_SEARCH_T_REGEX);
/* does filename match arg 1 ? */
if (!q && args[3])
{
/* does first line match arg 3 ? */
q = mc_search (args[3], first_line, MC_SEARCH_T_REGEX);
q = mc_search (args[3], "ASCII", first_line, MC_SEARCH_T_REGEX);
}
if (q)
{

View File

@ -259,7 +259,7 @@ select_unselect_cmd (const char *title, const char *history_name, gboolean do_se
return;
}
search = mc_search_new (reg_exp, -1);
search = mc_search_new (reg_exp, -1, NULL);
search->search_type = (shell_patterns != 0) ? MC_SEARCH_T_GLOB : MC_SEARCH_T_REGEX;
search->is_entire_line = TRUE;
search->is_case_sensitive = case_sens != 0;

View File

@ -189,7 +189,7 @@ handle_dirent (struct dirent *dp, const char *fltr, struct stat *buf1, int *link
vfs_path_free (vpath);
return (S_ISDIR (buf1->st_mode) || *link_to_dir != 0 || fltr == NULL
|| mc_search (fltr, dp->d_name, MC_SEARCH_T_GLOB));
|| mc_search (fltr, NULL, dp->d_name, MC_SEARCH_T_GLOB));
}
/* --------------------------------------------------------------------------------------------- */

View File

@ -726,7 +726,7 @@ regex_check_type (const vfs_path_t * filename_vpath, const char *ptr, gboolean c
{
mc_search_t *search;
search = mc_search_new (ptr, -1);
search = mc_search_new (ptr, -1, "ASCII");
if (search != NULL)
{
search->search_type = MC_SEARCH_T_REGEX;
@ -917,7 +917,7 @@ regex_command_for (void *target, const vfs_path_t * filename_vpath, const char *
if (case_insense)
p += 2;
search = mc_search_new (p, -1);
search = mc_search_new (p, -1, "ASCII");
if (search != NULL)
{
search->search_type = MC_SEARCH_T_REGEX;
@ -930,7 +930,8 @@ regex_command_for (void *target, const vfs_path_t * filename_vpath, const char *
else if (strncmp (p, "directory/", 10) == 0)
{
if (S_ISDIR (mystat.st_mode)
&& mc_search (p + 10, vfs_path_as_str (filename_vpath), MC_SEARCH_T_REGEX))
&& mc_search (p + 10, "ASCII", vfs_path_as_str (filename_vpath),
MC_SEARCH_T_REGEX))
found = TRUE;
}
else if (strncmp (p, "shell/", 6) == 0)

View File

@ -1280,7 +1280,7 @@ file_mask_dialog (FileOpContext * ctx, FileOperation operation,
return dest_dir;
}
ctx->search_handle = mc_search_new (source_mask, -1);
ctx->search_handle = mc_search_new (source_mask, -1, NULL);
if (ctx->search_handle == NULL)
{

View File

@ -390,7 +390,7 @@ find_check_regexp (const char *r)
mc_search_t *search;
gboolean regexp_ok = FALSE;
search = mc_search_new (r, -1);
search = mc_search_new (r, -1, NULL);
if (search != NULL)
{
@ -1625,7 +1625,7 @@ run_process (void)
{
int ret;
search_content_handle = mc_search_new (content_pattern, -1);
search_content_handle = mc_search_new (content_pattern, -1, NULL);
if (search_content_handle)
{
search_content_handle->search_type =
@ -1634,7 +1634,7 @@ run_process (void)
search_content_handle->whole_words = options.content_whole_words;
search_content_handle->is_all_charsets = options.content_all_charsets;
}
search_file_handle = mc_search_new (find_pattern, -1);
search_file_handle = mc_search_new (find_pattern, -1, NULL);
search_file_handle->search_type = options.file_pattern ? MC_SEARCH_T_GLOB : MC_SEARCH_T_REGEX;
search_file_handle->is_case_sensitive = options.file_case_sens;
search_file_handle->is_all_charsets = options.file_all_charsets;

View File

@ -2450,7 +2450,7 @@ do_search (WPanel * panel, int c_code)
reg_exp = g_strdup_printf ("%s*", panel->search_buffer);
esc_str = strutils_escape (reg_exp, -1, ",|\\{}[]", TRUE);
search = mc_search_new (esc_str, -1);
search = mc_search_new (esc_str, -1, NULL);
search->search_type = MC_SEARCH_T_GLOB;
search->is_entire_line = TRUE;
switch (panels_options.qsearch_mode)

View File

@ -247,13 +247,14 @@ test_condition (WEdit * edit_widget, char *p, int *condition)
char *edit_filename;
edit_filename = edit_get_file_name (edit_widget);
*condition = mc_search (arg, edit_filename, search_type) ? 1 : 0;
*condition = mc_search (arg, "ASCII", edit_filename, search_type) ? 1 : 0;
g_free (edit_filename);
}
else
#endif
*condition = panel != NULL &&
mc_search (arg, panel->dir.list[panel->selected].fname, search_type) ? 1 : 0;
mc_search (arg, "ASCII", panel->dir.list[panel->selected].fname,
search_type) ? 1 : 0;
break;
case 'y': /* syntax pattern */
#ifdef USE_INTERNAL_EDIT
@ -263,7 +264,7 @@ test_condition (WEdit * edit_widget, char *p, int *condition)
if (syntax_type != NULL)
{
p = extract_arg (p, arg, sizeof (arg));
*condition = mc_search (arg, syntax_type, MC_SEARCH_T_NORMAL) ? 1 : 0;
*condition = mc_search (arg, "ASCII", syntax_type, MC_SEARCH_T_NORMAL) ? 1 : 0;
}
}
#endif
@ -271,7 +272,8 @@ test_condition (WEdit * edit_widget, char *p, int *condition)
case 'd':
p = extract_arg (p, arg, sizeof (arg));
*condition = panel != NULL
&& mc_search (arg, vfs_path_as_str (panel->cwd_vpath), search_type) ? 1 : 0;
&& mc_search (arg, "ASCII", vfs_path_as_str (panel->cwd_vpath),
search_type) ? 1 : 0;
break;
case 't':
p = extract_arg (p, arg, sizeof (arg));

View File

@ -210,7 +210,7 @@ sftpfs_fill_config_entity_from_config (FILE * ssh_config_handler,
gboolean pattern_block_hit = FALSE;
mc_search_t *host_regexp;
host_regexp = mc_search_new ("^\\s*host\\s+(.*)$", -1);
host_regexp = mc_search_new ("^\\s*host\\s+(.*)$", -1, "ASCII");
host_regexp->search_type = MC_SEARCH_T_REGEX;
host_regexp->is_case_sensitive = FALSE;
@ -254,7 +254,7 @@ sftpfs_fill_config_entity_from_config (FILE * ssh_config_handler,
mc_search_t *pattern_regexp;
pattern_block_hit = FALSE;
pattern_regexp = mc_search_new (host_pattern, -1);
pattern_regexp = mc_search_new (host_pattern, -1, "ASCII");
pattern_regexp->search_type = MC_SEARCH_T_GLOB;
pattern_regexp->is_case_sensitive = FALSE;
pattern_regexp->is_entire_line = TRUE;
@ -397,7 +397,8 @@ sftpfs_init_config_variables_patterns (void)
for (i = 0; config_variables[i].pattern != NULL; i++)
{
config_variables[i].pattern_regexp = mc_search_new (config_variables[i].pattern, -1);
config_variables[i].pattern_regexp =
mc_search_new (config_variables[i].pattern, -1, "ASCII");
config_variables[i].pattern_regexp->search_type = MC_SEARCH_T_REGEX;
config_variables[i].pattern_regexp->is_case_sensitive = FALSE;
config_variables[i].offset = structure_offsets[i];

View File

@ -146,7 +146,11 @@ mcview_continue_search_cmd (mcview_t * view)
g_list_foreach (history, (GFunc) g_free, NULL);
g_list_free (history);
view->search = mc_search_new (view->last_search_string, -1);
#ifdef HAVE_CHARSET
view->search = mc_search_new (view->last_search_string, -1, cp_source);
#else
view->search = mc_search_new (view->last_search_string, -1, NULL);
#endif
view->search_nroff_seq = mcview_nroff_seq_new (view);
if (view->search == NULL)

View File

@ -142,7 +142,11 @@ mcview_dialog_search (mcview_t * view)
mcview_nroff_seq_free (&view->search_nroff_seq);
mc_search_free (view->search);
view->search = mc_search_new (view->last_search_string, -1);
#ifdef HAVE_CHARSET
view->search = mc_search_new (view->last_search_string, -1, cp_source);
#else
view->search = mc_search_new (view->last_search_string, -1, NULL);
#endif
view->search_nroff_seq = mcview_nroff_seq_new (view);
if (view->search != NULL)
{