From 66da27620035f5472338436ff6848564409ff838 Mon Sep 17 00:00:00 2001 From: Andrew Borodin Date: Fri, 6 Sep 2013 12:59:54 +0400 Subject: [PATCH] Ticket 3069: fix of case-sensitive search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ...if file encoding and locale are different. Example: locale is KOI8-R, file encoding is UTF-8. Note: those encodings are not same. File content is following (in Russian): йцукен Йцукен The difference is in first line char only: Й (lowercase letter) and й (uppercase letter). The search of Ê gives the result Й independently of case sensitivity. й isn't found. If switch "All charsets" on, nothing is found. The main idea of fix is modification of search API to allow set of search pattern charset and use if within search engine. Old API: mc_search_new (pattern, pattern_len); New API: mc_search_new (pattern, pattern_len, pattern_charset); Signed-off-by: Andrew Borodin --- lib/filehighlight/ini-file-read.c | 4 +- lib/search.h | 9 +++- lib/search/lib.c | 3 +- lib/search/search.c | 69 ++++++++++++++++++++++--------- src/diffviewer/search.c | 6 ++- src/editor/editcmd.c | 20 +++++++-- src/editor/editcmd_dialogs.c | 6 ++- src/editor/syntax.c | 4 +- src/filemanager/cmd.c | 2 +- src/filemanager/dir.c | 2 +- src/filemanager/ext.c | 7 ++-- src/filemanager/filegui.c | 2 +- src/filemanager/find.c | 6 +-- src/filemanager/panel.c | 2 +- src/filemanager/usermenu.c | 10 +++-- src/vfs/sftpfs/config_parcer.c | 7 ++-- src/viewer/actions_cmd.c | 6 ++- src/viewer/dialogs.c | 6 ++- 18 files changed, 120 insertions(+), 51 deletions(-) diff --git a/lib/filehighlight/ini-file-read.c b/lib/filehighlight/ini-file-read.c index 44faca42f..78d235089 100644 --- a/lib/filehighlight/ini-file-read.c +++ b/lib/filehighlight/ini-file-read.c @@ -115,7 +115,7 @@ mc_fhl_parse_get_regexp (mc_fhl_t * fhl, const gchar * group_name) mc_filter = g_new0 (mc_fhl_filter_t, 1); mc_filter->type = MC_FLHGH_T_FREGEXP; - mc_filter->search_condition = mc_search_new (regexp, -1); + mc_filter->search_condition = mc_search_new (regexp, -1, "ASCII"); mc_filter->search_condition->is_case_sensitive = TRUE; mc_filter->search_condition->search_type = MC_SEARCH_T_REGEX; @@ -162,7 +162,7 @@ mc_fhl_parse_get_extensions (mc_fhl_t * fhl, const gchar * group_name) mc_filter = g_new0 (mc_fhl_filter_t, 1); mc_filter->type = MC_FLHGH_T_FREGEXP; - mc_filter->search_condition = mc_search_new (buf->str, buf->len); + mc_filter->search_condition = mc_search_new (buf->str, buf->len, "ASCII"); mc_filter->search_condition->is_case_sensitive = mc_config_get_bool (fhl->config, group_name, "extensions_case", TRUE); mc_filter->search_condition->search_type = MC_SEARCH_T_REGEX; diff --git a/lib/search.h b/lib/search.h index b1fd8ebc3..9b7300612 100644 --- a/lib/search.h +++ b/lib/search.h @@ -108,6 +108,9 @@ typedef struct mc_search_struct /* original search string */ gchar *original; gsize original_len; +#ifdef HAVE_CHARSET + gchar *original_charset; +#endif /* error code after search */ mc_search_error_t error; @@ -124,7 +127,8 @@ typedef struct mc_search_type_str_struct /*** declarations of public functions ************************************************************/ -mc_search_t *mc_search_new (const gchar * original, gsize original_len); +mc_search_t *mc_search_new (const gchar * original, gsize original_len, + const gchar * original_charset); void mc_search_free (mc_search_t * lc_mc_search); @@ -144,7 +148,8 @@ gboolean mc_search_is_fixed_search_str (mc_search_t *); gchar **mc_search_get_types_strings_array (size_t * num); -gboolean mc_search (const gchar *, const gchar *, mc_search_type_t); +gboolean mc_search (const gchar * pattern, const gchar * pattern_charset, const gchar * str, + mc_search_type_t type); int mc_search_getstart_result_by_num (mc_search_t *, int); int mc_search_getend_result_by_num (mc_search_t *, int); diff --git a/lib/search/lib.c b/lib/search/lib.c index 0ecbff93e..2a1c38098 100644 --- a/lib/search/lib.c +++ b/lib/search/lib.c @@ -64,7 +64,8 @@ mc_search__recode_str (const char *str, gsize str_len, gsize bytes_read; GIConv conv; - if (charset_from == NULL || charset_to == NULL || !strcmp (charset_to, charset_from)) + if (charset_from == NULL || charset_to == NULL + || g_ascii_strcasecmp (charset_to, charset_from) == 0) { *bytes_written = str_len; return g_strndup (str, str_len); diff --git a/lib/search/search.c b/lib/search/search.c index 1be600f22..f113cd6c1 100644 --- a/lib/search/search.c +++ b/lib/search/search.c @@ -129,27 +129,43 @@ mc_search__conditions_free (GPtrArray * array) } /* --------------------------------------------------------------------------------------------- */ - /*** public functions ****************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ +/* Init search descriptor. + * + * @param original pattern to search + * @param original_len length of #original or -1 if #original is NULL-terminated + * @param original_charset charset of #original. If NULL then cp_display will be used + * + * @return new mc_search_t object. Use #mc_search_free() to free it. + */ mc_search_t * -mc_search_new (const gchar * original, gsize str_len) +mc_search_new (const gchar * original, gsize original_len, const gchar * original_charset) { mc_search_t *lc_mc_search; if (original == NULL) return NULL; - if ((gssize) str_len == -1) + if ((gssize) original_len == -1) { - str_len = strlen (original); - if (str_len == 0) + original_len = strlen (original); + if (original_len == 0) return NULL; } - lc_mc_search = g_malloc0 (sizeof (mc_search_t)); - lc_mc_search->original = g_strndup (original, str_len); - lc_mc_search->original_len = str_len; + lc_mc_search = g_new0 (mc_search_t, 1); + lc_mc_search->original = g_strndup (original, original_len); + lc_mc_search->original_len = original_len; +#ifdef HAVE_CHARSET + lc_mc_search->original_charset = + g_strdup (original_charset != NULL + && *original_charset != '\0' ? original_charset : cp_display); +#else + (void) original_charset; +#endif + return lc_mc_search; } @@ -162,6 +178,9 @@ mc_search_free (mc_search_t * lc_mc_search) return; g_free (lc_mc_search->original); +#ifdef HAVE_CHARSET + g_free (lc_mc_search->original_charset); +#endif g_free (lc_mc_search->error_str); if (lc_mc_search->conditions != NULL) @@ -191,25 +210,27 @@ mc_search_prepare (mc_search_t * lc_mc_search) #ifdef HAVE_CHARSET if (lc_mc_search->is_all_charsets) { - gsize loop1, recoded_str_len; - gchar *buffer; + gsize loop1; for (loop1 = 0; loop1 < codepages->len; loop1++) { - const char *id = ((codepage_desc *) g_ptr_array_index (codepages, loop1))->id; + const char *id; + gsize recoded_str_len; + gchar *buffer; - if (!g_ascii_strcasecmp (id, cp_display)) + id = ((codepage_desc *) g_ptr_array_index (codepages, loop1))->id; + if (g_ascii_strcasecmp (id, lc_mc_search->original_charset) == 0) { g_ptr_array_add (ret, mc_search__cond_struct_new (lc_mc_search, lc_mc_search->original, lc_mc_search->original_len, - cp_display)); + lc_mc_search->original_charset)); continue; } buffer = mc_search__recode_str (lc_mc_search->original, lc_mc_search->original_len, - cp_display, id, &recoded_str_len); + lc_mc_search->original_charset, id, &recoded_str_len); g_ptr_array_add (ret, mc_search__cond_struct_new (lc_mc_search, buffer, @@ -220,9 +241,9 @@ mc_search_prepare (mc_search_t * lc_mc_search) else { g_ptr_array_add (ret, - mc_search__cond_struct_new (lc_mc_search, - lc_mc_search->original, - lc_mc_search->original_len, cp_display)); + mc_search__cond_struct_new (lc_mc_search, lc_mc_search->original, + lc_mc_search->original_len, + lc_mc_search->original_charset)); } #else g_ptr_array_add (ret, @@ -382,9 +403,19 @@ mc_search_is_fixed_search_str (mc_search_t * lc_mc_search) } /* --------------------------------------------------------------------------------------------- */ +/* Search specified pattern in specified string. + * + * @param pattern string to search + * @param pattern_charset charset of #pattern. If NULL then cp_display will be used + * @param str string where search #pattern + * @param search type (normal, regex, hex or glob) + * + * @return TRUE if found is successful, FALSE otherwise. + */ gboolean -mc_search (const gchar * pattern, const gchar * str, mc_search_type_t type) +mc_search (const gchar * pattern, const gchar * pattern_charset, const gchar * str, + mc_search_type_t type) { gboolean ret; mc_search_t *search; @@ -392,7 +423,7 @@ mc_search (const gchar * pattern, const gchar * str, mc_search_type_t type) if (str == NULL) return FALSE; - search = mc_search_new (pattern, -1); + search = mc_search_new (pattern, -1, pattern_charset); if (search == NULL) return FALSE; diff --git a/src/diffviewer/search.c b/src/diffviewer/search.c index 19176af69..437753fef 100644 --- a/src/diffviewer/search.c +++ b/src/diffviewer/search.c @@ -248,7 +248,11 @@ dview_search_cmd (WDiff * dview) return; mc_search_free (dview->search.handle); - dview->search.handle = mc_search_new (dview->search.last_string, -1); +#ifdef HAVE_CHARSET + dview->search.handle = mc_search_new (dview->search.last_string, -1, cp_source); +#else + dview->search.handle = mc_search_new (dview->search.last_string, -1, NULL); +#endif if (dview->search.handle == NULL) return; diff --git a/src/editor/editcmd.c b/src/editor/editcmd.c index 4347fd721..4457976e1 100644 --- a/src/editor/editcmd.c +++ b/src/editor/editcmd.c @@ -1177,7 +1177,11 @@ edit_collect_completions (WEdit * edit, off_t word_start, gsize word_len, off_t last_byte, start = -1; char *current_word; - srch = mc_search_new (match_expr, -1); +#ifdef HAVE_CHARSET + srch = mc_search_new (match_expr, -1, cp_source); +#else + srch = mc_search_new (match_expr, -1, NULL); +#endif if (srch == NULL) return 0; @@ -2510,9 +2514,13 @@ edit_replace_cmd (WEdit * edit, int again) input2_str = g_string_new (input2); - if (!edit->search) + if (edit->search == NULL) { - edit->search = mc_search_new (input1, -1); +#ifdef HAVE_CHARSET + edit->search = mc_search_new (input1, -1, cp_source); +#else + edit->search = mc_search_new (input1, -1, NULL); +#endif if (edit->search == NULL) { edit->search_start = edit->buffer.curs1; @@ -2706,7 +2714,11 @@ edit_search_cmd (WEdit * edit, gboolean again) g_list_foreach (history, (GFunc) g_free, NULL); g_list_free (history); - edit->search = mc_search_new (edit->last_search_string, -1); +#ifdef HAVE_CHARSET + edit->search = mc_search_new (edit->last_search_string, -1, cp_source); +#else + edit->search = mc_search_new (edit->last_search_string, -1, NULL); +#endif if (edit->search == NULL) { /* if not... then ask for an expression */ diff --git a/src/editor/editcmd_dialogs.c b/src/editor/editcmd_dialogs.c index 865ef3d7f..3939e5b7f 100644 --- a/src/editor/editcmd_dialogs.c +++ b/src/editor/editcmd_dialogs.c @@ -162,7 +162,11 @@ editcmd_dialog_search_show (WEdit * edit) edit->last_search_string = search_text; mc_search_free (edit->search); - edit->search = mc_search_new (edit->last_search_string, -1); +#ifdef HAVE_CHARSET + edit->search = mc_search_new (edit->last_search_string, -1, cp_source); +#else + edit->search = mc_search_new (edit->last_search_string, -1, NULL); +#endif if (edit->search != NULL) { edit->search->search_type = edit_search_options.type; diff --git a/src/editor/syntax.c b/src/editor/syntax.c index b3db58563..e23095718 100644 --- a/src/editor/syntax.c +++ b/src/editor/syntax.c @@ -1314,12 +1314,12 @@ edit_read_syntax_file (WEdit * edit, char ***pnames, const char *syntax_file, /* 3: auto-detect rule set from regular expressions */ int q; - q = mc_search (args[1], editor_file, MC_SEARCH_T_REGEX); + q = mc_search (args[1], "ASCII", editor_file, MC_SEARCH_T_REGEX); /* does filename match arg 1 ? */ if (!q && args[3]) { /* does first line match arg 3 ? */ - q = mc_search (args[3], first_line, MC_SEARCH_T_REGEX); + q = mc_search (args[3], "ASCII", first_line, MC_SEARCH_T_REGEX); } if (q) { diff --git a/src/filemanager/cmd.c b/src/filemanager/cmd.c index cba3c7508..8ac73c37c 100644 --- a/src/filemanager/cmd.c +++ b/src/filemanager/cmd.c @@ -259,7 +259,7 @@ select_unselect_cmd (const char *title, const char *history_name, gboolean do_se return; } - search = mc_search_new (reg_exp, -1); + search = mc_search_new (reg_exp, -1, NULL); search->search_type = (shell_patterns != 0) ? MC_SEARCH_T_GLOB : MC_SEARCH_T_REGEX; search->is_entire_line = TRUE; search->is_case_sensitive = case_sens != 0; diff --git a/src/filemanager/dir.c b/src/filemanager/dir.c index 79a99198b..05d06a448 100644 --- a/src/filemanager/dir.c +++ b/src/filemanager/dir.c @@ -189,7 +189,7 @@ handle_dirent (struct dirent *dp, const char *fltr, struct stat *buf1, int *link vfs_path_free (vpath); return (S_ISDIR (buf1->st_mode) || *link_to_dir != 0 || fltr == NULL - || mc_search (fltr, dp->d_name, MC_SEARCH_T_GLOB)); + || mc_search (fltr, NULL, dp->d_name, MC_SEARCH_T_GLOB)); } /* --------------------------------------------------------------------------------------------- */ diff --git a/src/filemanager/ext.c b/src/filemanager/ext.c index 15ce01d6d..428a628d0 100644 --- a/src/filemanager/ext.c +++ b/src/filemanager/ext.c @@ -726,7 +726,7 @@ regex_check_type (const vfs_path_t * filename_vpath, const char *ptr, gboolean c { mc_search_t *search; - search = mc_search_new (ptr, -1); + search = mc_search_new (ptr, -1, "ASCII"); if (search != NULL) { search->search_type = MC_SEARCH_T_REGEX; @@ -917,7 +917,7 @@ regex_command_for (void *target, const vfs_path_t * filename_vpath, const char * if (case_insense) p += 2; - search = mc_search_new (p, -1); + search = mc_search_new (p, -1, "ASCII"); if (search != NULL) { search->search_type = MC_SEARCH_T_REGEX; @@ -930,7 +930,8 @@ regex_command_for (void *target, const vfs_path_t * filename_vpath, const char * else if (strncmp (p, "directory/", 10) == 0) { if (S_ISDIR (mystat.st_mode) - && mc_search (p + 10, vfs_path_as_str (filename_vpath), MC_SEARCH_T_REGEX)) + && mc_search (p + 10, "ASCII", vfs_path_as_str (filename_vpath), + MC_SEARCH_T_REGEX)) found = TRUE; } else if (strncmp (p, "shell/", 6) == 0) diff --git a/src/filemanager/filegui.c b/src/filemanager/filegui.c index fcb602ec9..384f7490e 100644 --- a/src/filemanager/filegui.c +++ b/src/filemanager/filegui.c @@ -1280,7 +1280,7 @@ file_mask_dialog (FileOpContext * ctx, FileOperation operation, return dest_dir; } - ctx->search_handle = mc_search_new (source_mask, -1); + ctx->search_handle = mc_search_new (source_mask, -1, NULL); if (ctx->search_handle == NULL) { diff --git a/src/filemanager/find.c b/src/filemanager/find.c index f9fce8d5c..1887da83d 100644 --- a/src/filemanager/find.c +++ b/src/filemanager/find.c @@ -390,7 +390,7 @@ find_check_regexp (const char *r) mc_search_t *search; gboolean regexp_ok = FALSE; - search = mc_search_new (r, -1); + search = mc_search_new (r, -1, NULL); if (search != NULL) { @@ -1625,7 +1625,7 @@ run_process (void) { int ret; - search_content_handle = mc_search_new (content_pattern, -1); + search_content_handle = mc_search_new (content_pattern, -1, NULL); if (search_content_handle) { search_content_handle->search_type = @@ -1634,7 +1634,7 @@ run_process (void) search_content_handle->whole_words = options.content_whole_words; search_content_handle->is_all_charsets = options.content_all_charsets; } - search_file_handle = mc_search_new (find_pattern, -1); + search_file_handle = mc_search_new (find_pattern, -1, NULL); search_file_handle->search_type = options.file_pattern ? MC_SEARCH_T_GLOB : MC_SEARCH_T_REGEX; search_file_handle->is_case_sensitive = options.file_case_sens; search_file_handle->is_all_charsets = options.file_all_charsets; diff --git a/src/filemanager/panel.c b/src/filemanager/panel.c index a26964ca3..703a31a6c 100644 --- a/src/filemanager/panel.c +++ b/src/filemanager/panel.c @@ -2450,7 +2450,7 @@ do_search (WPanel * panel, int c_code) reg_exp = g_strdup_printf ("%s*", panel->search_buffer); esc_str = strutils_escape (reg_exp, -1, ",|\\{}[]", TRUE); - search = mc_search_new (esc_str, -1); + search = mc_search_new (esc_str, -1, NULL); search->search_type = MC_SEARCH_T_GLOB; search->is_entire_line = TRUE; switch (panels_options.qsearch_mode) diff --git a/src/filemanager/usermenu.c b/src/filemanager/usermenu.c index 867d51868..945cfc48f 100644 --- a/src/filemanager/usermenu.c +++ b/src/filemanager/usermenu.c @@ -247,13 +247,14 @@ test_condition (WEdit * edit_widget, char *p, int *condition) char *edit_filename; edit_filename = edit_get_file_name (edit_widget); - *condition = mc_search (arg, edit_filename, search_type) ? 1 : 0; + *condition = mc_search (arg, "ASCII", edit_filename, search_type) ? 1 : 0; g_free (edit_filename); } else #endif *condition = panel != NULL && - mc_search (arg, panel->dir.list[panel->selected].fname, search_type) ? 1 : 0; + mc_search (arg, "ASCII", panel->dir.list[panel->selected].fname, + search_type) ? 1 : 0; break; case 'y': /* syntax pattern */ #ifdef USE_INTERNAL_EDIT @@ -263,7 +264,7 @@ test_condition (WEdit * edit_widget, char *p, int *condition) if (syntax_type != NULL) { p = extract_arg (p, arg, sizeof (arg)); - *condition = mc_search (arg, syntax_type, MC_SEARCH_T_NORMAL) ? 1 : 0; + *condition = mc_search (arg, "ASCII", syntax_type, MC_SEARCH_T_NORMAL) ? 1 : 0; } } #endif @@ -271,7 +272,8 @@ test_condition (WEdit * edit_widget, char *p, int *condition) case 'd': p = extract_arg (p, arg, sizeof (arg)); *condition = panel != NULL - && mc_search (arg, vfs_path_as_str (panel->cwd_vpath), search_type) ? 1 : 0; + && mc_search (arg, "ASCII", vfs_path_as_str (panel->cwd_vpath), + search_type) ? 1 : 0; break; case 't': p = extract_arg (p, arg, sizeof (arg)); diff --git a/src/vfs/sftpfs/config_parcer.c b/src/vfs/sftpfs/config_parcer.c index 577c00157..16efb373a 100644 --- a/src/vfs/sftpfs/config_parcer.c +++ b/src/vfs/sftpfs/config_parcer.c @@ -210,7 +210,7 @@ sftpfs_fill_config_entity_from_config (FILE * ssh_config_handler, gboolean pattern_block_hit = FALSE; mc_search_t *host_regexp; - host_regexp = mc_search_new ("^\\s*host\\s+(.*)$", -1); + host_regexp = mc_search_new ("^\\s*host\\s+(.*)$", -1, "ASCII"); host_regexp->search_type = MC_SEARCH_T_REGEX; host_regexp->is_case_sensitive = FALSE; @@ -254,7 +254,7 @@ sftpfs_fill_config_entity_from_config (FILE * ssh_config_handler, mc_search_t *pattern_regexp; pattern_block_hit = FALSE; - pattern_regexp = mc_search_new (host_pattern, -1); + pattern_regexp = mc_search_new (host_pattern, -1, "ASCII"); pattern_regexp->search_type = MC_SEARCH_T_GLOB; pattern_regexp->is_case_sensitive = FALSE; pattern_regexp->is_entire_line = TRUE; @@ -397,7 +397,8 @@ sftpfs_init_config_variables_patterns (void) for (i = 0; config_variables[i].pattern != NULL; i++) { - config_variables[i].pattern_regexp = mc_search_new (config_variables[i].pattern, -1); + config_variables[i].pattern_regexp = + mc_search_new (config_variables[i].pattern, -1, "ASCII"); config_variables[i].pattern_regexp->search_type = MC_SEARCH_T_REGEX; config_variables[i].pattern_regexp->is_case_sensitive = FALSE; config_variables[i].offset = structure_offsets[i]; diff --git a/src/viewer/actions_cmd.c b/src/viewer/actions_cmd.c index f638cce79..6289f04ef 100644 --- a/src/viewer/actions_cmd.c +++ b/src/viewer/actions_cmd.c @@ -146,7 +146,11 @@ mcview_continue_search_cmd (mcview_t * view) g_list_foreach (history, (GFunc) g_free, NULL); g_list_free (history); - view->search = mc_search_new (view->last_search_string, -1); +#ifdef HAVE_CHARSET + view->search = mc_search_new (view->last_search_string, -1, cp_source); +#else + view->search = mc_search_new (view->last_search_string, -1, NULL); +#endif view->search_nroff_seq = mcview_nroff_seq_new (view); if (view->search == NULL) diff --git a/src/viewer/dialogs.c b/src/viewer/dialogs.c index 8fba696d2..ba3a1e3b7 100644 --- a/src/viewer/dialogs.c +++ b/src/viewer/dialogs.c @@ -142,7 +142,11 @@ mcview_dialog_search (mcview_t * view) mcview_nroff_seq_free (&view->search_nroff_seq); mc_search_free (view->search); - view->search = mc_search_new (view->last_search_string, -1); +#ifdef HAVE_CHARSET + view->search = mc_search_new (view->last_search_string, -1, cp_source); +#else + view->search = mc_search_new (view->last_search_string, -1, NULL); +#endif view->search_nroff_seq = mcview_nroff_seq_new (view); if (view->search != NULL) {