Merge branch '2743_search_unicode'

* 2743_search_unicode:
  Ticket #2743: File selection by patterns uses bytes instead of (unicode) characters.
This commit is contained in:
Slava Zanko 2015-02-25 13:00:16 +03:00
commit 482e96e720
3 changed files with 150 additions and 2 deletions

View File

@ -703,6 +703,25 @@ mc_search_regex__process_escape_sequence (GString * dest_str, const char *from,
} }
} }
/* --------------------------------------------------------------------------------------------- */
/**
* Get regex flags for compilation of expressions.
* @param charset the charset
*
* @return regex flags
*/
static GRegexCompileFlags
mc_search__regex_get_compile_flags (const char *charset)
{
GRegexCompileFlags g_regex_options = G_REGEX_OPTIMIZE | G_REGEX_DOTALL;
if (!(mc_global.utf8_display && str_isutf8 (charset)))
g_regex_options |= G_REGEX_RAW;
return g_regex_options;
}
/* --------------------------------------------------------------------------------------------- */ /* --------------------------------------------------------------------------------------------- */
/*** public functions ****************************************************************************/ /*** public functions ****************************************************************************/
/* --------------------------------------------------------------------------------------------- */ /* --------------------------------------------------------------------------------------------- */
@ -723,8 +742,8 @@ mc_search__cond_struct_new_init_regex (const char *charset, mc_search_t * lc_mc_
g_string_free (tmp, TRUE); g_string_free (tmp, TRUE);
} }
mc_search_cond->regex_handle = mc_search_cond->regex_handle =
g_regex_new (mc_search_cond->str->str, G_REGEX_OPTIMIZE | G_REGEX_RAW | G_REGEX_DOTALL, g_regex_new (mc_search_cond->str->str, mc_search__regex_get_compile_flags (charset), 0,
0, &mcerror); &mcerror);
if (mcerror != NULL) if (mcerror != NULL)
{ {

View File

@ -10,6 +10,7 @@ LIBS = @CHECK_LIBS@ $(top_builddir)/lib/libmc.la
TESTS = \ TESTS = \
glob_prepare_replace_str \ glob_prepare_replace_str \
glob_translate_to_regex \ glob_translate_to_regex \
regex_get_compile_flags \
regex_replace_esc_seq \ regex_replace_esc_seq \
regex_process_escape_sequence \ regex_process_escape_sequence \
translate_replace_glob_to_regex translate_replace_glob_to_regex
@ -30,3 +31,6 @@ translate_replace_glob_to_regex_SOURCES = \
glob_translate_to_regex_SOURCES = \ glob_translate_to_regex_SOURCES = \
glob_translate_to_regex.c glob_translate_to_regex.c
regex_get_compile_flags__SOURCES = \
regex_get_compile_flags.c

View File

@ -0,0 +1,125 @@
/*
libmc - checks for producing compile flags
Copyright (C) 2011-2015
Free Software Foundation, Inc.
Written by:
Slava Zanko <slavazanko@gmail.com>, 2015
This file is part of the Midnight Commander.
The Midnight Commander is free software: you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation, either version 3 of the License,
or (at your option) any later version.
The Midnight Commander is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define TEST_SUITE_NAME "lib/search/glob"
#include "tests/mctest.h"
#include "regex.c" /* for testing static functions */
/* --------------------------------------------------------------------------------------------- */
/* @DataSource("test_glob_translate_to_regex_ds") */
/* *INDENT-OFF* */
static const struct test_regex_get_compile_flags_ds
{
const char *charset;
const gboolean utf_flag;
const GRegexCompileFlags expected_result;
} test_regex_get_compile_flags_ds[] =
{
{
"utf8",
TRUE,
G_REGEX_OPTIMIZE | G_REGEX_DOTALL
},
{
"utf8",
FALSE,
G_REGEX_OPTIMIZE | G_REGEX_DOTALL | G_REGEX_RAW
},
{
"utf-8",
TRUE,
G_REGEX_OPTIMIZE | G_REGEX_DOTALL
},
{
"utf-8",
FALSE,
G_REGEX_OPTIMIZE | G_REGEX_DOTALL | G_REGEX_RAW
},
{
"latin1",
TRUE,
G_REGEX_OPTIMIZE | G_REGEX_DOTALL | G_REGEX_RAW
},
{
"latin1",
FALSE,
G_REGEX_OPTIMIZE | G_REGEX_DOTALL | G_REGEX_RAW
},
{
"blablabla",
TRUE,
G_REGEX_OPTIMIZE | G_REGEX_DOTALL | G_REGEX_RAW
},
};
/* *INDENT-ON* */
/* @Test(dataSource = "test_regex_get_compile_flags_ds") */
/* *INDENT-OFF* */
START_PARAMETRIZED_TEST (test_regex_get_compile_flags, test_regex_get_compile_flags_ds)
/* *INDENT-ON* */
{
GRegexCompileFlags actual_result;
/* given */
mc_global.utf8_display = data->utf_flag;
/* when */
actual_result = mc_search__regex_get_compile_flags (data->charset);
/* then */
mctest_assert_int_eq (actual_result, data->expected_result);
}
/* *INDENT-OFF* */
END_PARAMETRIZED_TEST
/* *INDENT-ON* */
/* --------------------------------------------------------------------------------------------- */
int
main (void)
{
int number_failed;
Suite *s = suite_create (TEST_SUITE_NAME);
TCase *tc_core = tcase_create ("Core");
SRunner *sr;
/* Add new tests here: *************** */
mctest_add_parameterized_test (tc_core, test_regex_get_compile_flags,
test_regex_get_compile_flags_ds);
/* *********************************** */
suite_add_tcase (s, tc_core);
sr = srunner_create (s);
srunner_run_all (sr, CK_NORMAL);
number_failed = srunner_ntests_failed (sr);
srunner_free (sr);
return (number_failed == 0) ? 0 : 1;
}
/* --------------------------------------------------------------------------------------------- */