Merge branch '3694_hex_pattern_parsing'

* 3694_hex_pattern_parsing:
  (mc_search__hex_translate_to_regex): remove explicit handling of 0x prefixes.
  (mc_search__hex_translate_to_regex): rename variables.
  (mc_search__hex_translate_to_regex): avoid string duplication.
  (mc_search__hex_translate_to_regex): clean up handling of 0x prefixes.
  Fix quotes handling.
  Handle invalid characters.
  Fix trailing whitespace problem.
  Report errors to the user.
  Ticket #3694: fix hex pattern parsing.
This commit is contained in:
Andrew Borodin 2016-12-04 09:56:20 +03:00
commit 92b7acf7bf
3 changed files with 288 additions and 37 deletions

View File

@ -39,6 +39,14 @@
/*** file scope macro definitions ****************************************************************/
typedef enum
{
MC_SEARCH_HEX_E_OK,
MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE,
MC_SEARCH_HEX_E_INVALID_CHARACTER,
MC_SEARCH_HEX_E_UNMATCHED_QUOTES
} mc_search_hex_parse_error_t;
/*** file scope type declarations ****************************************************************/
/*** file scope variables ************************************************************************/
@ -46,68 +54,75 @@
/*** file scope functions ************************************************************************/
static GString *
mc_search__hex_translate_to_regex (const GString * astr)
mc_search__hex_translate_to_regex (const GString * astr, mc_search_hex_parse_error_t * error_ptr,
int *error_pos_ptr)
{
GString *buff;
gchar *tmp_str, *tmp_str2;
gsize tmp_str_len;
const char *str;
gsize str_len;
gsize loop = 0;
mc_search_hex_parse_error_t error = MC_SEARCH_HEX_E_OK;
buff = g_string_sized_new (64);
tmp_str = g_strndup (astr->str, astr->len);
tmp_str2 = tmp_str;
str = astr->str;
str_len = astr->len;
/* remove 0x prefices */
while (TRUE)
{
tmp_str2 = strstr (tmp_str2, "0x");
if (tmp_str2 == NULL)
break;
*tmp_str2++ = ' ';
*tmp_str2++ = ' ';
}
g_strchug (tmp_str); /* trim leadind whitespaces */
tmp_str_len = strlen (tmp_str);
while (loop < tmp_str_len)
while (loop < str_len && error == MC_SEARCH_HEX_E_OK)
{
unsigned int val;
int ptr;
if (g_ascii_isspace (str[loop]))
{
/* Eat-up whitespace between tokens. */
while (g_ascii_isspace (str[loop]))
loop++;
}
/* cppcheck-suppress invalidscanf */
if (sscanf (tmp_str + loop, "%x%n", &val, &ptr))
else if (sscanf (str + loop, "%x%n", &val, &ptr) == 1)
{
if (val > 255)
loop++;
error = MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE;
else
{
g_string_append_printf (buff, "\\x%02X", val);
loop += ptr;
}
}
else if (*(tmp_str + loop) == '"')
else if (str[loop] == '"')
{
gsize loop2 = 0;
gsize loop2;
loop++;
while (loop + loop2 < tmp_str_len)
loop2 = loop + 1;
while (loop2 < str_len)
{
if (*(tmp_str + loop + loop2) == '"' &&
!strutils_is_char_escaped (tmp_str, tmp_str + loop + loop2))
if (str[loop2] == '"')
break;
if (str[loop2] == '\\' && loop2 + 1 < str_len)
loop2++;
g_string_append_c (buff, str[loop2]);
loop2++;
}
g_string_append_len (buff, tmp_str + loop, loop2 - 1);
loop += loop2;
if (str[loop2] == '\0')
error = MC_SEARCH_HEX_E_UNMATCHED_QUOTES;
else
loop = loop2 + 1;
}
else
loop++;
error = MC_SEARCH_HEX_E_INVALID_CHARACTER;
}
g_free (tmp_str);
if (error != MC_SEARCH_HEX_E_OK)
{
g_string_free (buff, TRUE);
if (error_ptr != NULL)
*error_ptr = error;
if (error_pos_ptr != NULL)
*error_pos_ptr = loop;
return NULL;
}
return buff;
}
@ -119,13 +134,41 @@ mc_search__cond_struct_new_init_hex (const char *charset, mc_search_t * lc_mc_se
mc_search_cond_t * mc_search_cond)
{
GString *tmp;
mc_search_hex_parse_error_t error = MC_SEARCH_HEX_E_OK;
int error_pos = 0;
g_string_ascii_down (mc_search_cond->str);
tmp = mc_search__hex_translate_to_regex (mc_search_cond->str);
g_string_free (mc_search_cond->str, TRUE);
mc_search_cond->str = tmp;
tmp = mc_search__hex_translate_to_regex (mc_search_cond->str, &error, &error_pos);
if (tmp != NULL)
{
g_string_free (mc_search_cond->str, TRUE);
mc_search_cond->str = tmp;
mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond);
}
else
{
const char *desc;
mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond);
switch (error)
{
case MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE:
desc =
_
("Number out of range (should be in byte range, 0 <= n <= 0xFF, expressed in hex)");
break;
case MC_SEARCH_HEX_E_INVALID_CHARACTER:
desc = _("Invalid character");
break;
case MC_SEARCH_HEX_E_UNMATCHED_QUOTES:
desc = _("Unmatched quotes character");
break;
default:
desc = "";
}
lc_mc_search->error = MC_SEARCH_E_INPUT;
lc_mc_search->error_str =
g_strdup_printf (_("Hex pattern error at position %d:\n%s."), error_pos + 1, desc);
}
}
/* --------------------------------------------------------------------------------------------- */

View File

@ -12,6 +12,7 @@ LIBS = @CHECK_LIBS@ $(top_builddir)/lib/libmc.la @PCRE_LIBS@
TESTS = \
glob_prepare_replace_str \
glob_translate_to_regex \
hex_translate_to_regex \
regex_replace_esc_seq \
regex_process_escape_sequence \
translate_replace_glob_to_regex
@ -32,3 +33,6 @@ translate_replace_glob_to_regex_SOURCES = \
glob_translate_to_regex_SOURCES = \
glob_translate_to_regex.c
hex_translate_to_regex_SOURCES = \
hex_translate_to_regex.c

View File

@ -0,0 +1,204 @@
/*
libmc - checks for hex pattern parsing
Copyright (C) 2016
Free Software Foundation, Inc.
This file is part of the Midnight Commander.
The Midnight Commander is free software: you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation, either version 3 of the License,
or (at your option) any later version.
The Midnight Commander is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define TEST_SUITE_NAME "lib/search/hex"
#include "tests/mctest.h"
#include "hex.c" /* for testing static functions */
/* --------------------------------------------------------------------------------------------- */
/* @DataSource("test_hex_translate_to_regex_ds") */
/* *INDENT-OFF* */
static const struct test_hex_translate_to_regex_ds
{
const char *input_value;
const char *expected_result;
mc_search_hex_parse_error_t expected_error;
} test_hex_translate_to_regex_ds[] =
{
{
/* Simplest case */
"12 34",
"\\x12\\x34",
MC_SEARCH_HEX_E_OK
},
{
/* Prefixes (0x, 0X) */
"0x12 0X34",
"\\x12\\x34",
MC_SEARCH_HEX_E_OK
},
{
/* Prefix "0" doesn't signify octal! Numbers are always interpreted in hex. */
"012",
"\\x12",
MC_SEARCH_HEX_E_OK
},
{
/* Extra whitespace */
" 12 34 ",
"\\x12\\x34",
MC_SEARCH_HEX_E_OK
},
{
/* Min/max values */
"0 ff",
"\\x00\\xFF",
MC_SEARCH_HEX_E_OK
},
{
/* Error: Number out of range */
"100",
NULL,
MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE
},
{
/* Error: Number out of range (negative) */
"-1",
NULL,
MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE
},
{
/* Error: Invalid characters */
"1 z 2",
NULL,
MC_SEARCH_HEX_E_INVALID_CHARACTER
},
/*
* Quotes.
*/
{
" \"abc\" ",
"abc",
MC_SEARCH_HEX_E_OK
},
{
/* Preserve upper/lower case */
"\"aBc\"",
"aBc",
MC_SEARCH_HEX_E_OK
},
{
" 12\"abc\"34 ",
"\\x12abc\\x34",
MC_SEARCH_HEX_E_OK
},
{
"\"a\"\"b\"",
"ab",
MC_SEARCH_HEX_E_OK
},
/* Empty quotes */
{
"\"\"",
"",
MC_SEARCH_HEX_E_OK
},
{
"12 \"\"",
"\\x12",
MC_SEARCH_HEX_E_OK
},
/* Error: Unmatched quotes */
{
"\"a",
NULL,
MC_SEARCH_HEX_E_UNMATCHED_QUOTES
},
{
"\"",
NULL,
MC_SEARCH_HEX_E_UNMATCHED_QUOTES
},
/* Escaped quotes */
{
"\"a\\\"b\"",
"a\"b",
MC_SEARCH_HEX_E_OK
},
{
"\"a\\\\b\"",
"a\\b",
MC_SEARCH_HEX_E_OK
},
};
/* *INDENT-ON* */
/* @Test(dataSource = "test_hex_translate_to_regex_ds") */
/* *INDENT-OFF* */
START_PARAMETRIZED_TEST (test_hex_translate_to_regex, test_hex_translate_to_regex_ds)
/* *INDENT-ON* */
{
GString *tmp, *dest_str;
mc_search_hex_parse_error_t error;
/* given */
tmp = g_string_new (data->input_value);
/* when */
dest_str = mc_search__hex_translate_to_regex (tmp, &error, NULL);
g_string_free (tmp, TRUE);
/* then */
if (dest_str != NULL)
{
mctest_assert_str_eq (dest_str->str, data->expected_result);
g_string_free (dest_str, TRUE);
}
else
{
mctest_assert_int_eq (error, data->expected_error);
}
}
/* *INDENT-OFF* */
END_PARAMETRIZED_TEST
/* *INDENT-ON* */
/* --------------------------------------------------------------------------------------------- */
int
main (void)
{
int number_failed;
Suite *s = suite_create (TEST_SUITE_NAME);
TCase *tc_core = tcase_create ("Core");
SRunner *sr;
/* Add new tests here: *************** */
mctest_add_parameterized_test (tc_core, test_hex_translate_to_regex,
test_hex_translate_to_regex_ds);
/* *********************************** */
suite_add_tcase (s, tc_core);
sr = srunner_create (s);
srunner_set_log (sr, "hex_translate_to_regex.log");
srunner_run_all (sr, CK_ENV);
number_failed = srunner_ntests_failed (sr);
srunner_free (sr);
return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
}
/* --------------------------------------------------------------------------------------------- */