mc/lib/search/hex.c
Andrew Borodin 7257f794d2 Update template for .c files.
Add section for forward declarations of local functions. This section is
located before file scope variables because functions can be used in
strucutres (see find.c for example):

/*** forward declarations (file scope functions) *************************************************/

/* button callbacks */
static int start_stop (WButton * button, int action);
static int find_do_view_file (WButton * button, int action);
static int find_do_edit_file (WButton * button, int action);

/*** file scope variables ************************************************************************/

static struct
{
    ...
    bcback_fn callback;
} fbuts[] =
{
    ...
    { B_STOP, NORMAL_BUTTON, N_("S&uspend"), 0, 0, NULL, start_stop },
    ...
    { B_VIEW, NORMAL_BUTTON, N_("&View - F3"), 0, 0, NULL, find_do_view_file },
    { B_VIEW, NORMAL_BUTTON, N_("&Edit - F4"), 0, 0, NULL, find_do_edit_file }
};

Signed-off-by: Andrew Borodin <aborodin@vmail.ru>
2023-03-19 20:34:24 +03:00

236 lines
7.8 KiB
C

/*
Search text engine.
HEX-style pattern matching
Copyright (C) 2009-2023
Free Software Foundation, Inc.
Written by:
Slava Zanko <slavazanko@gmail.com>, 2009.
This file is part of the Midnight Commander.
The Midnight Commander is free software: you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation, either version 3 of the License,
or (at your option) any later version.
The Midnight Commander is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <stdio.h>
#include "lib/global.h"
#include "lib/strutil.h"
#include "lib/search.h"
#include "lib/strescape.h"
#include "internal.h"
/*** global variables ****************************************************************************/
/*** file scope macro definitions ****************************************************************/
typedef enum
{
MC_SEARCH_HEX_E_OK,
MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE,
MC_SEARCH_HEX_E_INVALID_CHARACTER,
MC_SEARCH_HEX_E_UNMATCHED_QUOTES
} mc_search_hex_parse_error_t;
/*** file scope type declarations ****************************************************************/
/*** forward declarations (file scope functions) *************************************************/
/*** file scope variables ************************************************************************/
/* --------------------------------------------------------------------------------------------- */
/*** file scope functions ************************************************************************/
/* --------------------------------------------------------------------------------------------- */
static GString *
mc_search__hex_translate_to_regex (const GString * astr, mc_search_hex_parse_error_t * error_ptr,
int *error_pos_ptr)
{
GString *buff;
const char *str;
gsize str_len;
gsize loop = 0;
mc_search_hex_parse_error_t error = MC_SEARCH_HEX_E_OK;
buff = g_string_sized_new (64);
str = astr->str;
str_len = astr->len;
while (loop < str_len && error == MC_SEARCH_HEX_E_OK)
{
unsigned int val;
int ptr;
if (g_ascii_isspace (str[loop]))
{
/* Eat-up whitespace between tokens. */
while (g_ascii_isspace (str[loop]))
loop++;
}
/* cppcheck-suppress invalidscanf */
else if (sscanf (str + loop, "%x%n", &val, &ptr) == 1)
{
if (val > 255)
error = MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE;
else
{
g_string_append_printf (buff, "\\x%02X", val);
loop += ptr;
}
}
else if (str[loop] == '"')
{
gsize loop2;
loop2 = loop + 1;
while (loop2 < str_len)
{
if (str[loop2] == '"')
break;
if (str[loop2] == '\\' && loop2 + 1 < str_len)
loop2++;
g_string_append_c (buff, str[loop2]);
loop2++;
}
if (str[loop2] == '\0')
error = MC_SEARCH_HEX_E_UNMATCHED_QUOTES;
else
loop = loop2 + 1;
}
else
error = MC_SEARCH_HEX_E_INVALID_CHARACTER;
}
if (error != MC_SEARCH_HEX_E_OK)
{
g_string_free (buff, TRUE);
if (error_ptr != NULL)
*error_ptr = error;
if (error_pos_ptr != NULL)
*error_pos_ptr = loop;
return NULL;
}
return buff;
}
/* --------------------------------------------------------------------------------------------- */
/*** public functions ****************************************************************************/
/* --------------------------------------------------------------------------------------------- */
void
mc_search__cond_struct_new_init_hex (const char *charset, mc_search_t * lc_mc_search,
mc_search_cond_t * mc_search_cond)
{
GString *tmp;
mc_search_hex_parse_error_t error = MC_SEARCH_HEX_E_OK;
int error_pos = 0;
/*
* We may be searching in binary data, which is often invalid UTF-8.
*
* We have to create a non UTF-8 regex (that is, G_REGEX_RAW) or else, as
* the data is invalid UTF-8, both GLib's PCRE and our
* mc_search__g_regex_match_full_safe() are going to fail us. The former by
* not finding all bytes, the latter by overwriting the supposedly invalid
* UTF-8 with NULs.
*
* To do this, we specify "ASCII" as the charset.
*
* In fact, we can specify any charset other than "UTF-8": any such charset
* will trigger G_REGEX_RAW (see [1]). The output of [2] will be the same
* for all charsets because it skips the \xXX symbols
* mc_search__hex_translate_to_regex() outputs.
*
* But "ASCII" is the best choice because a hex pattern may contain a
* quoted string: this way we know [2] will ignore any characters outside
* ASCII letters range (these ignored chars will be copied verbatim to the
* output and will match as-is; in other words, in a case-sensitive manner;
* If the user is interested in case-insensitive searches of international
* text, he shouldn't be using hex search in the first place.)
*
* Switching out of UTF-8 has another advantage:
*
* When doing case-insensitive searches, GLib treats \xXX symbols as normal
* letters and therefore matches both "a" and "A" for the hex pattern
* "0x61". When we switch out of UTF-8, we're switching to using [2], which
* doesn't have this issue.
*
* [1] mc_search__cond_struct_new_init_regex
* [2] mc_search__cond_struct_new_regex_ci_str
*/
if (str_isutf8 (charset))
charset = "ASCII";
tmp = mc_search__hex_translate_to_regex (mc_search_cond->str, &error, &error_pos);
if (tmp != NULL)
{
g_string_free (mc_search_cond->str, TRUE);
mc_search_cond->str = tmp;
mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond);
}
else
{
const char *desc;
switch (error)
{
case MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE:
desc =
_
("Number out of range (should be in byte range, 0 <= n <= 0xFF, expressed in hex)");
break;
case MC_SEARCH_HEX_E_INVALID_CHARACTER:
desc = _("Invalid character");
break;
case MC_SEARCH_HEX_E_UNMATCHED_QUOTES:
desc = _("Unmatched quotes character");
break;
default:
desc = "";
}
lc_mc_search->error = MC_SEARCH_E_INPUT;
lc_mc_search->error_str =
g_strdup_printf (_("Hex pattern error at position %d:\n%s."), error_pos + 1, desc);
}
}
/* --------------------------------------------------------------------------------------------- */
gboolean
mc_search__run_hex (mc_search_t * lc_mc_search, const void *user_data,
gsize start_search, gsize end_search, gsize * found_len)
{
return mc_search__run_regex (lc_mc_search, user_data, start_search, end_search, found_len);
}
/* --------------------------------------------------------------------------------------------- */
GString *
mc_search_hex_prepare_replace_str (mc_search_t * lc_mc_search, GString * replace_str)
{
(void) lc_mc_search;
return mc_g_string_dup (replace_str);
}
/* --------------------------------------------------------------------------------------------- */