mirror of
https://github.com/MidnightCommander/mc
synced 2024-12-23 21:06:52 +03:00
66da276200
...if file encoding and locale are different. Example: locale is KOI8-R, file encoding is UTF-8. Note: those encodings are not same. File content is following (in Russian): йцукен Йцукен The difference is in first line char only: Й (lowercase letter) and й (uppercase letter). The search of Ê gives the result Й independently of case sensitivity. й isn't found. If switch "All charsets" on, nothing is found. The main idea of fix is modification of search API to allow set of search pattern charset and use if within search engine. Old API: mc_search_new (pattern, pattern_len); New API: mc_search_new (pattern, pattern_len, pattern_charset); Signed-off-by: Andrew Borodin <aborodin@vmail.ru>
486 lines
15 KiB
C
486 lines
15 KiB
C
/*
|
|
Search text engine.
|
|
Interface functions
|
|
|
|
Copyright (C) 2009, 2011, 2013
|
|
The Free Software Foundation, Inc.
|
|
|
|
Written by:
|
|
Slava Zanko <slavazanko@gmail.com>, 2009
|
|
Andrew Borodin <aborodin@vmail.ru>, 2013
|
|
|
|
This file is part of the Midnight Commander.
|
|
|
|
The Midnight Commander is free software: you can redistribute it
|
|
and/or modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation, either version 3 of the License,
|
|
or (at your option) any later version.
|
|
|
|
The Midnight Commander is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <config.h>
|
|
|
|
#include <stdlib.h>
|
|
#include <sys/types.h>
|
|
|
|
#include "lib/global.h"
|
|
#include "lib/strutil.h"
|
|
#include "lib/search.h"
|
|
#ifdef HAVE_CHARSET
|
|
#include "lib/charsets.h"
|
|
#endif
|
|
|
|
#include "internal.h"
|
|
|
|
/*** global variables ****************************************************************************/
|
|
|
|
/*** file scope macro definitions ****************************************************************/
|
|
|
|
/*** file scope type declarations ****************************************************************/
|
|
|
|
/*** file scope variables ************************************************************************/
|
|
|
|
static const mc_search_type_str_t mc_search__list_types[] = {
|
|
{N_("No&rmal"), MC_SEARCH_T_NORMAL},
|
|
{N_("Re&gular expression"), MC_SEARCH_T_REGEX},
|
|
{N_("He&xadecimal"), MC_SEARCH_T_HEX},
|
|
{N_("Wil&dcard search"), MC_SEARCH_T_GLOB},
|
|
{NULL, -1}
|
|
};
|
|
|
|
/*** file scope functions ************************************************************************/
|
|
|
|
static mc_search_cond_t *
|
|
mc_search__cond_struct_new (mc_search_t * lc_mc_search, const char *str,
|
|
gsize str_len, const char *charset)
|
|
{
|
|
mc_search_cond_t *mc_search_cond;
|
|
|
|
mc_search_cond = g_malloc0 (sizeof (mc_search_cond_t));
|
|
mc_search_cond->str = g_string_new_len (str, str_len);
|
|
mc_search_cond->charset = g_strdup (charset);
|
|
|
|
switch (lc_mc_search->search_type)
|
|
{
|
|
case MC_SEARCH_T_GLOB:
|
|
mc_search__cond_struct_new_init_glob (charset, lc_mc_search, mc_search_cond);
|
|
break;
|
|
case MC_SEARCH_T_NORMAL:
|
|
mc_search__cond_struct_new_init_normal (charset, lc_mc_search, mc_search_cond);
|
|
break;
|
|
case MC_SEARCH_T_REGEX:
|
|
mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond);
|
|
break;
|
|
case MC_SEARCH_T_HEX:
|
|
mc_search__cond_struct_new_init_hex (charset, lc_mc_search, mc_search_cond);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
return mc_search_cond;
|
|
}
|
|
|
|
/* --------------------------------------------------------------------------------------------- */
|
|
|
|
static void
|
|
mc_search__cond_struct_free (mc_search_cond_t * mc_search_cond)
|
|
{
|
|
if (mc_search_cond->upper)
|
|
g_string_free (mc_search_cond->upper, TRUE);
|
|
|
|
if (mc_search_cond->lower)
|
|
g_string_free (mc_search_cond->lower, TRUE);
|
|
|
|
g_string_free (mc_search_cond->str, TRUE);
|
|
g_free (mc_search_cond->charset);
|
|
|
|
#ifdef SEARCH_TYPE_GLIB
|
|
if (mc_search_cond->regex_handle)
|
|
g_regex_unref (mc_search_cond->regex_handle);
|
|
#else /* SEARCH_TYPE_GLIB */
|
|
g_free (mc_search_cond->regex_handle);
|
|
#endif /* SEARCH_TYPE_GLIB */
|
|
|
|
g_free (mc_search_cond);
|
|
}
|
|
|
|
/* --------------------------------------------------------------------------------------------- */
|
|
|
|
static void
|
|
mc_search__conditions_free (GPtrArray * array)
|
|
{
|
|
gsize loop1;
|
|
|
|
for (loop1 = 0; loop1 < array->len; loop1++)
|
|
{
|
|
mc_search_cond_t *lc_mc_search;
|
|
|
|
lc_mc_search = (mc_search_cond_t *) g_ptr_array_index (array, loop1);
|
|
mc_search__cond_struct_free (lc_mc_search);
|
|
}
|
|
g_ptr_array_free (array, TRUE);
|
|
}
|
|
|
|
/* --------------------------------------------------------------------------------------------- */
|
|
/*** public functions ****************************************************************************/
|
|
/* --------------------------------------------------------------------------------------------- */
|
|
/* Init search descriptor.
|
|
*
|
|
* @param original pattern to search
|
|
* @param original_len length of #original or -1 if #original is NULL-terminated
|
|
* @param original_charset charset of #original. If NULL then cp_display will be used
|
|
*
|
|
* @return new mc_search_t object. Use #mc_search_free() to free it.
|
|
*/
|
|
|
|
mc_search_t *
|
|
mc_search_new (const gchar * original, gsize original_len, const gchar * original_charset)
|
|
{
|
|
mc_search_t *lc_mc_search;
|
|
|
|
if (original == NULL)
|
|
return NULL;
|
|
|
|
if ((gssize) original_len == -1)
|
|
{
|
|
original_len = strlen (original);
|
|
if (original_len == 0)
|
|
return NULL;
|
|
}
|
|
|
|
lc_mc_search = g_new0 (mc_search_t, 1);
|
|
lc_mc_search->original = g_strndup (original, original_len);
|
|
lc_mc_search->original_len = original_len;
|
|
#ifdef HAVE_CHARSET
|
|
lc_mc_search->original_charset =
|
|
g_strdup (original_charset != NULL
|
|
&& *original_charset != '\0' ? original_charset : cp_display);
|
|
#else
|
|
(void) original_charset;
|
|
#endif
|
|
|
|
return lc_mc_search;
|
|
}
|
|
|
|
/* --------------------------------------------------------------------------------------------- */
|
|
|
|
void
|
|
mc_search_free (mc_search_t * lc_mc_search)
|
|
{
|
|
if (lc_mc_search == NULL)
|
|
return;
|
|
|
|
g_free (lc_mc_search->original);
|
|
#ifdef HAVE_CHARSET
|
|
g_free (lc_mc_search->original_charset);
|
|
#endif
|
|
g_free (lc_mc_search->error_str);
|
|
|
|
if (lc_mc_search->conditions != NULL)
|
|
mc_search__conditions_free (lc_mc_search->conditions);
|
|
|
|
#ifdef SEARCH_TYPE_GLIB
|
|
if (lc_mc_search->regex_match_info != NULL)
|
|
g_match_info_free (lc_mc_search->regex_match_info);
|
|
#else /* SEARCH_TYPE_GLIB */
|
|
g_free (lc_mc_search->regex_match_info);
|
|
#endif /* SEARCH_TYPE_GLIB */
|
|
|
|
if (lc_mc_search->regex_buffer != NULL)
|
|
g_string_free (lc_mc_search->regex_buffer, TRUE);
|
|
|
|
g_free (lc_mc_search);
|
|
}
|
|
|
|
/* --------------------------------------------------------------------------------------------- */
|
|
|
|
gboolean
|
|
mc_search_prepare (mc_search_t * lc_mc_search)
|
|
{
|
|
GPtrArray *ret;
|
|
|
|
ret = g_ptr_array_new ();
|
|
#ifdef HAVE_CHARSET
|
|
if (lc_mc_search->is_all_charsets)
|
|
{
|
|
gsize loop1;
|
|
|
|
for (loop1 = 0; loop1 < codepages->len; loop1++)
|
|
{
|
|
const char *id;
|
|
gsize recoded_str_len;
|
|
gchar *buffer;
|
|
|
|
id = ((codepage_desc *) g_ptr_array_index (codepages, loop1))->id;
|
|
if (g_ascii_strcasecmp (id, lc_mc_search->original_charset) == 0)
|
|
{
|
|
g_ptr_array_add (ret,
|
|
mc_search__cond_struct_new (lc_mc_search, lc_mc_search->original,
|
|
lc_mc_search->original_len,
|
|
lc_mc_search->original_charset));
|
|
continue;
|
|
}
|
|
|
|
buffer =
|
|
mc_search__recode_str (lc_mc_search->original, lc_mc_search->original_len,
|
|
lc_mc_search->original_charset, id, &recoded_str_len);
|
|
|
|
g_ptr_array_add (ret,
|
|
mc_search__cond_struct_new (lc_mc_search, buffer,
|
|
recoded_str_len, id));
|
|
g_free (buffer);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
g_ptr_array_add (ret,
|
|
mc_search__cond_struct_new (lc_mc_search, lc_mc_search->original,
|
|
lc_mc_search->original_len,
|
|
lc_mc_search->original_charset));
|
|
}
|
|
#else
|
|
g_ptr_array_add (ret,
|
|
mc_search__cond_struct_new (lc_mc_search, lc_mc_search->original,
|
|
lc_mc_search->original_len,
|
|
str_detect_termencoding ()));
|
|
#endif
|
|
lc_mc_search->conditions = ret;
|
|
|
|
return (lc_mc_search->error == MC_SEARCH_E_OK);
|
|
}
|
|
|
|
/* --------------------------------------------------------------------------------------------- */
|
|
|
|
gboolean
|
|
mc_search_run (mc_search_t * lc_mc_search, const void *user_data,
|
|
gsize start_search, gsize end_search, gsize * found_len)
|
|
{
|
|
gboolean ret = FALSE;
|
|
|
|
if (lc_mc_search == NULL || user_data == NULL)
|
|
return FALSE;
|
|
if (!mc_search_is_type_avail (lc_mc_search->search_type))
|
|
{
|
|
lc_mc_search->error = MC_SEARCH_E_INPUT;
|
|
lc_mc_search->error_str = g_strdup (_(STR_E_UNKNOWN_TYPE));
|
|
return FALSE;
|
|
}
|
|
#ifdef SEARCH_TYPE_GLIB
|
|
if (lc_mc_search->regex_match_info != NULL)
|
|
{
|
|
g_match_info_free (lc_mc_search->regex_match_info);
|
|
lc_mc_search->regex_match_info = NULL;
|
|
}
|
|
#endif /* SEARCH_TYPE_GLIB */
|
|
|
|
lc_mc_search->error = MC_SEARCH_E_OK;
|
|
g_free (lc_mc_search->error_str);
|
|
lc_mc_search->error_str = NULL;
|
|
|
|
if ((lc_mc_search->conditions == NULL) && !mc_search_prepare (lc_mc_search))
|
|
return FALSE;
|
|
|
|
switch (lc_mc_search->search_type)
|
|
{
|
|
case MC_SEARCH_T_NORMAL:
|
|
ret = mc_search__run_normal (lc_mc_search, user_data, start_search, end_search, found_len);
|
|
break;
|
|
case MC_SEARCH_T_REGEX:
|
|
ret = mc_search__run_regex (lc_mc_search, user_data, start_search, end_search, found_len);
|
|
break;
|
|
case MC_SEARCH_T_GLOB:
|
|
ret = mc_search__run_glob (lc_mc_search, user_data, start_search, end_search, found_len);
|
|
break;
|
|
case MC_SEARCH_T_HEX:
|
|
ret = mc_search__run_hex (lc_mc_search, user_data, start_search, end_search, found_len);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/* --------------------------------------------------------------------------------------------- */
|
|
|
|
gboolean
|
|
mc_search_is_type_avail (mc_search_type_t search_type)
|
|
{
|
|
switch (search_type)
|
|
{
|
|
case MC_SEARCH_T_GLOB:
|
|
case MC_SEARCH_T_NORMAL:
|
|
case MC_SEARCH_T_REGEX:
|
|
case MC_SEARCH_T_HEX:
|
|
return TRUE;
|
|
default:
|
|
break;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
/* --------------------------------------------------------------------------------------------- */
|
|
|
|
const mc_search_type_str_t *
|
|
mc_search_types_list_get (size_t * num)
|
|
{
|
|
/* don't count last NULL item */
|
|
if (num != NULL)
|
|
*num = G_N_ELEMENTS (mc_search__list_types) - 1;
|
|
|
|
return mc_search__list_types;
|
|
}
|
|
|
|
/* --------------------------------------------------------------------------------------------- */
|
|
|
|
GString *
|
|
mc_search_prepare_replace_str (mc_search_t * lc_mc_search, GString * replace_str)
|
|
{
|
|
GString *ret;
|
|
|
|
if (lc_mc_search == NULL)
|
|
return g_string_new_len (replace_str->str, replace_str->len);
|
|
|
|
if (replace_str == NULL || replace_str->str == NULL || replace_str->len == 0)
|
|
return g_string_new ("");
|
|
|
|
switch (lc_mc_search->search_type)
|
|
{
|
|
case MC_SEARCH_T_REGEX:
|
|
ret = mc_search_regex_prepare_replace_str (lc_mc_search, replace_str);
|
|
break;
|
|
case MC_SEARCH_T_GLOB:
|
|
ret = mc_search_glob_prepare_replace_str (lc_mc_search, replace_str);
|
|
break;
|
|
case MC_SEARCH_T_NORMAL:
|
|
ret = mc_search_normal_prepare_replace_str (lc_mc_search, replace_str);
|
|
break;
|
|
case MC_SEARCH_T_HEX:
|
|
ret = mc_search_hex_prepare_replace_str (lc_mc_search, replace_str);
|
|
break;
|
|
default:
|
|
ret = g_string_new_len (replace_str->str, replace_str->len);
|
|
break;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/* --------------------------------------------------------------------------------------------- */
|
|
|
|
char *
|
|
mc_search_prepare_replace_str2 (mc_search_t * lc_mc_search, char *replace_str)
|
|
{
|
|
GString *ret;
|
|
GString *replace_str2;
|
|
|
|
replace_str2 = g_string_new (replace_str);
|
|
ret = mc_search_prepare_replace_str (lc_mc_search, replace_str2);
|
|
g_string_free (replace_str2, TRUE);
|
|
return (ret != NULL) ? g_string_free (ret, FALSE) : NULL;
|
|
}
|
|
|
|
/* --------------------------------------------------------------------------------------------- */
|
|
|
|
gboolean
|
|
mc_search_is_fixed_search_str (mc_search_t * lc_mc_search)
|
|
{
|
|
if (lc_mc_search == NULL)
|
|
return FALSE;
|
|
switch (lc_mc_search->search_type)
|
|
{
|
|
case MC_SEARCH_T_REGEX:
|
|
case MC_SEARCH_T_GLOB:
|
|
return FALSE;
|
|
default:
|
|
return TRUE;
|
|
}
|
|
}
|
|
|
|
/* --------------------------------------------------------------------------------------------- */
|
|
/* Search specified pattern in specified string.
|
|
*
|
|
* @param pattern string to search
|
|
* @param pattern_charset charset of #pattern. If NULL then cp_display will be used
|
|
* @param str string where search #pattern
|
|
* @param search type (normal, regex, hex or glob)
|
|
*
|
|
* @return TRUE if found is successful, FALSE otherwise.
|
|
*/
|
|
|
|
gboolean
|
|
mc_search (const gchar * pattern, const gchar * pattern_charset, const gchar * str,
|
|
mc_search_type_t type)
|
|
{
|
|
gboolean ret;
|
|
mc_search_t *search;
|
|
|
|
if (str == NULL)
|
|
return FALSE;
|
|
|
|
search = mc_search_new (pattern, -1, pattern_charset);
|
|
if (search == NULL)
|
|
return FALSE;
|
|
|
|
search->search_type = type;
|
|
search->is_case_sensitive = TRUE;
|
|
|
|
if (type == MC_SEARCH_T_GLOB)
|
|
search->is_entire_line = TRUE;
|
|
|
|
ret = mc_search_run (search, str, 0, strlen (str), NULL);
|
|
mc_search_free (search);
|
|
return ret;
|
|
}
|
|
|
|
/* --------------------------------------------------------------------------------------------- */
|
|
|
|
int
|
|
mc_search_getstart_result_by_num (mc_search_t * lc_mc_search, int lc_index)
|
|
{
|
|
if (lc_mc_search == NULL)
|
|
return 0;
|
|
if (lc_mc_search->search_type == MC_SEARCH_T_NORMAL)
|
|
return 0;
|
|
#ifdef SEARCH_TYPE_GLIB
|
|
{
|
|
gint start_pos;
|
|
gint end_pos;
|
|
|
|
g_match_info_fetch_pos (lc_mc_search->regex_match_info, lc_index, &start_pos, &end_pos);
|
|
return (int) start_pos;
|
|
}
|
|
#else /* SEARCH_TYPE_GLIB */
|
|
return lc_mc_search->iovector[lc_index * 2];
|
|
#endif /* SEARCH_TYPE_GLIB */
|
|
}
|
|
|
|
/* --------------------------------------------------------------------------------------------- */
|
|
|
|
int
|
|
mc_search_getend_result_by_num (mc_search_t * lc_mc_search, int lc_index)
|
|
{
|
|
if (lc_mc_search == NULL)
|
|
return 0;
|
|
if (lc_mc_search->search_type == MC_SEARCH_T_NORMAL)
|
|
return 0;
|
|
#ifdef SEARCH_TYPE_GLIB
|
|
{
|
|
gint start_pos;
|
|
gint end_pos;
|
|
|
|
g_match_info_fetch_pos (lc_mc_search->regex_match_info, lc_index, &start_pos, &end_pos);
|
|
return (int) end_pos;
|
|
}
|
|
#else /* SEARCH_TYPE_GLIB */
|
|
return lc_mc_search->iovector[lc_index * 2 + 1];
|
|
#endif /* SEARCH_TYPE_GLIB */
|
|
}
|
|
|
|
/* --------------------------------------------------------------------------------------------- */
|