mirror of
https://github.com/MidnightCommander/mc
synced 2025-01-22 03:02:06 +03:00
src/search.[ch]: add experimental search engine. Need to review and revork.
* added multicharset search feature * added template for HEX and GLOB searches (for viewer and for search files dialog) * implemented normal search with/without characters cases WARNING! UNSTABLE! Code not checked for work! I'm just wrote and compiled.
This commit is contained in:
parent
b0a0a8f17b
commit
ab729f822a
@ -60,7 +60,8 @@ SRCS = achown.c achown.h background.c background.h boxes.c boxes.h \
|
||||
user.h util.c util.h utilunix.c view.c view.h vfsdummy.h widget.c \
|
||||
widget.h win.c win.h wtools.c wtools.h unixcompat.h \
|
||||
x11conn.h x11conn.c ecs.h ecs.c \
|
||||
strutil.h strutil.c strutilascii.c strutil8bit.c strutilutf8.c
|
||||
strutil.h strutil.c strutilascii.c strutil8bit.c strutilutf8.c \
|
||||
search.c search.h
|
||||
|
||||
if CHARSET
|
||||
mc_SOURCES = $(SRCS) $(CHARSET_SRC)
|
||||
|
428
src/search.c
Normal file
428
src/search.c
Normal file
@ -0,0 +1,428 @@
|
||||
/*
|
||||
Search text engine.
|
||||
|
||||
Copyright (C) 2009 The Free Software Foundation, Inc.
|
||||
|
||||
Written by:
|
||||
Slava Zanko <slavazanko@gmail.com>, 2009.
|
||||
|
||||
This file is part of the Midnight Commander.
|
||||
|
||||
The Midnight Commander is free software; you can redistribute it
|
||||
and/or modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The Midnight Commander is distributed in the hope that it will be
|
||||
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
|
||||
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <config.h>
|
||||
|
||||
|
||||
#include "../src/global.h"
|
||||
#include "../src/search.h"
|
||||
#include "../src/strutil.h"
|
||||
#include "../src/charsets.h"
|
||||
|
||||
/*** global variables ****************************************************************************/
|
||||
|
||||
/*** file scope macro definitions ****************************************************************/
|
||||
|
||||
#define STR_E_NOTFOUND " Search string not found "
|
||||
#define STR_E_UNKNOWN_TYPE " Unknown search type "
|
||||
|
||||
/*** file scope type declarations ****************************************************************/
|
||||
|
||||
typedef struct mc_search_cond_struct {
|
||||
GString *str;
|
||||
GString *upper;
|
||||
GString *lower;
|
||||
gsize len;
|
||||
gchar *charset;
|
||||
} mc_search_cond_t;
|
||||
|
||||
typedef enum {
|
||||
COND__NOT_ALL_FOUND,
|
||||
COND__FOUND_CHAR,
|
||||
COND__FOUND_CHAR_LAST,
|
||||
} mc_search__found_cond_t;
|
||||
|
||||
/*** file scope variables ************************************************************************/
|
||||
|
||||
extern const char *cp_source;
|
||||
extern const char *cp_display;
|
||||
|
||||
/*** file scope functions ************************************************************************/
|
||||
|
||||
static gchar *
|
||||
mc_search__recode_str (const char *str, guint str_len, const char *charset_from,
|
||||
const char *charset_to, guint * bytes_written)
|
||||
{
|
||||
gchar *ret;
|
||||
gsize bytes_read;
|
||||
GIConv conv;
|
||||
conv = g_iconv_open (charset_to, charset_from);
|
||||
if (conv == (GIConv) - 1)
|
||||
return NULL;
|
||||
|
||||
ret = g_convert_with_iconv (str, str_len, conv, &bytes_read, bytes_written, NULL);
|
||||
g_iconv_close (conv);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------------------- */
|
||||
static GString *
|
||||
mc_search__tolower_case_str (const char *charset, const char *str, guint str_len)
|
||||
{
|
||||
gchar *converted_str, *tmp_str1, *tmp_str2, *tmp_str3;
|
||||
guint converted_str_len;
|
||||
guint tmp_len;
|
||||
|
||||
tmp_str2 = converted_str =
|
||||
mc_search__recode_str (str, str_len, charset, cp_display, &converted_str_len);
|
||||
if (converted_str == NULL)
|
||||
return NULL;
|
||||
|
||||
tmp_len = converted_str_len;
|
||||
|
||||
tmp_str3 = tmp_str1 = g_strdup (converted_str);
|
||||
|
||||
while (str_tolower (tmp_str1, &tmp_str2, &tmp_len))
|
||||
tmp_str1 += str_length_char (tmp_str1);
|
||||
|
||||
g_free (tmp_str3);
|
||||
|
||||
tmp_str2 =
|
||||
mc_search__recode_str (converted_str, converted_str_len, cp_display, charset, &tmp_len);
|
||||
g_free (converted_str);
|
||||
if (tmp_str2 == NULL)
|
||||
return NULL;
|
||||
|
||||
return g_string_new_len (tmp_str2, tmp_len);
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------------------- */
|
||||
|
||||
static GString *
|
||||
mc_search__toupper_case_str (const char *charset, const char *str, guint str_len)
|
||||
{
|
||||
gchar *converted_str, *tmp_str1, *tmp_str2, *tmp_str3;
|
||||
guint converted_str_len;
|
||||
guint tmp_len;
|
||||
|
||||
tmp_str2 = converted_str =
|
||||
mc_search__recode_str (str, str_len, charset, cp_display, &converted_str_len);
|
||||
if (converted_str == NULL)
|
||||
return NULL;
|
||||
|
||||
tmp_len = converted_str_len;
|
||||
|
||||
tmp_str3 = tmp_str1 = g_strdup (converted_str);
|
||||
|
||||
while (str_toupper (tmp_str1, &tmp_str2, &tmp_len))
|
||||
tmp_str1 += str_length_char (tmp_str1);
|
||||
|
||||
g_free (tmp_str3);
|
||||
|
||||
tmp_str2 =
|
||||
mc_search__recode_str (converted_str, converted_str_len, cp_display, charset, &tmp_len);
|
||||
g_free (converted_str);
|
||||
if (tmp_str2 == NULL)
|
||||
return NULL;
|
||||
|
||||
return g_string_new_len (tmp_str2, tmp_len);
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------------------- */
|
||||
static mc_search_cond_t *
|
||||
mc_search__cond_struct_new (const char *str, guint str_len, const char *charset, int case_sentitive)
|
||||
{
|
||||
mc_search_cond_t *mc_search_cond;
|
||||
mc_search_cond = g_malloc0 (sizeof (mc_search_cond_t));
|
||||
mc_search_cond->str = g_string_new (str);
|
||||
mc_search_cond->len = str_len;
|
||||
mc_search_cond->charset = g_strdup (charset);
|
||||
if (case_sentitive) {
|
||||
mc_search_cond->upper = mc_search__toupper_case_str (charset, str, str_len);
|
||||
mc_search_cond->lower = mc_search__tolower_case_str (charset, str, str_len);
|
||||
}
|
||||
return mc_search_cond;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------------------- */
|
||||
|
||||
static GPtrArray *
|
||||
mc_search__conditions_new (const char *str, guint str_len, int all_charsets, int case_sentitive)
|
||||
{
|
||||
GPtrArray *ret;
|
||||
|
||||
ret = g_ptr_array_new ();
|
||||
|
||||
if (all_charsets) {
|
||||
guint loop1, recoded_str_len;
|
||||
|
||||
gchar *buffer;
|
||||
|
||||
for (loop1 = 0; loop1 < n_codepages; loop1++) {
|
||||
if (g_ascii_strcasecmp (codepages[loop1].id, cp_source)) {
|
||||
g_ptr_array_add (ret,
|
||||
mc_search__cond_struct_new (str, str_len, cp_source,
|
||||
case_sentitive));
|
||||
continue;
|
||||
}
|
||||
|
||||
buffer =
|
||||
mc_search__recode_str (str, str_len, cp_source, codepages[loop1].id,
|
||||
&recoded_str_len);
|
||||
if (buffer == NULL)
|
||||
continue;
|
||||
|
||||
g_ptr_array_add (ret,
|
||||
mc_search__cond_struct_new (buffer, recoded_str_len,
|
||||
codepages[loop1].id, case_sentitive));
|
||||
g_free (buffer);
|
||||
}
|
||||
} else {
|
||||
g_ptr_array_add (ret,
|
||||
(gpointer) mc_search__cond_struct_new (str, str_len, cp_source,
|
||||
case_sentitive));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------------------- */
|
||||
|
||||
static void
|
||||
mc_search__cond_struct_free (mc_search_cond_t * mc_search_cond)
|
||||
{
|
||||
if (mc_search_cond->upper)
|
||||
g_string_free (mc_search_cond->upper, TRUE);
|
||||
|
||||
if (mc_search_cond->lower)
|
||||
g_string_free (mc_search_cond->lower, TRUE);
|
||||
|
||||
g_string_free (mc_search_cond->str, TRUE);
|
||||
g_free (mc_search_cond->charset);
|
||||
g_free (mc_search_cond);
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------------------- */
|
||||
|
||||
static void
|
||||
mc_search__conditions_free (GPtrArray * array)
|
||||
{
|
||||
guint loop1;
|
||||
mc_search_cond_t *mc_search;
|
||||
|
||||
for (loop1 = 0; loop1 < array->len; loop1++) {
|
||||
mc_search = (mc_search_cond_t *) g_ptr_array_index (array, loop1);
|
||||
mc_search__cond_struct_free (mc_search);
|
||||
}
|
||||
g_ptr_array_free (array, TRUE);
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------------------- */
|
||||
|
||||
static int
|
||||
mc_search__get_char (mc_search_t * mc_search, const void *user_data, gsize current_pos)
|
||||
{
|
||||
char *data;
|
||||
if (mc_search->search_fn)
|
||||
return (mc_search->search_fn) (user_data, current_pos);
|
||||
|
||||
data = (char *) user_data;
|
||||
return (int) data[current_pos];
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------------------- */
|
||||
|
||||
static mc_search__found_cond_t
|
||||
mc_search__normal_found_cond (mc_search_t * mc_search, int current_chr, gsize search_pos)
|
||||
{
|
||||
int loop1;
|
||||
mc_search_cond_t *mc_search_cond;
|
||||
|
||||
for (loop1 = 0; loop1 < mc_search->conditions->len; loop1++) {
|
||||
mc_search_cond = (mc_search_cond_t *) g_ptr_array_index (mc_search->conditions, loop1);
|
||||
|
||||
if (search_pos > mc_search_cond->len)
|
||||
continue;
|
||||
|
||||
if (mc_search->is_case_sentitive) {
|
||||
if ((char) current_chr == mc_search_cond->str->str[search_pos])
|
||||
return (search_pos ==
|
||||
mc_search_cond->len) ? COND__FOUND_CHAR_LAST : COND__FOUND_CHAR;
|
||||
} else {
|
||||
GString *upp, *low;
|
||||
upp = (mc_search_cond->upper) ? mc_search_cond->upper : mc_search_cond->str;
|
||||
low = (mc_search_cond->lower) ? mc_search_cond->lower : mc_search_cond->str;
|
||||
|
||||
if (((char) current_chr == upp->str[search_pos])
|
||||
|| ((char) current_chr == low->str[search_pos]))
|
||||
return (search_pos ==
|
||||
mc_search_cond->len) ? COND__FOUND_CHAR_LAST : COND__FOUND_CHAR;
|
||||
}
|
||||
}
|
||||
return COND__NOT_ALL_FOUND;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------------------- */
|
||||
|
||||
static gboolean
|
||||
mc_search__run_normal (mc_search_t * mc_search, const void *user_data,
|
||||
gsize start_search, gsize end_search, gsize * founded_len)
|
||||
{
|
||||
gsize current_pos, search_pos;
|
||||
int current_chr = 0;
|
||||
gboolean founded;
|
||||
|
||||
if (mc_search->is_backward) {
|
||||
current_pos = end_search;
|
||||
} else {
|
||||
current_pos = start_search;
|
||||
}
|
||||
while (1) {
|
||||
search_pos = 0;
|
||||
founded = TRUE;
|
||||
|
||||
while (1) {
|
||||
if (current_pos + search_pos > end_search)
|
||||
break;
|
||||
|
||||
current_chr = mc_search__get_char (mc_search, user_data, current_pos + search_pos);
|
||||
if (current_chr == -1)
|
||||
break;
|
||||
|
||||
|
||||
switch (mc_search__normal_found_cond (mc_search, current_chr, search_pos)) {
|
||||
|
||||
case COND__NOT_ALL_FOUND:
|
||||
founded = FALSE;
|
||||
break;
|
||||
|
||||
case COND__FOUND_CHAR_LAST:
|
||||
mc_search->normal_offset = current_pos;
|
||||
*founded_len = search_pos;
|
||||
return TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (!founded)
|
||||
break;
|
||||
|
||||
search_pos++;
|
||||
}
|
||||
if (current_chr == -1)
|
||||
break;
|
||||
|
||||
if (mc_search->is_backward) {
|
||||
current_pos--;
|
||||
if (current_pos == start_search - 1)
|
||||
break;
|
||||
} else {
|
||||
current_pos++;
|
||||
if (current_pos == end_search + 1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
mc_search->error = MC_SEARCH_E_NOTFOUND;
|
||||
mc_search->error_str = g_strdup (_(STR_E_NOTFOUND));
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------------------- */
|
||||
|
||||
static gboolean
|
||||
mc_search__run_regex (mc_search_t * mc_search, const void *user_data,
|
||||
gsize start_search, gsize end_search, gsize * founded_len)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
/*** public functions ****************************************************************************/
|
||||
|
||||
mc_search_t *
|
||||
mc_search_new (const gchar * original, guint str_len)
|
||||
{
|
||||
mc_search_t *mc_search;
|
||||
if (!original)
|
||||
return NULL;
|
||||
|
||||
if (str_len == -1) {
|
||||
str_len = strlen (original);
|
||||
if (str_len == 0)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
mc_search = g_malloc0 (sizeof (mc_search_t));
|
||||
mc_search->original = g_strndup (original, str_len);
|
||||
mc_search->original_len = str_len;
|
||||
return mc_search;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------------------- */
|
||||
void
|
||||
mc_search_free (mc_search_t * mc_search)
|
||||
{
|
||||
if (!mc_search)
|
||||
return;
|
||||
|
||||
g_free (mc_search->original);
|
||||
|
||||
if (mc_search->error_str)
|
||||
g_free (mc_search->error_str);
|
||||
|
||||
mc_search__conditions_free (mc_search->conditions);
|
||||
g_free (mc_search);
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------------------- */
|
||||
gboolean
|
||||
mc_search_run (mc_search_t * mc_search, const void *user_data, gsize start_search, gsize end_search,
|
||||
gsize * founded_len)
|
||||
{
|
||||
gboolean ret;
|
||||
if (!mc_search)
|
||||
return FALSE;
|
||||
|
||||
mc_search->conditions = mc_search__conditions_new (mc_search->original, mc_search->original_len,
|
||||
mc_search->is_all_charsets,
|
||||
mc_search->is_case_sentitive);
|
||||
|
||||
mc_search->error = MC_SEARCH_E_OK;
|
||||
if (mc_search->error_str) {
|
||||
g_free (mc_search->error_str);
|
||||
mc_search->error_str = NULL;
|
||||
}
|
||||
|
||||
switch (mc_search->search_type) {
|
||||
case MC_SEARCH_T_NORMAL:
|
||||
ret = mc_search__run_normal (mc_search, user_data, start_search, end_search, founded_len);
|
||||
break;
|
||||
case MC_SEARCH_T_REGEX:
|
||||
// ret = mc_search__run_regex (mc_search, user_data, start_search, end_search, founded_len);
|
||||
break;
|
||||
case MC_SEARCH_T_HEX:
|
||||
case MC_SEARCH_T_SCANF:
|
||||
case MC_SEARCH_T_GLOB:
|
||||
default:
|
||||
mc_search->error = MC_SEARCH_E_INPUT;
|
||||
mc_search->error_str = g_strdup (_(STR_E_UNKNOWN_TYPE));
|
||||
ret = FALSE;
|
||||
break;
|
||||
}
|
||||
mc_search__conditions_free(mc_search->conditions);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------------------- */
|
91
src/search.h
Normal file
91
src/search.h
Normal file
@ -0,0 +1,91 @@
|
||||
#ifndef MC__SEARCH_H
|
||||
#define MC__SEARCH_H
|
||||
|
||||
/*** typedefs(not structures) and defined constants **********************************************/
|
||||
|
||||
typedef int (*mc_search_fn) (const void *user_data, gsize char_offset);
|
||||
|
||||
#define MC_SEARCH__NUM_REPL_ARGS 64
|
||||
#define MC_SEARCH__MAX_REPL_LEN 1024
|
||||
|
||||
/*** enums ***************************************************************************************/
|
||||
|
||||
typedef enum {
|
||||
MC_SEARCH_E_OK,
|
||||
MC_SEARCH_E_INPUT,
|
||||
MC_SEARCH_E_REGEX,
|
||||
MC_SEARCH_E_NOTFOUND,
|
||||
} mc_search_error_t;
|
||||
|
||||
typedef enum {
|
||||
MC_SEARCH_T_NORMAL,
|
||||
MC_SEARCH_T_REGEX,
|
||||
MC_SEARCH_T_SCANF,
|
||||
MC_SEARCH_T_HEX,
|
||||
MC_SEARCH_T_GLOB,
|
||||
} mc_search_type_t;
|
||||
|
||||
|
||||
/*** structures declarations (and typedefs of structures)*****************************************/
|
||||
|
||||
typedef struct mc_search_struct {
|
||||
|
||||
/* public input data */
|
||||
|
||||
/* search in all charsets */
|
||||
gboolean is_all_charsets;
|
||||
|
||||
/* case sentitive search */
|
||||
gboolean is_case_sentitive;
|
||||
|
||||
/* backward search */
|
||||
gboolean is_backward;
|
||||
|
||||
/* search only once. Is this for replace? */
|
||||
gboolean is_once_only;
|
||||
|
||||
/* function, used for getting data. NULL if not used */
|
||||
mc_search_fn search_fn;
|
||||
|
||||
/* type of search */
|
||||
mc_search_type_t search_type;
|
||||
|
||||
|
||||
/* public output data */
|
||||
|
||||
/* some data for normal */
|
||||
gsize normal_offset;
|
||||
|
||||
/* some data for regexp */
|
||||
GRegex *regex_handle;
|
||||
GMatchInfo *regex_match_info;
|
||||
|
||||
/* some data for sscanf */
|
||||
|
||||
/* some data for glob */
|
||||
GPatternSpec *glob_handle;
|
||||
|
||||
/* private data */
|
||||
|
||||
/* prepared conditions */
|
||||
GPtrArray *conditions;
|
||||
|
||||
/* original search string */
|
||||
gchar *original;
|
||||
guint original_len;
|
||||
|
||||
/* error code after search */
|
||||
mc_search_error_t error;
|
||||
gchar *error_str;
|
||||
|
||||
} mc_search_t;
|
||||
|
||||
/*** global variables defined in .c file *********************************************************/
|
||||
|
||||
/*** declarations of public functions ************************************************************/
|
||||
|
||||
mc_search_t *mc_search_new (const gchar * original, guint original_len);
|
||||
|
||||
void mc_search_free (mc_search_t * mc_search);
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user