From ab729f822a9e4a29f8a08b4a40a9a12344e823b1 Mon Sep 17 00:00:00 2001 From: Slava Zanko Date: Wed, 22 Apr 2009 18:52:49 +0300 Subject: [PATCH] src/search.[ch]: add experimental search engine. Need to review and revork. * added multicharset search feature * added template for HEX and GLOB searches (for viewer and for search files dialog) * implemented normal search with/without characters cases WARNING! UNSTABLE! Code not checked for work! I'm just wrote and compiled. --- src/Makefile.am | 3 +- src/search.c | 428 ++++++++++++++++++++++++++++++++++++++++++++++++ src/search.h | 91 ++++++++++ 3 files changed, 521 insertions(+), 1 deletion(-) create mode 100644 src/search.c create mode 100644 src/search.h diff --git a/src/Makefile.am b/src/Makefile.am index 9ed8eec06..3665cdad2 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -60,7 +60,8 @@ SRCS = achown.c achown.h background.c background.h boxes.c boxes.h \ user.h util.c util.h utilunix.c view.c view.h vfsdummy.h widget.c \ widget.h win.c win.h wtools.c wtools.h unixcompat.h \ x11conn.h x11conn.c ecs.h ecs.c \ - strutil.h strutil.c strutilascii.c strutil8bit.c strutilutf8.c + strutil.h strutil.c strutilascii.c strutil8bit.c strutilutf8.c \ + search.c search.h if CHARSET mc_SOURCES = $(SRCS) $(CHARSET_SRC) diff --git a/src/search.c b/src/search.c new file mode 100644 index 000000000..735abd039 --- /dev/null +++ b/src/search.c @@ -0,0 +1,428 @@ +/* + Search text engine. + + Copyright (C) 2009 The Free Software Foundation, Inc. + + Written by: + Slava Zanko , 2009. + + This file is part of the Midnight Commander. + + The Midnight Commander is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The Midnight Commander is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. + */ + +#include + + +#include "../src/global.h" +#include "../src/search.h" +#include "../src/strutil.h" +#include "../src/charsets.h" + +/*** global variables ****************************************************************************/ + +/*** file scope macro definitions ****************************************************************/ + +#define STR_E_NOTFOUND " Search string not found " +#define STR_E_UNKNOWN_TYPE " Unknown search type " + +/*** file scope type declarations ****************************************************************/ + +typedef struct mc_search_cond_struct { + GString *str; + GString *upper; + GString *lower; + gsize len; + gchar *charset; +} mc_search_cond_t; + +typedef enum { + COND__NOT_ALL_FOUND, + COND__FOUND_CHAR, + COND__FOUND_CHAR_LAST, +} mc_search__found_cond_t; + +/*** file scope variables ************************************************************************/ + +extern const char *cp_source; +extern const char *cp_display; + +/*** file scope functions ************************************************************************/ + +static gchar * +mc_search__recode_str (const char *str, guint str_len, const char *charset_from, + const char *charset_to, guint * bytes_written) +{ + gchar *ret; + gsize bytes_read; + GIConv conv; + conv = g_iconv_open (charset_to, charset_from); + if (conv == (GIConv) - 1) + return NULL; + + ret = g_convert_with_iconv (str, str_len, conv, &bytes_read, bytes_written, NULL); + g_iconv_close (conv); + return ret; +} + +/* --------------------------------------------------------------------------------------------- */ +static GString * +mc_search__tolower_case_str (const char *charset, const char *str, guint str_len) +{ + gchar *converted_str, *tmp_str1, *tmp_str2, *tmp_str3; + guint converted_str_len; + guint tmp_len; + + tmp_str2 = converted_str = + mc_search__recode_str (str, str_len, charset, cp_display, &converted_str_len); + if (converted_str == NULL) + return NULL; + + tmp_len = converted_str_len; + + tmp_str3 = tmp_str1 = g_strdup (converted_str); + + while (str_tolower (tmp_str1, &tmp_str2, &tmp_len)) + tmp_str1 += str_length_char (tmp_str1); + + g_free (tmp_str3); + + tmp_str2 = + mc_search__recode_str (converted_str, converted_str_len, cp_display, charset, &tmp_len); + g_free (converted_str); + if (tmp_str2 == NULL) + return NULL; + + return g_string_new_len (tmp_str2, tmp_len); +} + +/* --------------------------------------------------------------------------------------------- */ + +static GString * +mc_search__toupper_case_str (const char *charset, const char *str, guint str_len) +{ + gchar *converted_str, *tmp_str1, *tmp_str2, *tmp_str3; + guint converted_str_len; + guint tmp_len; + + tmp_str2 = converted_str = + mc_search__recode_str (str, str_len, charset, cp_display, &converted_str_len); + if (converted_str == NULL) + return NULL; + + tmp_len = converted_str_len; + + tmp_str3 = tmp_str1 = g_strdup (converted_str); + + while (str_toupper (tmp_str1, &tmp_str2, &tmp_len)) + tmp_str1 += str_length_char (tmp_str1); + + g_free (tmp_str3); + + tmp_str2 = + mc_search__recode_str (converted_str, converted_str_len, cp_display, charset, &tmp_len); + g_free (converted_str); + if (tmp_str2 == NULL) + return NULL; + + return g_string_new_len (tmp_str2, tmp_len); +} + +/* --------------------------------------------------------------------------------------------- */ +static mc_search_cond_t * +mc_search__cond_struct_new (const char *str, guint str_len, const char *charset, int case_sentitive) +{ + mc_search_cond_t *mc_search_cond; + mc_search_cond = g_malloc0 (sizeof (mc_search_cond_t)); + mc_search_cond->str = g_string_new (str); + mc_search_cond->len = str_len; + mc_search_cond->charset = g_strdup (charset); + if (case_sentitive) { + mc_search_cond->upper = mc_search__toupper_case_str (charset, str, str_len); + mc_search_cond->lower = mc_search__tolower_case_str (charset, str, str_len); + } + return mc_search_cond; +} + +/* --------------------------------------------------------------------------------------------- */ + +static GPtrArray * +mc_search__conditions_new (const char *str, guint str_len, int all_charsets, int case_sentitive) +{ + GPtrArray *ret; + + ret = g_ptr_array_new (); + + if (all_charsets) { + guint loop1, recoded_str_len; + + gchar *buffer; + + for (loop1 = 0; loop1 < n_codepages; loop1++) { + if (g_ascii_strcasecmp (codepages[loop1].id, cp_source)) { + g_ptr_array_add (ret, + mc_search__cond_struct_new (str, str_len, cp_source, + case_sentitive)); + continue; + } + + buffer = + mc_search__recode_str (str, str_len, cp_source, codepages[loop1].id, + &recoded_str_len); + if (buffer == NULL) + continue; + + g_ptr_array_add (ret, + mc_search__cond_struct_new (buffer, recoded_str_len, + codepages[loop1].id, case_sentitive)); + g_free (buffer); + } + } else { + g_ptr_array_add (ret, + (gpointer) mc_search__cond_struct_new (str, str_len, cp_source, + case_sentitive)); + } + return ret; +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +mc_search__cond_struct_free (mc_search_cond_t * mc_search_cond) +{ + if (mc_search_cond->upper) + g_string_free (mc_search_cond->upper, TRUE); + + if (mc_search_cond->lower) + g_string_free (mc_search_cond->lower, TRUE); + + g_string_free (mc_search_cond->str, TRUE); + g_free (mc_search_cond->charset); + g_free (mc_search_cond); +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +mc_search__conditions_free (GPtrArray * array) +{ + guint loop1; + mc_search_cond_t *mc_search; + + for (loop1 = 0; loop1 < array->len; loop1++) { + mc_search = (mc_search_cond_t *) g_ptr_array_index (array, loop1); + mc_search__cond_struct_free (mc_search); + } + g_ptr_array_free (array, TRUE); +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +mc_search__get_char (mc_search_t * mc_search, const void *user_data, gsize current_pos) +{ + char *data; + if (mc_search->search_fn) + return (mc_search->search_fn) (user_data, current_pos); + + data = (char *) user_data; + return (int) data[current_pos]; +} + +/* --------------------------------------------------------------------------------------------- */ + +static mc_search__found_cond_t +mc_search__normal_found_cond (mc_search_t * mc_search, int current_chr, gsize search_pos) +{ + int loop1; + mc_search_cond_t *mc_search_cond; + + for (loop1 = 0; loop1 < mc_search->conditions->len; loop1++) { + mc_search_cond = (mc_search_cond_t *) g_ptr_array_index (mc_search->conditions, loop1); + + if (search_pos > mc_search_cond->len) + continue; + + if (mc_search->is_case_sentitive) { + if ((char) current_chr == mc_search_cond->str->str[search_pos]) + return (search_pos == + mc_search_cond->len) ? COND__FOUND_CHAR_LAST : COND__FOUND_CHAR; + } else { + GString *upp, *low; + upp = (mc_search_cond->upper) ? mc_search_cond->upper : mc_search_cond->str; + low = (mc_search_cond->lower) ? mc_search_cond->lower : mc_search_cond->str; + + if (((char) current_chr == upp->str[search_pos]) + || ((char) current_chr == low->str[search_pos])) + return (search_pos == + mc_search_cond->len) ? COND__FOUND_CHAR_LAST : COND__FOUND_CHAR; + } + } + return COND__NOT_ALL_FOUND; +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +mc_search__run_normal (mc_search_t * mc_search, const void *user_data, + gsize start_search, gsize end_search, gsize * founded_len) +{ + gsize current_pos, search_pos; + int current_chr = 0; + gboolean founded; + + if (mc_search->is_backward) { + current_pos = end_search; + } else { + current_pos = start_search; + } + while (1) { + search_pos = 0; + founded = TRUE; + + while (1) { + if (current_pos + search_pos > end_search) + break; + + current_chr = mc_search__get_char (mc_search, user_data, current_pos + search_pos); + if (current_chr == -1) + break; + + + switch (mc_search__normal_found_cond (mc_search, current_chr, search_pos)) { + + case COND__NOT_ALL_FOUND: + founded = FALSE; + break; + + case COND__FOUND_CHAR_LAST: + mc_search->normal_offset = current_pos; + *founded_len = search_pos; + return TRUE; + break; + + default: + break; + } + if (!founded) + break; + + search_pos++; + } + if (current_chr == -1) + break; + + if (mc_search->is_backward) { + current_pos--; + if (current_pos == start_search - 1) + break; + } else { + current_pos++; + if (current_pos == end_search + 1) + break; + } + } + mc_search->error = MC_SEARCH_E_NOTFOUND; + mc_search->error_str = g_strdup (_(STR_E_NOTFOUND)); + return FALSE; +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +mc_search__run_regex (mc_search_t * mc_search, const void *user_data, + gsize start_search, gsize end_search, gsize * founded_len) +{ + +} + +/*** public functions ****************************************************************************/ + +mc_search_t * +mc_search_new (const gchar * original, guint str_len) +{ + mc_search_t *mc_search; + if (!original) + return NULL; + + if (str_len == -1) { + str_len = strlen (original); + if (str_len == 0) + return NULL; + } + + mc_search = g_malloc0 (sizeof (mc_search_t)); + mc_search->original = g_strndup (original, str_len); + mc_search->original_len = str_len; + return mc_search; +} + +/* --------------------------------------------------------------------------------------------- */ +void +mc_search_free (mc_search_t * mc_search) +{ + if (!mc_search) + return; + + g_free (mc_search->original); + + if (mc_search->error_str) + g_free (mc_search->error_str); + + mc_search__conditions_free (mc_search->conditions); + g_free (mc_search); +} + +/* --------------------------------------------------------------------------------------------- */ +gboolean +mc_search_run (mc_search_t * mc_search, const void *user_data, gsize start_search, gsize end_search, + gsize * founded_len) +{ + gboolean ret; + if (!mc_search) + return FALSE; + + mc_search->conditions = mc_search__conditions_new (mc_search->original, mc_search->original_len, + mc_search->is_all_charsets, + mc_search->is_case_sentitive); + + mc_search->error = MC_SEARCH_E_OK; + if (mc_search->error_str) { + g_free (mc_search->error_str); + mc_search->error_str = NULL; + } + + switch (mc_search->search_type) { + case MC_SEARCH_T_NORMAL: + ret = mc_search__run_normal (mc_search, user_data, start_search, end_search, founded_len); + break; + case MC_SEARCH_T_REGEX: +// ret = mc_search__run_regex (mc_search, user_data, start_search, end_search, founded_len); + break; + case MC_SEARCH_T_HEX: + case MC_SEARCH_T_SCANF: + case MC_SEARCH_T_GLOB: + default: + mc_search->error = MC_SEARCH_E_INPUT; + mc_search->error_str = g_strdup (_(STR_E_UNKNOWN_TYPE)); + ret = FALSE; + break; + } + mc_search__conditions_free(mc_search->conditions); + return ret; +} + +/* --------------------------------------------------------------------------------------------- */ diff --git a/src/search.h b/src/search.h new file mode 100644 index 000000000..cf53eb2ad --- /dev/null +++ b/src/search.h @@ -0,0 +1,91 @@ +#ifndef MC__SEARCH_H +#define MC__SEARCH_H + +/*** typedefs(not structures) and defined constants **********************************************/ + +typedef int (*mc_search_fn) (const void *user_data, gsize char_offset); + +#define MC_SEARCH__NUM_REPL_ARGS 64 +#define MC_SEARCH__MAX_REPL_LEN 1024 + +/*** enums ***************************************************************************************/ + +typedef enum { + MC_SEARCH_E_OK, + MC_SEARCH_E_INPUT, + MC_SEARCH_E_REGEX, + MC_SEARCH_E_NOTFOUND, +} mc_search_error_t; + +typedef enum { + MC_SEARCH_T_NORMAL, + MC_SEARCH_T_REGEX, + MC_SEARCH_T_SCANF, + MC_SEARCH_T_HEX, + MC_SEARCH_T_GLOB, +} mc_search_type_t; + + +/*** structures declarations (and typedefs of structures)*****************************************/ + +typedef struct mc_search_struct { + +/* public input data */ + + /* search in all charsets */ + gboolean is_all_charsets; + + /* case sentitive search */ + gboolean is_case_sentitive; + + /* backward search */ + gboolean is_backward; + + /* search only once. Is this for replace? */ + gboolean is_once_only; + + /* function, used for getting data. NULL if not used */ + mc_search_fn search_fn; + + /* type of search */ + mc_search_type_t search_type; + + +/* public output data */ + + /* some data for normal */ + gsize normal_offset; + + /* some data for regexp */ + GRegex *regex_handle; + GMatchInfo *regex_match_info; + + /* some data for sscanf */ + + /* some data for glob */ + GPatternSpec *glob_handle; + +/* private data */ + + /* prepared conditions */ + GPtrArray *conditions; + + /* original search string */ + gchar *original; + guint original_len; + + /* error code after search */ + mc_search_error_t error; + gchar *error_str; + +} mc_search_t; + +/*** global variables defined in .c file *********************************************************/ + +/*** declarations of public functions ************************************************************/ + +mc_search_t *mc_search_new (const gchar * original, guint original_len); + +void mc_search_free (mc_search_t * mc_search); + +#endif