From 44d8213f4ebc16bce3cd68395af885d9e8f3edad Mon Sep 17 00:00:00 2001 From: Andrew Borodin Date: Sun, 31 Mar 2024 18:53:59 +0300 Subject: [PATCH] Ticket #4533: External editor does not work with arguments in $EDITOR When using an external editor (i.e. "Use internal edit" in the Configure Options is unchecked) the environment variable EDITOR is used. However, if $EDITOR contains a command line argument after the executable name, these arguments are not processed properly, and the editor might not be started at all. How to reproduce: (Precondition: vi is available on the system) 1) On the command line, execute: export EDITOR="vi +" && mc (the + argument should let vi start at the document's end instead of the beginning). 2) Go to the Options menu -> Configuration -> uncheck "Use internal edit". 3) Move the cursor to a file that is larger than a single screen (e.g. ABOUT-NLS in mc's source directory). 4) Press F4 to start the external editor. Result: Nothing visible happens Expected result: vi is opened showing the end of the file ABOUT-NLS The bug: my_system_make_arg_array() doesn't perform full-feature parsing of the comman line. * (str_tokenize): mew function based on history_tokenize_internal() from GNU readline-8.2. * (str_tokenize_word): mew function based on history_tokenize_word() from GNU readline-8.2. * (my_system_make_arg_array): reimplement using str_tokenize(). * (my_systemv_flags): use modified my_system_make_arg_array(). * (fork_child_tokens): new test for string tokenization. * (fork_child_tokens2): likewise. Signed-off-by: Andrew Borodin --- lib/strutil.h | 2 + lib/strutil/Makefile.am | 1 + lib/strutil/tokenize.c | 251 +++++++++++++++++++++ lib/utilunix.c | 33 ++- tests/lib/utilunix__my_system-fork_child.c | 76 +++++++ 5 files changed, 344 insertions(+), 19 deletions(-) create mode 100644 lib/strutil/tokenize.c diff --git a/lib/strutil.h b/lib/strutil.h index e11dfda78..ed13bc118 100644 --- a/lib/strutil.h +++ b/lib/strutil.h @@ -607,6 +607,8 @@ char *strrstr_skip_count (const char *haystack, const char *needle, size_t skip_ char *str_replace_all (const char *haystack, const char *needle, const char *replacement); +GPtrArray *str_tokenize (const char *string); + strtol_error_t xstrtoumax (const char *s, char **ptr, int base, uintmax_t * val, const char *valid_suffixes); uintmax_t parse_integer (const char *str, gboolean * invalid); diff --git a/lib/strutil/Makefile.am b/lib/strutil/Makefile.am index 5936a3664..85ec16e59 100644 --- a/lib/strutil/Makefile.am +++ b/lib/strutil/Makefile.am @@ -9,6 +9,7 @@ libmcstrutil_la_SOURCES = \ strutil.c \ strutilutf8.c \ strverscmp.c \ + tokenize.c \ xstrtol.c AM_CPPFLAGS = $(GLIB_CFLAGS) -I$(top_srcdir) diff --git a/lib/strutil/tokenize.c b/lib/strutil/tokenize.c new file mode 100644 index 000000000..3c861dee0 --- /dev/null +++ b/lib/strutil/tokenize.c @@ -0,0 +1,251 @@ +/* + Parse string into tokens. + + Copyright (C) 2024 + Free Software Foundation, Inc. + + Written by: + Andrew Borodin 2010-2024 + + The str_tokenize() and str_tokenize_word routines are mostly from + GNU readline-8.2. + + This file is part of the Midnight Commander. + + The Midnight Commander is free software: you can redistribute it + and/or modify it under the terms of the GNU General Public License as + published by the Free Software Foundation, either version 3 of the License, + or (at your option) any later version. + + The Midnight Commander is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +/** \file tokenize.c + * \brief Source: parse string into tokens. + */ + +#include + +#include +#include + +#include "lib/global.h" +#include "lib/util.h" /* whiteness() */ + +#include "lib/strutil.h" + +/*** global variables ****************************************************************************/ + +/*** file scope macro definitions ****************************************************************/ + +#define WORD_DELIMITERS " \t\n;&()|<>" +#define QUOTE_CHARACTERS "\"'`" + +#define slashify_in_quotes "\\`\"$" + +#define member(c, s) ((c != '\0') ? (strchr ((s), (c)) != NULL) : FALSE) + +/*** file scope type declarations ****************************************************************/ + +/*** forward declarations (file scope functions) *************************************************/ + +/*** file scope variables ************************************************************************/ + +/* --------------------------------------------------------------------------------------------- */ +/*** file scope functions ************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +/* + * Based on history_tokenize_word() from GNU readline-8.2 + */ +static int +str_tokenize_word (const char *string, int start) +{ + int i = start; + char delimiter = '\0'; + char delimopen; + int nestdelim = 0; + + if (member (string[i], "()\n")) /* XXX - included \n, but why? been here forever */ + return (i + 1); + + if (g_ascii_isdigit (string[i])) + { + int j; + + for (j = i; string[j] != '\0' && g_ascii_isdigit (string[j]); j++) + ; + + if (string[j] == '\0') + return j; + + if (string[j] == '<' || string[j] == '>') + i = j; /* digit sequence is a file descriptor */ + else + { + i = j; /* digit sequence is part of a word */ + goto get_word; + } + } + + if (member (string[i], "<>;&|")) + { + char peek = string[i + 1]; + + if (peek == string[i]) + { + if (peek == '<' && (string[i + 2] == '-' || string[i + 2] == '<')) + i++; + return (i + 2); + } + + if (peek == '&' && (string[i] == '>' || string[i] == '<')) + { + int j; + + /* file descriptor */ + for (j = i + 2; string[j] != '\0' && g_ascii_isdigit (string[j]); j++) + ; + if (string[j] == '-') /* <&[digits]-, >&[digits]- */ + j++; + return j; + } + + if ((peek == '>' && string[i] == '&') || (peek == '|' && string[i] == '>')) + return (i + 2); + + /* XXX - process substitution -- separated out for later -- bash-4.2 */ + if (peek == '(' && (string[i] == '>' || string[i] == '<')) + { + /* ) */ + i += 2; + delimopen = '('; + delimiter = ')'; + nestdelim = 1; + goto get_word; + } + + return (i + 1); + } + + get_word: + /* Get word from string + i; */ + + if (delimiter == '\0' && member (string[i], QUOTE_CHARACTERS)) + { + delimiter = string[i]; + i++; + } + + for (; string[i] != '\0'; i++) + { + if (string[i] == '\\' && string[i + 1] == '\n') + { + i++; + continue; + } + + if (string[i] == '\\' && delimiter != '\'' && + (delimiter != '"' || member (string[i], slashify_in_quotes))) + { + i++; + continue; + } + + /* delimiter must be set and set to something other than a quote if + nestdelim is set, so these tests are safe. */ + if (nestdelim != 0 && string[i] == delimopen) + { + nestdelim++; + continue; + } + if (nestdelim != 0 && string[i] == delimiter) + { + nestdelim--; + if (nestdelim == 0) + delimiter = '\0'; + continue; + } + + if (delimiter != '\0' && string[i] == delimiter) + { + delimiter = '\0'; + continue; + } + + /* Command and process substitution; shell extended globbing patterns */ + if (nestdelim == 0 && delimiter == '\0' && member (string[i], "<>$!@?+*") + && string[i + 1] == '(') + { + /* ) */ + i += 2; + delimopen = '('; + delimiter = ')'; + nestdelim = 1; + continue; + } + + if (delimiter == '\0' && member (string[i], WORD_DELIMITERS)) + break; + + if (delimiter == '\0' && member (string[i], QUOTE_CHARACTERS)) + delimiter = string[i]; + } + + return i; +} + +/* --------------------------------------------------------------------------------------------- */ +/*** public functions ****************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +/* Parse string into tokens. + * + * Based on history_tokenize_internal() from GNU readline-8.2 + */ +GPtrArray * +str_tokenize (const char *string) +{ + GPtrArray *result = NULL; + int i = 0; + + /* Get a token, and stuff it into RESULT. The tokens are split + exactly where the shell would split them. */ + while (string[i] != '\0') + { + int start; + + /* Skip leading whitespace */ + for (; string[i] != '\0' && whiteness (string[i]); i++) + ; + + if (string[i] == '\0') + return result; + + start = i; + i = str_tokenize_word (string, start); + + /* If we have a non-whitespace delimiter character (which would not be + skipped by the loop above), use it and any adjacent delimiters to + make a separate field. Any adjacent white space will be skipped the + next time through the loop. */ + if (i == start) + for (i++; string[i] != '\0' && member (string[i], WORD_DELIMITERS); i++) + ; + + if (result == NULL) + result = g_ptr_array_new (); + + g_ptr_array_add (result, g_strndup (string + start, i - start)); + } + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ diff --git a/lib/utilunix.c b/lib/utilunix.c index e652e516c..fc7a011f0 100644 --- a/lib/utilunix.c +++ b/lib/utilunix.c @@ -10,7 +10,7 @@ Dugan Porter, 1994, 1995, 1996 Jakub Jelinek, 1994, 1995, 1996 Mauricio Plaza, 1994, 1995, 1996 - Andrew Borodin 2010-2022 + Andrew Borodin 2010-2024 The mc_realpath routine is mostly from uClibc package, written by Rick Sladkey @@ -61,7 +61,7 @@ #include "lib/unixcompat.h" #include "lib/vfs/vfs.h" /* VFS_ENCODING_PREFIX */ -#include "lib/strutil.h" /* str_move() */ +#include "lib/strutil.h" /* str_move(), str_tokenize() */ #include "lib/util.h" #include "lib/widget.h" /* message() */ #include "lib/vfs/xdirentry.h" @@ -198,30 +198,24 @@ my_system__restore_sigaction_handlers (my_system_sigactions_t * sigactions) /* --------------------------------------------------------------------------------------------- */ static GPtrArray * -my_system_make_arg_array (int flags, const char *shell, char **execute_name) +my_system_make_arg_array (int flags, const char *shell) { GPtrArray *args_array; - args_array = g_ptr_array_new (); - if ((flags & EXECUTE_AS_SHELL) != 0) { + args_array = g_ptr_array_new (); g_ptr_array_add (args_array, (gpointer) shell); g_ptr_array_add (args_array, (gpointer) "-c"); - *execute_name = g_strdup (shell); + } + else if (shell == NULL || *shell == '\0') + { + args_array = g_ptr_array_new (); + g_ptr_array_add (args_array, NULL); } else - { - char *shell_token; + args_array = str_tokenize (shell); - shell_token = shell != NULL ? strchr (shell, ' ') : NULL; - if (shell_token == NULL) - *execute_name = g_strdup (shell); - else - *execute_name = g_strndup (shell, (gsize) (shell_token - shell)); - - g_ptr_array_add (args_array, (gpointer) shell); - } return args_array; } @@ -472,11 +466,13 @@ my_systemv (const char *command, char *const argv[]) int my_systemv_flags (int flags, const char *command, char *const argv[]) { - char *execute_name = NULL; + const char *execute_name; GPtrArray *args_array; int status = 0; - args_array = my_system_make_arg_array (flags, command, &execute_name); + args_array = my_system_make_arg_array (flags, command); + + execute_name = g_ptr_array_index (args_array, 0); for (; argv != NULL && *argv != NULL; argv++) g_ptr_array_add (args_array, *argv); @@ -484,7 +480,6 @@ my_systemv_flags (int flags, const char *command, char *const argv[]) g_ptr_array_add (args_array, NULL); status = my_systemv (execute_name, (char *const *) args_array->pdata); - g_free (execute_name); g_ptr_array_free (args_array, TRUE); return status; diff --git a/tests/lib/utilunix__my_system-fork_child.c b/tests/lib/utilunix__my_system-fork_child.c index 46c1fa260..10d972e52 100644 --- a/tests/lib/utilunix__my_system-fork_child.c +++ b/tests/lib/utilunix__my_system-fork_child.c @@ -66,6 +66,80 @@ END_TEST /* --------------------------------------------------------------------------------------------- */ +/* *INDENT-OFF* */ +START_TEST (fork_child_tokens) +/* *INDENT-ON* */ +{ + int actual_value; + /* given */ + fork__return_value = 0; + + /* when */ + actual_value = my_system (0, "vi +", "foo.txt"); + + /* then */ + ck_assert_int_eq (actual_value, 0); + + VERIFY_SIGACTION_CALLS (); + VERIFY_SIGNAL_CALLS (); + + mctest_assert_str_eq (execvp__file__captured, "vi"); + ck_assert_int_eq (execvp__args__captured->len, 3); + + mctest_assert_str_eq (g_ptr_array_index (execvp__args__captured, 0), "vi"); + mctest_assert_str_eq (g_ptr_array_index (execvp__args__captured, 1), "+"); + mctest_assert_str_eq (g_ptr_array_index (execvp__args__captured, 2), "foo.txt"); + + /* All exec* calls is mocked, so call to _exit() function with 127 status code it's a normal situation */ + ck_assert_int_eq (my_exit__status__captured, 127); +} +/* *INDENT-OFF* */ +END_TEST +/* *INDENT-ON* */ + +/* --------------------------------------------------------------------------------------------- */ + +/* *INDENT-OFF* */ +START_TEST (fork_child_tokens2) +/* *INDENT-ON* */ +{ + int actual_value; + /* given */ + fork__return_value = 0; + + /* when */ + actual_value = my_system (0, "qwe -a 'aa bb' -b -c cc -d \"dd ee\" -f ff\\ gg", "foo.txt"); + + /* then */ + ck_assert_int_eq (actual_value, 0); + + VERIFY_SIGACTION_CALLS (); + VERIFY_SIGNAL_CALLS (); + + mctest_assert_str_eq (execvp__file__captured, "qwe"); + ck_assert_int_eq (execvp__args__captured->len, 11); + + mctest_assert_str_eq (g_ptr_array_index (execvp__args__captured, 0), "qwe"); + mctest_assert_str_eq (g_ptr_array_index (execvp__args__captured, 1), "-a"); + mctest_assert_str_eq (g_ptr_array_index (execvp__args__captured, 2), "'aa bb'"); + mctest_assert_str_eq (g_ptr_array_index (execvp__args__captured, 3), "-b"); + mctest_assert_str_eq (g_ptr_array_index (execvp__args__captured, 4), "-c"); + mctest_assert_str_eq (g_ptr_array_index (execvp__args__captured, 5), "cc"); + mctest_assert_str_eq (g_ptr_array_index (execvp__args__captured, 6), "-d"); + mctest_assert_str_eq (g_ptr_array_index (execvp__args__captured, 7), "\"dd ee\""); + mctest_assert_str_eq (g_ptr_array_index (execvp__args__captured, 8), "-f"); + mctest_assert_str_eq (g_ptr_array_index (execvp__args__captured, 9), "ff\\ gg"); + mctest_assert_str_eq (g_ptr_array_index (execvp__args__captured, 10), "foo.txt"); + + /* All exec* calls is mocked, so call to _exit() function with 127 status code it's a normal situation */ + ck_assert_int_eq (my_exit__status__captured, 127); +} +/* *INDENT-OFF* */ +END_TEST +/* *INDENT-ON* */ + +/* --------------------------------------------------------------------------------------------- */ + int main (void) { @@ -77,6 +151,8 @@ main (void) /* Add new tests here: *************** */ tcase_add_test (tc_core, fork_child); + tcase_add_test (tc_core, fork_child_tokens); + tcase_add_test (tc_core, fork_child_tokens2); /* *********************************** */ return mctest_run_all (tc_core);