2022-10-30 22:11:31 +03:00
|
|
|
/* $NetBSD: read.c,v 1.108 2022/10/30 19:11:31 christos Exp $ */
|
1997-01-11 09:47:47 +03:00
|
|
|
|
1994-05-06 10:01:42 +04:00
|
|
|
/*-
|
|
|
|
* Copyright (c) 1992, 1993
|
|
|
|
* The Regents of the University of California. All rights reserved.
|
|
|
|
*
|
|
|
|
* This code is derived from software contributed to Berkeley by
|
|
|
|
* Christos Zoulas of Cornell University.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
2003-08-07 20:42:00 +04:00
|
|
|
* 3. Neither the name of the University nor the names of its contributors
|
1994-05-06 10:01:42 +04:00
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
2002-03-18 19:00:50 +03:00
|
|
|
#include "config.h"
|
1994-05-06 10:01:42 +04:00
|
|
|
#if !defined(lint) && !defined(SCCSID)
|
1997-01-11 09:47:47 +03:00
|
|
|
#if 0
|
1994-05-06 10:01:42 +04:00
|
|
|
static char sccsid[] = "@(#)read.c 8.1 (Berkeley) 6/4/93";
|
1997-01-11 09:47:47 +03:00
|
|
|
#else
|
2022-10-30 22:11:31 +03:00
|
|
|
__RCSID("$NetBSD: read.c,v 1.108 2022/10/30 19:11:31 christos Exp $");
|
1997-01-11 09:47:47 +03:00
|
|
|
#endif
|
1994-05-06 10:01:42 +04:00
|
|
|
#endif /* not lint && not SCCSID */
|
1997-01-11 09:47:47 +03:00
|
|
|
|
1994-05-06 10:01:42 +04:00
|
|
|
/*
|
2016-05-25 16:01:11 +03:00
|
|
|
* read.c: Terminal read functions
|
1994-05-06 10:01:42 +04:00
|
|
|
*/
|
2016-02-17 01:53:14 +03:00
|
|
|
#include <ctype.h>
|
1999-01-12 01:39:59 +03:00
|
|
|
#include <errno.h>
|
2003-09-13 08:18:00 +04:00
|
|
|
#include <fcntl.h>
|
2016-02-17 22:47:49 +03:00
|
|
|
#include <limits.h>
|
1994-05-06 10:01:42 +04:00
|
|
|
#include <stdlib.h>
|
2016-02-17 01:53:14 +03:00
|
|
|
#include <string.h>
|
2016-02-17 22:47:49 +03:00
|
|
|
#include <unistd.h>
|
2016-02-17 01:53:14 +03:00
|
|
|
|
1994-05-06 10:01:42 +04:00
|
|
|
#include "el.h"
|
From Ingo Schwarze:
* Replace fcns.c by a shorter and simpler func.h
and include it only in the one file needing it, map.c.
* Combine help.h and help.c into a simplified help.h
and include it only in the one file needing it, map.c.
* Check the very simple, static files editline.c, historyn.c, and
tokenizern.c into CVS rather than needlessly generating them.
* So we no longer autogenerate any C files. :-)
* Shorten and simplify makelist by deleting the options -n, -e, -bc,
and -m; the latter was unused and useless in the first place.
* Move the declaration of el_func_t from fcns.h to the header
actually needing it, map.h. Since that header is already
included by el.h for unrelated reasons, that makes el_func_t
just as globally available as before.
* No longer include the simplified fcns.h into el.h,
include it directly into the *.c files needing it.
2016-04-18 20:01:19 +03:00
|
|
|
#include "fcns.h"
|
2016-04-19 22:50:53 +03:00
|
|
|
#include "read.h"
|
|
|
|
|
Stop the read module from poking the el_chared.c_macro data structure
currently belonging to the chared module. The read module does so
from three of its functions, while no other module uses the macro
data, not even the chared module itself. That's quite logical
because macros are a feature of input handling, all of which is
done by the read module, and none by the chared module. So move
the data into the read modules's own opaque data structure, struct
el_read_t.
That simplifies internal interfaces in several respects: The
semi-public chared.h has one fewer struct, one fewer #define, and
one fewer member in struct el_chared_t; all three move to one single
C file, read.c, and are now module-local. And the internal interface
function ch_reset() needs one fewer argument, making the code of many
functions in various modules more readable.
The price is one additional internal interface function, read_end(),
10 lines long including comments, called publicly from exactly one
place: el_end() in el.c. That's hardly an increase in complexity
since most other modules already have their *_end() function, read.c
was the odd one out not having one.
From Ingo Schwarze
2016-05-22 22:44:26 +03:00
|
|
|
#define EL_MAXMACRO 10
|
|
|
|
|
|
|
|
struct macros {
|
|
|
|
wchar_t **macro;
|
|
|
|
int level;
|
|
|
|
int offset;
|
|
|
|
};
|
|
|
|
|
2016-04-19 22:50:53 +03:00
|
|
|
struct el_read_t {
|
Stop the read module from poking the el_chared.c_macro data structure
currently belonging to the chared module. The read module does so
from three of its functions, while no other module uses the macro
data, not even the chared module itself. That's quite logical
because macros are a feature of input handling, all of which is
done by the read module, and none by the chared module. So move
the data into the read modules's own opaque data structure, struct
el_read_t.
That simplifies internal interfaces in several respects: The
semi-public chared.h has one fewer struct, one fewer #define, and
one fewer member in struct el_chared_t; all three move to one single
C file, read.c, and are now module-local. And the internal interface
function ch_reset() needs one fewer argument, making the code of many
functions in various modules more readable.
The price is one additional internal interface function, read_end(),
10 lines long including comments, called publicly from exactly one
place: el_end() in el.c. That's hardly an increase in complexity
since most other modules already have their *_end() function, read.c
was the odd one out not having one.
From Ingo Schwarze
2016-05-22 22:44:26 +03:00
|
|
|
struct macros macros;
|
2016-04-19 22:50:53 +03:00
|
|
|
el_rfunc_t read_char; /* Function to read a character. */
|
From Ingo Schwarze:
Reduce obfuscation of errno handling. There is only one purpose
non-local errno handling is needed for: Inside el_wgets(), several
functions call down indirectly to el_wgetc(), many of them via the
dispatch table. When el_wgetc() fails, it does properly report
failure, but then various cleanup is done which may clobber errno.
But when returning due to failure, el_wgets() wants to have errno
set to the reason of the original read failure, not to the reason
of some subsequent failure of some cleanup operation. So el_wgetc()
needs to save errno, and if it's non-zero, el_wgets() needs to
restore it on failure.
This core logic is currently obscured by the fact that el_errno
is set and inspected at some additional places where it isn't needed.
Besides, since el_wgetc() and and el_wgets() are both in read.c,
el_errno does not need to be in struct editline, it can and should
be local to read.c in struct el_read_t.
Let's look at what can be simplified.
1. keymacro_get() abuses el_errno instead of having a proper
error return code. Adding that error return code is easy
because node_trav() already detects the condition and an
adequate code is already defined. Returning it, testing
for it in read_getcmd(), and returning with error from there
removes the need to inspect el_errno from el_wgets() after
calling read_getcmd().
Note that resetting lastchar and cursor and clearing buffer[0]
is irrelevant. The code returns from el_wgets() right afterwards.
Outside el_wgets(), these variables are no longer relevant.
When el_wgets() is called the next time, it will call ch_reset()
anyway, resetting the two pointers. And as long as lastchar
points to the beginning of the buffer, the contents of the
buffer won't be used for anything.
2. read_getcmd() doesn't need to set el_errno again after el_wgetc()
failure since el_wgetc() already did so. While here, remove
the silly "if EOF or error" comments from the el_wgetc()
return value tests. It's a public interface documented in a
manual, so people working on the implementation can obviously
be expected to know how it works. It's a case of
count++; /* Increment count. */
3. In the two code paths of el_wgets() that lead up to "goto noedit",
there is no need to save the errno because nothing that might
change it happens before returning.
For clarity, since el_wgets() is the function restoring the errno,
also move initializing it to the same function.
Finally, note that restoring errno when the saved value is zero is
wrong. No library code is ever allowed to clear a previously set
value of errno. Only application programs are allowed to do that,
and even they usually don't need to do so, except when using certain
ill-designed interfaces like strtol(3).
I tested that the behaviour remains sane in the following cases,
all during execution of el_wgets(3) and with a signal handler
for USR1 installed without SA_RESTART.
* Enter some text and maybe move around a bit.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
Now el_wgets() sets errno=EINTR and returns -1.
* Press Ctrl-V to activate ed-quoted-insert.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
ed_quoted_insert() returns ed_end_of_file(), i.e. CC_EOF,
and el_wgets() returns 0.
* Press a key starting a keyboard macro.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
Now el_wgets() sets errno=EINTR and returns -1.
* Press : to enter builtin command mode.
Start typing a command.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
Now c_gets() returns -1, ed_command() beeps and returns CC_REFRESH,
and el_wgets() resumes operation as it should.
I also tested with "el_set(el, EL_EDITMODE, 0)", and it returns
the right value and sets errno correctly.
2016-05-24 18:00:45 +03:00
|
|
|
int read_errno;
|
2016-04-19 22:50:53 +03:00
|
|
|
};
|
1994-05-06 10:01:42 +04:00
|
|
|
|
2016-04-11 21:56:31 +03:00
|
|
|
static int read__fixio(int, int);
|
|
|
|
static int read_char(EditLine *, wchar_t *);
|
|
|
|
static int read_getcmd(EditLine *, el_action_t *, wchar_t *);
|
Stop the read module from poking the el_chared.c_macro data structure
currently belonging to the chared module. The read module does so
from three of its functions, while no other module uses the macro
data, not even the chared module itself. That's quite logical
because macros are a feature of input handling, all of which is
done by the read module, and none by the chared module. So move
the data into the read modules's own opaque data structure, struct
el_read_t.
That simplifies internal interfaces in several respects: The
semi-public chared.h has one fewer struct, one fewer #define, and
one fewer member in struct el_chared_t; all three move to one single
C file, read.c, and are now module-local. And the internal interface
function ch_reset() needs one fewer argument, making the code of many
functions in various modules more readable.
The price is one additional internal interface function, read_end(),
10 lines long including comments, called publicly from exactly one
place: el_end() in el.c. That's hardly an increase in complexity
since most other modules already have their *_end() function, read.c
was the odd one out not having one.
From Ingo Schwarze
2016-05-22 22:44:26 +03:00
|
|
|
static void read_clearmacros(struct macros *);
|
|
|
|
static void read_pop(struct macros *);
|
2016-05-25 16:01:11 +03:00
|
|
|
static const wchar_t *noedit_wgets(EditLine *, int *);
|
2001-09-27 23:29:50 +04:00
|
|
|
|
|
|
|
/* read_init():
|
|
|
|
* Initialize the read stuff
|
|
|
|
*/
|
2016-05-10 00:46:56 +03:00
|
|
|
libedit_private int
|
2001-09-27 23:29:50 +04:00
|
|
|
read_init(EditLine *el)
|
|
|
|
{
|
Stop the read module from poking the el_chared.c_macro data structure
currently belonging to the chared module. The read module does so
from three of its functions, while no other module uses the macro
data, not even the chared module itself. That's quite logical
because macros are a feature of input handling, all of which is
done by the read module, and none by the chared module. So move
the data into the read modules's own opaque data structure, struct
el_read_t.
That simplifies internal interfaces in several respects: The
semi-public chared.h has one fewer struct, one fewer #define, and
one fewer member in struct el_chared_t; all three move to one single
C file, read.c, and are now module-local. And the internal interface
function ch_reset() needs one fewer argument, making the code of many
functions in various modules more readable.
The price is one additional internal interface function, read_end(),
10 lines long including comments, called publicly from exactly one
place: el_end() in el.c. That's hardly an increase in complexity
since most other modules already have their *_end() function, read.c
was the odd one out not having one.
From Ingo Schwarze
2016-05-22 22:44:26 +03:00
|
|
|
struct macros *ma;
|
|
|
|
|
2016-04-19 22:50:53 +03:00
|
|
|
if ((el->el_read = el_malloc(sizeof(*el->el_read))) == NULL)
|
|
|
|
return -1;
|
Stop the read module from poking the el_chared.c_macro data structure
currently belonging to the chared module. The read module does so
from three of its functions, while no other module uses the macro
data, not even the chared module itself. That's quite logical
because macros are a feature of input handling, all of which is
done by the read module, and none by the chared module. So move
the data into the read modules's own opaque data structure, struct
el_read_t.
That simplifies internal interfaces in several respects: The
semi-public chared.h has one fewer struct, one fewer #define, and
one fewer member in struct el_chared_t; all three move to one single
C file, read.c, and are now module-local. And the internal interface
function ch_reset() needs one fewer argument, making the code of many
functions in various modules more readable.
The price is one additional internal interface function, read_end(),
10 lines long including comments, called publicly from exactly one
place: el_end() in el.c. That's hardly an increase in complexity
since most other modules already have their *_end() function, read.c
was the odd one out not having one.
From Ingo Schwarze
2016-05-22 22:44:26 +03:00
|
|
|
|
|
|
|
ma = &el->el_read->macros;
|
2022-10-30 22:11:31 +03:00
|
|
|
if ((ma->macro = el_calloc(EL_MAXMACRO, sizeof(*ma->macro))) == NULL)
|
|
|
|
goto out;
|
Stop the read module from poking the el_chared.c_macro data structure
currently belonging to the chared module. The read module does so
from three of its functions, while no other module uses the macro
data, not even the chared module itself. That's quite logical
because macros are a feature of input handling, all of which is
done by the read module, and none by the chared module. So move
the data into the read modules's own opaque data structure, struct
el_read_t.
That simplifies internal interfaces in several respects: The
semi-public chared.h has one fewer struct, one fewer #define, and
one fewer member in struct el_chared_t; all three move to one single
C file, read.c, and are now module-local. And the internal interface
function ch_reset() needs one fewer argument, making the code of many
functions in various modules more readable.
The price is one additional internal interface function, read_end(),
10 lines long including comments, called publicly from exactly one
place: el_end() in el.c. That's hardly an increase in complexity
since most other modules already have their *_end() function, read.c
was the odd one out not having one.
From Ingo Schwarze
2016-05-22 22:44:26 +03:00
|
|
|
ma->level = -1;
|
|
|
|
ma->offset = 0;
|
|
|
|
|
2001-09-27 23:29:50 +04:00
|
|
|
/* builtin read_char */
|
2016-04-19 22:50:53 +03:00
|
|
|
el->el_read->read_char = read_char;
|
2001-09-27 23:29:50 +04:00
|
|
|
return 0;
|
2022-10-30 22:11:31 +03:00
|
|
|
out:
|
|
|
|
read_end(el);
|
|
|
|
return -1;
|
2001-09-27 23:29:50 +04:00
|
|
|
}
|
|
|
|
|
Stop the read module from poking the el_chared.c_macro data structure
currently belonging to the chared module. The read module does so
from three of its functions, while no other module uses the macro
data, not even the chared module itself. That's quite logical
because macros are a feature of input handling, all of which is
done by the read module, and none by the chared module. So move
the data into the read modules's own opaque data structure, struct
el_read_t.
That simplifies internal interfaces in several respects: The
semi-public chared.h has one fewer struct, one fewer #define, and
one fewer member in struct el_chared_t; all three move to one single
C file, read.c, and are now module-local. And the internal interface
function ch_reset() needs one fewer argument, making the code of many
functions in various modules more readable.
The price is one additional internal interface function, read_end(),
10 lines long including comments, called publicly from exactly one
place: el_end() in el.c. That's hardly an increase in complexity
since most other modules already have their *_end() function, read.c
was the odd one out not having one.
From Ingo Schwarze
2016-05-22 22:44:26 +03:00
|
|
|
/* el_read_end():
|
|
|
|
* Free the data structures used by the read stuff.
|
|
|
|
*/
|
|
|
|
libedit_private void
|
2022-10-30 22:11:31 +03:00
|
|
|
read_end(EditLine *el)
|
Stop the read module from poking the el_chared.c_macro data structure
currently belonging to the chared module. The read module does so
from three of its functions, while no other module uses the macro
data, not even the chared module itself. That's quite logical
because macros are a feature of input handling, all of which is
done by the read module, and none by the chared module. So move
the data into the read modules's own opaque data structure, struct
el_read_t.
That simplifies internal interfaces in several respects: The
semi-public chared.h has one fewer struct, one fewer #define, and
one fewer member in struct el_chared_t; all three move to one single
C file, read.c, and are now module-local. And the internal interface
function ch_reset() needs one fewer argument, making the code of many
functions in various modules more readable.
The price is one additional internal interface function, read_end(),
10 lines long including comments, called publicly from exactly one
place: el_end() in el.c. That's hardly an increase in complexity
since most other modules already have their *_end() function, read.c
was the odd one out not having one.
From Ingo Schwarze
2016-05-22 22:44:26 +03:00
|
|
|
{
|
2022-10-30 22:11:31 +03:00
|
|
|
|
|
|
|
read_clearmacros(&el->el_read->macros);
|
|
|
|
el_free(el->el_read->macros.macro);
|
|
|
|
el->el_read->macros.macro = NULL;
|
|
|
|
el_free(el->el_read);
|
|
|
|
el->el_read = NULL;
|
Stop the read module from poking the el_chared.c_macro data structure
currently belonging to the chared module. The read module does so
from three of its functions, while no other module uses the macro
data, not even the chared module itself. That's quite logical
because macros are a feature of input handling, all of which is
done by the read module, and none by the chared module. So move
the data into the read modules's own opaque data structure, struct
el_read_t.
That simplifies internal interfaces in several respects: The
semi-public chared.h has one fewer struct, one fewer #define, and
one fewer member in struct el_chared_t; all three move to one single
C file, read.c, and are now module-local. And the internal interface
function ch_reset() needs one fewer argument, making the code of many
functions in various modules more readable.
The price is one additional internal interface function, read_end(),
10 lines long including comments, called publicly from exactly one
place: el_end() in el.c. That's hardly an increase in complexity
since most other modules already have their *_end() function, read.c
was the odd one out not having one.
From Ingo Schwarze
2016-05-22 22:44:26 +03:00
|
|
|
}
|
2001-09-27 23:29:50 +04:00
|
|
|
|
|
|
|
/* el_read_setfn():
|
|
|
|
* Set the read char function to the one provided.
|
|
|
|
* If it is set to EL_BUILTIN_GETCFN, then reset to the builtin one.
|
|
|
|
*/
|
2016-05-10 00:46:56 +03:00
|
|
|
libedit_private int
|
2016-04-19 22:50:53 +03:00
|
|
|
el_read_setfn(struct el_read_t *el_read, el_rfunc_t rc)
|
2001-09-27 23:29:50 +04:00
|
|
|
{
|
2016-04-19 22:50:53 +03:00
|
|
|
el_read->read_char = (rc == EL_BUILTIN_GETCFN) ? read_char : rc;
|
2001-09-27 23:29:50 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* el_read_getfn():
|
|
|
|
* return the current read char function, or EL_BUILTIN_GETCFN
|
|
|
|
* if it is the default one
|
|
|
|
*/
|
2016-05-10 00:46:56 +03:00
|
|
|
libedit_private el_rfunc_t
|
2016-04-19 22:50:53 +03:00
|
|
|
el_read_getfn(struct el_read_t *el_read)
|
2001-09-27 23:29:50 +04:00
|
|
|
{
|
2016-04-19 22:50:53 +03:00
|
|
|
return el_read->read_char == read_char ?
|
|
|
|
EL_BUILTIN_GETCFN : el_read->read_char;
|
2001-09-27 23:29:50 +04:00
|
|
|
}
|
|
|
|
|
1994-05-06 10:01:42 +04:00
|
|
|
|
|
|
|
/* read__fixio():
|
|
|
|
* Try to recover from a read error
|
|
|
|
*/
|
1998-12-12 23:08:21 +03:00
|
|
|
/* ARGSUSED */
|
2016-04-11 21:56:31 +03:00
|
|
|
static int
|
2003-06-19 19:55:05 +04:00
|
|
|
read__fixio(int fd __attribute__((__unused__)), int e)
|
1994-05-06 10:01:42 +04:00
|
|
|
{
|
2000-09-05 02:06:28 +04:00
|
|
|
|
|
|
|
switch (e) {
|
|
|
|
case -1: /* Make sure that the code is reachable */
|
1994-05-06 10:01:42 +04:00
|
|
|
|
|
|
|
#ifdef EWOULDBLOCK
|
2000-09-05 02:06:28 +04:00
|
|
|
case EWOULDBLOCK:
|
|
|
|
#ifndef TRY_AGAIN
|
2011-07-30 03:44:44 +04:00
|
|
|
#define TRY_AGAIN
|
2000-09-05 02:06:28 +04:00
|
|
|
#endif
|
1994-05-06 10:01:42 +04:00
|
|
|
#endif /* EWOULDBLOCK */
|
|
|
|
|
|
|
|
#if defined(POSIX) && defined(EAGAIN)
|
2000-09-05 02:06:28 +04:00
|
|
|
#if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN
|
|
|
|
case EAGAIN:
|
|
|
|
#ifndef TRY_AGAIN
|
2011-07-30 03:44:44 +04:00
|
|
|
#define TRY_AGAIN
|
2000-09-05 02:06:28 +04:00
|
|
|
#endif
|
|
|
|
#endif /* EWOULDBLOCK && EWOULDBLOCK != EAGAIN */
|
1994-05-06 10:01:42 +04:00
|
|
|
#endif /* POSIX && EAGAIN */
|
|
|
|
|
2000-09-05 02:06:28 +04:00
|
|
|
e = 0;
|
1994-05-06 10:01:42 +04:00
|
|
|
#ifdef TRY_AGAIN
|
2000-09-05 02:06:28 +04:00
|
|
|
#if defined(F_SETFL) && defined(O_NDELAY)
|
|
|
|
if ((e = fcntl(fd, F_GETFL, 0)) == -1)
|
2011-07-29 19:16:33 +04:00
|
|
|
return -1;
|
2000-09-05 02:06:28 +04:00
|
|
|
|
|
|
|
if (fcntl(fd, F_SETFL, e & ~O_NDELAY) == -1)
|
2011-07-29 19:16:33 +04:00
|
|
|
return -1;
|
1997-07-06 22:25:21 +04:00
|
|
|
else
|
2000-09-05 02:06:28 +04:00
|
|
|
e = 1;
|
|
|
|
#endif /* F_SETFL && O_NDELAY */
|
|
|
|
|
|
|
|
#ifdef FIONBIO
|
|
|
|
{
|
|
|
|
int zero = 0;
|
|
|
|
|
2011-07-29 00:50:55 +04:00
|
|
|
if (ioctl(fd, FIONBIO, &zero) == -1)
|
2011-07-29 19:16:33 +04:00
|
|
|
return -1;
|
2000-09-05 02:06:28 +04:00
|
|
|
else
|
|
|
|
e = 1;
|
|
|
|
}
|
|
|
|
#endif /* FIONBIO */
|
1994-05-06 10:01:42 +04:00
|
|
|
|
|
|
|
#endif /* TRY_AGAIN */
|
2011-07-29 19:16:33 +04:00
|
|
|
return e ? 0 : -1;
|
1994-05-06 10:01:42 +04:00
|
|
|
|
2000-09-05 02:06:28 +04:00
|
|
|
case EINTR:
|
2011-07-29 19:16:33 +04:00
|
|
|
return 0;
|
1994-05-06 10:01:42 +04:00
|
|
|
|
2000-09-05 02:06:28 +04:00
|
|
|
default:
|
2011-07-29 19:16:33 +04:00
|
|
|
return -1;
|
2000-09-05 02:06:28 +04:00
|
|
|
}
|
1994-05-06 10:01:42 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* el_push():
|
|
|
|
* Push a macro
|
|
|
|
*/
|
2016-04-11 21:56:31 +03:00
|
|
|
void
|
2016-04-11 03:50:13 +03:00
|
|
|
el_wpush(EditLine *el, const wchar_t *str)
|
1994-05-06 10:01:42 +04:00
|
|
|
{
|
Stop the read module from poking the el_chared.c_macro data structure
currently belonging to the chared module. The read module does so
from three of its functions, while no other module uses the macro
data, not even the chared module itself. That's quite logical
because macros are a feature of input handling, all of which is
done by the read module, and none by the chared module. So move
the data into the read modules's own opaque data structure, struct
el_read_t.
That simplifies internal interfaces in several respects: The
semi-public chared.h has one fewer struct, one fewer #define, and
one fewer member in struct el_chared_t; all three move to one single
C file, read.c, and are now module-local. And the internal interface
function ch_reset() needs one fewer argument, making the code of many
functions in various modules more readable.
The price is one additional internal interface function, read_end(),
10 lines long including comments, called publicly from exactly one
place: el_end() in el.c. That's hardly an increase in complexity
since most other modules already have their *_end() function, read.c
was the odd one out not having one.
From Ingo Schwarze
2016-05-22 22:44:26 +03:00
|
|
|
struct macros *ma = &el->el_read->macros;
|
2000-09-05 02:06:28 +04:00
|
|
|
|
|
|
|
if (str != NULL && ma->level + 1 < EL_MAXMACRO) {
|
|
|
|
ma->level++;
|
2016-04-11 03:22:48 +03:00
|
|
|
if ((ma->macro[ma->level] = wcsdup(str)) != NULL)
|
2003-10-19 03:48:42 +04:00
|
|
|
return;
|
|
|
|
ma->level--;
|
2000-09-05 02:06:28 +04:00
|
|
|
}
|
2011-07-28 04:44:35 +04:00
|
|
|
terminal_beep(el);
|
|
|
|
terminal__flush(el);
|
1994-05-06 10:01:42 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* read_getcmd():
|
2016-04-12 03:16:06 +03:00
|
|
|
* Get next command from the input stream,
|
|
|
|
* return 0 on success or -1 on EOF or error.
|
2009-12-31 01:37:40 +03:00
|
|
|
* Character values > 255 are not looked up in the map, but inserted.
|
1994-05-06 10:01:42 +04:00
|
|
|
*/
|
2016-04-11 21:56:31 +03:00
|
|
|
static int
|
2016-04-11 03:50:13 +03:00
|
|
|
read_getcmd(EditLine *el, el_action_t *cmdnum, wchar_t *ch)
|
1994-05-06 10:01:42 +04:00
|
|
|
{
|
2016-04-11 03:50:13 +03:00
|
|
|
static const wchar_t meta = (wchar_t)0x80;
|
2002-11-15 17:32:32 +03:00
|
|
|
el_action_t cmd;
|
1994-05-06 10:01:42 +04:00
|
|
|
|
2002-11-15 17:32:32 +03:00
|
|
|
do {
|
2017-06-28 02:24:19 +03:00
|
|
|
if (el_wgetc(el, ch) != 1)
|
2016-04-12 03:16:06 +03:00
|
|
|
return -1;
|
1994-05-06 10:01:42 +04:00
|
|
|
|
|
|
|
#ifdef KANJI
|
From Ingo Schwarze:
If CHARSET_IS_UTF8 is not set, read_char() is broken in a large
number of ways:
1. The isascii(3) check can yield false positives. If a string in
an arbitrary encoding contains a byte in the range 0..127,
that does not at all imply that it forms a character all by
itself, and even less that it represents the same character
as in ASCII. Consequently, read_char() may return characters
the user never typed.
Even if the encoding is not state dependent, the assumption that
bytes in the range 0..127 represent ASCII characters is broken.
Consider UTF-16, for example.
2. The reverse problem can also occur. In an arbitrary encoding,
there is no guarantee that a character that can be represented
by ASCII is represented by a seven-bit byte, and even less by
the same byte as in ASCII.
Even for single-byte encodings, these assumptions are broken.
Consider the ISO 646 national variants, for example.
Consequently, the current code is insufficient to keep ASCII
characters working even for single-byte encodings.
3. The condition "++cbp != 1" can never trigger (because initially,
cbp is 0, and the code can only go back up via the final goto,
which has another cbp = 0 right before it) and it has no effect
(because cbp isn't used afterwards).
4. bytes = ct_mbtowc(cp, cbuf, cbp) is broken. If this returns -1,
the code assumes that is can just call mbtowc(3) again for later
input bytes. In some implementations, that may even be broken
for state-independent encodings, but trying again after mbtowc(3)
failure certainly produces completely erratic and meaningless
results in state-dependent encodings.
5. The assignment "*cp = (Char)(unsigned char)cbuf[0]" is
completely bogus. Even if the byte cbuf[0] represents a
character all by itself, which it usually will not, whether
or not the cast produces the desired result depends on the
internal representation of wchar_t in the C library, which
the application program can know nothing about. Even for ASCII
in the C/POSIX locale, an ASCII character other than '\0' ==
L'\0' == 0 need not have the same numeric value as a char and
as a wchar_t.
To summarize, this code only works if all of the following
conditions hold:
- The encoding is a single-byte encoding.
- ASCII is a subset of the encoding.
- The implementation of mbtowc(3) in the C library does not
require re-initialization after encoding errors.
- The implementation of wchar_t in the C library uses the
same numerical values as ASCII.
Otherwise, it silently produces wrong results.
The simplest way to fix this is to just use the same code as for
UTF-8 (right above). Of course, that causes functional changes
but that shouldn't matter since current behaviour is undefined.
The patch below provides the following improvements:
- It works for all stateless single-byte encodings, no matter
whether they are somehow related to ASCII, no matter how
mb[r]towc(3) are internally implemented, and no matter how
wchar_t is internally represented.
- Instead of producing unpredictable and definitely wrong
results for non-UTF-8 multibyte characters, it behaves in
a well-defined way: It aborts input processing, sets errno,
and returns failure.
Note that short of providing full support for arbitrary locales,
it is impossible to do better. We cannot know whether a given
unsupported locale is state-dependent, and for a state-dependent
locale, it makes no sense to retry parsing after an encoding
error, so the best we can do is abort processing for *any*
unsupported multi-byte character.
- Note that single-byte characters in arbitrary state-independent
locales still work, even in locales that may potentially also
contain multibyte characters, as long as those don't occur in
input. I'm not sure whether any such locales exist in practice...
Tested with UTF-8 and C/POSIX on OpenBSD. Also tested that in the
C/POSIX locale, non-ASCII bytes get through unmangled. You may
wish to test with ISO-LATIN on NetBSD if NetBSD supports that.
----
Also use a constant for meta to avoid warnings.
2016-02-12 18:11:09 +03:00
|
|
|
if ((*ch & meta)) {
|
2000-09-05 02:06:28 +04:00
|
|
|
el->el_state.metanext = 0;
|
|
|
|
cmd = CcViMap[' '];
|
|
|
|
break;
|
|
|
|
} else
|
1994-05-06 10:01:42 +04:00
|
|
|
#endif /* KANJI */
|
|
|
|
|
2000-09-05 02:06:28 +04:00
|
|
|
if (el->el_state.metanext) {
|
|
|
|
el->el_state.metanext = 0;
|
From Ingo Schwarze:
If CHARSET_IS_UTF8 is not set, read_char() is broken in a large
number of ways:
1. The isascii(3) check can yield false positives. If a string in
an arbitrary encoding contains a byte in the range 0..127,
that does not at all imply that it forms a character all by
itself, and even less that it represents the same character
as in ASCII. Consequently, read_char() may return characters
the user never typed.
Even if the encoding is not state dependent, the assumption that
bytes in the range 0..127 represent ASCII characters is broken.
Consider UTF-16, for example.
2. The reverse problem can also occur. In an arbitrary encoding,
there is no guarantee that a character that can be represented
by ASCII is represented by a seven-bit byte, and even less by
the same byte as in ASCII.
Even for single-byte encodings, these assumptions are broken.
Consider the ISO 646 national variants, for example.
Consequently, the current code is insufficient to keep ASCII
characters working even for single-byte encodings.
3. The condition "++cbp != 1" can never trigger (because initially,
cbp is 0, and the code can only go back up via the final goto,
which has another cbp = 0 right before it) and it has no effect
(because cbp isn't used afterwards).
4. bytes = ct_mbtowc(cp, cbuf, cbp) is broken. If this returns -1,
the code assumes that is can just call mbtowc(3) again for later
input bytes. In some implementations, that may even be broken
for state-independent encodings, but trying again after mbtowc(3)
failure certainly produces completely erratic and meaningless
results in state-dependent encodings.
5. The assignment "*cp = (Char)(unsigned char)cbuf[0]" is
completely bogus. Even if the byte cbuf[0] represents a
character all by itself, which it usually will not, whether
or not the cast produces the desired result depends on the
internal representation of wchar_t in the C library, which
the application program can know nothing about. Even for ASCII
in the C/POSIX locale, an ASCII character other than '\0' ==
L'\0' == 0 need not have the same numeric value as a char and
as a wchar_t.
To summarize, this code only works if all of the following
conditions hold:
- The encoding is a single-byte encoding.
- ASCII is a subset of the encoding.
- The implementation of mbtowc(3) in the C library does not
require re-initialization after encoding errors.
- The implementation of wchar_t in the C library uses the
same numerical values as ASCII.
Otherwise, it silently produces wrong results.
The simplest way to fix this is to just use the same code as for
UTF-8 (right above). Of course, that causes functional changes
but that shouldn't matter since current behaviour is undefined.
The patch below provides the following improvements:
- It works for all stateless single-byte encodings, no matter
whether they are somehow related to ASCII, no matter how
mb[r]towc(3) are internally implemented, and no matter how
wchar_t is internally represented.
- Instead of producing unpredictable and definitely wrong
results for non-UTF-8 multibyte characters, it behaves in
a well-defined way: It aborts input processing, sets errno,
and returns failure.
Note that short of providing full support for arbitrary locales,
it is impossible to do better. We cannot know whether a given
unsupported locale is state-dependent, and for a state-dependent
locale, it makes no sense to retry parsing after an encoding
error, so the best we can do is abort processing for *any*
unsupported multi-byte character.
- Note that single-byte characters in arbitrary state-independent
locales still work, even in locales that may potentially also
contain multibyte characters, as long as those don't occur in
input. I'm not sure whether any such locales exist in practice...
Tested with UTF-8 and C/POSIX on OpenBSD. Also tested that in the
C/POSIX locale, non-ASCII bytes get through unmangled. You may
wish to test with ISO-LATIN on NetBSD if NetBSD supports that.
----
Also use a constant for meta to avoid warnings.
2016-02-12 18:11:09 +03:00
|
|
|
*ch |= meta;
|
2000-09-05 02:06:28 +04:00
|
|
|
}
|
2011-07-30 03:44:44 +04:00
|
|
|
if (*ch >= N_KEYS)
|
|
|
|
cmd = ED_INSERT;
|
2009-12-31 01:37:40 +03:00
|
|
|
else
|
2011-07-30 03:44:44 +04:00
|
|
|
cmd = el->el_map.current[(unsigned char) *ch];
|
2000-09-05 02:06:28 +04:00
|
|
|
if (cmd == ED_SEQUENCE_LEAD_IN) {
|
2011-07-28 05:56:26 +04:00
|
|
|
keymacro_value_t val;
|
|
|
|
switch (keymacro_get(el, ch, &val)) {
|
2000-09-05 02:06:28 +04:00
|
|
|
case XK_CMD:
|
|
|
|
cmd = val.cmd;
|
|
|
|
break;
|
|
|
|
case XK_STR:
|
2016-04-11 03:22:48 +03:00
|
|
|
el_wpush(el, val.str);
|
2000-09-05 02:06:28 +04:00
|
|
|
break;
|
From Ingo Schwarze:
Reduce obfuscation of errno handling. There is only one purpose
non-local errno handling is needed for: Inside el_wgets(), several
functions call down indirectly to el_wgetc(), many of them via the
dispatch table. When el_wgetc() fails, it does properly report
failure, but then various cleanup is done which may clobber errno.
But when returning due to failure, el_wgets() wants to have errno
set to the reason of the original read failure, not to the reason
of some subsequent failure of some cleanup operation. So el_wgetc()
needs to save errno, and if it's non-zero, el_wgets() needs to
restore it on failure.
This core logic is currently obscured by the fact that el_errno
is set and inspected at some additional places where it isn't needed.
Besides, since el_wgetc() and and el_wgets() are both in read.c,
el_errno does not need to be in struct editline, it can and should
be local to read.c in struct el_read_t.
Let's look at what can be simplified.
1. keymacro_get() abuses el_errno instead of having a proper
error return code. Adding that error return code is easy
because node_trav() already detects the condition and an
adequate code is already defined. Returning it, testing
for it in read_getcmd(), and returning with error from there
removes the need to inspect el_errno from el_wgets() after
calling read_getcmd().
Note that resetting lastchar and cursor and clearing buffer[0]
is irrelevant. The code returns from el_wgets() right afterwards.
Outside el_wgets(), these variables are no longer relevant.
When el_wgets() is called the next time, it will call ch_reset()
anyway, resetting the two pointers. And as long as lastchar
points to the beginning of the buffer, the contents of the
buffer won't be used for anything.
2. read_getcmd() doesn't need to set el_errno again after el_wgetc()
failure since el_wgetc() already did so. While here, remove
the silly "if EOF or error" comments from the el_wgetc()
return value tests. It's a public interface documented in a
manual, so people working on the implementation can obviously
be expected to know how it works. It's a case of
count++; /* Increment count. */
3. In the two code paths of el_wgets() that lead up to "goto noedit",
there is no need to save the errno because nothing that might
change it happens before returning.
For clarity, since el_wgets() is the function restoring the errno,
also move initializing it to the same function.
Finally, note that restoring errno when the saved value is zero is
wrong. No library code is ever allowed to clear a previously set
value of errno. Only application programs are allowed to do that,
and even they usually don't need to do so, except when using certain
ill-designed interfaces like strtol(3).
I tested that the behaviour remains sane in the following cases,
all during execution of el_wgets(3) and with a signal handler
for USR1 installed without SA_RESTART.
* Enter some text and maybe move around a bit.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
Now el_wgets() sets errno=EINTR and returns -1.
* Press Ctrl-V to activate ed-quoted-insert.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
ed_quoted_insert() returns ed_end_of_file(), i.e. CC_EOF,
and el_wgets() returns 0.
* Press a key starting a keyboard macro.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
Now el_wgets() sets errno=EINTR and returns -1.
* Press : to enter builtin command mode.
Start typing a command.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
Now c_gets() returns -1, ed_command() beeps and returns CC_REFRESH,
and el_wgets() resumes operation as it should.
I also tested with "el_set(el, EL_EDITMODE, 0)", and it returns
the right value and sets errno correctly.
2016-05-24 18:00:45 +03:00
|
|
|
case XK_NOD:
|
|
|
|
return -1;
|
2000-09-05 02:06:28 +04:00
|
|
|
default:
|
2000-11-12 01:18:57 +03:00
|
|
|
EL_ABORT((el->el_errfile, "Bad XK_ type \n"));
|
2000-09-05 02:06:28 +04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2002-11-15 17:32:32 +03:00
|
|
|
} while (cmd == ED_SEQUENCE_LEAD_IN);
|
2000-09-05 02:06:28 +04:00
|
|
|
*cmdnum = cmd;
|
2016-04-12 03:16:06 +03:00
|
|
|
return 0;
|
1994-05-06 10:01:42 +04:00
|
|
|
}
|
|
|
|
|
1997-10-26 23:19:48 +03:00
|
|
|
/* read_char():
|
|
|
|
* Read a character from the tty.
|
|
|
|
*/
|
2016-04-11 21:56:31 +03:00
|
|
|
static int
|
2016-02-24 20:13:22 +03:00
|
|
|
read_char(EditLine *el, wchar_t *cp)
|
1997-10-26 23:19:48 +03:00
|
|
|
{
|
2009-02-16 00:55:23 +03:00
|
|
|
ssize_t num_read;
|
2021-08-15 13:08:41 +03:00
|
|
|
int tried = (el->el_flags & FIXIO) == 0;
|
2011-07-30 03:44:44 +04:00
|
|
|
char cbuf[MB_LEN_MAX];
|
|
|
|
size_t cbp = 0;
|
UTF-8 fixes from Ingo Schwarze:
1. Assume that errno is non-zero when entering read_char()
and that read(2) returns 0 (indicating end of file).
Then, the code will clear errno before returning.
(Obviously, the statement "errno = 0" is almost always
a bug unless there is save_errno = errno right before it
and the previous value is properly restored later,
in all reachable code paths.)
2. When encountering an invalid byte sequence, the code discards
all following bytes until MB_LEN_MAX overflows; consider, for
example, 0xc2 immediately followed by a few valid ASCII bytes.
Three of those ASCII bytes will be discarded.
3. On a POSIX system, EILSEQ will always be set after reading a
valid (yes, valid, not invalid!) UTF-8 character. The reason
is that mbtowc(3) will first be called with a length limit
(third argument) of 1, which will fail, return -1, and - on
a POSIX system - set errno to EILSEQ.
This third bug is mitigated a bit because i couldn't find any
system that actually conforms to POSIX in this respect: None
of OpenBSD, NetBSD, FreeBSD, Solaris 11, and glibc set errno
when an incomplete character is passed to mbtowc(3), even though
that is required by POSIX.
Anyway, that mbtowc(3) bug will be fixed at least in OpenBSD
after release unlock, so it would be good to fix this bug in
libedit before fixing the bug in mbtowc(3).
How can these three bugs be fixed?
1. As far as i understand it, the intention of the bogus errno = 0
is to undo the effects of failing system calls in el_wset(),
sig_set(), and read__fixio() if the subsequent read(2) indicates
end of file. So, restoring errno has to be moved right after
read__fixio(). Of course, neither 0 nor e is the right value
to restore: 0 is wrong if errno happened to be set on entry, e
would be wrong because if one read(2) fails but a second attempt
succeeds after read__fixio(), errno should not be touched. So,
the errno to be restored in this case has to be saved before
calling read(2) for the first time.
2. Solving the second issue requires distinguishing invalid and
incomplete characters, but that is impossible with the function
mbtowc(3) because it returns -1 in both cases and sets errno
to EILSEQ in both cases (once properly implemented).
It is vital that each input character is processed right away.
It is not acceptable to wait for the next input character before
processing the previous one because this is an interactive
library, not a batch system. Consequently, the only situation
where it is acceptable to wait for the next byte without first
processing the previous one(s) is when the previous one(s) form
an incomplete sequence that can be continued to form a valid
character.
Consequently, short of reimplementing a full UTF-8 state machine
by hand, the only correct way forward is to use mbrtowc(3).
Even then, care is needed to always have the state object
properly initialized before using it, and to not discard a valid
ASCII or UTF-8 lead byte if it happens to follow an invalid
sequence.
3. Fortunately, solution 2. also solves issue 3. as a side effect,
by no longer using mbtowc(3) in the first place.
2016-02-08 20:18:43 +03:00
|
|
|
int save_errno = errno;
|
2000-09-05 02:06:28 +04:00
|
|
|
|
2009-02-19 18:20:22 +03:00
|
|
|
again:
|
|
|
|
el->el_signal->sig_no = 0;
|
2011-07-30 03:44:44 +04:00
|
|
|
while ((num_read = read(el->el_infd, cbuf + cbp, (size_t)1)) == -1) {
|
2012-09-11 00:53:18 +04:00
|
|
|
int e = errno;
|
2010-07-19 21:18:13 +04:00
|
|
|
switch (el->el_signal->sig_no) {
|
|
|
|
case SIGCONT:
|
2016-04-11 03:22:48 +03:00
|
|
|
el_wset(el, EL_REFRESH);
|
2010-07-21 22:18:52 +04:00
|
|
|
/*FALLTHROUGH*/
|
2010-07-19 21:18:13 +04:00
|
|
|
case SIGWINCH:
|
2009-02-19 18:20:22 +03:00
|
|
|
sig_set(el);
|
|
|
|
goto again;
|
2010-07-19 21:18:13 +04:00
|
|
|
default:
|
|
|
|
break;
|
2009-02-19 18:20:22 +03:00
|
|
|
}
|
UTF-8 fixes from Ingo Schwarze:
1. Assume that errno is non-zero when entering read_char()
and that read(2) returns 0 (indicating end of file).
Then, the code will clear errno before returning.
(Obviously, the statement "errno = 0" is almost always
a bug unless there is save_errno = errno right before it
and the previous value is properly restored later,
in all reachable code paths.)
2. When encountering an invalid byte sequence, the code discards
all following bytes until MB_LEN_MAX overflows; consider, for
example, 0xc2 immediately followed by a few valid ASCII bytes.
Three of those ASCII bytes will be discarded.
3. On a POSIX system, EILSEQ will always be set after reading a
valid (yes, valid, not invalid!) UTF-8 character. The reason
is that mbtowc(3) will first be called with a length limit
(third argument) of 1, which will fail, return -1, and - on
a POSIX system - set errno to EILSEQ.
This third bug is mitigated a bit because i couldn't find any
system that actually conforms to POSIX in this respect: None
of OpenBSD, NetBSD, FreeBSD, Solaris 11, and glibc set errno
when an incomplete character is passed to mbtowc(3), even though
that is required by POSIX.
Anyway, that mbtowc(3) bug will be fixed at least in OpenBSD
after release unlock, so it would be good to fix this bug in
libedit before fixing the bug in mbtowc(3).
How can these three bugs be fixed?
1. As far as i understand it, the intention of the bogus errno = 0
is to undo the effects of failing system calls in el_wset(),
sig_set(), and read__fixio() if the subsequent read(2) indicates
end of file. So, restoring errno has to be moved right after
read__fixio(). Of course, neither 0 nor e is the right value
to restore: 0 is wrong if errno happened to be set on entry, e
would be wrong because if one read(2) fails but a second attempt
succeeds after read__fixio(), errno should not be touched. So,
the errno to be restored in this case has to be saved before
calling read(2) for the first time.
2. Solving the second issue requires distinguishing invalid and
incomplete characters, but that is impossible with the function
mbtowc(3) because it returns -1 in both cases and sets errno
to EILSEQ in both cases (once properly implemented).
It is vital that each input character is processed right away.
It is not acceptable to wait for the next input character before
processing the previous one because this is an interactive
library, not a batch system. Consequently, the only situation
where it is acceptable to wait for the next byte without first
processing the previous one(s) is when the previous one(s) form
an incomplete sequence that can be continued to form a valid
character.
Consequently, short of reimplementing a full UTF-8 state machine
by hand, the only correct way forward is to use mbrtowc(3).
Even then, care is needed to always have the state object
properly initialized before using it, and to not discard a valid
ASCII or UTF-8 lead byte if it happens to follow an invalid
sequence.
3. Fortunately, solution 2. also solves issue 3. as a side effect,
by no longer using mbtowc(3) in the first place.
2016-02-08 20:18:43 +03:00
|
|
|
if (!tried && read__fixio(el->el_infd, e) == 0) {
|
|
|
|
errno = save_errno;
|
2000-09-05 02:06:28 +04:00
|
|
|
tried = 1;
|
UTF-8 fixes from Ingo Schwarze:
1. Assume that errno is non-zero when entering read_char()
and that read(2) returns 0 (indicating end of file).
Then, the code will clear errno before returning.
(Obviously, the statement "errno = 0" is almost always
a bug unless there is save_errno = errno right before it
and the previous value is properly restored later,
in all reachable code paths.)
2. When encountering an invalid byte sequence, the code discards
all following bytes until MB_LEN_MAX overflows; consider, for
example, 0xc2 immediately followed by a few valid ASCII bytes.
Three of those ASCII bytes will be discarded.
3. On a POSIX system, EILSEQ will always be set after reading a
valid (yes, valid, not invalid!) UTF-8 character. The reason
is that mbtowc(3) will first be called with a length limit
(third argument) of 1, which will fail, return -1, and - on
a POSIX system - set errno to EILSEQ.
This third bug is mitigated a bit because i couldn't find any
system that actually conforms to POSIX in this respect: None
of OpenBSD, NetBSD, FreeBSD, Solaris 11, and glibc set errno
when an incomplete character is passed to mbtowc(3), even though
that is required by POSIX.
Anyway, that mbtowc(3) bug will be fixed at least in OpenBSD
after release unlock, so it would be good to fix this bug in
libedit before fixing the bug in mbtowc(3).
How can these three bugs be fixed?
1. As far as i understand it, the intention of the bogus errno = 0
is to undo the effects of failing system calls in el_wset(),
sig_set(), and read__fixio() if the subsequent read(2) indicates
end of file. So, restoring errno has to be moved right after
read__fixio(). Of course, neither 0 nor e is the right value
to restore: 0 is wrong if errno happened to be set on entry, e
would be wrong because if one read(2) fails but a second attempt
succeeds after read__fixio(), errno should not be touched. So,
the errno to be restored in this case has to be saved before
calling read(2) for the first time.
2. Solving the second issue requires distinguishing invalid and
incomplete characters, but that is impossible with the function
mbtowc(3) because it returns -1 in both cases and sets errno
to EILSEQ in both cases (once properly implemented).
It is vital that each input character is processed right away.
It is not acceptable to wait for the next input character before
processing the previous one because this is an interactive
library, not a batch system. Consequently, the only situation
where it is acceptable to wait for the next byte without first
processing the previous one(s) is when the previous one(s) form
an incomplete sequence that can be continued to form a valid
character.
Consequently, short of reimplementing a full UTF-8 state machine
by hand, the only correct way forward is to use mbrtowc(3).
Even then, care is needed to always have the state object
properly initialized before using it, and to not discard a valid
ASCII or UTF-8 lead byte if it happens to follow an invalid
sequence.
3. Fortunately, solution 2. also solves issue 3. as a side effect,
by no longer using mbtowc(3) in the first place.
2016-02-08 20:18:43 +03:00
|
|
|
} else {
|
2012-09-11 00:53:18 +04:00
|
|
|
errno = e;
|
2016-02-24 20:13:22 +03:00
|
|
|
*cp = L'\0';
|
2011-07-29 19:16:33 +04:00
|
|
|
return -1;
|
2000-09-05 02:06:28 +04:00
|
|
|
}
|
2009-02-19 18:20:22 +03:00
|
|
|
}
|
2009-12-31 01:37:40 +03:00
|
|
|
|
2013-05-28 03:55:55 +04:00
|
|
|
/* Test for EOF */
|
|
|
|
if (num_read == 0) {
|
2016-02-24 20:13:22 +03:00
|
|
|
*cp = L'\0';
|
2013-05-28 03:55:55 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
From Ingo Schwartze:
Next step: Remove #ifdef'ing in read_char(), in the same style
as we did for setlocale(3) in el.c.
A few remarks are required to explain the choices made.
* On first sight, handling mbrtowc(3) seems a bit less trivial
than handling setlocale(3) because its prototype uses the data
type mbstate_t from <wchar.h>. However, it turns out that
"histedit.h" already includes <wchar.h> unconditionally (i don't
like headers including other headers, but that ship has sailed,
people are by now certainly used to the fact that including
"histedit.h" doesn't require including <wchar.h> before), and
"histedit.h" is of course included all over the place. So from
that perspective, there is no problem with using mbrtowc(3)
unconditionally ever for !WIDECHAR.
* However, <wchar.h> also defines the mbrtowc(3) prototype,
so we cannot just #define mbrtowc away, or including the header
will break. It would also be a bad idea to porovide a local
implementation of mbrtowc() and hope that it overrides the one
in libc. Besides, the required prototype is subtly different:
While mbrtowc(3) takes "wchar_t *" as its first argument, we
need a function that takes "Char *". So unfortunately, we have
to keep a ct_mbrtowc #define, at least until we can maybe get
rid of "Char *" in the more remote future.
* After getting rid of the #else clause in read_char(), we can
pull "return 1;" into the default: clause. After that, we can
get rid of the ugly "goto again_lastbyte;" and just "break;".
As a bonus, that also gets rid of the ugly CONSTCOND.
* While here, delete the unused ct_mbtowc() from chartype.h.
2016-02-14 17:47:48 +03:00
|
|
|
for (;;) {
|
2016-04-09 21:43:17 +03:00
|
|
|
mbstate_t mbs;
|
From Ingo Schwartze:
Next step: Remove #ifdef'ing in read_char(), in the same style
as we did for setlocale(3) in el.c.
A few remarks are required to explain the choices made.
* On first sight, handling mbrtowc(3) seems a bit less trivial
than handling setlocale(3) because its prototype uses the data
type mbstate_t from <wchar.h>. However, it turns out that
"histedit.h" already includes <wchar.h> unconditionally (i don't
like headers including other headers, but that ship has sailed,
people are by now certainly used to the fact that including
"histedit.h" doesn't require including <wchar.h> before), and
"histedit.h" is of course included all over the place. So from
that perspective, there is no problem with using mbrtowc(3)
unconditionally ever for !WIDECHAR.
* However, <wchar.h> also defines the mbrtowc(3) prototype,
so we cannot just #define mbrtowc away, or including the header
will break. It would also be a bad idea to porovide a local
implementation of mbrtowc() and hope that it overrides the one
in libc. Besides, the required prototype is subtly different:
While mbrtowc(3) takes "wchar_t *" as its first argument, we
need a function that takes "Char *". So unfortunately, we have
to keep a ct_mbrtowc #define, at least until we can maybe get
rid of "Char *" in the more remote future.
* After getting rid of the #else clause in read_char(), we can
pull "return 1;" into the default: clause. After that, we can
get rid of the ugly "goto again_lastbyte;" and just "break;".
As a bonus, that also gets rid of the ugly CONSTCOND.
* While here, delete the unused ct_mbtowc() from chartype.h.
2016-02-14 17:47:48 +03:00
|
|
|
|
2009-12-31 01:37:40 +03:00
|
|
|
++cbp;
|
2016-04-09 21:43:17 +03:00
|
|
|
/* This only works because UTF8 is stateless. */
|
|
|
|
memset(&mbs, 0, sizeof(mbs));
|
|
|
|
switch (mbrtowc(cp, cbuf, cbp, &mbs)) {
|
UTF-8 fixes from Ingo Schwarze:
1. Assume that errno is non-zero when entering read_char()
and that read(2) returns 0 (indicating end of file).
Then, the code will clear errno before returning.
(Obviously, the statement "errno = 0" is almost always
a bug unless there is save_errno = errno right before it
and the previous value is properly restored later,
in all reachable code paths.)
2. When encountering an invalid byte sequence, the code discards
all following bytes until MB_LEN_MAX overflows; consider, for
example, 0xc2 immediately followed by a few valid ASCII bytes.
Three of those ASCII bytes will be discarded.
3. On a POSIX system, EILSEQ will always be set after reading a
valid (yes, valid, not invalid!) UTF-8 character. The reason
is that mbtowc(3) will first be called with a length limit
(third argument) of 1, which will fail, return -1, and - on
a POSIX system - set errno to EILSEQ.
This third bug is mitigated a bit because i couldn't find any
system that actually conforms to POSIX in this respect: None
of OpenBSD, NetBSD, FreeBSD, Solaris 11, and glibc set errno
when an incomplete character is passed to mbtowc(3), even though
that is required by POSIX.
Anyway, that mbtowc(3) bug will be fixed at least in OpenBSD
after release unlock, so it would be good to fix this bug in
libedit before fixing the bug in mbtowc(3).
How can these three bugs be fixed?
1. As far as i understand it, the intention of the bogus errno = 0
is to undo the effects of failing system calls in el_wset(),
sig_set(), and read__fixio() if the subsequent read(2) indicates
end of file. So, restoring errno has to be moved right after
read__fixio(). Of course, neither 0 nor e is the right value
to restore: 0 is wrong if errno happened to be set on entry, e
would be wrong because if one read(2) fails but a second attempt
succeeds after read__fixio(), errno should not be touched. So,
the errno to be restored in this case has to be saved before
calling read(2) for the first time.
2. Solving the second issue requires distinguishing invalid and
incomplete characters, but that is impossible with the function
mbtowc(3) because it returns -1 in both cases and sets errno
to EILSEQ in both cases (once properly implemented).
It is vital that each input character is processed right away.
It is not acceptable to wait for the next input character before
processing the previous one because this is an interactive
library, not a batch system. Consequently, the only situation
where it is acceptable to wait for the next byte without first
processing the previous one(s) is when the previous one(s) form
an incomplete sequence that can be continued to form a valid
character.
Consequently, short of reimplementing a full UTF-8 state machine
by hand, the only correct way forward is to use mbrtowc(3).
Even then, care is needed to always have the state object
properly initialized before using it, and to not discard a valid
ASCII or UTF-8 lead byte if it happens to follow an invalid
sequence.
3. Fortunately, solution 2. also solves issue 3. as a side effect,
by no longer using mbtowc(3) in the first place.
2016-02-08 20:18:43 +03:00
|
|
|
case (size_t)-1:
|
|
|
|
if (cbp > 1) {
|
|
|
|
/*
|
|
|
|
* Invalid sequence, discard all bytes
|
|
|
|
* except the last one.
|
|
|
|
*/
|
|
|
|
cbuf[0] = cbuf[cbp - 1];
|
|
|
|
cbp = 0;
|
From Ingo Schwartze:
Next step: Remove #ifdef'ing in read_char(), in the same style
as we did for setlocale(3) in el.c.
A few remarks are required to explain the choices made.
* On first sight, handling mbrtowc(3) seems a bit less trivial
than handling setlocale(3) because its prototype uses the data
type mbstate_t from <wchar.h>. However, it turns out that
"histedit.h" already includes <wchar.h> unconditionally (i don't
like headers including other headers, but that ship has sailed,
people are by now certainly used to the fact that including
"histedit.h" doesn't require including <wchar.h> before), and
"histedit.h" is of course included all over the place. So from
that perspective, there is no problem with using mbrtowc(3)
unconditionally ever for !WIDECHAR.
* However, <wchar.h> also defines the mbrtowc(3) prototype,
so we cannot just #define mbrtowc away, or including the header
will break. It would also be a bad idea to porovide a local
implementation of mbrtowc() and hope that it overrides the one
in libc. Besides, the required prototype is subtly different:
While mbrtowc(3) takes "wchar_t *" as its first argument, we
need a function that takes "Char *". So unfortunately, we have
to keep a ct_mbrtowc #define, at least until we can maybe get
rid of "Char *" in the more remote future.
* After getting rid of the #else clause in read_char(), we can
pull "return 1;" into the default: clause. After that, we can
get rid of the ugly "goto again_lastbyte;" and just "break;".
As a bonus, that also gets rid of the ugly CONSTCOND.
* While here, delete the unused ct_mbtowc() from chartype.h.
2016-02-14 17:47:48 +03:00
|
|
|
break;
|
UTF-8 fixes from Ingo Schwarze:
1. Assume that errno is non-zero when entering read_char()
and that read(2) returns 0 (indicating end of file).
Then, the code will clear errno before returning.
(Obviously, the statement "errno = 0" is almost always
a bug unless there is save_errno = errno right before it
and the previous value is properly restored later,
in all reachable code paths.)
2. When encountering an invalid byte sequence, the code discards
all following bytes until MB_LEN_MAX overflows; consider, for
example, 0xc2 immediately followed by a few valid ASCII bytes.
Three of those ASCII bytes will be discarded.
3. On a POSIX system, EILSEQ will always be set after reading a
valid (yes, valid, not invalid!) UTF-8 character. The reason
is that mbtowc(3) will first be called with a length limit
(third argument) of 1, which will fail, return -1, and - on
a POSIX system - set errno to EILSEQ.
This third bug is mitigated a bit because i couldn't find any
system that actually conforms to POSIX in this respect: None
of OpenBSD, NetBSD, FreeBSD, Solaris 11, and glibc set errno
when an incomplete character is passed to mbtowc(3), even though
that is required by POSIX.
Anyway, that mbtowc(3) bug will be fixed at least in OpenBSD
after release unlock, so it would be good to fix this bug in
libedit before fixing the bug in mbtowc(3).
How can these three bugs be fixed?
1. As far as i understand it, the intention of the bogus errno = 0
is to undo the effects of failing system calls in el_wset(),
sig_set(), and read__fixio() if the subsequent read(2) indicates
end of file. So, restoring errno has to be moved right after
read__fixio(). Of course, neither 0 nor e is the right value
to restore: 0 is wrong if errno happened to be set on entry, e
would be wrong because if one read(2) fails but a second attempt
succeeds after read__fixio(), errno should not be touched. So,
the errno to be restored in this case has to be saved before
calling read(2) for the first time.
2. Solving the second issue requires distinguishing invalid and
incomplete characters, but that is impossible with the function
mbtowc(3) because it returns -1 in both cases and sets errno
to EILSEQ in both cases (once properly implemented).
It is vital that each input character is processed right away.
It is not acceptable to wait for the next input character before
processing the previous one because this is an interactive
library, not a batch system. Consequently, the only situation
where it is acceptable to wait for the next byte without first
processing the previous one(s) is when the previous one(s) form
an incomplete sequence that can be continued to form a valid
character.
Consequently, short of reimplementing a full UTF-8 state machine
by hand, the only correct way forward is to use mbrtowc(3).
Even then, care is needed to always have the state object
properly initialized before using it, and to not discard a valid
ASCII or UTF-8 lead byte if it happens to follow an invalid
sequence.
3. Fortunately, solution 2. also solves issue 3. as a side effect,
by no longer using mbtowc(3) in the first place.
2016-02-08 20:18:43 +03:00
|
|
|
} else {
|
|
|
|
/* Invalid byte, discard it. */
|
|
|
|
cbp = 0;
|
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
case (size_t)-2:
|
2018-11-25 19:21:04 +03:00
|
|
|
if (cbp >= MB_LEN_MAX) {
|
|
|
|
errno = EILSEQ;
|
|
|
|
*cp = L'\0';
|
|
|
|
return -1;
|
|
|
|
}
|
UTF-8 fixes from Ingo Schwarze:
1. Assume that errno is non-zero when entering read_char()
and that read(2) returns 0 (indicating end of file).
Then, the code will clear errno before returning.
(Obviously, the statement "errno = 0" is almost always
a bug unless there is save_errno = errno right before it
and the previous value is properly restored later,
in all reachable code paths.)
2. When encountering an invalid byte sequence, the code discards
all following bytes until MB_LEN_MAX overflows; consider, for
example, 0xc2 immediately followed by a few valid ASCII bytes.
Three of those ASCII bytes will be discarded.
3. On a POSIX system, EILSEQ will always be set after reading a
valid (yes, valid, not invalid!) UTF-8 character. The reason
is that mbtowc(3) will first be called with a length limit
(third argument) of 1, which will fail, return -1, and - on
a POSIX system - set errno to EILSEQ.
This third bug is mitigated a bit because i couldn't find any
system that actually conforms to POSIX in this respect: None
of OpenBSD, NetBSD, FreeBSD, Solaris 11, and glibc set errno
when an incomplete character is passed to mbtowc(3), even though
that is required by POSIX.
Anyway, that mbtowc(3) bug will be fixed at least in OpenBSD
after release unlock, so it would be good to fix this bug in
libedit before fixing the bug in mbtowc(3).
How can these three bugs be fixed?
1. As far as i understand it, the intention of the bogus errno = 0
is to undo the effects of failing system calls in el_wset(),
sig_set(), and read__fixio() if the subsequent read(2) indicates
end of file. So, restoring errno has to be moved right after
read__fixio(). Of course, neither 0 nor e is the right value
to restore: 0 is wrong if errno happened to be set on entry, e
would be wrong because if one read(2) fails but a second attempt
succeeds after read__fixio(), errno should not be touched. So,
the errno to be restored in this case has to be saved before
calling read(2) for the first time.
2. Solving the second issue requires distinguishing invalid and
incomplete characters, but that is impossible with the function
mbtowc(3) because it returns -1 in both cases and sets errno
to EILSEQ in both cases (once properly implemented).
It is vital that each input character is processed right away.
It is not acceptable to wait for the next input character before
processing the previous one because this is an interactive
library, not a batch system. Consequently, the only situation
where it is acceptable to wait for the next byte without first
processing the previous one(s) is when the previous one(s) form
an incomplete sequence that can be continued to form a valid
character.
Consequently, short of reimplementing a full UTF-8 state machine
by hand, the only correct way forward is to use mbrtowc(3).
Even then, care is needed to always have the state object
properly initialized before using it, and to not discard a valid
ASCII or UTF-8 lead byte if it happens to follow an invalid
sequence.
3. Fortunately, solution 2. also solves issue 3. as a side effect,
by no longer using mbtowc(3) in the first place.
2016-02-08 20:18:43 +03:00
|
|
|
/* Incomplete sequence, read another byte. */
|
2009-12-31 01:37:40 +03:00
|
|
|
goto again;
|
UTF-8 fixes from Ingo Schwarze:
1. Assume that errno is non-zero when entering read_char()
and that read(2) returns 0 (indicating end of file).
Then, the code will clear errno before returning.
(Obviously, the statement "errno = 0" is almost always
a bug unless there is save_errno = errno right before it
and the previous value is properly restored later,
in all reachable code paths.)
2. When encountering an invalid byte sequence, the code discards
all following bytes until MB_LEN_MAX overflows; consider, for
example, 0xc2 immediately followed by a few valid ASCII bytes.
Three of those ASCII bytes will be discarded.
3. On a POSIX system, EILSEQ will always be set after reading a
valid (yes, valid, not invalid!) UTF-8 character. The reason
is that mbtowc(3) will first be called with a length limit
(third argument) of 1, which will fail, return -1, and - on
a POSIX system - set errno to EILSEQ.
This third bug is mitigated a bit because i couldn't find any
system that actually conforms to POSIX in this respect: None
of OpenBSD, NetBSD, FreeBSD, Solaris 11, and glibc set errno
when an incomplete character is passed to mbtowc(3), even though
that is required by POSIX.
Anyway, that mbtowc(3) bug will be fixed at least in OpenBSD
after release unlock, so it would be good to fix this bug in
libedit before fixing the bug in mbtowc(3).
How can these three bugs be fixed?
1. As far as i understand it, the intention of the bogus errno = 0
is to undo the effects of failing system calls in el_wset(),
sig_set(), and read__fixio() if the subsequent read(2) indicates
end of file. So, restoring errno has to be moved right after
read__fixio(). Of course, neither 0 nor e is the right value
to restore: 0 is wrong if errno happened to be set on entry, e
would be wrong because if one read(2) fails but a second attempt
succeeds after read__fixio(), errno should not be touched. So,
the errno to be restored in this case has to be saved before
calling read(2) for the first time.
2. Solving the second issue requires distinguishing invalid and
incomplete characters, but that is impossible with the function
mbtowc(3) because it returns -1 in both cases and sets errno
to EILSEQ in both cases (once properly implemented).
It is vital that each input character is processed right away.
It is not acceptable to wait for the next input character before
processing the previous one because this is an interactive
library, not a batch system. Consequently, the only situation
where it is acceptable to wait for the next byte without first
processing the previous one(s) is when the previous one(s) form
an incomplete sequence that can be continued to form a valid
character.
Consequently, short of reimplementing a full UTF-8 state machine
by hand, the only correct way forward is to use mbrtowc(3).
Even then, care is needed to always have the state object
properly initialized before using it, and to not discard a valid
ASCII or UTF-8 lead byte if it happens to follow an invalid
sequence.
3. Fortunately, solution 2. also solves issue 3. as a side effect,
by no longer using mbtowc(3) in the first place.
2016-02-08 20:18:43 +03:00
|
|
|
default:
|
|
|
|
/* Valid character, process it. */
|
From Ingo Schwartze:
Next step: Remove #ifdef'ing in read_char(), in the same style
as we did for setlocale(3) in el.c.
A few remarks are required to explain the choices made.
* On first sight, handling mbrtowc(3) seems a bit less trivial
than handling setlocale(3) because its prototype uses the data
type mbstate_t from <wchar.h>. However, it turns out that
"histedit.h" already includes <wchar.h> unconditionally (i don't
like headers including other headers, but that ship has sailed,
people are by now certainly used to the fact that including
"histedit.h" doesn't require including <wchar.h> before), and
"histedit.h" is of course included all over the place. So from
that perspective, there is no problem with using mbrtowc(3)
unconditionally ever for !WIDECHAR.
* However, <wchar.h> also defines the mbrtowc(3) prototype,
so we cannot just #define mbrtowc away, or including the header
will break. It would also be a bad idea to porovide a local
implementation of mbrtowc() and hope that it overrides the one
in libc. Besides, the required prototype is subtly different:
While mbrtowc(3) takes "wchar_t *" as its first argument, we
need a function that takes "Char *". So unfortunately, we have
to keep a ct_mbrtowc #define, at least until we can maybe get
rid of "Char *" in the more remote future.
* After getting rid of the #else clause in read_char(), we can
pull "return 1;" into the default: clause. After that, we can
get rid of the ugly "goto again_lastbyte;" and just "break;".
As a bonus, that also gets rid of the ugly CONSTCOND.
* While here, delete the unused ct_mbtowc() from chartype.h.
2016-02-14 17:47:48 +03:00
|
|
|
return 1;
|
2009-12-31 01:37:40 +03:00
|
|
|
}
|
From Ingo Schwartze:
Next step: Remove #ifdef'ing in read_char(), in the same style
as we did for setlocale(3) in el.c.
A few remarks are required to explain the choices made.
* On first sight, handling mbrtowc(3) seems a bit less trivial
than handling setlocale(3) because its prototype uses the data
type mbstate_t from <wchar.h>. However, it turns out that
"histedit.h" already includes <wchar.h> unconditionally (i don't
like headers including other headers, but that ship has sailed,
people are by now certainly used to the fact that including
"histedit.h" doesn't require including <wchar.h> before), and
"histedit.h" is of course included all over the place. So from
that perspective, there is no problem with using mbrtowc(3)
unconditionally ever for !WIDECHAR.
* However, <wchar.h> also defines the mbrtowc(3) prototype,
so we cannot just #define mbrtowc away, or including the header
will break. It would also be a bad idea to porovide a local
implementation of mbrtowc() and hope that it overrides the one
in libc. Besides, the required prototype is subtly different:
While mbrtowc(3) takes "wchar_t *" as its first argument, we
need a function that takes "Char *". So unfortunately, we have
to keep a ct_mbrtowc #define, at least until we can maybe get
rid of "Char *" in the more remote future.
* After getting rid of the #else clause in read_char(), we can
pull "return 1;" into the default: clause. After that, we can
get rid of the ugly "goto again_lastbyte;" and just "break;".
As a bonus, that also gets rid of the ugly CONSTCOND.
* While here, delete the unused ct_mbtowc() from chartype.h.
2016-02-14 17:47:48 +03:00
|
|
|
}
|
1997-10-26 23:19:48 +03:00
|
|
|
}
|
|
|
|
|
2007-03-02 00:41:45 +03:00
|
|
|
/* read_pop():
|
|
|
|
* Pop a macro from the stack
|
|
|
|
*/
|
2016-04-11 21:56:31 +03:00
|
|
|
static void
|
Stop the read module from poking the el_chared.c_macro data structure
currently belonging to the chared module. The read module does so
from three of its functions, while no other module uses the macro
data, not even the chared module itself. That's quite logical
because macros are a feature of input handling, all of which is
done by the read module, and none by the chared module. So move
the data into the read modules's own opaque data structure, struct
el_read_t.
That simplifies internal interfaces in several respects: The
semi-public chared.h has one fewer struct, one fewer #define, and
one fewer member in struct el_chared_t; all three move to one single
C file, read.c, and are now module-local. And the internal interface
function ch_reset() needs one fewer argument, making the code of many
functions in various modules more readable.
The price is one additional internal interface function, read_end(),
10 lines long including comments, called publicly from exactly one
place: el_end() in el.c. That's hardly an increase in complexity
since most other modules already have their *_end() function, read.c
was the odd one out not having one.
From Ingo Schwarze
2016-05-22 22:44:26 +03:00
|
|
|
read_pop(struct macros *ma)
|
2007-03-02 00:41:45 +03:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
el_free(ma->macro[0]);
|
2009-06-08 19:10:53 +04:00
|
|
|
for (i = 0; i < ma->level; i++)
|
|
|
|
ma->macro[i] = ma->macro[i + 1];
|
2009-06-09 17:04:33 +04:00
|
|
|
ma->level--;
|
2007-03-02 00:41:45 +03:00
|
|
|
ma->offset = 0;
|
|
|
|
}
|
1994-05-06 10:01:42 +04:00
|
|
|
|
Stop the read module from poking the el_chared.c_macro data structure
currently belonging to the chared module. The read module does so
from three of its functions, while no other module uses the macro
data, not even the chared module itself. That's quite logical
because macros are a feature of input handling, all of which is
done by the read module, and none by the chared module. So move
the data into the read modules's own opaque data structure, struct
el_read_t.
That simplifies internal interfaces in several respects: The
semi-public chared.h has one fewer struct, one fewer #define, and
one fewer member in struct el_chared_t; all three move to one single
C file, read.c, and are now module-local. And the internal interface
function ch_reset() needs one fewer argument, making the code of many
functions in various modules more readable.
The price is one additional internal interface function, read_end(),
10 lines long including comments, called publicly from exactly one
place: el_end() in el.c. That's hardly an increase in complexity
since most other modules already have their *_end() function, read.c
was the odd one out not having one.
From Ingo Schwarze
2016-05-22 22:44:26 +03:00
|
|
|
static void
|
|
|
|
read_clearmacros(struct macros *ma)
|
|
|
|
{
|
|
|
|
while (ma->level >= 0)
|
|
|
|
el_free(ma->macro[ma->level--]);
|
|
|
|
ma->offset = 0;
|
|
|
|
}
|
|
|
|
|
2016-02-24 17:25:38 +03:00
|
|
|
/* el_wgetc():
|
|
|
|
* Read a wide character
|
1994-05-06 10:01:42 +04:00
|
|
|
*/
|
2016-04-11 21:56:31 +03:00
|
|
|
int
|
2016-02-24 17:25:38 +03:00
|
|
|
el_wgetc(EditLine *el, wchar_t *cp)
|
1994-05-06 10:01:42 +04:00
|
|
|
{
|
Stop the read module from poking the el_chared.c_macro data structure
currently belonging to the chared module. The read module does so
from three of its functions, while no other module uses the macro
data, not even the chared module itself. That's quite logical
because macros are a feature of input handling, all of which is
done by the read module, and none by the chared module. So move
the data into the read modules's own opaque data structure, struct
el_read_t.
That simplifies internal interfaces in several respects: The
semi-public chared.h has one fewer struct, one fewer #define, and
one fewer member in struct el_chared_t; all three move to one single
C file, read.c, and are now module-local. And the internal interface
function ch_reset() needs one fewer argument, making the code of many
functions in various modules more readable.
The price is one additional internal interface function, read_end(),
10 lines long including comments, called publicly from exactly one
place: el_end() in el.c. That's hardly an increase in complexity
since most other modules already have their *_end() function, read.c
was the odd one out not having one.
From Ingo Schwarze
2016-05-22 22:44:26 +03:00
|
|
|
struct macros *ma = &el->el_read->macros;
|
2000-09-05 02:06:28 +04:00
|
|
|
int num_read;
|
1999-07-02 19:14:07 +04:00
|
|
|
|
2011-07-28 04:44:35 +04:00
|
|
|
terminal__flush(el);
|
2000-09-05 02:06:28 +04:00
|
|
|
for (;;) {
|
|
|
|
if (ma->level < 0)
|
|
|
|
break;
|
|
|
|
|
2007-03-02 00:41:45 +03:00
|
|
|
if (ma->macro[0][ma->offset] == '\0') {
|
|
|
|
read_pop(ma);
|
2000-09-05 02:06:28 +04:00
|
|
|
continue;
|
|
|
|
}
|
2007-03-02 00:41:45 +03:00
|
|
|
|
2009-12-31 01:37:40 +03:00
|
|
|
*cp = ma->macro[0][ma->offset++];
|
2007-03-02 00:41:45 +03:00
|
|
|
|
|
|
|
if (ma->macro[0][ma->offset] == '\0') {
|
2003-10-19 03:48:42 +04:00
|
|
|
/* Needed for QuoteMode On */
|
2007-03-02 00:41:45 +03:00
|
|
|
read_pop(ma);
|
2000-09-05 02:06:28 +04:00
|
|
|
}
|
2007-03-02 00:41:45 +03:00
|
|
|
|
2011-07-29 19:16:33 +04:00
|
|
|
return 1;
|
1994-05-06 10:01:42 +04:00
|
|
|
}
|
|
|
|
|
2000-09-05 02:06:28 +04:00
|
|
|
if (tty_rawmode(el) < 0)/* make sure the tty is set up correctly */
|
2011-07-29 19:16:33 +04:00
|
|
|
return 0;
|
1994-05-06 10:01:42 +04:00
|
|
|
|
2016-04-19 22:50:53 +03:00
|
|
|
num_read = (*el->el_read->read_char)(el, cp);
|
From Ingo Schwarze:
Reduce obfuscation of errno handling. There is only one purpose
non-local errno handling is needed for: Inside el_wgets(), several
functions call down indirectly to el_wgetc(), many of them via the
dispatch table. When el_wgetc() fails, it does properly report
failure, but then various cleanup is done which may clobber errno.
But when returning due to failure, el_wgets() wants to have errno
set to the reason of the original read failure, not to the reason
of some subsequent failure of some cleanup operation. So el_wgetc()
needs to save errno, and if it's non-zero, el_wgets() needs to
restore it on failure.
This core logic is currently obscured by the fact that el_errno
is set and inspected at some additional places where it isn't needed.
Besides, since el_wgetc() and and el_wgets() are both in read.c,
el_errno does not need to be in struct editline, it can and should
be local to read.c in struct el_read_t.
Let's look at what can be simplified.
1. keymacro_get() abuses el_errno instead of having a proper
error return code. Adding that error return code is easy
because node_trav() already detects the condition and an
adequate code is already defined. Returning it, testing
for it in read_getcmd(), and returning with error from there
removes the need to inspect el_errno from el_wgets() after
calling read_getcmd().
Note that resetting lastchar and cursor and clearing buffer[0]
is irrelevant. The code returns from el_wgets() right afterwards.
Outside el_wgets(), these variables are no longer relevant.
When el_wgets() is called the next time, it will call ch_reset()
anyway, resetting the two pointers. And as long as lastchar
points to the beginning of the buffer, the contents of the
buffer won't be used for anything.
2. read_getcmd() doesn't need to set el_errno again after el_wgetc()
failure since el_wgetc() already did so. While here, remove
the silly "if EOF or error" comments from the el_wgetc()
return value tests. It's a public interface documented in a
manual, so people working on the implementation can obviously
be expected to know how it works. It's a case of
count++; /* Increment count. */
3. In the two code paths of el_wgets() that lead up to "goto noedit",
there is no need to save the errno because nothing that might
change it happens before returning.
For clarity, since el_wgets() is the function restoring the errno,
also move initializing it to the same function.
Finally, note that restoring errno when the saved value is zero is
wrong. No library code is ever allowed to clear a previously set
value of errno. Only application programs are allowed to do that,
and even they usually don't need to do so, except when using certain
ill-designed interfaces like strtol(3).
I tested that the behaviour remains sane in the following cases,
all during execution of el_wgets(3) and with a signal handler
for USR1 installed without SA_RESTART.
* Enter some text and maybe move around a bit.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
Now el_wgets() sets errno=EINTR and returns -1.
* Press Ctrl-V to activate ed-quoted-insert.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
ed_quoted_insert() returns ed_end_of_file(), i.e. CC_EOF,
and el_wgets() returns 0.
* Press a key starting a keyboard macro.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
Now el_wgets() sets errno=EINTR and returns -1.
* Press : to enter builtin command mode.
Start typing a command.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
Now c_gets() returns -1, ed_command() beeps and returns CC_REFRESH,
and el_wgets() resumes operation as it should.
I also tested with "el_set(el, EL_EDITMODE, 0)", and it returns
the right value and sets errno correctly.
2016-05-24 18:00:45 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Remember the original reason of a read failure
|
|
|
|
* such that el_wgets() can restore it after doing
|
|
|
|
* various cleanup operation that might change errno.
|
|
|
|
*/
|
2012-09-11 00:53:18 +04:00
|
|
|
if (num_read < 0)
|
From Ingo Schwarze:
Reduce obfuscation of errno handling. There is only one purpose
non-local errno handling is needed for: Inside el_wgets(), several
functions call down indirectly to el_wgetc(), many of them via the
dispatch table. When el_wgetc() fails, it does properly report
failure, but then various cleanup is done which may clobber errno.
But when returning due to failure, el_wgets() wants to have errno
set to the reason of the original read failure, not to the reason
of some subsequent failure of some cleanup operation. So el_wgetc()
needs to save errno, and if it's non-zero, el_wgets() needs to
restore it on failure.
This core logic is currently obscured by the fact that el_errno
is set and inspected at some additional places where it isn't needed.
Besides, since el_wgetc() and and el_wgets() are both in read.c,
el_errno does not need to be in struct editline, it can and should
be local to read.c in struct el_read_t.
Let's look at what can be simplified.
1. keymacro_get() abuses el_errno instead of having a proper
error return code. Adding that error return code is easy
because node_trav() already detects the condition and an
adequate code is already defined. Returning it, testing
for it in read_getcmd(), and returning with error from there
removes the need to inspect el_errno from el_wgets() after
calling read_getcmd().
Note that resetting lastchar and cursor and clearing buffer[0]
is irrelevant. The code returns from el_wgets() right afterwards.
Outside el_wgets(), these variables are no longer relevant.
When el_wgets() is called the next time, it will call ch_reset()
anyway, resetting the two pointers. And as long as lastchar
points to the beginning of the buffer, the contents of the
buffer won't be used for anything.
2. read_getcmd() doesn't need to set el_errno again after el_wgetc()
failure since el_wgetc() already did so. While here, remove
the silly "if EOF or error" comments from the el_wgetc()
return value tests. It's a public interface documented in a
manual, so people working on the implementation can obviously
be expected to know how it works. It's a case of
count++; /* Increment count. */
3. In the two code paths of el_wgets() that lead up to "goto noedit",
there is no need to save the errno because nothing that might
change it happens before returning.
For clarity, since el_wgets() is the function restoring the errno,
also move initializing it to the same function.
Finally, note that restoring errno when the saved value is zero is
wrong. No library code is ever allowed to clear a previously set
value of errno. Only application programs are allowed to do that,
and even they usually don't need to do so, except when using certain
ill-designed interfaces like strtol(3).
I tested that the behaviour remains sane in the following cases,
all during execution of el_wgets(3) and with a signal handler
for USR1 installed without SA_RESTART.
* Enter some text and maybe move around a bit.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
Now el_wgets() sets errno=EINTR and returns -1.
* Press Ctrl-V to activate ed-quoted-insert.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
ed_quoted_insert() returns ed_end_of_file(), i.e. CC_EOF,
and el_wgets() returns 0.
* Press a key starting a keyboard macro.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
Now el_wgets() sets errno=EINTR and returns -1.
* Press : to enter builtin command mode.
Start typing a command.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
Now c_gets() returns -1, ed_command() beeps and returns CC_REFRESH,
and el_wgets() resumes operation as it should.
I also tested with "el_set(el, EL_EDITMODE, 0)", and it returns
the right value and sets errno correctly.
2016-05-24 18:00:45 +03:00
|
|
|
el->el_read->read_errno = errno;
|
|
|
|
|
2011-07-29 19:16:33 +04:00
|
|
|
return num_read;
|
1994-05-06 10:01:42 +04:00
|
|
|
}
|
|
|
|
|
2016-05-10 00:46:56 +03:00
|
|
|
libedit_private void
|
2004-02-27 17:52:18 +03:00
|
|
|
read_prepare(EditLine *el)
|
2003-09-26 21:44:51 +04:00
|
|
|
{
|
|
|
|
if (el->el_flags & HANDLE_SIGNALS)
|
|
|
|
sig_set(el);
|
|
|
|
if (el->el_flags & NO_TTY)
|
|
|
|
return;
|
|
|
|
if ((el->el_flags & (UNBUFFERED|EDIT_DISABLED)) == UNBUFFERED)
|
|
|
|
tty_rawmode(el);
|
|
|
|
|
|
|
|
/* This is relatively cheap, and things go terribly wrong if
|
|
|
|
we have the wrong size. */
|
|
|
|
el_resize(el);
|
|
|
|
re_clear_display(el); /* reset the display stuff */
|
Stop the read module from poking the el_chared.c_macro data structure
currently belonging to the chared module. The read module does so
from three of its functions, while no other module uses the macro
data, not even the chared module itself. That's quite logical
because macros are a feature of input handling, all of which is
done by the read module, and none by the chared module. So move
the data into the read modules's own opaque data structure, struct
el_read_t.
That simplifies internal interfaces in several respects: The
semi-public chared.h has one fewer struct, one fewer #define, and
one fewer member in struct el_chared_t; all three move to one single
C file, read.c, and are now module-local. And the internal interface
function ch_reset() needs one fewer argument, making the code of many
functions in various modules more readable.
The price is one additional internal interface function, read_end(),
10 lines long including comments, called publicly from exactly one
place: el_end() in el.c. That's hardly an increase in complexity
since most other modules already have their *_end() function, read.c
was the odd one out not having one.
From Ingo Schwarze
2016-05-22 22:44:26 +03:00
|
|
|
ch_reset(el);
|
2003-09-26 21:44:51 +04:00
|
|
|
re_refresh(el); /* print the prompt */
|
2005-03-10 02:55:02 +03:00
|
|
|
|
|
|
|
if (el->el_flags & UNBUFFERED)
|
2011-07-28 04:44:35 +04:00
|
|
|
terminal__flush(el);
|
2003-09-26 21:44:51 +04:00
|
|
|
}
|
|
|
|
|
2016-05-10 00:46:56 +03:00
|
|
|
libedit_private void
|
2003-09-26 21:44:51 +04:00
|
|
|
read_finish(EditLine *el)
|
|
|
|
{
|
|
|
|
if ((el->el_flags & UNBUFFERED) == 0)
|
|
|
|
(void) tty_cookedmode(el);
|
|
|
|
if (el->el_flags & HANDLE_SIGNALS)
|
|
|
|
sig_clr(el);
|
|
|
|
}
|
1994-05-06 10:01:42 +04:00
|
|
|
|
2016-05-25 16:01:11 +03:00
|
|
|
static const wchar_t *
|
|
|
|
noedit_wgets(EditLine *el, int *nread)
|
|
|
|
{
|
|
|
|
el_line_t *lp = &el->el_line;
|
|
|
|
int num;
|
|
|
|
|
|
|
|
while ((num = (*el->el_read->read_char)(el, lp->lastchar)) == 1) {
|
|
|
|
if (lp->lastchar + 1 >= lp->limit &&
|
|
|
|
!ch_enlargebufs(el, (size_t)2))
|
|
|
|
break;
|
|
|
|
lp->lastchar++;
|
|
|
|
if (el->el_flags & UNBUFFERED ||
|
|
|
|
lp->lastchar[-1] == '\r' ||
|
|
|
|
lp->lastchar[-1] == '\n')
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (num == -1 && errno == EINTR)
|
|
|
|
lp->lastchar = lp->buffer;
|
|
|
|
lp->cursor = lp->lastchar;
|
|
|
|
*lp->lastchar = '\0';
|
|
|
|
*nread = (int)(lp->lastchar - lp->buffer);
|
|
|
|
return *nread ? lp->buffer : NULL;
|
|
|
|
}
|
|
|
|
|
2016-04-11 21:56:31 +03:00
|
|
|
const wchar_t *
|
2016-04-11 03:22:48 +03:00
|
|
|
el_wgets(EditLine *el, int *nread)
|
1994-05-06 10:01:42 +04:00
|
|
|
{
|
2000-09-05 02:06:28 +04:00
|
|
|
int retval;
|
|
|
|
el_action_t cmdnum = 0;
|
|
|
|
int num; /* how many chars we have read at NL */
|
2016-05-25 16:01:11 +03:00
|
|
|
wchar_t ch;
|
2009-03-10 23:46:15 +03:00
|
|
|
int nrb;
|
1994-05-06 10:01:42 +04:00
|
|
|
|
2009-03-10 23:46:15 +03:00
|
|
|
if (nread == NULL)
|
|
|
|
nread = &nrb;
|
2009-07-22 19:57:00 +04:00
|
|
|
*nread = 0;
|
From Ingo Schwarze:
Reduce obfuscation of errno handling. There is only one purpose
non-local errno handling is needed for: Inside el_wgets(), several
functions call down indirectly to el_wgetc(), many of them via the
dispatch table. When el_wgetc() fails, it does properly report
failure, but then various cleanup is done which may clobber errno.
But when returning due to failure, el_wgets() wants to have errno
set to the reason of the original read failure, not to the reason
of some subsequent failure of some cleanup operation. So el_wgetc()
needs to save errno, and if it's non-zero, el_wgets() needs to
restore it on failure.
This core logic is currently obscured by the fact that el_errno
is set and inspected at some additional places where it isn't needed.
Besides, since el_wgetc() and and el_wgets() are both in read.c,
el_errno does not need to be in struct editline, it can and should
be local to read.c in struct el_read_t.
Let's look at what can be simplified.
1. keymacro_get() abuses el_errno instead of having a proper
error return code. Adding that error return code is easy
because node_trav() already detects the condition and an
adequate code is already defined. Returning it, testing
for it in read_getcmd(), and returning with error from there
removes the need to inspect el_errno from el_wgets() after
calling read_getcmd().
Note that resetting lastchar and cursor and clearing buffer[0]
is irrelevant. The code returns from el_wgets() right afterwards.
Outside el_wgets(), these variables are no longer relevant.
When el_wgets() is called the next time, it will call ch_reset()
anyway, resetting the two pointers. And as long as lastchar
points to the beginning of the buffer, the contents of the
buffer won't be used for anything.
2. read_getcmd() doesn't need to set el_errno again after el_wgetc()
failure since el_wgetc() already did so. While here, remove
the silly "if EOF or error" comments from the el_wgetc()
return value tests. It's a public interface documented in a
manual, so people working on the implementation can obviously
be expected to know how it works. It's a case of
count++; /* Increment count. */
3. In the two code paths of el_wgets() that lead up to "goto noedit",
there is no need to save the errno because nothing that might
change it happens before returning.
For clarity, since el_wgets() is the function restoring the errno,
also move initializing it to the same function.
Finally, note that restoring errno when the saved value is zero is
wrong. No library code is ever allowed to clear a previously set
value of errno. Only application programs are allowed to do that,
and even they usually don't need to do so, except when using certain
ill-designed interfaces like strtol(3).
I tested that the behaviour remains sane in the following cases,
all during execution of el_wgets(3) and with a signal handler
for USR1 installed without SA_RESTART.
* Enter some text and maybe move around a bit.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
Now el_wgets() sets errno=EINTR and returns -1.
* Press Ctrl-V to activate ed-quoted-insert.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
ed_quoted_insert() returns ed_end_of_file(), i.e. CC_EOF,
and el_wgets() returns 0.
* Press a key starting a keyboard macro.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
Now el_wgets() sets errno=EINTR and returns -1.
* Press : to enter builtin command mode.
Start typing a command.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
Now c_gets() returns -1, ed_command() beeps and returns CC_REFRESH,
and el_wgets() resumes operation as it should.
I also tested with "el_set(el, EL_EDITMODE, 0)", and it returns
the right value and sets errno correctly.
2016-05-24 18:00:45 +03:00
|
|
|
el->el_read->read_errno = 0;
|
2009-03-10 23:46:15 +03:00
|
|
|
|
2000-09-05 02:06:28 +04:00
|
|
|
if (el->el_flags & NO_TTY) {
|
2016-05-25 16:01:11 +03:00
|
|
|
el->el_line.lastchar = el->el_line.buffer;
|
|
|
|
return noedit_wgets(el, nread);
|
1997-12-20 22:15:50 +03:00
|
|
|
}
|
2002-10-28 00:41:50 +03:00
|
|
|
|
1994-05-06 10:01:42 +04:00
|
|
|
#ifdef FIONREAD
|
Stop the read module from poking the el_chared.c_macro data structure
currently belonging to the chared module. The read module does so
from three of its functions, while no other module uses the macro
data, not even the chared module itself. That's quite logical
because macros are a feature of input handling, all of which is
done by the read module, and none by the chared module. So move
the data into the read modules's own opaque data structure, struct
el_read_t.
That simplifies internal interfaces in several respects: The
semi-public chared.h has one fewer struct, one fewer #define, and
one fewer member in struct el_chared_t; all three move to one single
C file, read.c, and are now module-local. And the internal interface
function ch_reset() needs one fewer argument, making the code of many
functions in various modules more readable.
The price is one additional internal interface function, read_end(),
10 lines long including comments, called publicly from exactly one
place: el_end() in el.c. That's hardly an increase in complexity
since most other modules already have their *_end() function, read.c
was the odd one out not having one.
From Ingo Schwarze
2016-05-22 22:44:26 +03:00
|
|
|
if (el->el_tty.t_mode == EX_IO && el->el_read->macros.level < 0) {
|
2016-04-12 14:15:46 +03:00
|
|
|
int chrs = 0;
|
2000-09-05 02:06:28 +04:00
|
|
|
|
2011-07-29 00:50:55 +04:00
|
|
|
(void) ioctl(el->el_infd, FIONREAD, &chrs);
|
2000-09-05 02:06:28 +04:00
|
|
|
if (chrs == 0) {
|
|
|
|
if (tty_rawmode(el) < 0) {
|
2009-03-10 23:46:15 +03:00
|
|
|
errno = 0;
|
|
|
|
*nread = 0;
|
2011-07-29 19:16:33 +04:00
|
|
|
return NULL;
|
2000-09-05 02:06:28 +04:00
|
|
|
}
|
|
|
|
}
|
1994-05-06 10:01:42 +04:00
|
|
|
}
|
|
|
|
#endif /* FIONREAD */
|
|
|
|
|
2003-09-26 21:44:51 +04:00
|
|
|
if ((el->el_flags & UNBUFFERED) == 0)
|
|
|
|
read_prepare(el);
|
1994-05-06 10:01:42 +04:00
|
|
|
|
2000-09-05 02:06:28 +04:00
|
|
|
if (el->el_flags & EDIT_DISABLED) {
|
2004-07-08 04:51:36 +04:00
|
|
|
if ((el->el_flags & UNBUFFERED) == 0)
|
2016-05-25 16:01:11 +03:00
|
|
|
el->el_line.lastchar = el->el_line.buffer;
|
2011-07-28 04:44:35 +04:00
|
|
|
terminal__flush(el);
|
2016-05-25 16:01:11 +03:00
|
|
|
return noedit_wgets(el, nread);
|
1999-08-02 05:01:55 +04:00
|
|
|
}
|
2002-10-28 00:41:50 +03:00
|
|
|
|
2016-04-12 03:16:06 +03:00
|
|
|
for (num = -1; num == -1;) { /* while still editing this line */
|
2000-09-05 02:06:28 +04:00
|
|
|
/* if EOF or error */
|
2016-05-24 20:42:54 +03:00
|
|
|
if (read_getcmd(el, &cmdnum, &ch) == -1)
|
2000-09-05 02:06:28 +04:00
|
|
|
break;
|
2016-05-24 20:42:54 +03:00
|
|
|
if ((size_t)cmdnum >= el->el_map.nfunc) /* BUG CHECK command */
|
2000-09-05 02:06:28 +04:00
|
|
|
continue; /* try again */
|
|
|
|
/* now do the real command */
|
2002-11-15 17:32:32 +03:00
|
|
|
/* vi redo needs these way down the levels... */
|
|
|
|
el->el_state.thiscmd = cmdnum;
|
|
|
|
el->el_state.thisch = ch;
|
|
|
|
if (el->el_map.type == MAP_VI &&
|
|
|
|
el->el_map.current == el->el_map.key &&
|
|
|
|
el->el_chared.c_redo.pos < el->el_chared.c_redo.lim) {
|
|
|
|
if (cmdnum == VI_DELETE_PREV_CHAR &&
|
|
|
|
el->el_chared.c_redo.pos != el->el_chared.c_redo.buf
|
2016-04-09 21:43:17 +03:00
|
|
|
&& iswprint(el->el_chared.c_redo.pos[-1]))
|
2002-11-15 17:32:32 +03:00
|
|
|
el->el_chared.c_redo.pos--;
|
|
|
|
else
|
|
|
|
*el->el_chared.c_redo.pos++ = ch;
|
|
|
|
}
|
2000-09-05 02:06:28 +04:00
|
|
|
retval = (*el->el_map.func[cmdnum]) (el, ch);
|
|
|
|
|
|
|
|
/* save the last command here */
|
|
|
|
el->el_state.lastcmd = cmdnum;
|
|
|
|
|
|
|
|
/* use any return value */
|
|
|
|
switch (retval) {
|
|
|
|
case CC_CURSOR:
|
|
|
|
re_refresh_cursor(el);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CC_REDISPLAY:
|
|
|
|
re_clear_lines(el);
|
|
|
|
re_clear_display(el);
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
|
|
|
|
case CC_REFRESH:
|
|
|
|
re_refresh(el);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CC_REFRESH_BEEP:
|
|
|
|
re_refresh(el);
|
2011-07-28 04:44:35 +04:00
|
|
|
terminal_beep(el);
|
2000-09-05 02:06:28 +04:00
|
|
|
break;
|
|
|
|
|
|
|
|
case CC_NORM: /* normal char */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CC_ARGHACK: /* Suggested by Rich Salz */
|
|
|
|
/* <rsalz@pineapple.bbn.com> */
|
2002-11-15 17:32:32 +03:00
|
|
|
continue; /* keep going... */
|
2000-09-05 02:06:28 +04:00
|
|
|
|
|
|
|
case CC_EOF: /* end of file typed */
|
2003-10-09 04:42:28 +04:00
|
|
|
if ((el->el_flags & UNBUFFERED) == 0)
|
|
|
|
num = 0;
|
|
|
|
else if (num == -1) {
|
2004-01-17 20:57:40 +03:00
|
|
|
*el->el_line.lastchar++ = CONTROL('d');
|
2003-10-09 04:42:28 +04:00
|
|
|
el->el_line.cursor = el->el_line.lastchar;
|
|
|
|
num = 1;
|
|
|
|
}
|
2000-09-05 02:06:28 +04:00
|
|
|
break;
|
|
|
|
|
|
|
|
case CC_NEWLINE: /* normal end of line */
|
2009-02-16 00:55:23 +03:00
|
|
|
num = (int)(el->el_line.lastchar - el->el_line.buffer);
|
2000-09-05 02:06:28 +04:00
|
|
|
break;
|
|
|
|
|
|
|
|
case CC_FATAL: /* fatal error, reset to known state */
|
|
|
|
/* put (real) cursor in a known place */
|
|
|
|
re_clear_display(el); /* reset the display stuff */
|
Stop the read module from poking the el_chared.c_macro data structure
currently belonging to the chared module. The read module does so
from three of its functions, while no other module uses the macro
data, not even the chared module itself. That's quite logical
because macros are a feature of input handling, all of which is
done by the read module, and none by the chared module. So move
the data into the read modules's own opaque data structure, struct
el_read_t.
That simplifies internal interfaces in several respects: The
semi-public chared.h has one fewer struct, one fewer #define, and
one fewer member in struct el_chared_t; all three move to one single
C file, read.c, and are now module-local. And the internal interface
function ch_reset() needs one fewer argument, making the code of many
functions in various modules more readable.
The price is one additional internal interface function, read_end(),
10 lines long including comments, called publicly from exactly one
place: el_end() in el.c. That's hardly an increase in complexity
since most other modules already have their *_end() function, read.c
was the odd one out not having one.
From Ingo Schwarze
2016-05-22 22:44:26 +03:00
|
|
|
ch_reset(el); /* reset the input pointers */
|
|
|
|
read_clearmacros(&el->el_read->macros);
|
2011-07-30 03:44:44 +04:00
|
|
|
re_refresh(el); /* print the prompt again */
|
2000-09-05 02:06:28 +04:00
|
|
|
break;
|
|
|
|
|
|
|
|
case CC_ERROR:
|
|
|
|
default: /* functions we don't know about */
|
2011-07-28 04:44:35 +04:00
|
|
|
terminal_beep(el);
|
|
|
|
terminal__flush(el);
|
2000-09-05 02:06:28 +04:00
|
|
|
break;
|
|
|
|
}
|
2002-11-15 17:32:32 +03:00
|
|
|
el->el_state.argument = 1;
|
|
|
|
el->el_state.doingarg = 0;
|
|
|
|
el->el_chared.c_vcmd.action = NOP;
|
2003-09-26 21:44:51 +04:00
|
|
|
if (el->el_flags & UNBUFFERED)
|
|
|
|
break;
|
1994-05-06 10:01:42 +04:00
|
|
|
}
|
2000-09-05 02:06:28 +04:00
|
|
|
|
2011-07-28 04:44:35 +04:00
|
|
|
terminal__flush(el); /* flush any buffered output */
|
2002-10-28 00:41:50 +03:00
|
|
|
/* make sure the tty is set up correctly */
|
2003-09-26 21:44:51 +04:00
|
|
|
if ((el->el_flags & UNBUFFERED) == 0) {
|
|
|
|
read_finish(el);
|
2009-03-10 23:46:15 +03:00
|
|
|
*nread = num != -1 ? num : 0;
|
2016-05-25 16:01:11 +03:00
|
|
|
} else
|
2009-03-10 23:46:15 +03:00
|
|
|
*nread = (int)(el->el_line.lastchar - el->el_line.buffer);
|
2016-05-25 16:01:11 +03:00
|
|
|
|
2009-03-10 23:46:15 +03:00
|
|
|
if (*nread == 0) {
|
|
|
|
if (num == -1) {
|
|
|
|
*nread = -1;
|
From Ingo Schwarze:
Reduce obfuscation of errno handling. There is only one purpose
non-local errno handling is needed for: Inside el_wgets(), several
functions call down indirectly to el_wgetc(), many of them via the
dispatch table. When el_wgetc() fails, it does properly report
failure, but then various cleanup is done which may clobber errno.
But when returning due to failure, el_wgets() wants to have errno
set to the reason of the original read failure, not to the reason
of some subsequent failure of some cleanup operation. So el_wgetc()
needs to save errno, and if it's non-zero, el_wgets() needs to
restore it on failure.
This core logic is currently obscured by the fact that el_errno
is set and inspected at some additional places where it isn't needed.
Besides, since el_wgetc() and and el_wgets() are both in read.c,
el_errno does not need to be in struct editline, it can and should
be local to read.c in struct el_read_t.
Let's look at what can be simplified.
1. keymacro_get() abuses el_errno instead of having a proper
error return code. Adding that error return code is easy
because node_trav() already detects the condition and an
adequate code is already defined. Returning it, testing
for it in read_getcmd(), and returning with error from there
removes the need to inspect el_errno from el_wgets() after
calling read_getcmd().
Note that resetting lastchar and cursor and clearing buffer[0]
is irrelevant. The code returns from el_wgets() right afterwards.
Outside el_wgets(), these variables are no longer relevant.
When el_wgets() is called the next time, it will call ch_reset()
anyway, resetting the two pointers. And as long as lastchar
points to the beginning of the buffer, the contents of the
buffer won't be used for anything.
2. read_getcmd() doesn't need to set el_errno again after el_wgetc()
failure since el_wgetc() already did so. While here, remove
the silly "if EOF or error" comments from the el_wgetc()
return value tests. It's a public interface documented in a
manual, so people working on the implementation can obviously
be expected to know how it works. It's a case of
count++; /* Increment count. */
3. In the two code paths of el_wgets() that lead up to "goto noedit",
there is no need to save the errno because nothing that might
change it happens before returning.
For clarity, since el_wgets() is the function restoring the errno,
also move initializing it to the same function.
Finally, note that restoring errno when the saved value is zero is
wrong. No library code is ever allowed to clear a previously set
value of errno. Only application programs are allowed to do that,
and even they usually don't need to do so, except when using certain
ill-designed interfaces like strtol(3).
I tested that the behaviour remains sane in the following cases,
all during execution of el_wgets(3) and with a signal handler
for USR1 installed without SA_RESTART.
* Enter some text and maybe move around a bit.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
Now el_wgets() sets errno=EINTR and returns -1.
* Press Ctrl-V to activate ed-quoted-insert.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
ed_quoted_insert() returns ed_end_of_file(), i.e. CC_EOF,
and el_wgets() returns 0.
* Press a key starting a keyboard macro.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
Now el_wgets() sets errno=EINTR and returns -1.
* Press : to enter builtin command mode.
Start typing a command.
Then send a USR1 signal.
The signal gets processed, then read_char() resumes reading.
Send another USR1 signal.
Now c_gets() returns -1, ed_command() beeps and returns CC_REFRESH,
and el_wgets() resumes operation as it should.
I also tested with "el_set(el, EL_EDITMODE, 0)", and it returns
the right value and sets errno correctly.
2016-05-24 18:00:45 +03:00
|
|
|
if (el->el_read->read_errno)
|
|
|
|
errno = el->el_read->read_errno;
|
2009-03-10 23:46:15 +03:00
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
} else
|
|
|
|
return el->el_line.buffer;
|
1994-05-06 10:01:42 +04:00
|
|
|
}
|