NetBSD/lib/libedit/read.c

683 lines
15 KiB
C
Raw Normal View History

/* $NetBSD: read.c,v 1.92 2016/04/12 00:16:06 christos Exp $ */
1994-05-06 10:01:42 +04:00
/*-
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Christos Zoulas of Cornell University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
1994-05-06 10:01:42 +04:00
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
1994-05-06 10:01:42 +04:00
#if !defined(lint) && !defined(SCCSID)
#if 0
1994-05-06 10:01:42 +04:00
static char sccsid[] = "@(#)read.c 8.1 (Berkeley) 6/4/93";
#else
__RCSID("$NetBSD: read.c,v 1.92 2016/04/12 00:16:06 christos Exp $");
#endif
1994-05-06 10:01:42 +04:00
#endif /* not lint && not SCCSID */
1994-05-06 10:01:42 +04:00
/*
* read.c: Clean this junk up! This is horrible code.
* Terminal read functions
*/
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
1994-05-06 10:01:42 +04:00
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
1994-05-06 10:01:42 +04:00
#include "el.h"
static int read__fixio(int, int);
static int read_char(EditLine *, wchar_t *);
static int read_getcmd(EditLine *, el_action_t *, wchar_t *);
static void read_pop(c_macro_t *);
/* read_init():
* Initialize the read stuff
*/
protected int
read_init(EditLine *el)
{
/* builtin read_char */
el->el_read.read_char = read_char;
return 0;
}
/* el_read_setfn():
* Set the read char function to the one provided.
* If it is set to EL_BUILTIN_GETCFN, then reset to the builtin one.
*/
protected int
el_read_setfn(EditLine *el, el_rfunc_t rc)
{
el->el_read.read_char = (rc == EL_BUILTIN_GETCFN) ? read_char : rc;
return 0;
}
/* el_read_getfn():
* return the current read char function, or EL_BUILTIN_GETCFN
* if it is the default one
*/
protected el_rfunc_t
el_read_getfn(EditLine *el)
{
2011-07-29 19:16:33 +04:00
return el->el_read.read_char == read_char ?
EL_BUILTIN_GETCFN : el->el_read.read_char;
}
1994-05-06 10:01:42 +04:00
#ifdef DEBUG_EDIT
static void
read_debug(EditLine *el)
1994-05-06 10:01:42 +04:00
{
if (el->el_line.cursor > el->el_line.lastchar)
(void) fprintf(el->el_errfile, "cursor > lastchar\r\n");
if (el->el_line.cursor < el->el_line.buffer)
(void) fprintf(el->el_errfile, "cursor < buffer\r\n");
if (el->el_line.cursor > el->el_line.limit)
(void) fprintf(el->el_errfile, "cursor > limit\r\n");
if (el->el_line.lastchar > el->el_line.limit)
(void) fprintf(el->el_errfile, "lastchar > limit\r\n");
if (el->el_line.limit != &el->el_line.buffer[EL_BUFSIZ - 2])
(void) fprintf(el->el_errfile, "limit != &buffer[EL_BUFSIZ-2]\r\n");
1994-05-06 10:01:42 +04:00
}
#endif /* DEBUG_EDIT */
1994-05-06 10:01:42 +04:00
/* read__fixio():
* Try to recover from a read error
*/
1998-12-12 23:08:21 +03:00
/* ARGSUSED */
static int
read__fixio(int fd __attribute__((__unused__)), int e)
1994-05-06 10:01:42 +04:00
{
switch (e) {
case -1: /* Make sure that the code is reachable */
1994-05-06 10:01:42 +04:00
#ifdef EWOULDBLOCK
case EWOULDBLOCK:
#ifndef TRY_AGAIN
2011-07-30 03:44:44 +04:00
#define TRY_AGAIN
#endif
1994-05-06 10:01:42 +04:00
#endif /* EWOULDBLOCK */
#if defined(POSIX) && defined(EAGAIN)
#if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN
case EAGAIN:
#ifndef TRY_AGAIN
2011-07-30 03:44:44 +04:00
#define TRY_AGAIN
#endif
#endif /* EWOULDBLOCK && EWOULDBLOCK != EAGAIN */
1994-05-06 10:01:42 +04:00
#endif /* POSIX && EAGAIN */
e = 0;
1994-05-06 10:01:42 +04:00
#ifdef TRY_AGAIN
#if defined(F_SETFL) && defined(O_NDELAY)
if ((e = fcntl(fd, F_GETFL, 0)) == -1)
2011-07-29 19:16:33 +04:00
return -1;
if (fcntl(fd, F_SETFL, e & ~O_NDELAY) == -1)
2011-07-29 19:16:33 +04:00
return -1;
1997-07-06 22:25:21 +04:00
else
e = 1;
#endif /* F_SETFL && O_NDELAY */
#ifdef FIONBIO
{
int zero = 0;
if (ioctl(fd, FIONBIO, &zero) == -1)
2011-07-29 19:16:33 +04:00
return -1;
else
e = 1;
}
#endif /* FIONBIO */
1994-05-06 10:01:42 +04:00
#endif /* TRY_AGAIN */
2011-07-29 19:16:33 +04:00
return e ? 0 : -1;
1994-05-06 10:01:42 +04:00
case EINTR:
2011-07-29 19:16:33 +04:00
return 0;
1994-05-06 10:01:42 +04:00
default:
2011-07-29 19:16:33 +04:00
return -1;
}
1994-05-06 10:01:42 +04:00
}
/* el_push():
* Push a macro
*/
void
2016-04-11 03:50:13 +03:00
el_wpush(EditLine *el, const wchar_t *str)
1994-05-06 10:01:42 +04:00
{
c_macro_t *ma = &el->el_chared.c_macro;
if (str != NULL && ma->level + 1 < EL_MAXMACRO) {
ma->level++;
if ((ma->macro[ma->level] = wcsdup(str)) != NULL)
return;
ma->level--;
}
terminal_beep(el);
terminal__flush(el);
1994-05-06 10:01:42 +04:00
}
/* read_getcmd():
* Get next command from the input stream,
* return 0 on success or -1 on EOF or error.
* Character values > 255 are not looked up in the map, but inserted.
1994-05-06 10:01:42 +04:00
*/
static int
2016-04-11 03:50:13 +03:00
read_getcmd(EditLine *el, el_action_t *cmdnum, wchar_t *ch)
1994-05-06 10:01:42 +04:00
{
2016-04-11 03:50:13 +03:00
static const wchar_t meta = (wchar_t)0x80;
el_action_t cmd;
int num;
1994-05-06 10:01:42 +04:00
el->el_errno = 0;
do {
2016-04-11 03:50:13 +03:00
if ((num = el_wgetc(el, ch)) != 1) {/* if EOF or error */
el->el_errno = num == 0 ? 0 : errno;
return -1;
}
1994-05-06 10:01:42 +04:00
#ifdef KANJI
From Ingo Schwarze: If CHARSET_IS_UTF8 is not set, read_char() is broken in a large number of ways: 1. The isascii(3) check can yield false positives. If a string in an arbitrary encoding contains a byte in the range 0..127, that does not at all imply that it forms a character all by itself, and even less that it represents the same character as in ASCII. Consequently, read_char() may return characters the user never typed. Even if the encoding is not state dependent, the assumption that bytes in the range 0..127 represent ASCII characters is broken. Consider UTF-16, for example. 2. The reverse problem can also occur. In an arbitrary encoding, there is no guarantee that a character that can be represented by ASCII is represented by a seven-bit byte, and even less by the same byte as in ASCII. Even for single-byte encodings, these assumptions are broken. Consider the ISO 646 national variants, for example. Consequently, the current code is insufficient to keep ASCII characters working even for single-byte encodings. 3. The condition "++cbp != 1" can never trigger (because initially, cbp is 0, and the code can only go back up via the final goto, which has another cbp = 0 right before it) and it has no effect (because cbp isn't used afterwards). 4. bytes = ct_mbtowc(cp, cbuf, cbp) is broken. If this returns -1, the code assumes that is can just call mbtowc(3) again for later input bytes. In some implementations, that may even be broken for state-independent encodings, but trying again after mbtowc(3) failure certainly produces completely erratic and meaningless results in state-dependent encodings. 5. The assignment "*cp = (Char)(unsigned char)cbuf[0]" is completely bogus. Even if the byte cbuf[0] represents a character all by itself, which it usually will not, whether or not the cast produces the desired result depends on the internal representation of wchar_t in the C library, which the application program can know nothing about. Even for ASCII in the C/POSIX locale, an ASCII character other than '\0' == L'\0' == 0 need not have the same numeric value as a char and as a wchar_t. To summarize, this code only works if all of the following conditions hold: - The encoding is a single-byte encoding. - ASCII is a subset of the encoding. - The implementation of mbtowc(3) in the C library does not require re-initialization after encoding errors. - The implementation of wchar_t in the C library uses the same numerical values as ASCII. Otherwise, it silently produces wrong results. The simplest way to fix this is to just use the same code as for UTF-8 (right above). Of course, that causes functional changes but that shouldn't matter since current behaviour is undefined. The patch below provides the following improvements: - It works for all stateless single-byte encodings, no matter whether they are somehow related to ASCII, no matter how mb[r]towc(3) are internally implemented, and no matter how wchar_t is internally represented. - Instead of producing unpredictable and definitely wrong results for non-UTF-8 multibyte characters, it behaves in a well-defined way: It aborts input processing, sets errno, and returns failure. Note that short of providing full support for arbitrary locales, it is impossible to do better. We cannot know whether a given unsupported locale is state-dependent, and for a state-dependent locale, it makes no sense to retry parsing after an encoding error, so the best we can do is abort processing for *any* unsupported multi-byte character. - Note that single-byte characters in arbitrary state-independent locales still work, even in locales that may potentially also contain multibyte characters, as long as those don't occur in input. I'm not sure whether any such locales exist in practice... Tested with UTF-8 and C/POSIX on OpenBSD. Also tested that in the C/POSIX locale, non-ASCII bytes get through unmangled. You may wish to test with ISO-LATIN on NetBSD if NetBSD supports that. ---- Also use a constant for meta to avoid warnings.
2016-02-12 18:11:09 +03:00
if ((*ch & meta)) {
el->el_state.metanext = 0;
cmd = CcViMap[' '];
break;
} else
1994-05-06 10:01:42 +04:00
#endif /* KANJI */
if (el->el_state.metanext) {
el->el_state.metanext = 0;
From Ingo Schwarze: If CHARSET_IS_UTF8 is not set, read_char() is broken in a large number of ways: 1. The isascii(3) check can yield false positives. If a string in an arbitrary encoding contains a byte in the range 0..127, that does not at all imply that it forms a character all by itself, and even less that it represents the same character as in ASCII. Consequently, read_char() may return characters the user never typed. Even if the encoding is not state dependent, the assumption that bytes in the range 0..127 represent ASCII characters is broken. Consider UTF-16, for example. 2. The reverse problem can also occur. In an arbitrary encoding, there is no guarantee that a character that can be represented by ASCII is represented by a seven-bit byte, and even less by the same byte as in ASCII. Even for single-byte encodings, these assumptions are broken. Consider the ISO 646 national variants, for example. Consequently, the current code is insufficient to keep ASCII characters working even for single-byte encodings. 3. The condition "++cbp != 1" can never trigger (because initially, cbp is 0, and the code can only go back up via the final goto, which has another cbp = 0 right before it) and it has no effect (because cbp isn't used afterwards). 4. bytes = ct_mbtowc(cp, cbuf, cbp) is broken. If this returns -1, the code assumes that is can just call mbtowc(3) again for later input bytes. In some implementations, that may even be broken for state-independent encodings, but trying again after mbtowc(3) failure certainly produces completely erratic and meaningless results in state-dependent encodings. 5. The assignment "*cp = (Char)(unsigned char)cbuf[0]" is completely bogus. Even if the byte cbuf[0] represents a character all by itself, which it usually will not, whether or not the cast produces the desired result depends on the internal representation of wchar_t in the C library, which the application program can know nothing about. Even for ASCII in the C/POSIX locale, an ASCII character other than '\0' == L'\0' == 0 need not have the same numeric value as a char and as a wchar_t. To summarize, this code only works if all of the following conditions hold: - The encoding is a single-byte encoding. - ASCII is a subset of the encoding. - The implementation of mbtowc(3) in the C library does not require re-initialization after encoding errors. - The implementation of wchar_t in the C library uses the same numerical values as ASCII. Otherwise, it silently produces wrong results. The simplest way to fix this is to just use the same code as for UTF-8 (right above). Of course, that causes functional changes but that shouldn't matter since current behaviour is undefined. The patch below provides the following improvements: - It works for all stateless single-byte encodings, no matter whether they are somehow related to ASCII, no matter how mb[r]towc(3) are internally implemented, and no matter how wchar_t is internally represented. - Instead of producing unpredictable and definitely wrong results for non-UTF-8 multibyte characters, it behaves in a well-defined way: It aborts input processing, sets errno, and returns failure. Note that short of providing full support for arbitrary locales, it is impossible to do better. We cannot know whether a given unsupported locale is state-dependent, and for a state-dependent locale, it makes no sense to retry parsing after an encoding error, so the best we can do is abort processing for *any* unsupported multi-byte character. - Note that single-byte characters in arbitrary state-independent locales still work, even in locales that may potentially also contain multibyte characters, as long as those don't occur in input. I'm not sure whether any such locales exist in practice... Tested with UTF-8 and C/POSIX on OpenBSD. Also tested that in the C/POSIX locale, non-ASCII bytes get through unmangled. You may wish to test with ISO-LATIN on NetBSD if NetBSD supports that. ---- Also use a constant for meta to avoid warnings.
2016-02-12 18:11:09 +03:00
*ch |= meta;
}
2011-07-30 03:44:44 +04:00
if (*ch >= N_KEYS)
cmd = ED_INSERT;
else
2011-07-30 03:44:44 +04:00
cmd = el->el_map.current[(unsigned char) *ch];
if (cmd == ED_SEQUENCE_LEAD_IN) {
keymacro_value_t val;
switch (keymacro_get(el, ch, &val)) {
case XK_CMD:
cmd = val.cmd;
break;
case XK_STR:
el_wpush(el, val.str);
break;
default:
EL_ABORT((el->el_errfile, "Bad XK_ type \n"));
break;
}
}
if (el->el_map.alt == NULL)
el->el_map.current = el->el_map.key;
} while (cmd == ED_SEQUENCE_LEAD_IN);
*cmdnum = cmd;
return 0;
1994-05-06 10:01:42 +04:00
}
/* read_char():
* Read a character from the tty.
*/
static int
read_char(EditLine *el, wchar_t *cp)
{
2009-02-16 00:55:23 +03:00
ssize_t num_read;
int tried = 0;
2011-07-30 03:44:44 +04:00
char cbuf[MB_LEN_MAX];
size_t cbp = 0;
UTF-8 fixes from Ingo Schwarze: 1. Assume that errno is non-zero when entering read_char() and that read(2) returns 0 (indicating end of file). Then, the code will clear errno before returning. (Obviously, the statement "errno = 0" is almost always a bug unless there is save_errno = errno right before it and the previous value is properly restored later, in all reachable code paths.) 2. When encountering an invalid byte sequence, the code discards all following bytes until MB_LEN_MAX overflows; consider, for example, 0xc2 immediately followed by a few valid ASCII bytes. Three of those ASCII bytes will be discarded. 3. On a POSIX system, EILSEQ will always be set after reading a valid (yes, valid, not invalid!) UTF-8 character. The reason is that mbtowc(3) will first be called with a length limit (third argument) of 1, which will fail, return -1, and - on a POSIX system - set errno to EILSEQ. This third bug is mitigated a bit because i couldn't find any system that actually conforms to POSIX in this respect: None of OpenBSD, NetBSD, FreeBSD, Solaris 11, and glibc set errno when an incomplete character is passed to mbtowc(3), even though that is required by POSIX. Anyway, that mbtowc(3) bug will be fixed at least in OpenBSD after release unlock, so it would be good to fix this bug in libedit before fixing the bug in mbtowc(3). How can these three bugs be fixed? 1. As far as i understand it, the intention of the bogus errno = 0 is to undo the effects of failing system calls in el_wset(), sig_set(), and read__fixio() if the subsequent read(2) indicates end of file. So, restoring errno has to be moved right after read__fixio(). Of course, neither 0 nor e is the right value to restore: 0 is wrong if errno happened to be set on entry, e would be wrong because if one read(2) fails but a second attempt succeeds after read__fixio(), errno should not be touched. So, the errno to be restored in this case has to be saved before calling read(2) for the first time. 2. Solving the second issue requires distinguishing invalid and incomplete characters, but that is impossible with the function mbtowc(3) because it returns -1 in both cases and sets errno to EILSEQ in both cases (once properly implemented). It is vital that each input character is processed right away. It is not acceptable to wait for the next input character before processing the previous one because this is an interactive library, not a batch system. Consequently, the only situation where it is acceptable to wait for the next byte without first processing the previous one(s) is when the previous one(s) form an incomplete sequence that can be continued to form a valid character. Consequently, short of reimplementing a full UTF-8 state machine by hand, the only correct way forward is to use mbrtowc(3). Even then, care is needed to always have the state object properly initialized before using it, and to not discard a valid ASCII or UTF-8 lead byte if it happens to follow an invalid sequence. 3. Fortunately, solution 2. also solves issue 3. as a side effect, by no longer using mbtowc(3) in the first place.
2016-02-08 20:18:43 +03:00
int save_errno = errno;
again:
el->el_signal->sig_no = 0;
2011-07-30 03:44:44 +04:00
while ((num_read = read(el->el_infd, cbuf + cbp, (size_t)1)) == -1) {
int e = errno;
switch (el->el_signal->sig_no) {
case SIGCONT:
el_wset(el, EL_REFRESH);
/*FALLTHROUGH*/
case SIGWINCH:
sig_set(el);
goto again;
default:
break;
}
UTF-8 fixes from Ingo Schwarze: 1. Assume that errno is non-zero when entering read_char() and that read(2) returns 0 (indicating end of file). Then, the code will clear errno before returning. (Obviously, the statement "errno = 0" is almost always a bug unless there is save_errno = errno right before it and the previous value is properly restored later, in all reachable code paths.) 2. When encountering an invalid byte sequence, the code discards all following bytes until MB_LEN_MAX overflows; consider, for example, 0xc2 immediately followed by a few valid ASCII bytes. Three of those ASCII bytes will be discarded. 3. On a POSIX system, EILSEQ will always be set after reading a valid (yes, valid, not invalid!) UTF-8 character. The reason is that mbtowc(3) will first be called with a length limit (third argument) of 1, which will fail, return -1, and - on a POSIX system - set errno to EILSEQ. This third bug is mitigated a bit because i couldn't find any system that actually conforms to POSIX in this respect: None of OpenBSD, NetBSD, FreeBSD, Solaris 11, and glibc set errno when an incomplete character is passed to mbtowc(3), even though that is required by POSIX. Anyway, that mbtowc(3) bug will be fixed at least in OpenBSD after release unlock, so it would be good to fix this bug in libedit before fixing the bug in mbtowc(3). How can these three bugs be fixed? 1. As far as i understand it, the intention of the bogus errno = 0 is to undo the effects of failing system calls in el_wset(), sig_set(), and read__fixio() if the subsequent read(2) indicates end of file. So, restoring errno has to be moved right after read__fixio(). Of course, neither 0 nor e is the right value to restore: 0 is wrong if errno happened to be set on entry, e would be wrong because if one read(2) fails but a second attempt succeeds after read__fixio(), errno should not be touched. So, the errno to be restored in this case has to be saved before calling read(2) for the first time. 2. Solving the second issue requires distinguishing invalid and incomplete characters, but that is impossible with the function mbtowc(3) because it returns -1 in both cases and sets errno to EILSEQ in both cases (once properly implemented). It is vital that each input character is processed right away. It is not acceptable to wait for the next input character before processing the previous one because this is an interactive library, not a batch system. Consequently, the only situation where it is acceptable to wait for the next byte without first processing the previous one(s) is when the previous one(s) form an incomplete sequence that can be continued to form a valid character. Consequently, short of reimplementing a full UTF-8 state machine by hand, the only correct way forward is to use mbrtowc(3). Even then, care is needed to always have the state object properly initialized before using it, and to not discard a valid ASCII or UTF-8 lead byte if it happens to follow an invalid sequence. 3. Fortunately, solution 2. also solves issue 3. as a side effect, by no longer using mbtowc(3) in the first place.
2016-02-08 20:18:43 +03:00
if (!tried && read__fixio(el->el_infd, e) == 0) {
errno = save_errno;
tried = 1;
UTF-8 fixes from Ingo Schwarze: 1. Assume that errno is non-zero when entering read_char() and that read(2) returns 0 (indicating end of file). Then, the code will clear errno before returning. (Obviously, the statement "errno = 0" is almost always a bug unless there is save_errno = errno right before it and the previous value is properly restored later, in all reachable code paths.) 2. When encountering an invalid byte sequence, the code discards all following bytes until MB_LEN_MAX overflows; consider, for example, 0xc2 immediately followed by a few valid ASCII bytes. Three of those ASCII bytes will be discarded. 3. On a POSIX system, EILSEQ will always be set after reading a valid (yes, valid, not invalid!) UTF-8 character. The reason is that mbtowc(3) will first be called with a length limit (third argument) of 1, which will fail, return -1, and - on a POSIX system - set errno to EILSEQ. This third bug is mitigated a bit because i couldn't find any system that actually conforms to POSIX in this respect: None of OpenBSD, NetBSD, FreeBSD, Solaris 11, and glibc set errno when an incomplete character is passed to mbtowc(3), even though that is required by POSIX. Anyway, that mbtowc(3) bug will be fixed at least in OpenBSD after release unlock, so it would be good to fix this bug in libedit before fixing the bug in mbtowc(3). How can these three bugs be fixed? 1. As far as i understand it, the intention of the bogus errno = 0 is to undo the effects of failing system calls in el_wset(), sig_set(), and read__fixio() if the subsequent read(2) indicates end of file. So, restoring errno has to be moved right after read__fixio(). Of course, neither 0 nor e is the right value to restore: 0 is wrong if errno happened to be set on entry, e would be wrong because if one read(2) fails but a second attempt succeeds after read__fixio(), errno should not be touched. So, the errno to be restored in this case has to be saved before calling read(2) for the first time. 2. Solving the second issue requires distinguishing invalid and incomplete characters, but that is impossible with the function mbtowc(3) because it returns -1 in both cases and sets errno to EILSEQ in both cases (once properly implemented). It is vital that each input character is processed right away. It is not acceptable to wait for the next input character before processing the previous one because this is an interactive library, not a batch system. Consequently, the only situation where it is acceptable to wait for the next byte without first processing the previous one(s) is when the previous one(s) form an incomplete sequence that can be continued to form a valid character. Consequently, short of reimplementing a full UTF-8 state machine by hand, the only correct way forward is to use mbrtowc(3). Even then, care is needed to always have the state object properly initialized before using it, and to not discard a valid ASCII or UTF-8 lead byte if it happens to follow an invalid sequence. 3. Fortunately, solution 2. also solves issue 3. as a side effect, by no longer using mbtowc(3) in the first place.
2016-02-08 20:18:43 +03:00
} else {
errno = e;
*cp = L'\0';
2011-07-29 19:16:33 +04:00
return -1;
}
}
/* Test for EOF */
if (num_read == 0) {
*cp = L'\0';
return 0;
}
From Ingo Schwartze: Next step: Remove #ifdef'ing in read_char(), in the same style as we did for setlocale(3) in el.c. A few remarks are required to explain the choices made. * On first sight, handling mbrtowc(3) seems a bit less trivial than handling setlocale(3) because its prototype uses the data type mbstate_t from <wchar.h>. However, it turns out that "histedit.h" already includes <wchar.h> unconditionally (i don't like headers including other headers, but that ship has sailed, people are by now certainly used to the fact that including "histedit.h" doesn't require including <wchar.h> before), and "histedit.h" is of course included all over the place. So from that perspective, there is no problem with using mbrtowc(3) unconditionally ever for !WIDECHAR. * However, <wchar.h> also defines the mbrtowc(3) prototype, so we cannot just #define mbrtowc away, or including the header will break. It would also be a bad idea to porovide a local implementation of mbrtowc() and hope that it overrides the one in libc. Besides, the required prototype is subtly different: While mbrtowc(3) takes "wchar_t *" as its first argument, we need a function that takes "Char *". So unfortunately, we have to keep a ct_mbrtowc #define, at least until we can maybe get rid of "Char *" in the more remote future. * After getting rid of the #else clause in read_char(), we can pull "return 1;" into the default: clause. After that, we can get rid of the ugly "goto again_lastbyte;" and just "break;". As a bonus, that also gets rid of the ugly CONSTCOND. * While here, delete the unused ct_mbtowc() from chartype.h.
2016-02-14 17:47:48 +03:00
for (;;) {
mbstate_t mbs;
From Ingo Schwartze: Next step: Remove #ifdef'ing in read_char(), in the same style as we did for setlocale(3) in el.c. A few remarks are required to explain the choices made. * On first sight, handling mbrtowc(3) seems a bit less trivial than handling setlocale(3) because its prototype uses the data type mbstate_t from <wchar.h>. However, it turns out that "histedit.h" already includes <wchar.h> unconditionally (i don't like headers including other headers, but that ship has sailed, people are by now certainly used to the fact that including "histedit.h" doesn't require including <wchar.h> before), and "histedit.h" is of course included all over the place. So from that perspective, there is no problem with using mbrtowc(3) unconditionally ever for !WIDECHAR. * However, <wchar.h> also defines the mbrtowc(3) prototype, so we cannot just #define mbrtowc away, or including the header will break. It would also be a bad idea to porovide a local implementation of mbrtowc() and hope that it overrides the one in libc. Besides, the required prototype is subtly different: While mbrtowc(3) takes "wchar_t *" as its first argument, we need a function that takes "Char *". So unfortunately, we have to keep a ct_mbrtowc #define, at least until we can maybe get rid of "Char *" in the more remote future. * After getting rid of the #else clause in read_char(), we can pull "return 1;" into the default: clause. After that, we can get rid of the ugly "goto again_lastbyte;" and just "break;". As a bonus, that also gets rid of the ugly CONSTCOND. * While here, delete the unused ct_mbtowc() from chartype.h.
2016-02-14 17:47:48 +03:00
++cbp;
/* This only works because UTF8 is stateless. */
memset(&mbs, 0, sizeof(mbs));
switch (mbrtowc(cp, cbuf, cbp, &mbs)) {
UTF-8 fixes from Ingo Schwarze: 1. Assume that errno is non-zero when entering read_char() and that read(2) returns 0 (indicating end of file). Then, the code will clear errno before returning. (Obviously, the statement "errno = 0" is almost always a bug unless there is save_errno = errno right before it and the previous value is properly restored later, in all reachable code paths.) 2. When encountering an invalid byte sequence, the code discards all following bytes until MB_LEN_MAX overflows; consider, for example, 0xc2 immediately followed by a few valid ASCII bytes. Three of those ASCII bytes will be discarded. 3. On a POSIX system, EILSEQ will always be set after reading a valid (yes, valid, not invalid!) UTF-8 character. The reason is that mbtowc(3) will first be called with a length limit (third argument) of 1, which will fail, return -1, and - on a POSIX system - set errno to EILSEQ. This third bug is mitigated a bit because i couldn't find any system that actually conforms to POSIX in this respect: None of OpenBSD, NetBSD, FreeBSD, Solaris 11, and glibc set errno when an incomplete character is passed to mbtowc(3), even though that is required by POSIX. Anyway, that mbtowc(3) bug will be fixed at least in OpenBSD after release unlock, so it would be good to fix this bug in libedit before fixing the bug in mbtowc(3). How can these three bugs be fixed? 1. As far as i understand it, the intention of the bogus errno = 0 is to undo the effects of failing system calls in el_wset(), sig_set(), and read__fixio() if the subsequent read(2) indicates end of file. So, restoring errno has to be moved right after read__fixio(). Of course, neither 0 nor e is the right value to restore: 0 is wrong if errno happened to be set on entry, e would be wrong because if one read(2) fails but a second attempt succeeds after read__fixio(), errno should not be touched. So, the errno to be restored in this case has to be saved before calling read(2) for the first time. 2. Solving the second issue requires distinguishing invalid and incomplete characters, but that is impossible with the function mbtowc(3) because it returns -1 in both cases and sets errno to EILSEQ in both cases (once properly implemented). It is vital that each input character is processed right away. It is not acceptable to wait for the next input character before processing the previous one because this is an interactive library, not a batch system. Consequently, the only situation where it is acceptable to wait for the next byte without first processing the previous one(s) is when the previous one(s) form an incomplete sequence that can be continued to form a valid character. Consequently, short of reimplementing a full UTF-8 state machine by hand, the only correct way forward is to use mbrtowc(3). Even then, care is needed to always have the state object properly initialized before using it, and to not discard a valid ASCII or UTF-8 lead byte if it happens to follow an invalid sequence. 3. Fortunately, solution 2. also solves issue 3. as a side effect, by no longer using mbtowc(3) in the first place.
2016-02-08 20:18:43 +03:00
case (size_t)-1:
if (cbp > 1) {
/*
* Invalid sequence, discard all bytes
* except the last one.
*/
cbuf[0] = cbuf[cbp - 1];
cbp = 0;
From Ingo Schwartze: Next step: Remove #ifdef'ing in read_char(), in the same style as we did for setlocale(3) in el.c. A few remarks are required to explain the choices made. * On first sight, handling mbrtowc(3) seems a bit less trivial than handling setlocale(3) because its prototype uses the data type mbstate_t from <wchar.h>. However, it turns out that "histedit.h" already includes <wchar.h> unconditionally (i don't like headers including other headers, but that ship has sailed, people are by now certainly used to the fact that including "histedit.h" doesn't require including <wchar.h> before), and "histedit.h" is of course included all over the place. So from that perspective, there is no problem with using mbrtowc(3) unconditionally ever for !WIDECHAR. * However, <wchar.h> also defines the mbrtowc(3) prototype, so we cannot just #define mbrtowc away, or including the header will break. It would also be a bad idea to porovide a local implementation of mbrtowc() and hope that it overrides the one in libc. Besides, the required prototype is subtly different: While mbrtowc(3) takes "wchar_t *" as its first argument, we need a function that takes "Char *". So unfortunately, we have to keep a ct_mbrtowc #define, at least until we can maybe get rid of "Char *" in the more remote future. * After getting rid of the #else clause in read_char(), we can pull "return 1;" into the default: clause. After that, we can get rid of the ugly "goto again_lastbyte;" and just "break;". As a bonus, that also gets rid of the ugly CONSTCOND. * While here, delete the unused ct_mbtowc() from chartype.h.
2016-02-14 17:47:48 +03:00
break;
UTF-8 fixes from Ingo Schwarze: 1. Assume that errno is non-zero when entering read_char() and that read(2) returns 0 (indicating end of file). Then, the code will clear errno before returning. (Obviously, the statement "errno = 0" is almost always a bug unless there is save_errno = errno right before it and the previous value is properly restored later, in all reachable code paths.) 2. When encountering an invalid byte sequence, the code discards all following bytes until MB_LEN_MAX overflows; consider, for example, 0xc2 immediately followed by a few valid ASCII bytes. Three of those ASCII bytes will be discarded. 3. On a POSIX system, EILSEQ will always be set after reading a valid (yes, valid, not invalid!) UTF-8 character. The reason is that mbtowc(3) will first be called with a length limit (third argument) of 1, which will fail, return -1, and - on a POSIX system - set errno to EILSEQ. This third bug is mitigated a bit because i couldn't find any system that actually conforms to POSIX in this respect: None of OpenBSD, NetBSD, FreeBSD, Solaris 11, and glibc set errno when an incomplete character is passed to mbtowc(3), even though that is required by POSIX. Anyway, that mbtowc(3) bug will be fixed at least in OpenBSD after release unlock, so it would be good to fix this bug in libedit before fixing the bug in mbtowc(3). How can these three bugs be fixed? 1. As far as i understand it, the intention of the bogus errno = 0 is to undo the effects of failing system calls in el_wset(), sig_set(), and read__fixio() if the subsequent read(2) indicates end of file. So, restoring errno has to be moved right after read__fixio(). Of course, neither 0 nor e is the right value to restore: 0 is wrong if errno happened to be set on entry, e would be wrong because if one read(2) fails but a second attempt succeeds after read__fixio(), errno should not be touched. So, the errno to be restored in this case has to be saved before calling read(2) for the first time. 2. Solving the second issue requires distinguishing invalid and incomplete characters, but that is impossible with the function mbtowc(3) because it returns -1 in both cases and sets errno to EILSEQ in both cases (once properly implemented). It is vital that each input character is processed right away. It is not acceptable to wait for the next input character before processing the previous one because this is an interactive library, not a batch system. Consequently, the only situation where it is acceptable to wait for the next byte without first processing the previous one(s) is when the previous one(s) form an incomplete sequence that can be continued to form a valid character. Consequently, short of reimplementing a full UTF-8 state machine by hand, the only correct way forward is to use mbrtowc(3). Even then, care is needed to always have the state object properly initialized before using it, and to not discard a valid ASCII or UTF-8 lead byte if it happens to follow an invalid sequence. 3. Fortunately, solution 2. also solves issue 3. as a side effect, by no longer using mbtowc(3) in the first place.
2016-02-08 20:18:43 +03:00
} else {
/* Invalid byte, discard it. */
cbp = 0;
goto again;
}
case (size_t)-2:
From Ingo Schwarze: If CHARSET_IS_UTF8 is not set, read_char() is broken in a large number of ways: 1. The isascii(3) check can yield false positives. If a string in an arbitrary encoding contains a byte in the range 0..127, that does not at all imply that it forms a character all by itself, and even less that it represents the same character as in ASCII. Consequently, read_char() may return characters the user never typed. Even if the encoding is not state dependent, the assumption that bytes in the range 0..127 represent ASCII characters is broken. Consider UTF-16, for example. 2. The reverse problem can also occur. In an arbitrary encoding, there is no guarantee that a character that can be represented by ASCII is represented by a seven-bit byte, and even less by the same byte as in ASCII. Even for single-byte encodings, these assumptions are broken. Consider the ISO 646 national variants, for example. Consequently, the current code is insufficient to keep ASCII characters working even for single-byte encodings. 3. The condition "++cbp != 1" can never trigger (because initially, cbp is 0, and the code can only go back up via the final goto, which has another cbp = 0 right before it) and it has no effect (because cbp isn't used afterwards). 4. bytes = ct_mbtowc(cp, cbuf, cbp) is broken. If this returns -1, the code assumes that is can just call mbtowc(3) again for later input bytes. In some implementations, that may even be broken for state-independent encodings, but trying again after mbtowc(3) failure certainly produces completely erratic and meaningless results in state-dependent encodings. 5. The assignment "*cp = (Char)(unsigned char)cbuf[0]" is completely bogus. Even if the byte cbuf[0] represents a character all by itself, which it usually will not, whether or not the cast produces the desired result depends on the internal representation of wchar_t in the C library, which the application program can know nothing about. Even for ASCII in the C/POSIX locale, an ASCII character other than '\0' == L'\0' == 0 need not have the same numeric value as a char and as a wchar_t. To summarize, this code only works if all of the following conditions hold: - The encoding is a single-byte encoding. - ASCII is a subset of the encoding. - The implementation of mbtowc(3) in the C library does not require re-initialization after encoding errors. - The implementation of wchar_t in the C library uses the same numerical values as ASCII. Otherwise, it silently produces wrong results. The simplest way to fix this is to just use the same code as for UTF-8 (right above). Of course, that causes functional changes but that shouldn't matter since current behaviour is undefined. The patch below provides the following improvements: - It works for all stateless single-byte encodings, no matter whether they are somehow related to ASCII, no matter how mb[r]towc(3) are internally implemented, and no matter how wchar_t is internally represented. - Instead of producing unpredictable and definitely wrong results for non-UTF-8 multibyte characters, it behaves in a well-defined way: It aborts input processing, sets errno, and returns failure. Note that short of providing full support for arbitrary locales, it is impossible to do better. We cannot know whether a given unsupported locale is state-dependent, and for a state-dependent locale, it makes no sense to retry parsing after an encoding error, so the best we can do is abort processing for *any* unsupported multi-byte character. - Note that single-byte characters in arbitrary state-independent locales still work, even in locales that may potentially also contain multibyte characters, as long as those don't occur in input. I'm not sure whether any such locales exist in practice... Tested with UTF-8 and C/POSIX on OpenBSD. Also tested that in the C/POSIX locale, non-ASCII bytes get through unmangled. You may wish to test with ISO-LATIN on NetBSD if NetBSD supports that. ---- Also use a constant for meta to avoid warnings.
2016-02-12 18:11:09 +03:00
/*
* We don't support other multibyte charsets.
* The second condition shouldn't happen
* and is here merely for additional safety.
*/
if ((el->el_flags & CHARSET_IS_UTF8) == 0 ||
cbp >= MB_LEN_MAX) {
errno = EILSEQ;
*cp = L'\0';
2011-07-29 19:16:33 +04:00
return -1;
}
UTF-8 fixes from Ingo Schwarze: 1. Assume that errno is non-zero when entering read_char() and that read(2) returns 0 (indicating end of file). Then, the code will clear errno before returning. (Obviously, the statement "errno = 0" is almost always a bug unless there is save_errno = errno right before it and the previous value is properly restored later, in all reachable code paths.) 2. When encountering an invalid byte sequence, the code discards all following bytes until MB_LEN_MAX overflows; consider, for example, 0xc2 immediately followed by a few valid ASCII bytes. Three of those ASCII bytes will be discarded. 3. On a POSIX system, EILSEQ will always be set after reading a valid (yes, valid, not invalid!) UTF-8 character. The reason is that mbtowc(3) will first be called with a length limit (third argument) of 1, which will fail, return -1, and - on a POSIX system - set errno to EILSEQ. This third bug is mitigated a bit because i couldn't find any system that actually conforms to POSIX in this respect: None of OpenBSD, NetBSD, FreeBSD, Solaris 11, and glibc set errno when an incomplete character is passed to mbtowc(3), even though that is required by POSIX. Anyway, that mbtowc(3) bug will be fixed at least in OpenBSD after release unlock, so it would be good to fix this bug in libedit before fixing the bug in mbtowc(3). How can these three bugs be fixed? 1. As far as i understand it, the intention of the bogus errno = 0 is to undo the effects of failing system calls in el_wset(), sig_set(), and read__fixio() if the subsequent read(2) indicates end of file. So, restoring errno has to be moved right after read__fixio(). Of course, neither 0 nor e is the right value to restore: 0 is wrong if errno happened to be set on entry, e would be wrong because if one read(2) fails but a second attempt succeeds after read__fixio(), errno should not be touched. So, the errno to be restored in this case has to be saved before calling read(2) for the first time. 2. Solving the second issue requires distinguishing invalid and incomplete characters, but that is impossible with the function mbtowc(3) because it returns -1 in both cases and sets errno to EILSEQ in both cases (once properly implemented). It is vital that each input character is processed right away. It is not acceptable to wait for the next input character before processing the previous one because this is an interactive library, not a batch system. Consequently, the only situation where it is acceptable to wait for the next byte without first processing the previous one(s) is when the previous one(s) form an incomplete sequence that can be continued to form a valid character. Consequently, short of reimplementing a full UTF-8 state machine by hand, the only correct way forward is to use mbrtowc(3). Even then, care is needed to always have the state object properly initialized before using it, and to not discard a valid ASCII or UTF-8 lead byte if it happens to follow an invalid sequence. 3. Fortunately, solution 2. also solves issue 3. as a side effect, by no longer using mbtowc(3) in the first place.
2016-02-08 20:18:43 +03:00
/* Incomplete sequence, read another byte. */
goto again;
UTF-8 fixes from Ingo Schwarze: 1. Assume that errno is non-zero when entering read_char() and that read(2) returns 0 (indicating end of file). Then, the code will clear errno before returning. (Obviously, the statement "errno = 0" is almost always a bug unless there is save_errno = errno right before it and the previous value is properly restored later, in all reachable code paths.) 2. When encountering an invalid byte sequence, the code discards all following bytes until MB_LEN_MAX overflows; consider, for example, 0xc2 immediately followed by a few valid ASCII bytes. Three of those ASCII bytes will be discarded. 3. On a POSIX system, EILSEQ will always be set after reading a valid (yes, valid, not invalid!) UTF-8 character. The reason is that mbtowc(3) will first be called with a length limit (third argument) of 1, which will fail, return -1, and - on a POSIX system - set errno to EILSEQ. This third bug is mitigated a bit because i couldn't find any system that actually conforms to POSIX in this respect: None of OpenBSD, NetBSD, FreeBSD, Solaris 11, and glibc set errno when an incomplete character is passed to mbtowc(3), even though that is required by POSIX. Anyway, that mbtowc(3) bug will be fixed at least in OpenBSD after release unlock, so it would be good to fix this bug in libedit before fixing the bug in mbtowc(3). How can these three bugs be fixed? 1. As far as i understand it, the intention of the bogus errno = 0 is to undo the effects of failing system calls in el_wset(), sig_set(), and read__fixio() if the subsequent read(2) indicates end of file. So, restoring errno has to be moved right after read__fixio(). Of course, neither 0 nor e is the right value to restore: 0 is wrong if errno happened to be set on entry, e would be wrong because if one read(2) fails but a second attempt succeeds after read__fixio(), errno should not be touched. So, the errno to be restored in this case has to be saved before calling read(2) for the first time. 2. Solving the second issue requires distinguishing invalid and incomplete characters, but that is impossible with the function mbtowc(3) because it returns -1 in both cases and sets errno to EILSEQ in both cases (once properly implemented). It is vital that each input character is processed right away. It is not acceptable to wait for the next input character before processing the previous one because this is an interactive library, not a batch system. Consequently, the only situation where it is acceptable to wait for the next byte without first processing the previous one(s) is when the previous one(s) form an incomplete sequence that can be continued to form a valid character. Consequently, short of reimplementing a full UTF-8 state machine by hand, the only correct way forward is to use mbrtowc(3). Even then, care is needed to always have the state object properly initialized before using it, and to not discard a valid ASCII or UTF-8 lead byte if it happens to follow an invalid sequence. 3. Fortunately, solution 2. also solves issue 3. as a side effect, by no longer using mbtowc(3) in the first place.
2016-02-08 20:18:43 +03:00
default:
/* Valid character, process it. */
From Ingo Schwartze: Next step: Remove #ifdef'ing in read_char(), in the same style as we did for setlocale(3) in el.c. A few remarks are required to explain the choices made. * On first sight, handling mbrtowc(3) seems a bit less trivial than handling setlocale(3) because its prototype uses the data type mbstate_t from <wchar.h>. However, it turns out that "histedit.h" already includes <wchar.h> unconditionally (i don't like headers including other headers, but that ship has sailed, people are by now certainly used to the fact that including "histedit.h" doesn't require including <wchar.h> before), and "histedit.h" is of course included all over the place. So from that perspective, there is no problem with using mbrtowc(3) unconditionally ever for !WIDECHAR. * However, <wchar.h> also defines the mbrtowc(3) prototype, so we cannot just #define mbrtowc away, or including the header will break. It would also be a bad idea to porovide a local implementation of mbrtowc() and hope that it overrides the one in libc. Besides, the required prototype is subtly different: While mbrtowc(3) takes "wchar_t *" as its first argument, we need a function that takes "Char *". So unfortunately, we have to keep a ct_mbrtowc #define, at least until we can maybe get rid of "Char *" in the more remote future. * After getting rid of the #else clause in read_char(), we can pull "return 1;" into the default: clause. After that, we can get rid of the ugly "goto again_lastbyte;" and just "break;". As a bonus, that also gets rid of the ugly CONSTCOND. * While here, delete the unused ct_mbtowc() from chartype.h.
2016-02-14 17:47:48 +03:00
return 1;
}
From Ingo Schwartze: Next step: Remove #ifdef'ing in read_char(), in the same style as we did for setlocale(3) in el.c. A few remarks are required to explain the choices made. * On first sight, handling mbrtowc(3) seems a bit less trivial than handling setlocale(3) because its prototype uses the data type mbstate_t from <wchar.h>. However, it turns out that "histedit.h" already includes <wchar.h> unconditionally (i don't like headers including other headers, but that ship has sailed, people are by now certainly used to the fact that including "histedit.h" doesn't require including <wchar.h> before), and "histedit.h" is of course included all over the place. So from that perspective, there is no problem with using mbrtowc(3) unconditionally ever for !WIDECHAR. * However, <wchar.h> also defines the mbrtowc(3) prototype, so we cannot just #define mbrtowc away, or including the header will break. It would also be a bad idea to porovide a local implementation of mbrtowc() and hope that it overrides the one in libc. Besides, the required prototype is subtly different: While mbrtowc(3) takes "wchar_t *" as its first argument, we need a function that takes "Char *". So unfortunately, we have to keep a ct_mbrtowc #define, at least until we can maybe get rid of "Char *" in the more remote future. * After getting rid of the #else clause in read_char(), we can pull "return 1;" into the default: clause. After that, we can get rid of the ugly "goto again_lastbyte;" and just "break;". As a bonus, that also gets rid of the ugly CONSTCOND. * While here, delete the unused ct_mbtowc() from chartype.h.
2016-02-14 17:47:48 +03:00
}
}
/* read_pop():
* Pop a macro from the stack
*/
static void
read_pop(c_macro_t *ma)
{
int i;
el_free(ma->macro[0]);
for (i = 0; i < ma->level; i++)
ma->macro[i] = ma->macro[i + 1];
ma->level--;
ma->offset = 0;
}
1994-05-06 10:01:42 +04:00
/* el_wgetc():
* Read a wide character
1994-05-06 10:01:42 +04:00
*/
int
el_wgetc(EditLine *el, wchar_t *cp)
1994-05-06 10:01:42 +04:00
{
int num_read;
c_macro_t *ma = &el->el_chared.c_macro;
1999-07-02 19:14:07 +04:00
terminal__flush(el);
for (;;) {
if (ma->level < 0)
break;
if (ma->macro[0][ma->offset] == '\0') {
read_pop(ma);
continue;
}
*cp = ma->macro[0][ma->offset++];
if (ma->macro[0][ma->offset] == '\0') {
/* Needed for QuoteMode On */
read_pop(ma);
}
2011-07-29 19:16:33 +04:00
return 1;
1994-05-06 10:01:42 +04:00
}
#ifdef DEBUG_READ
(void) fprintf(el->el_errfile, "Turning raw mode on\n");
1994-05-06 10:01:42 +04:00
#endif /* DEBUG_READ */
if (tty_rawmode(el) < 0)/* make sure the tty is set up correctly */
2011-07-29 19:16:33 +04:00
return 0;
1994-05-06 10:01:42 +04:00
#ifdef DEBUG_READ
(void) fprintf(el->el_errfile, "Reading a character\n");
1994-05-06 10:01:42 +04:00
#endif /* DEBUG_READ */
num_read = (*el->el_read.read_char)(el, cp);
if (num_read < 0)
el->el_errno = errno;
1994-05-06 10:01:42 +04:00
#ifdef DEBUG_READ
(void) fprintf(el->el_errfile, "Got it %lc\n", *cp);
1994-05-06 10:01:42 +04:00
#endif /* DEBUG_READ */
2011-07-29 19:16:33 +04:00
return num_read;
1994-05-06 10:01:42 +04:00
}
protected void
read_prepare(EditLine *el)
{
if (el->el_flags & HANDLE_SIGNALS)
sig_set(el);
if (el->el_flags & NO_TTY)
return;
if ((el->el_flags & (UNBUFFERED|EDIT_DISABLED)) == UNBUFFERED)
tty_rawmode(el);
/* This is relatively cheap, and things go terribly wrong if
we have the wrong size. */
el_resize(el);
re_clear_display(el); /* reset the display stuff */
ch_reset(el, 0);
re_refresh(el); /* print the prompt */
if (el->el_flags & UNBUFFERED)
terminal__flush(el);
}
protected void
read_finish(EditLine *el)
{
if ((el->el_flags & UNBUFFERED) == 0)
(void) tty_cookedmode(el);
if (el->el_flags & HANDLE_SIGNALS)
sig_clr(el);
}
1994-05-06 10:01:42 +04:00
const wchar_t *
el_wgets(EditLine *el, int *nread)
1994-05-06 10:01:42 +04:00
{
int retval;
el_action_t cmdnum = 0;
int num; /* how many chars we have read at NL */
wchar_t wc;
2016-04-11 03:50:13 +03:00
wchar_t ch, *cp;
int crlf = 0;
int nrb;
1994-05-06 10:01:42 +04:00
if (nread == NULL)
nread = &nrb;
*nread = 0;
if (el->el_flags & NO_TTY) {
size_t idx;
cp = el->el_line.buffer;
while ((num = (*el->el_read.read_char)(el, &wc)) == 1) {
2016-04-11 03:50:13 +03:00
*cp = wc;
/* make sure there is space for next character */
if (cp + 1 >= el->el_line.limit) {
2011-08-16 20:25:15 +04:00
idx = (size_t)(cp - el->el_line.buffer);
2011-07-30 03:44:44 +04:00
if (!ch_enlargebufs(el, (size_t)2))
break;
cp = &el->el_line.buffer[idx];
}
cp++;
if (el->el_flags & UNBUFFERED)
break;
if (cp[-1] == '\r' || cp[-1] == '\n')
break;
}
if (num == -1) {
if (errno == EINTR)
cp = el->el_line.buffer;
el->el_errno = errno;
}
goto noedit;
}
1994-05-06 10:01:42 +04:00
#ifdef FIONREAD
if (el->el_tty.t_mode == EX_IO && el->el_chared.c_macro.level < 0) {
long chrs = 0;
(void) ioctl(el->el_infd, FIONREAD, &chrs);
if (chrs == 0) {
if (tty_rawmode(el) < 0) {
errno = 0;
*nread = 0;
2011-07-29 19:16:33 +04:00
return NULL;
}
}
1994-05-06 10:01:42 +04:00
}
#endif /* FIONREAD */
if ((el->el_flags & UNBUFFERED) == 0)
read_prepare(el);
1994-05-06 10:01:42 +04:00
if (el->el_flags & EDIT_DISABLED) {
size_t idx;
if ((el->el_flags & UNBUFFERED) == 0)
cp = el->el_line.buffer;
else
cp = el->el_line.lastchar;
terminal__flush(el);
while ((num = (*el->el_read.read_char)(el, &wc)) == 1) {
2016-04-11 03:50:13 +03:00
*cp = wc;
/* make sure there is space next character */
if (cp + 1 >= el->el_line.limit) {
2011-08-16 20:25:15 +04:00
idx = (size_t)(cp - el->el_line.buffer);
2011-07-30 03:44:44 +04:00
if (!ch_enlargebufs(el, (size_t)2))
break;
cp = &el->el_line.buffer[idx];
}
cp++;
crlf = cp[-1] == '\r' || cp[-1] == '\n';
if (el->el_flags & UNBUFFERED)
break;
if (crlf)
break;
}
if (num == -1) {
if (errno == EINTR)
cp = el->el_line.buffer;
el->el_errno = errno;
}
goto noedit;
}
for (num = -1; num == -1;) { /* while still editing this line */
1994-05-06 10:01:42 +04:00
#ifdef DEBUG_EDIT
read_debug(el);
1994-05-06 10:01:42 +04:00
#endif /* DEBUG_EDIT */
/* if EOF or error */
if (read_getcmd(el, &cmdnum, &ch) == -1) {
1994-05-06 10:01:42 +04:00
#ifdef DEBUG_READ
(void) fprintf(el->el_errfile,
"Returning from el_gets\n");
1994-05-06 10:01:42 +04:00
#endif /* DEBUG_READ */
break;
}
if (el->el_errno == EINTR) {
el->el_line.buffer[0] = '\0';
el->el_line.lastchar =
el->el_line.cursor = el->el_line.buffer;
break;
}
if ((size_t)cmdnum >= el->el_map.nfunc) { /* BUG CHECK command */
1994-05-06 10:01:42 +04:00
#ifdef DEBUG_EDIT
(void) fprintf(el->el_errfile,
"ERROR: illegal command from key 0%o\r\n", ch);
1994-05-06 10:01:42 +04:00
#endif /* DEBUG_EDIT */
continue; /* try again */
}
/* now do the real command */
1994-05-06 10:01:42 +04:00
#ifdef DEBUG_READ
{
el_bindings_t *b;
for (b = el->el_map.help; b->name; b++)
if (b->func == cmdnum)
break;
if (b->name)
(void) fprintf(el->el_errfile,
"Executing %ls\n", b->name);
else
(void) fprintf(el->el_errfile,
"Error command = %d\n", cmdnum);
}
1994-05-06 10:01:42 +04:00
#endif /* DEBUG_READ */
/* vi redo needs these way down the levels... */
el->el_state.thiscmd = cmdnum;
el->el_state.thisch = ch;
if (el->el_map.type == MAP_VI &&
el->el_map.current == el->el_map.key &&
el->el_chared.c_redo.pos < el->el_chared.c_redo.lim) {
if (cmdnum == VI_DELETE_PREV_CHAR &&
el->el_chared.c_redo.pos != el->el_chared.c_redo.buf
&& iswprint(el->el_chared.c_redo.pos[-1]))
el->el_chared.c_redo.pos--;
else
*el->el_chared.c_redo.pos++ = ch;
}
retval = (*el->el_map.func[cmdnum]) (el, ch);
#ifdef DEBUG_READ
(void) fprintf(el->el_errfile,
"Returned state %d\n", retval );
#endif /* DEBUG_READ */
/* save the last command here */
el->el_state.lastcmd = cmdnum;
/* use any return value */
switch (retval) {
case CC_CURSOR:
re_refresh_cursor(el);
break;
case CC_REDISPLAY:
re_clear_lines(el);
re_clear_display(el);
/* FALLTHROUGH */
case CC_REFRESH:
re_refresh(el);
break;
case CC_REFRESH_BEEP:
re_refresh(el);
terminal_beep(el);
break;
case CC_NORM: /* normal char */
break;
case CC_ARGHACK: /* Suggested by Rich Salz */
/* <rsalz@pineapple.bbn.com> */
continue; /* keep going... */
case CC_EOF: /* end of file typed */
if ((el->el_flags & UNBUFFERED) == 0)
num = 0;
else if (num == -1) {
2004-01-17 20:57:40 +03:00
*el->el_line.lastchar++ = CONTROL('d');
el->el_line.cursor = el->el_line.lastchar;
num = 1;
}
break;
case CC_NEWLINE: /* normal end of line */
2009-02-16 00:55:23 +03:00
num = (int)(el->el_line.lastchar - el->el_line.buffer);
break;
case CC_FATAL: /* fatal error, reset to known state */
1994-05-06 10:01:42 +04:00
#ifdef DEBUG_READ
(void) fprintf(el->el_errfile,
"*** editor fatal ERROR ***\r\n\n");
1994-05-06 10:01:42 +04:00
#endif /* DEBUG_READ */
/* put (real) cursor in a known place */
re_clear_display(el); /* reset the display stuff */
ch_reset(el, 1); /* reset the input pointers */
2011-07-30 03:44:44 +04:00
re_refresh(el); /* print the prompt again */
break;
case CC_ERROR:
default: /* functions we don't know about */
1994-05-06 10:01:42 +04:00
#ifdef DEBUG_READ
(void) fprintf(el->el_errfile,
"*** editor ERROR ***\r\n\n");
1994-05-06 10:01:42 +04:00
#endif /* DEBUG_READ */
terminal_beep(el);
terminal__flush(el);
break;
}
el->el_state.argument = 1;
el->el_state.doingarg = 0;
el->el_chared.c_vcmd.action = NOP;
if (el->el_flags & UNBUFFERED)
break;
1994-05-06 10:01:42 +04:00
}
terminal__flush(el); /* flush any buffered output */
/* make sure the tty is set up correctly */
if ((el->el_flags & UNBUFFERED) == 0) {
read_finish(el);
*nread = num != -1 ? num : 0;
} else {
*nread = (int)(el->el_line.lastchar - el->el_line.buffer);
}
goto done;
noedit:
el->el_line.cursor = el->el_line.lastchar = cp;
*cp = '\0';
*nread = (int)(el->el_line.cursor - el->el_line.buffer);
done:
if (*nread == 0) {
if (num == -1) {
*nread = -1;
errno = el->el_errno;
}
return NULL;
} else
return el->el_line.buffer;
1994-05-06 10:01:42 +04:00
}