indent: don't touch comments in preprocessing lines

The indentation of multi-line comments was wrong, and the code for
handling them was too complicated.
This commit is contained in:
rillig 2023-05-11 19:01:35 +00:00
parent bacb7fc4b5
commit d5678748c0
5 changed files with 25 additions and 213 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: lsym_preprocessing.c,v 1.7 2023/05/11 18:44:14 rillig Exp $ */
/* $NetBSD: lsym_preprocessing.c,v 1.8 2023/05/11 19:01:35 rillig Exp $ */
/*
* Tests for the token lsym_preprocessing, which represents a '#' that starts
@ -79,16 +79,16 @@
#else
#endif
#if 0 /* if comment */
#else /* else comment */
#endif /* endif comment */
#if 0 /* if comment */
#else /* else comment */
#endif /* endif comment */
#if 0 /* outer if comment */
#if 0 /* outer if comment */
/* $ XXX: The indentation is removed, which can get confusing */
#if nested /* inner if comment */
#else /* inner else comment */
#endif /* inner endif comment */
#endif /* outer endif comment */
#if nested /* inner if comment */
#else /* inner else comment */
#endif /* inner endif comment */
#endif /* outer endif comment */
//indent end
@ -204,15 +204,7 @@ int unary_plus =
#endif/* comment */
//indent end
//indent run
#if 0 /* comment */
#else /* comment */
#endif /* comment */
#if 0 /* comment */
#else /* comment */
#endif /* comment */
//indent end
//indent run-equals-input
/*
@ -221,7 +213,7 @@ int unary_plus =
//indent input
#define eol_comment // EOL
#define wrap_comment /* line 1
#define no_wrap_comment /* line 1
* line 2
* line 3
*/
@ -235,20 +227,7 @@ int unary_plus =
#define three_comments /* first */ /* second */ /*third*/
//indent end
//indent run
#define eol_comment // EOL
#define wrap_comment /* line 1 line 2 line 3 */
/* $ FIXME: Keep the original indentation of the follow-up lines. */
#define fixed_comment /*- line 1
* line 2
* line 3
*/
#define two_comments /* 1 */ /* 2 */ /* 3 */
#define three_comments /* first */ /* second */ /* third */
//indent end
//indent run-equals-input
/*

View File

@ -1,4 +1,4 @@
/* $NetBSD: opt_v.c,v 1.10 2023/05/11 18:13:55 rillig Exp $ */
/* $NetBSD: opt_v.c,v 1.11 2023/05/11 19:01:35 rillig Exp $ */
/*
* Tests for the options '-v' and '-nv'.
@ -40,9 +40,9 @@ example(void)
#define macro1 /* prefix */ suffix
#define macro2 prefix /* suffix */
There were 12 output lines and 2 comments
(Lines with comments)/(Lines with code): 0.571
#define macro2 prefix /* suffix */
There were 12 output lines and 1 comments
(Lines with comments)/(Lines with code): 0.429
//indent end

View File

@ -1,4 +1,4 @@
/* $NetBSD: indent.c,v 1.251 2023/05/11 18:13:55 rillig Exp $ */
/* $NetBSD: indent.c,v 1.252 2023/05/11 19:01:35 rillig Exp $ */
/*-
* SPDX-License-Identifier: BSD-4-Clause
@ -43,7 +43,7 @@ static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93";
#include <sys/cdefs.h>
#if defined(__NetBSD__)
__RCSID("$NetBSD: indent.c,v 1.251 2023/05/11 18:13:55 rillig Exp $");
__RCSID("$NetBSD: indent.c,v 1.252 2023/05/11 19:01:35 rillig Exp $");
#elif defined(__FreeBSD__)
__FBSDID("$FreeBSD: head/usr.bin/indent/indent.c 340138 2018-11-04 19:24:49Z oshogbo $");
#endif
@ -927,7 +927,6 @@ read_preprocessing_line(void)
buf_add_char(&lab, '#');
state = PLAIN;
int com_start = 0, com_end = 0;
while (ch_isblank(inp_peek()))
inp_skip();
@ -944,7 +943,6 @@ read_preprocessing_line(void)
if (inp_peek() == '*' && state == PLAIN) {
state = COMM;
*lab.e++ = inp_next();
com_start = (int)buf_len(&lab) - 2;
}
break;
case '"':
@ -963,7 +961,6 @@ read_preprocessing_line(void)
if (inp_peek() == '/' && state == COMM) {
state = PLAIN;
*lab.e++ = inp_next();
com_end = (int)buf_len(&lab);
}
break;
}
@ -971,16 +968,6 @@ read_preprocessing_line(void)
while (lab.e > lab.s && ch_isblank(lab.e[-1]))
lab.e--;
if (lab.e - lab.s == com_end && !inp_comment_seen()) {
/* comment on preprocessor line */
inp_comment_init_preproc();
inp_comment_add_range(lab.s + com_start, lab.s + com_end);
lab.e = lab.s + com_start;
while (lab.e > lab.s && ch_isblank(lab.e[-1]))
lab.e--;
inp_comment_add_char(' '); /* add trailing blank, just in case */
inp_from_comment();
}
buf_terminate(&lab);
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: indent.h,v 1.118 2023/05/11 18:26:56 rillig Exp $ */
/* $NetBSD: indent.h,v 1.119 2023/05/11 19:01:35 rillig Exp $ */
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
@ -389,19 +389,6 @@ char inp_lookahead(size_t);
void inp_skip(void);
char inp_next(void);
void inp_comment_init_preproc(void);
void inp_comment_add_char(char);
void inp_comment_add_range(const char *, const char *);
bool inp_comment_seen(void);
void inp_from_comment(void);
#ifdef debug
void debug_inp(const char *);
#else
#define debug_inp(prefix) do { } while (false)
#endif
lexer_symbol lexi(void);
void diag(int, const char *, ...)__printflike(2, 3);
void output_line(void);

View File

@ -1,4 +1,4 @@
/* $NetBSD: io.c,v 1.152 2023/05/11 18:44:14 rillig Exp $ */
/* $NetBSD: io.c,v 1.153 2023/05/11 19:01:35 rillig Exp $ */
/*-
* SPDX-License-Identifier: BSD-4-Clause
@ -43,7 +43,7 @@ static char sccsid[] = "@(#)io.c 8.1 (Berkeley) 6/6/93";
#include <sys/cdefs.h>
#if defined(__NetBSD__)
__RCSID("$NetBSD: io.c,v 1.152 2023/05/11 18:44:14 rillig Exp $");
__RCSID("$NetBSD: io.c,v 1.153 2023/05/11 19:01:35 rillig Exp $");
#elif defined(__FreeBSD__)
__FBSDID("$FreeBSD: head/usr.bin/indent/io.c 334927 2018-06-10 16:44:18Z pstef $");
#endif
@ -56,40 +56,12 @@ __FBSDID("$FreeBSD: head/usr.bin/indent/io.c 334927 2018-06-10 16:44:18Z pstef $
#include "indent.h"
/*
* There are 3 modes for reading the input.
*
* default: In this mode, the input comes from the input file. The buffer
* 'inp' contains the current line, terminated with '\n'. The current read
* position is inp.s, and there is always inp.buf <= inp.s < inp.e. All other
* pointers are null.
*
* copy-in: After reading 'if (expr)' or similar tokens, the input still comes
* from 'inp', but instead of processing it, it is copied to 'save_com'. The
* goal of this mode is to move the comments after the '{', that is to
* transform 'if (expr) comment {' to 'if (expr) { comment'. When the next
* token cannot be part of this transformation, switch to copy-out.
*
* copy-out: In this mode, the input comes from 'save_com', which contains the
* tokens to be placed after the '{'. The input still comes from the range
* [inp.s, inp.e), but these two members have been overwritten with pointers
* into save_com_buf, so inp.buf and inp.s are unrelated, which is unusual.
* In this mode, inp.e[-1] is usually not terminated with '\n'. After reading
* all tokens from save_com, switch to default mode again.
* The buffer 'inp' contains the current line, terminated with '\n'. The
* current read position is inp.s, and inp.buf <= inp.s < inp.e holds.
*/
static struct {
struct buffer inp; /* one line of input, ready to be split into
* tokens; occasionally 's' and 'e' switch
* to save_com_buf */
char save_com_buf[5000]; /* input text is saved here when looking for
* the brace after an if, while, etc */
char *save_com_s; /* start of the comment in save_com_buf, or
* null */
char *save_com_e; /* end of the comment in save_com_buf, or
* null */
char *saved_inp_s; /* saved value of inp.s when taking input from
* save_com, or null */
char *saved_inp_e; /* saved value of inp.e, or null */
* tokens */
} inbuf;
static int paren_indent;
@ -114,7 +86,7 @@ inp_p(void)
const char *
inp_line_start(void)
{
return inbuf.saved_inp_s != NULL ? inbuf.save_com_buf : inbuf.inp.buf;
return inbuf.inp.buf;
}
const char *
@ -154,116 +126,6 @@ inp_next(void)
return ch;
}
#ifdef debug
static void
debug_inp_buf(const char *name, const char *s, const char *e)
{
if (s != NULL && e != NULL) {
debug_printf(" %-12s ", name);
debug_vis_range("\"", s, e, "\"\n");
}
}
void
debug_inp(const char *prefix)
{
assert(inp_line_start() <= inbuf.inp.s);
assert(inbuf.inp.s <= inbuf.inp.e);
debug_println("%s %s:", __func__, prefix);
if (inbuf.saved_inp_s == NULL)
debug_inp_buf("inp.buf", inbuf.inp.buf, inbuf.inp.s);
debug_inp_buf("inp", inbuf.inp.s, inbuf.inp.e); /* never null */
debug_inp_buf("save_com.buf", inbuf.save_com_buf, inbuf.save_com_s);
debug_inp_buf("save_com", inbuf.save_com_s, inbuf.save_com_e);
debug_inp_buf("saved_inp", inbuf.saved_inp_s, inbuf.saved_inp_e);
}
#endif
static void
inp_comment_check_size(size_t n)
{
if ((size_t)(inbuf.save_com_e - inbuf.save_com_buf) + n <=
array_length(inbuf.save_com_buf))
return;
diag(1, "Internal buffer overflow - "
"Move big comment from right after if, while, or whatever");
fflush(output);
exit(1);
}
void
inp_comment_init_preproc(void)
{
if (inbuf.save_com_e == NULL) { /* if this is the first comment, we
* must set up the buffer */
inbuf.save_com_s = inbuf.save_com_buf;
inbuf.save_com_e = inbuf.save_com_s;
} else {
inp_comment_add_char('\n'); /* add newline between comments */
inp_comment_add_char(' ');
--line_no;
}
}
void
inp_comment_add_char(char ch)
{
inp_comment_check_size(1);
*inbuf.save_com_e++ = ch;
}
void
inp_comment_add_range(const char *s, const char *e)
{
size_t len = (size_t)(e - s);
inp_comment_check_size(len);
memcpy(inbuf.save_com_e, s, len);
inbuf.save_com_e += len;
}
bool
inp_comment_seen(void)
{
return inbuf.save_com_e != NULL;
}
/*
* Switch the input to come from save_com, replaying the copied tokens while
* looking for the next '{'.
*/
void
inp_from_comment(void)
{
debug_inp("before inp_from_comment");
inbuf.saved_inp_s = inbuf.inp.s;
inbuf.saved_inp_e = inbuf.inp.e;
inbuf.inp.s = inbuf.save_com_s;
inbuf.inp.e = inbuf.save_com_e;
inbuf.save_com_s = NULL;
inbuf.save_com_e = NULL;
debug_inp("after inp_from_comment");
}
/*
* After having read from save_com, continue with the rest of the input line
* before reading the next line from the input file.
*/
static bool
inp_from_file(void)
{
if (inbuf.saved_inp_s == NULL)
return false;
inbuf.inp.s = inbuf.saved_inp_s;
inbuf.inp.e = inbuf.saved_inp_e;
inbuf.saved_inp_s = inbuf.saved_inp_e = NULL;
debug_println("switched inp.s back to saved_inp_s");
return inbuf.inp.s < inbuf.inp.e;
}
static void
inp_add(char ch)
{
@ -582,9 +444,6 @@ parse_indent_comment(void)
void
inp_read_line(void)
{
if (inp_from_file())
return;
inp_read_next_line(input);
parse_indent_comment();