indent: clean up the code, add a few tests

This commit is contained in:
rillig 2023-06-14 20:46:08 +00:00
parent cdfe968337
commit 940b85009c
10 changed files with 307 additions and 83 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: fmt_block.c,v 1.7 2023/05/22 23:03:16 rillig Exp $ */
/* $NetBSD: fmt_block.c,v 1.8 2023/06/14 20:46:08 rillig Exp $ */
/*
* Tests for formatting blocks of statements and declarations.
@ -6,7 +6,6 @@
* See also:
* lsym_lbrace.c
* psym_stmt.c
* psym_stmt_list.c
*/
//indent input

View File

@ -1,4 +1,4 @@
/* $NetBSD: fmt_decl.c,v 1.54 2023/06/10 17:56:29 rillig Exp $ */
/* $NetBSD: fmt_decl.c,v 1.55 2023/06/14 20:46:08 rillig Exp $ */
/*
* Tests for declarations of global variables, external functions, and local
@ -1085,3 +1085,104 @@ die(void)
{
}
//indent end
/*
* In very rare cases, the type of a declarator might include literal tab
* characters. This tab might affect the indentation of the declarator, but
* only if it occurs before the declarator, and that is hard to achieve.
*/
//indent input
int arr[sizeof " "];
//indent end
//indent run-equals-input
/*
* The '}' of an initializer is not supposed to end the statement, it only ends
* the brace level of the initializer expression.
*/
//indent input
int multi_line[1][1][1] = {
{
{
1
},
},
};
int single_line[2][1][1] = {{{1},},{{2}}};
//indent end
//indent run -di0
int multi_line[1][1][1] = {
{
{
1
},
},
};
int single_line[2][1][1] = {{{1},}, {{2}}};
//indent end
/*
* The '}' of an initializer is not supposed to end the statement, it only ends
* the brace level of the initializer expression.
*/
//indent input
{
int multi_line = {
{
{
b
},
},
};
int single_line = {{{b},},{}};
}
//indent end
//indent run -di0
{
int multi_line = {
{
{
b
},
},
};
int single_line = {{{b},}, {}};
}
//indent end
/*
* In initializers, multi-line expressions don't have their second line
* indented, even though they should.
*/
//indent input
{
multi_line = (int[]){
{1
+1},
{1
+1},
{1
+1},
};
}
//indent end
//indent run
{
multi_line = (int[]){
{1
+ 1},
{1
+ 1},
{1
+ 1},
};
}
//indent end

View File

@ -1,4 +1,4 @@
/* $NetBSD: lsym_preprocessing.c,v 1.13 2023/06/14 17:07:32 rillig Exp $ */
/* $NetBSD: lsym_preprocessing.c,v 1.14 2023/06/14 20:46:08 rillig Exp $ */
/*
* Tests for the token lsym_preprocessing, which represents a '#' that starts
@ -307,3 +307,38 @@ error: Standard Input:2: Unmatched #elif
error: Standard Input:3: Unmatched #elifdef
error: Standard Input:4: Unmatched #endif
//indent end
/*
* The '#' can only occur at the beginning of a line, therefore indent does not
* care when it occurs in the middle of a line.
*/
//indent input
int no = #;
//indent end
//indent run -di0
int no =
#;
//indent end
/*
* Preprocessing directives may be indented; indent moves them to the beginning
* of a line.
*/
//indent input
#if 0
#if 1 \
|| 2
#endif
#endif
//indent end
//indent run
#if 0
#if 1 \
|| 2
#endif
#endif
//indent end

View File

@ -1,4 +1,4 @@
/* $NetBSD: opt_bc.c,v 1.11 2023/06/14 14:11:28 rillig Exp $ */
/* $NetBSD: opt_bc.c,v 1.12 2023/06/14 20:46:08 rillig Exp $ */
/*
* Tests for the options '-bc' and '-nbc'.
@ -121,6 +121,17 @@ int a = (1),
//indent end
//indent input
int a,
b,
c;
//indent end
//indent run -nbc -di0
int a, b, c;
//indent end
/*
* When declarations are too long to fit in a single line, they should not be
* joined.

View File

@ -1,10 +1,13 @@
/* $NetBSD: ps_ind_level.c,v 1.7 2023/05/15 14:55:47 rillig Exp $ */
/* $NetBSD: ps_ind_level.c,v 1.8 2023/06/14 20:46:08 rillig Exp $ */
/*
* The indentation of the very first line of a file determines the
* indentation of the remaining code. Even if later code has a smaller
* indentation, it is nevertheless indented to the level given by the first
* line of code.
* indentation of the remaining code. This mode is meant for code snippets from
* function bodies. At this level, function definitions are not recognized
* properly.
*
* Even if later code has a smaller indentation, it is nevertheless indented to
* the level given by the first line of code.
*
* In this particular test, the indentation is set to 5 and the tabulator
* width is set to 8, to demonstrate an off-by-one error in
@ -18,6 +21,9 @@
int indented_by_24;
void function_in_column_1(void){}
#if indented
#endif
//indent end
/* 5 spaces indentation, 8 spaces per tabulator */
@ -26,6 +32,9 @@ void function_in_column_1(void){}
void function_in_column_1(void) {
}
#if indented
#endif
//indent end

View File

@ -1,4 +1,4 @@
/* $NetBSD: psym_rbrace.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
/* $NetBSD: psym_rbrace.c,v 1.4 2023/06/14 20:46:08 rillig Exp $ */
/*
* Tests for the parser symbol psym_rbrace, which represents '}' and finishes
@ -8,8 +8,81 @@
* psym_lbrace.c
*/
/*
* While it is a syntax error to have an unfinished declaration between braces,
* indent is forgiving enough to accept this input.
*/
//indent input
// TODO: add input
{
int
}
//indent end
//indent run
{
int
}
exit 1
error: Standard Input:3: Statement nesting error
error: Standard Input:3: Stuff missing from end of file
//indent end
//indent input
{
do {
} while (cond)
}
//indent end
// XXX: Why doesn't indent complain about the missing semicolon?
//indent run-equals-input
//indent input
{
if (cond)
}
//indent end
//indent run
{
if (cond)
}
exit 1
error: Standard Input:3: Statement nesting error
error: Standard Input:3: Stuff missing from end of file
//indent end
//indent input
{
switch (expr)
}
//indent end
//indent run
{
switch (expr)
}
exit 1
error: Standard Input:3: Statement nesting error
error: Standard Input:3: Stuff missing from end of file
//indent end
//indent input
{
while (cond)
}
//indent end
//indent run
{
while (cond)
}
exit 1
error: Standard Input:3: Statement nesting error
error: Standard Input:3: Stuff missing from end of file
//indent end

View File

@ -1,4 +1,4 @@
/* $NetBSD: debug.c,v 1.60 2023/06/14 16:14:30 rillig Exp $ */
/* $NetBSD: debug.c,v 1.61 2023/06/14 20:46:08 rillig Exp $ */
/*-
* Copyright (c) 2023 The NetBSD Foundation, Inc.
@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
__RCSID("$NetBSD: debug.c,v 1.60 2023/06/14 16:14:30 rillig Exp $");
__RCSID("$NetBSD: debug.c,v 1.61 2023/06/14 20:46:08 rillig Exp $");
#include <stdarg.h>
#include <string.h>
@ -367,7 +367,7 @@ debug_parser_state(void)
state.heading = "vertical spacing";
debug_ps_bool(break_after_comma);
debug_ps_bool(force_nl);
debug_ps_bool(want_newline);
debug_ps_enum(declaration, declaration_name);
debug_ps_bool(blank_line_after_decl);

View File

@ -1,4 +1,4 @@
/* $NetBSD: indent.c,v 1.366 2023/06/14 19:05:40 rillig Exp $ */
/* $NetBSD: indent.c,v 1.367 2023/06/14 20:46:08 rillig Exp $ */
/*-
* SPDX-License-Identifier: BSD-4-Clause
@ -38,7 +38,7 @@
*/
#include <sys/cdefs.h>
__RCSID("$NetBSD: indent.c,v 1.366 2023/06/14 19:05:40 rillig Exp $");
__RCSID("$NetBSD: indent.c,v 1.367 2023/06/14 20:46:08 rillig Exp $");
#include <sys/param.h>
#include <err.h>
@ -187,7 +187,7 @@ ind_add(int ind, const char *s, size_t len)
static void
init_globals(void)
{
ps_push(psym_stmt, false);
ps_push(psym_stmt, false); /* as a stop symbol */
ps.prev_lsym = lsym_semicolon;
ps.lbrace_kind = psym_lbrace_block;
@ -345,9 +345,11 @@ update_ps_lbrace_kind(lexer_symbol lsym)
ps.lbrace_kind = token.s[0] == 's' ? psym_lbrace_struct :
token.s[0] == 'u' ? psym_lbrace_union :
psym_lbrace_enum;
} else if (lsym != lsym_type_outside_parentheses
&& lsym != lsym_word
&& lsym != lsym_lbrace)
} else if (lsym == lsym_type_outside_parentheses
|| lsym == lsym_word
|| lsym == lsym_lbrace) {
/* Keep the current '{' kind. */
} else
ps.lbrace_kind = psym_lbrace_block;
}
@ -355,21 +357,19 @@ static void
indent_declarator(int decl_ind, bool tabs_to_var)
{
int base = ps.ind_level * opt.indent_size;
int ind = base + (int)code.len;
int ind = ind_add(base, code.s, code.len);
int target = base + decl_ind;
size_t orig_code_len = code.len;
if (tabs_to_var)
for (int next; (next = next_tab(ind)) <= target; ind = next)
buf_add_char(&code, '\t');
for (; ind < target; ind++)
buf_add_char(&code, ' ');
if (code.len == orig_code_len && ps.want_blank) {
if (code.len == orig_code_len && ps.want_blank)
buf_add_char(&code, ' ');
ps.want_blank = false;
}
ps.want_blank = false;
ps.decl_indent_done = true;
}
@ -556,7 +556,7 @@ process_newline(void)
if (ps.psyms.sym[ps.psyms.len - 1] == psym_switch_expr
&& opt.brace_same_line
&& com.len == 0) {
ps.force_nl = true;
ps.want_newline = true;
goto stay_in_line;
}
@ -591,7 +591,7 @@ process_lparen(void)
else if (ps.want_blank && want_blank_before_lparen())
buf_add_char(&code, ' ');
ps.want_blank = false;
buf_add_char(&code, token.s[0]);
buf_add_buf(&code, &token);
if (opt.extra_expr_indent && ps.spaced_expr_psym != psym_0)
ps.extra_expr_indent = eei_maybe;
@ -617,17 +617,14 @@ process_lparen(void)
static void
process_rparen(void)
{
if (ps.paren.len == 0) {
if (ps.paren.len == 0)
diag(0, "Extra '%c'", *token.s);
goto unbalanced;
}
enum paren_level_cast cast = ps.paren.item[--ps.paren.len].cast;
if (ps.in_func_def_params || (ps.line_has_decl && !ps.in_init))
cast = cast_no;
ps.prev_paren_was_cast = cast == cast_maybe;
if (cast == cast_maybe) {
ps.prev_paren_was_cast = ps.paren.len > 0
&& ps.paren.item[--ps.paren.len].cast == cast_maybe
&& !ps.in_func_def_params
&& !(ps.line_has_decl && !ps.in_init);
if (ps.prev_paren_was_cast) {
ps.next_unary = true;
ps.want_blank = opt.space_after_cast;
} else
@ -636,19 +633,19 @@ process_rparen(void)
if (code.len == 0)
ps.ind_paren_level = (int)ps.paren.len;
unbalanced:
buf_add_char(&code, token.s[0]);
buf_add_buf(&code, &token);
if (ps.spaced_expr_psym != psym_0 && ps.paren.len == 0) {
if (ps.extra_expr_indent == eei_maybe)
ps.extra_expr_indent = eei_last;
ps.force_nl = true;
ps.next_unary = true;
ps.in_stmt_or_decl = false;
parse(ps.spaced_expr_psym);
ps.spaced_expr_psym = psym_0;
ps.want_newline = true;
ps.next_unary = true;
ps.in_stmt_or_decl = false;
ps.want_blank = true;
out.line_kind = lk_stmt_head;
if (ps.extra_expr_indent == eei_maybe)
ps.extra_expr_indent = eei_last;
}
}
@ -658,8 +655,8 @@ process_lbracket(void)
if (code.len > 0
&& (ps.prev_lsym == lsym_comma || ps.prev_lsym == lsym_binary_op))
buf_add_char(&code, ' ');
buf_add_buf(&code, &token);
ps.want_blank = false;
buf_add_char(&code, token.s[0]);
paren_stack_push(&ps.paren, ind_add(0, code.s, code.len), cast_no);
}
@ -667,18 +664,16 @@ process_lbracket(void)
static void
process_rbracket(void)
{
if (ps.paren.len == 0) {
if (ps.paren.len == 0)
diag(0, "Extra '%c'", *token.s);
goto unbalanced;
}
ps.paren.len--;
if (ps.paren.len > 0)
ps.paren.len--;
ps.want_blank = true;
if (code.len == 0)
ps.ind_paren_level = (int)ps.paren.len;
unbalanced:
buf_add_char(&code, token.s[0]);
buf_add_buf(&code, &token);
ps.want_blank = true;
}
static void
@ -697,7 +692,7 @@ process_lbrace(void)
if (ps.in_init)
ps.init_level++;
else
ps.force_nl = true;
ps.want_newline = true;
if (code.len > 0 && !ps.in_init) {
if (!opt.brace_same_line ||
@ -789,7 +784,7 @@ process_rbrace(void)
if (!ps.in_var_decl
&& ps.psyms.sym[ps.psyms.len - 1] != psym_do_stmt
&& ps.psyms.sym[ps.psyms.len - 1] != psym_if_expr_stmt)
ps.force_nl = true;
ps.want_newline = true;
}
static void
@ -846,7 +841,7 @@ process_comma(void)
if (ps.break_after_comma && (opt.break_after_comma ||
ind_add(compute_code_indent(), code.s, code.len)
>= opt.max_line_length - typical_varname_length))
ps.force_nl = true;
ps.want_newline = true;
}
}
@ -860,7 +855,7 @@ process_label_colon(void)
if (ps.seen_case)
out.line_kind = lk_case_or_default;
ps.in_stmt_or_decl = false;
ps.force_nl = ps.seen_case;
ps.want_newline = ps.seen_case;
ps.seen_case = false;
ps.want_blank = false;
}
@ -915,7 +910,7 @@ process_semicolon(void)
if (ps.spaced_expr_psym == psym_0) {
parse(psym_stmt);
ps.force_nl = true;
ps.want_newline = true;
}
}
@ -968,7 +963,7 @@ process_word(lexer_symbol lsym)
}
} else if (ps.spaced_expr_psym != psym_0 && ps.paren.len == 0) {
ps.force_nl = true;
ps.want_newline = true;
ps.in_stmt_or_decl = false;
ps.next_unary = true;
parse(ps.spaced_expr_psym);
@ -985,7 +980,7 @@ process_do(void)
if (code.len > 0)
output_line();
ps.force_nl = true;
ps.want_newline = true;
parse(psym_do);
}
@ -998,7 +993,7 @@ process_else(void)
&& !(opt.cuddle_else && code.s[code.len - 1] == '}'))
output_line();
ps.force_nl = true;
ps.want_newline = true;
parse(psym_else);
}
@ -1085,15 +1080,15 @@ indent(void)
if (lsym == lsym_if && ps.prev_lsym == lsym_else
&& opt.else_if_in_same_line)
ps.force_nl = false;
ps.want_newline = false;
if (lsym == lsym_preprocessing || lsym == lsym_newline)
ps.force_nl = false;
ps.want_newline = false;
else if (lsym == lsym_comment) {
/* no special processing */
} else {
if (ps.force_nl && should_break_line(lsym)) {
ps.force_nl = false;
if (ps.want_newline && should_break_line(lsym)) {
ps.want_newline = false;
output_line();
}
ps.in_stmt_or_decl = true;

View File

@ -1,4 +1,4 @@
/* $NetBSD: indent.h,v 1.196 2023/06/14 19:05:40 rillig Exp $ */
/* $NetBSD: indent.h,v 1.197 2023/06/14 20:46:08 rillig Exp $ */
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
@ -272,10 +272,10 @@ struct paren_level {
struct psym_stack {
parser_symbol *sym;
int *ind_level;
size_t len; /* points to one behind the top of the stack;
* 1 at the top level of the file outside a
* declaration or statement; 2 at the top
* level */
size_t len; /* points to one behind the top of the stack; 1
* at the top level of the file outside a
* declaration or statement; 2 at the top level
*/
size_t cap;
};
@ -393,9 +393,9 @@ extern struct parser_state {
bool break_after_comma; /* whether to add a newline after the next
* comma; used in declarations but not in
* initializer lists */
bool force_nl; /* whether the next token is forced to go to a
* new line; used after 'if (expr)' and in
* similar situations; tokens like '{' may
bool want_newline; /* whether the next token should go to a new
* line; used after 'if (expr)' and in similar
* situations; tokens like '{' or ';' may
* ignore this */
enum declaration {

View File

@ -1,4 +1,4 @@
/* $NetBSD: parse.c,v 1.76 2023/06/14 19:05:40 rillig Exp $ */
/* $NetBSD: parse.c,v 1.77 2023/06/14 20:46:08 rillig Exp $ */
/*-
* SPDX-License-Identifier: BSD-4-Clause
@ -38,9 +38,8 @@
*/
#include <sys/cdefs.h>
__RCSID("$NetBSD: parse.c,v 1.76 2023/06/14 19:05:40 rillig Exp $");
__RCSID("$NetBSD: parse.c,v 1.77 2023/06/14 20:46:08 rillig Exp $");
#include <err.h>
#include <stdlib.h>
#include "indent.h"
@ -50,8 +49,9 @@ __RCSID("$NetBSD: parse.c,v 1.76 2023/06/14 19:05:40 rillig Exp $");
* directly below it, replacing these two symbols with a single symbol.
*/
static bool
psyms_reduce_stmt(struct psym_stack *psyms)
psyms_reduce_stmt(void)
{
struct psym_stack *psyms = &ps.psyms;
switch (psyms->sym[psyms->len - 2]) {
case psym_stmt:
@ -105,9 +105,9 @@ ps_push(parser_symbol psym, bool follow)
if (ps.psyms.len == ps.psyms.cap) {
ps.psyms.cap += 16;
ps.psyms.sym = nonnull(realloc(ps.psyms.sym,
sizeof(ps.psyms.sym[0]) * ps.psyms.cap));
sizeof(ps.psyms.sym[0]) * ps.psyms.cap));
ps.psyms.ind_level = nonnull(realloc(ps.psyms.ind_level,
sizeof(ps.psyms.ind_level[0]) * ps.psyms.cap));
sizeof(ps.psyms.ind_level[0]) * ps.psyms.cap));
}
ps.psyms.len++;
ps.psyms.sym[ps.psyms.len - 1] = psym;
@ -120,11 +120,12 @@ ps_push(parser_symbol psym, bool follow)
* symbol, until no more reductions are possible.
*/
static void
psyms_reduce(struct psym_stack *psyms)
psyms_reduce(void)
{
struct psym_stack *psyms = &ps.psyms;
again:
if (psyms->len >= 2 && psyms->sym[psyms->len - 1] == psym_stmt
&& psyms_reduce_stmt(psyms))
&& psyms_reduce_stmt())
goto again;
if (psyms->sym[psyms->len - 1] == psym_while_expr &&
psyms->sym[psyms->len - 2] == psym_do_stmt) {
@ -156,7 +157,7 @@ parse(parser_symbol psym)
if (psym != psym_else) {
while (psyms->sym[psyms->len - 1] == psym_if_expr_stmt) {
psyms->sym[psyms->len - 1] = psym_stmt;
psyms_reduce(&ps.psyms);
psyms_reduce();
}
}
@ -186,7 +187,7 @@ parse(parser_symbol psym)
break;
case psym_rbrace:
/* stack should have <lbrace> <stmt> or <lbrace> <stmt_list> */
/* stack should have <lbrace> <stmt> or <lbrace> <decl> */
if (!(psyms->len >= 2
&& is_lbrace(psyms->sym[psyms->len - 2]))) {
diag(1, "Statement nesting error");
@ -257,6 +258,6 @@ parse(parser_symbol psym)
}
debug_psyms_stack("before reduction");
psyms_reduce(&ps.psyms);
psyms_reduce();
debug_psyms_stack("after reduction");
}