diff --git a/tests/usr.bin/indent/fmt_block.c b/tests/usr.bin/indent/fmt_block.c index 71cb3ea16a41..89073d1c2e88 100644 --- a/tests/usr.bin/indent/fmt_block.c +++ b/tests/usr.bin/indent/fmt_block.c @@ -1,4 +1,4 @@ -/* $NetBSD: fmt_block.c,v 1.7 2023/05/22 23:03:16 rillig Exp $ */ +/* $NetBSD: fmt_block.c,v 1.8 2023/06/14 20:46:08 rillig Exp $ */ /* * Tests for formatting blocks of statements and declarations. @@ -6,7 +6,6 @@ * See also: * lsym_lbrace.c * psym_stmt.c - * psym_stmt_list.c */ //indent input diff --git a/tests/usr.bin/indent/fmt_decl.c b/tests/usr.bin/indent/fmt_decl.c index a516005b836c..088faddd23c4 100644 --- a/tests/usr.bin/indent/fmt_decl.c +++ b/tests/usr.bin/indent/fmt_decl.c @@ -1,4 +1,4 @@ -/* $NetBSD: fmt_decl.c,v 1.54 2023/06/10 17:56:29 rillig Exp $ */ +/* $NetBSD: fmt_decl.c,v 1.55 2023/06/14 20:46:08 rillig Exp $ */ /* * Tests for declarations of global variables, external functions, and local @@ -1085,3 +1085,104 @@ die(void) { } //indent end + + +/* + * In very rare cases, the type of a declarator might include literal tab + * characters. This tab might affect the indentation of the declarator, but + * only if it occurs before the declarator, and that is hard to achieve. + */ +//indent input +int arr[sizeof " "]; +//indent end + +//indent run-equals-input + + +/* + * The '}' of an initializer is not supposed to end the statement, it only ends + * the brace level of the initializer expression. + */ +//indent input +int multi_line[1][1][1] = { +{ +{ +1 +}, +}, +}; +int single_line[2][1][1] = {{{1},},{{2}}}; +//indent end + +//indent run -di0 +int multi_line[1][1][1] = { + { + { + 1 + }, + }, +}; +int single_line[2][1][1] = {{{1},}, {{2}}}; +//indent end + + +/* + * The '}' of an initializer is not supposed to end the statement, it only ends + * the brace level of the initializer expression. + */ +//indent input +{ +int multi_line = { +{ +{ +b +}, +}, +}; +int single_line = {{{b},},{}}; +} +//indent end + +//indent run -di0 +{ + int multi_line = { + { + { + b + }, + }, + }; + int single_line = {{{b},}, {}}; +} +//indent end + + +/* + * In initializers, multi-line expressions don't have their second line + * indented, even though they should. + */ +//indent input +{ +multi_line = (int[]){ +{1 ++1}, +{1 ++1}, +{1 ++1}, +}; +} +//indent end + +//indent run +{ + multi_line = (int[]){ + {1 + + 1}, + {1 + + 1}, + {1 + + 1}, + }; +} +//indent end diff --git a/tests/usr.bin/indent/lsym_preprocessing.c b/tests/usr.bin/indent/lsym_preprocessing.c index ce8c6980ffdc..87f195f742d5 100644 --- a/tests/usr.bin/indent/lsym_preprocessing.c +++ b/tests/usr.bin/indent/lsym_preprocessing.c @@ -1,4 +1,4 @@ -/* $NetBSD: lsym_preprocessing.c,v 1.13 2023/06/14 17:07:32 rillig Exp $ */ +/* $NetBSD: lsym_preprocessing.c,v 1.14 2023/06/14 20:46:08 rillig Exp $ */ /* * Tests for the token lsym_preprocessing, which represents a '#' that starts @@ -307,3 +307,38 @@ error: Standard Input:2: Unmatched #elif error: Standard Input:3: Unmatched #elifdef error: Standard Input:4: Unmatched #endif //indent end + + +/* + * The '#' can only occur at the beginning of a line, therefore indent does not + * care when it occurs in the middle of a line. + */ +//indent input +int no = #; +//indent end + +//indent run -di0 +int no = +#; +//indent end + + +/* + * Preprocessing directives may be indented; indent moves them to the beginning + * of a line. + */ +//indent input +#if 0 + #if 1 \ + || 2 + #endif +#endif +//indent end + +//indent run +#if 0 +#if 1 \ + || 2 +#endif +#endif +//indent end diff --git a/tests/usr.bin/indent/opt_bc.c b/tests/usr.bin/indent/opt_bc.c index 664f92292fc5..275f2920389a 100644 --- a/tests/usr.bin/indent/opt_bc.c +++ b/tests/usr.bin/indent/opt_bc.c @@ -1,4 +1,4 @@ -/* $NetBSD: opt_bc.c,v 1.11 2023/06/14 14:11:28 rillig Exp $ */ +/* $NetBSD: opt_bc.c,v 1.12 2023/06/14 20:46:08 rillig Exp $ */ /* * Tests for the options '-bc' and '-nbc'. @@ -121,6 +121,17 @@ int a = (1), //indent end +//indent input +int a, +b, +c; +//indent end + +//indent run -nbc -di0 +int a, b, c; +//indent end + + /* * When declarations are too long to fit in a single line, they should not be * joined. diff --git a/tests/usr.bin/indent/ps_ind_level.c b/tests/usr.bin/indent/ps_ind_level.c index 0f33269537e7..2ca8cbd9934d 100644 --- a/tests/usr.bin/indent/ps_ind_level.c +++ b/tests/usr.bin/indent/ps_ind_level.c @@ -1,10 +1,13 @@ -/* $NetBSD: ps_ind_level.c,v 1.7 2023/05/15 14:55:47 rillig Exp $ */ +/* $NetBSD: ps_ind_level.c,v 1.8 2023/06/14 20:46:08 rillig Exp $ */ /* * The indentation of the very first line of a file determines the - * indentation of the remaining code. Even if later code has a smaller - * indentation, it is nevertheless indented to the level given by the first - * line of code. + * indentation of the remaining code. This mode is meant for code snippets from + * function bodies. At this level, function definitions are not recognized + * properly. + * + * Even if later code has a smaller indentation, it is nevertheless indented to + * the level given by the first line of code. * * In this particular test, the indentation is set to 5 and the tabulator * width is set to 8, to demonstrate an off-by-one error in @@ -18,6 +21,9 @@ int indented_by_24; void function_in_column_1(void){} + + #if indented +#endif //indent end /* 5 spaces indentation, 8 spaces per tabulator */ @@ -26,6 +32,9 @@ void function_in_column_1(void){} void function_in_column_1(void) { } + +#if indented +#endif //indent end diff --git a/tests/usr.bin/indent/psym_rbrace.c b/tests/usr.bin/indent/psym_rbrace.c index 8390762bdf5e..34b0fb81c647 100644 --- a/tests/usr.bin/indent/psym_rbrace.c +++ b/tests/usr.bin/indent/psym_rbrace.c @@ -1,4 +1,4 @@ -/* $NetBSD: psym_rbrace.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */ +/* $NetBSD: psym_rbrace.c,v 1.4 2023/06/14 20:46:08 rillig Exp $ */ /* * Tests for the parser symbol psym_rbrace, which represents '}' and finishes @@ -8,8 +8,81 @@ * psym_lbrace.c */ + +/* + * While it is a syntax error to have an unfinished declaration between braces, + * indent is forgiving enough to accept this input. + */ //indent input -// TODO: add input +{ + int +} //indent end +//indent run +{ + int + } +exit 1 +error: Standard Input:3: Statement nesting error +error: Standard Input:3: Stuff missing from end of file +//indent end + + +//indent input +{ + do { + } while (cond) +} +//indent end + +// XXX: Why doesn't indent complain about the missing semicolon? //indent run-equals-input + + +//indent input +{ + if (cond) +} +//indent end + +//indent run +{ + if (cond) + } +exit 1 +error: Standard Input:3: Statement nesting error +error: Standard Input:3: Stuff missing from end of file +//indent end + + +//indent input +{ + switch (expr) +} +//indent end + +//indent run +{ + switch (expr) + } +exit 1 +error: Standard Input:3: Statement nesting error +error: Standard Input:3: Stuff missing from end of file +//indent end + + +//indent input +{ + while (cond) +} +//indent end + +//indent run +{ + while (cond) + } +exit 1 +error: Standard Input:3: Statement nesting error +error: Standard Input:3: Stuff missing from end of file +//indent end diff --git a/usr.bin/indent/debug.c b/usr.bin/indent/debug.c index 9a121a462ef0..b52c942163e2 100644 --- a/usr.bin/indent/debug.c +++ b/usr.bin/indent/debug.c @@ -1,4 +1,4 @@ -/* $NetBSD: debug.c,v 1.60 2023/06/14 16:14:30 rillig Exp $ */ +/* $NetBSD: debug.c,v 1.61 2023/06/14 20:46:08 rillig Exp $ */ /*- * Copyright (c) 2023 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include -__RCSID("$NetBSD: debug.c,v 1.60 2023/06/14 16:14:30 rillig Exp $"); +__RCSID("$NetBSD: debug.c,v 1.61 2023/06/14 20:46:08 rillig Exp $"); #include #include @@ -367,7 +367,7 @@ debug_parser_state(void) state.heading = "vertical spacing"; debug_ps_bool(break_after_comma); - debug_ps_bool(force_nl); + debug_ps_bool(want_newline); debug_ps_enum(declaration, declaration_name); debug_ps_bool(blank_line_after_decl); diff --git a/usr.bin/indent/indent.c b/usr.bin/indent/indent.c index ef7c1e487ef0..8df19a5d3b5f 100644 --- a/usr.bin/indent/indent.c +++ b/usr.bin/indent/indent.c @@ -1,4 +1,4 @@ -/* $NetBSD: indent.c,v 1.366 2023/06/14 19:05:40 rillig Exp $ */ +/* $NetBSD: indent.c,v 1.367 2023/06/14 20:46:08 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause @@ -38,7 +38,7 @@ */ #include -__RCSID("$NetBSD: indent.c,v 1.366 2023/06/14 19:05:40 rillig Exp $"); +__RCSID("$NetBSD: indent.c,v 1.367 2023/06/14 20:46:08 rillig Exp $"); #include #include @@ -187,7 +187,7 @@ ind_add(int ind, const char *s, size_t len) static void init_globals(void) { - ps_push(psym_stmt, false); + ps_push(psym_stmt, false); /* as a stop symbol */ ps.prev_lsym = lsym_semicolon; ps.lbrace_kind = psym_lbrace_block; @@ -345,9 +345,11 @@ update_ps_lbrace_kind(lexer_symbol lsym) ps.lbrace_kind = token.s[0] == 's' ? psym_lbrace_struct : token.s[0] == 'u' ? psym_lbrace_union : psym_lbrace_enum; - } else if (lsym != lsym_type_outside_parentheses - && lsym != lsym_word - && lsym != lsym_lbrace) + } else if (lsym == lsym_type_outside_parentheses + || lsym == lsym_word + || lsym == lsym_lbrace) { + /* Keep the current '{' kind. */ + } else ps.lbrace_kind = psym_lbrace_block; } @@ -355,21 +357,19 @@ static void indent_declarator(int decl_ind, bool tabs_to_var) { int base = ps.ind_level * opt.indent_size; - int ind = base + (int)code.len; + int ind = ind_add(base, code.s, code.len); int target = base + decl_ind; size_t orig_code_len = code.len; if (tabs_to_var) for (int next; (next = next_tab(ind)) <= target; ind = next) buf_add_char(&code, '\t'); - for (; ind < target; ind++) buf_add_char(&code, ' '); - - if (code.len == orig_code_len && ps.want_blank) { + if (code.len == orig_code_len && ps.want_blank) buf_add_char(&code, ' '); - ps.want_blank = false; - } + + ps.want_blank = false; ps.decl_indent_done = true; } @@ -556,7 +556,7 @@ process_newline(void) if (ps.psyms.sym[ps.psyms.len - 1] == psym_switch_expr && opt.brace_same_line && com.len == 0) { - ps.force_nl = true; + ps.want_newline = true; goto stay_in_line; } @@ -591,7 +591,7 @@ process_lparen(void) else if (ps.want_blank && want_blank_before_lparen()) buf_add_char(&code, ' '); ps.want_blank = false; - buf_add_char(&code, token.s[0]); + buf_add_buf(&code, &token); if (opt.extra_expr_indent && ps.spaced_expr_psym != psym_0) ps.extra_expr_indent = eei_maybe; @@ -617,17 +617,14 @@ process_lparen(void) static void process_rparen(void) { - if (ps.paren.len == 0) { + if (ps.paren.len == 0) diag(0, "Extra '%c'", *token.s); - goto unbalanced; - } - enum paren_level_cast cast = ps.paren.item[--ps.paren.len].cast; - if (ps.in_func_def_params || (ps.line_has_decl && !ps.in_init)) - cast = cast_no; - - ps.prev_paren_was_cast = cast == cast_maybe; - if (cast == cast_maybe) { + ps.prev_paren_was_cast = ps.paren.len > 0 + && ps.paren.item[--ps.paren.len].cast == cast_maybe + && !ps.in_func_def_params + && !(ps.line_has_decl && !ps.in_init); + if (ps.prev_paren_was_cast) { ps.next_unary = true; ps.want_blank = opt.space_after_cast; } else @@ -636,19 +633,19 @@ process_rparen(void) if (code.len == 0) ps.ind_paren_level = (int)ps.paren.len; -unbalanced: - buf_add_char(&code, token.s[0]); + buf_add_buf(&code, &token); if (ps.spaced_expr_psym != psym_0 && ps.paren.len == 0) { - if (ps.extra_expr_indent == eei_maybe) - ps.extra_expr_indent = eei_last; - ps.force_nl = true; - ps.next_unary = true; - ps.in_stmt_or_decl = false; parse(ps.spaced_expr_psym); ps.spaced_expr_psym = psym_0; + + ps.want_newline = true; + ps.next_unary = true; + ps.in_stmt_or_decl = false; ps.want_blank = true; out.line_kind = lk_stmt_head; + if (ps.extra_expr_indent == eei_maybe) + ps.extra_expr_indent = eei_last; } } @@ -658,8 +655,8 @@ process_lbracket(void) if (code.len > 0 && (ps.prev_lsym == lsym_comma || ps.prev_lsym == lsym_binary_op)) buf_add_char(&code, ' '); + buf_add_buf(&code, &token); ps.want_blank = false; - buf_add_char(&code, token.s[0]); paren_stack_push(&ps.paren, ind_add(0, code.s, code.len), cast_no); } @@ -667,18 +664,16 @@ process_lbracket(void) static void process_rbracket(void) { - if (ps.paren.len == 0) { + if (ps.paren.len == 0) diag(0, "Extra '%c'", *token.s); - goto unbalanced; - } - ps.paren.len--; + if (ps.paren.len > 0) + ps.paren.len--; - ps.want_blank = true; if (code.len == 0) ps.ind_paren_level = (int)ps.paren.len; -unbalanced: - buf_add_char(&code, token.s[0]); + buf_add_buf(&code, &token); + ps.want_blank = true; } static void @@ -697,7 +692,7 @@ process_lbrace(void) if (ps.in_init) ps.init_level++; else - ps.force_nl = true; + ps.want_newline = true; if (code.len > 0 && !ps.in_init) { if (!opt.brace_same_line || @@ -789,7 +784,7 @@ process_rbrace(void) if (!ps.in_var_decl && ps.psyms.sym[ps.psyms.len - 1] != psym_do_stmt && ps.psyms.sym[ps.psyms.len - 1] != psym_if_expr_stmt) - ps.force_nl = true; + ps.want_newline = true; } static void @@ -846,7 +841,7 @@ process_comma(void) if (ps.break_after_comma && (opt.break_after_comma || ind_add(compute_code_indent(), code.s, code.len) >= opt.max_line_length - typical_varname_length)) - ps.force_nl = true; + ps.want_newline = true; } } @@ -860,7 +855,7 @@ process_label_colon(void) if (ps.seen_case) out.line_kind = lk_case_or_default; ps.in_stmt_or_decl = false; - ps.force_nl = ps.seen_case; + ps.want_newline = ps.seen_case; ps.seen_case = false; ps.want_blank = false; } @@ -915,7 +910,7 @@ process_semicolon(void) if (ps.spaced_expr_psym == psym_0) { parse(psym_stmt); - ps.force_nl = true; + ps.want_newline = true; } } @@ -968,7 +963,7 @@ process_word(lexer_symbol lsym) } } else if (ps.spaced_expr_psym != psym_0 && ps.paren.len == 0) { - ps.force_nl = true; + ps.want_newline = true; ps.in_stmt_or_decl = false; ps.next_unary = true; parse(ps.spaced_expr_psym); @@ -985,7 +980,7 @@ process_do(void) if (code.len > 0) output_line(); - ps.force_nl = true; + ps.want_newline = true; parse(psym_do); } @@ -998,7 +993,7 @@ process_else(void) && !(opt.cuddle_else && code.s[code.len - 1] == '}')) output_line(); - ps.force_nl = true; + ps.want_newline = true; parse(psym_else); } @@ -1085,15 +1080,15 @@ indent(void) if (lsym == lsym_if && ps.prev_lsym == lsym_else && opt.else_if_in_same_line) - ps.force_nl = false; + ps.want_newline = false; if (lsym == lsym_preprocessing || lsym == lsym_newline) - ps.force_nl = false; + ps.want_newline = false; else if (lsym == lsym_comment) { /* no special processing */ } else { - if (ps.force_nl && should_break_line(lsym)) { - ps.force_nl = false; + if (ps.want_newline && should_break_line(lsym)) { + ps.want_newline = false; output_line(); } ps.in_stmt_or_decl = true; diff --git a/usr.bin/indent/indent.h b/usr.bin/indent/indent.h index e05e3ca4563e..58c349e71276 100644 --- a/usr.bin/indent/indent.h +++ b/usr.bin/indent/indent.h @@ -1,4 +1,4 @@ -/* $NetBSD: indent.h,v 1.196 2023/06/14 19:05:40 rillig Exp $ */ +/* $NetBSD: indent.h,v 1.197 2023/06/14 20:46:08 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD @@ -272,10 +272,10 @@ struct paren_level { struct psym_stack { parser_symbol *sym; int *ind_level; - size_t len; /* points to one behind the top of the stack; - * 1 at the top level of the file outside a - * declaration or statement; 2 at the top - * level */ + size_t len; /* points to one behind the top of the stack; 1 + * at the top level of the file outside a + * declaration or statement; 2 at the top level + */ size_t cap; }; @@ -393,9 +393,9 @@ extern struct parser_state { bool break_after_comma; /* whether to add a newline after the next * comma; used in declarations but not in * initializer lists */ - bool force_nl; /* whether the next token is forced to go to a - * new line; used after 'if (expr)' and in - * similar situations; tokens like '{' may + bool want_newline; /* whether the next token should go to a new + * line; used after 'if (expr)' and in similar + * situations; tokens like '{' or ';' may * ignore this */ enum declaration { diff --git a/usr.bin/indent/parse.c b/usr.bin/indent/parse.c index 31d78c1eda18..b15529530059 100644 --- a/usr.bin/indent/parse.c +++ b/usr.bin/indent/parse.c @@ -1,4 +1,4 @@ -/* $NetBSD: parse.c,v 1.76 2023/06/14 19:05:40 rillig Exp $ */ +/* $NetBSD: parse.c,v 1.77 2023/06/14 20:46:08 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause @@ -38,9 +38,8 @@ */ #include -__RCSID("$NetBSD: parse.c,v 1.76 2023/06/14 19:05:40 rillig Exp $"); +__RCSID("$NetBSD: parse.c,v 1.77 2023/06/14 20:46:08 rillig Exp $"); -#include #include #include "indent.h" @@ -50,8 +49,9 @@ __RCSID("$NetBSD: parse.c,v 1.76 2023/06/14 19:05:40 rillig Exp $"); * directly below it, replacing these two symbols with a single symbol. */ static bool -psyms_reduce_stmt(struct psym_stack *psyms) +psyms_reduce_stmt(void) { + struct psym_stack *psyms = &ps.psyms; switch (psyms->sym[psyms->len - 2]) { case psym_stmt: @@ -105,9 +105,9 @@ ps_push(parser_symbol psym, bool follow) if (ps.psyms.len == ps.psyms.cap) { ps.psyms.cap += 16; ps.psyms.sym = nonnull(realloc(ps.psyms.sym, - sizeof(ps.psyms.sym[0]) * ps.psyms.cap)); + sizeof(ps.psyms.sym[0]) * ps.psyms.cap)); ps.psyms.ind_level = nonnull(realloc(ps.psyms.ind_level, - sizeof(ps.psyms.ind_level[0]) * ps.psyms.cap)); + sizeof(ps.psyms.ind_level[0]) * ps.psyms.cap)); } ps.psyms.len++; ps.psyms.sym[ps.psyms.len - 1] = psym; @@ -120,11 +120,12 @@ ps_push(parser_symbol psym, bool follow) * symbol, until no more reductions are possible. */ static void -psyms_reduce(struct psym_stack *psyms) +psyms_reduce(void) { + struct psym_stack *psyms = &ps.psyms; again: if (psyms->len >= 2 && psyms->sym[psyms->len - 1] == psym_stmt - && psyms_reduce_stmt(psyms)) + && psyms_reduce_stmt()) goto again; if (psyms->sym[psyms->len - 1] == psym_while_expr && psyms->sym[psyms->len - 2] == psym_do_stmt) { @@ -156,7 +157,7 @@ parse(parser_symbol psym) if (psym != psym_else) { while (psyms->sym[psyms->len - 1] == psym_if_expr_stmt) { psyms->sym[psyms->len - 1] = psym_stmt; - psyms_reduce(&ps.psyms); + psyms_reduce(); } } @@ -186,7 +187,7 @@ parse(parser_symbol psym) break; case psym_rbrace: - /* stack should have or */ + /* stack should have or */ if (!(psyms->len >= 2 && is_lbrace(psyms->sym[psyms->len - 2]))) { diag(1, "Statement nesting error"); @@ -257,6 +258,6 @@ parse(parser_symbol psym) } debug_psyms_stack("before reduction"); - psyms_reduce(&ps.psyms); + psyms_reduce(); debug_psyms_stack("after reduction"); }