indent: only null-terminate the buffers if necessary

The only case where a buffer is used as a C-style string is when looking up a keyword. No functional change.
2023-05-14 22:26:37 +00:00 · 2023-05-14 22:26:37 +00:00 · 506e3bec5c
parent a44e145462
commit 506e3bec5c
6 changed files with 23 additions and 71 deletions
--- a/tests/usr.bin/indent/lsym_comment.c
+++ b/tests/usr.bin/indent/lsym_comment.c
@ -1,4 +1,4 @@
-/* $NetBSD: lsym_comment.c,v 1.8 2023/05/14 17:53:38 rillig Exp $ */
+/* $NetBSD: lsym_comment.c,v 1.9 2023/05/14 22:26:37 rillig Exp $ */

 /*
 * Tests for the token lsym_comment, which starts a comment.
@ -1042,20 +1042,6 @@ line 4
 //indent run-equals-input


-/*
- * Cover the code for expanding the comment buffer in com_terminate. As of
- * 2021-11-07, the default buffer size is 200, with a safety margin of 1 at
- * the beginning and another safety margin of 5 at the end. To force the
- * comment buffer to expanded in com_terminate, the comment must be exactly
- * 193 bytes long.
- */
-//indent input
-/*-_____10________20________30________40________50________60________70________80________90_______100_______110_______120_______130_______140_______150_______160_______170_______180_______190 */
-//indent end
-
-//indent run-equals-input
-
-
 /*
 * Since 2019-04-04 and before pr_comment.c 1.123 from 2021-11-25, the
 * function analyze_comment wrongly joined the two comments.
--- a/usr.bin/indent/indent.c
+++ b/usr.bin/indent/indent.c
@ -1,4 +1,4 @@
-/*	$NetBSD: indent.c,v 1.266 2023/05/14 12:12:02 rillig Exp $	*/
+/*	$NetBSD: indent.c,v 1.267 2023/05/14 22:26:37 rillig Exp $	*/

 /*-
 * SPDX-License-Identifier: BSD-4-Clause
@ -38,7 +38,7 @@
 */

 #include <sys/cdefs.h>
-__RCSID("$NetBSD: indent.c,v 1.266 2023/05/14 12:12:02 rillig Exp $");
+__RCSID("$NetBSD: indent.c,v 1.267 2023/05/14 22:26:37 rillig Exp $");

 #include <sys/param.h>
 #include <err.h>
@ -109,7 +109,6 @@ buf_init(struct buffer *buf)
    buf->s = buf->mem + 1;	/* allow accessing buf->e[-1] */
    buf->e = buf->s;
    buf->mem[0] = ' ';
-    buf->e[0] = '\0';
 }

 static size_t
@ -127,7 +126,6 @@ buf_expand(struct buffer *buf, size_t add_size)
    buf->limit = buf->mem + new_size - 5;
    buf->s = buf->mem + 1;
    buf->e = buf->s + len;
-    /* At this point, the buffer may not be null-terminated anymore. */
 }

 static void
@ -159,13 +157,6 @@ buf_add_buf(struct buffer *buf, const struct buffer *add)
    buf_add_range(buf, add->s, add->e);
 }

-static void
-buf_terminate(struct buffer *buf)
-{
-    buf_reserve(buf, 1);
-    *buf->e = '\0';
-}
-
 static void
 buf_reset(struct buffer *buf)
 {
@ -356,7 +347,7 @@ code_add_decl_indent(int decl_ind, bool tabs_to_var)
    int base_ind = ps.ind_level * opt.indent_size;
    int ind = base_ind + (int)buf_len(&code);
    int target_ind = base_ind + decl_ind;
-    char *orig_code_e = code.e;
+    const char *orig_code_e = code.e;

    if (tabs_to_var)
 	for (int next; (next = next_tab(ind)) <= target_ind; ind = next)
@ -407,7 +398,6 @@ move_com_to_code(void)
 	buf_add_char(&code, ' ');
    buf_add_buf(&code, &com);
    buf_add_char(&code, ' ');
-    buf_terminate(&code);
    buf_reset(&com);
    ps.want_blank = false;
 }
@ -610,7 +600,6 @@ process_colon(void)

    buf_add_buf(&lab, &code);	/* 'case' or 'default' or named label */
    buf_add_char(&lab, ':');
-    buf_terminate(&lab);
    buf_reset(&code);

    ps.in_stmt_or_decl = false;
@ -827,12 +816,10 @@ process_ident(lexer_symbol lsym)
    if (ps.in_decl) {
 	if (lsym == lsym_funcname) {
 	    ps.in_decl = false;
-	    if (opt.procnames_start_line && code.s != code.e) {
-		*code.e = '\0';
+	    if (opt.procnames_start_line && code.s != code.e)
 		output_line();
-	    } else if (ps.want_blank) {
+	    else if (ps.want_blank)
 		*code.e++ = ' ';
-	    }
 	    ps.want_blank = false;

 	} else if (!ps.block_init && !ps.decl_indent_done &&
@ -936,7 +923,6 @@ read_preprocessing_line(void)

    while (lab.e > lab.s && ch_isblank(lab.e[-1]))
 	lab.e--;
-    buf_terminate(&lab);
 }

 typedef struct {
@ -1167,7 +1153,6 @@ main_loop(void)
 	    break;
 	}

-	*code.e = '\0';
 	if (lsym != lsym_comment && lsym != lsym_newline &&
 		lsym != lsym_preprocessing)
 	    ps.prev_token = lsym;
--- a/usr.bin/indent/indent.h
+++ b/usr.bin/indent/indent.h
@ -1,4 +1,4 @@
-/*	$NetBSD: indent.h,v 1.131 2023/05/14 14:14:07 rillig Exp $	*/
+/*	$NetBSD: indent.h,v 1.132 2023/05/14 22:26:37 rillig Exp $	*/

 /*-
 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
@ -123,7 +123,7 @@ typedef enum parser_symbol {
    psym_while_expr,		/* 'while' '(' expr ')' */
 } parser_symbol;

-/* A range of characters, in some cases null-terminated. */
+/* A range of characters, not null-terminated. */
 struct buffer {
    char *s;			/* start of the usable text */
    char *e;			/* end of the usable text */
--- a/usr.bin/indent/io.c
+++ b/usr.bin/indent/io.c
@ -1,4 +1,4 @@
-/*	$NetBSD: io.c,v 1.164 2023/05/14 14:14:07 rillig Exp $	*/
+/*	$NetBSD: io.c,v 1.165 2023/05/14 22:26:37 rillig Exp $	*/

 /*-
 * SPDX-License-Identifier: BSD-4-Clause
@ -38,7 +38,7 @@
 */

 #include <sys/cdefs.h>
-__RCSID("$NetBSD: io.c,v 1.164 2023/05/14 14:14:07 rillig Exp $");
+__RCSID("$NetBSD: io.c,v 1.165 2023/05/14 22:26:37 rillig Exp $");

 #include <assert.h>
 #include <stdio.h>
@ -190,7 +190,6 @@ output_line_label(void)

    while (lab.e > lab.s && ch_isblank(lab.e[-1]))
 	lab.e--;
-    *lab.e = '\0';

    ind = output_indent(0, compute_label_indent());
    output_range(lab.s, lab.e);
@ -304,9 +303,9 @@ output_complete_line(char line_terminator)
    ps.in_stmt_cont = ps.in_stmt_or_decl && !ps.in_decl;
    ps.decl_indent_done = false;

-    *(lab.e = lab.s) = '\0';	/* reset buffers */
-    *(code.e = code.s) = '\0';
-    *(com.e = com.s = com.mem + 1) = '\0';
+    lab.e = lab.s;		/* reset buffers */
+    code.e = code.s;
+    com.e = com.s = com.mem + 1;

    ps.ind_level = ps.ind_level_follow;
    ps.line_start_nparen = ps.nparen;
--- a/usr.bin/indent/lexi.c
+++ b/usr.bin/indent/lexi.c
@ -1,4 +1,4 @@
-/*	$NetBSD: lexi.c,v 1.183 2023/05/14 14:14:07 rillig Exp $	*/
+/*	$NetBSD: lexi.c,v 1.184 2023/05/14 22:26:37 rillig Exp $	*/

 /*-
 * SPDX-License-Identifier: BSD-4-Clause
@ -38,7 +38,7 @@
 */

 #include <sys/cdefs.h>
-__RCSID("$NetBSD: lexi.c,v 1.183 2023/05/14 14:14:07 rillig Exp $");
+__RCSID("$NetBSD: lexi.c,v 1.184 2023/05/14 22:26:37 rillig Exp $");

 #include <stdlib.h>
 #include <string.h>
@ -381,15 +381,12 @@ lexi_alnum(void)
 	    ps.next_unary = false;

 	    check_size_token(1);
-	    *token.e = '\0';

 	    return lsym_word;
 	}
    } else
 	return lsym_eof;	/* just as a placeholder */

-    *token.e = '\0';
-
    while (ch_isblank(inp_peek()))
 	inp_skip();

@ -398,6 +395,8 @@ lexi_alnum(void)
    if (ps.prev_token == lsym_tag && ps.nparen == 0)
 	return lsym_type_outside_parentheses;

+    token_add_char('\0');
+    token.e--;
    const struct keyword *kw = bsearch(token.s, keywords,
 	array_length(keywords), sizeof(keywords[0]), cmp_keyword_by_name);
    bool is_type = false;
@ -509,7 +508,6 @@ lexi(void)

    check_size_token(3);	/* for things like "<<=" */
    *token.e++ = inp_next();
-    *token.e = '\0';

    lexer_symbol lsym;
    bool next_unary;
@ -582,10 +580,8 @@ lexi(void)
    case '=':
 	if (ps.init_or_struct)
 	    ps.block_init = true;
-	if (inp_peek() == '=') {	/* == */
+	if (inp_peek() == '=')
 	    *token.e++ = inp_next();
-	    *token.e = '\0';
-	}
 	lsym = lsym_binary_op;
 	next_unary = true;
 	break;
@ -638,7 +634,6 @@ lexi(void)
    ps.next_unary = next_unary;

    check_size_token(1);
-    *token.e = '\0';

    return lexi_end(lsym);
 }
--- a/usr.bin/indent/pr_comment.c
+++ b/usr.bin/indent/pr_comment.c
@ -1,4 +1,4 @@
-/*	$NetBSD: pr_comment.c,v 1.138 2023/05/14 18:05:52 rillig Exp $	*/
+/*	$NetBSD: pr_comment.c,v 1.139 2023/05/14 22:26:37 rillig Exp $	*/

 /*-
 * SPDX-License-Identifier: BSD-4-Clause
@ -38,7 +38,7 @@
 */

 #include <sys/cdefs.h>
-__RCSID("$NetBSD: pr_comment.c,v 1.138 2023/05/14 18:05:52 rillig Exp $");
+__RCSID("$NetBSD: pr_comment.c,v 1.139 2023/05/14 22:26:37 rillig Exp $");

 #include <string.h>

@ -61,14 +61,6 @@ com_add_delim(void)
    buf_add_range(&com, delim, delim + 3);
 }

-static void
-com_terminate(void)
-{
-    if (1 >= com.limit - com.e)
-	buf_expand(&com, 1);
-    *com.e = '\0';
-}
-
 static bool
 fits_in_one_line(int com_ind, int max_line_length)
 {
@ -241,14 +233,13 @@ copy_comment_wrap(int line_length, bool delim)
 		    com_add_char(' ');
 		com_add_char('*');
 		com_add_char('/');
-		com_terminate();
 		return;

 	    } else		/* handle isolated '*' */
 		com_add_char('*');
 	    break;

-	default:		/* we have a random char */
+	default:
 	    ;
 	    int now_len = ind_add(ps.com_ind, com.s, com.e);
 	    for (;;) {
@ -294,10 +285,8 @@ copy_comment_nowrap(void)
 {
    for (;;) {
 	if (inp_peek() == '\n') {
-	    if (token.e[-1] == '/') {
-		com_terminate();
+	    if (token.e[-1] == '/')
 		return;
-	    }

 	    if (had_eof) {
 		diag(1, "Unterminated comment");
@ -314,10 +303,8 @@ copy_comment_nowrap(void)
 	}

 	com_add_char(inp_next());
-	if (com.e[-2] == '*' && com.e[-1] == '/' && token.e[-1] == '*') {
-	    com_terminate();
+	if (com.e[-2] == '*' && com.e[-1] == '/' && token.e[-1] == '*')
 	    return;
-	}
    }
 }