lint: move code for parsing balanced token sequences to lex.c

To access yytext from lex.c, the lexer needs to be in %pointer mode,
which was specified in IEEE Std 1003.1, 2004 Edition and thus should be
old enough to be available on platforms other than NetBSD, for use in
tools/lint1 where lint1 is built before yacc and lex.
This commit is contained in:
rillig 2024-05-12 08:48:36 +00:00
parent abfde61b92
commit 922b235ff9
4 changed files with 94 additions and 88 deletions

View File

@ -1,5 +1,5 @@
%{
/* $NetBSD: cgram.y,v 1.501 2024/05/11 16:58:59 rillig Exp $ */
/* $NetBSD: cgram.y,v 1.502 2024/05/12 08:48:36 rillig Exp $ */
/*
* Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved.
@ -35,7 +35,7 @@
#include <sys/cdefs.h>
#if defined(__RCSID)
__RCSID("$NetBSD: cgram.y,v 1.501 2024/05/11 16:58:59 rillig Exp $");
__RCSID("$NetBSD: cgram.y,v 1.502 2024/05/12 08:48:36 rillig Exp $");
#endif
#include <limits.h>
@ -2562,78 +2562,11 @@ read_until_rparen(void)
yyclearin;
}
static void
fill_token(token *tok)
{
switch (yychar) {
case T_NAME:
case T_TYPENAME:
tok->kind = TK_IDENTIFIER;
tok->u.identifier = xstrdup(yylval.y_name->sb_name);
break;
case T_CON:
tok->kind = TK_CONSTANT;
tok->u.constant = *yylval.y_val;
break;
case T_NAMED_CONSTANT:
tok->kind = TK_IDENTIFIER;
tok->u.identifier = xstrdup(yytext);
break;
case T_STRING:;
tok->kind = TK_STRING_LITERALS;
tok->u.string_literals.len = yylval.y_string->len;
tok->u.string_literals.cap = yylval.y_string->cap;
tok->u.string_literals.data = xstrdup(yylval.y_string->data);
break;
default:
tok->kind = TK_PUNCTUATOR;
tok->u.punctuator = xstrdup(yytext);
}
}
static void
seq_reserve(balanced_token_sequence *seq)
{
if (seq->len >= seq->cap) {
seq->cap = 16 + 2 * seq->cap;
const balanced_token *old_tokens = seq->tokens;
balanced_token *new_tokens = block_zero_alloc(
seq->cap * sizeof(*seq->tokens), "balanced_tokens");
memcpy(new_tokens, old_tokens, seq->len * sizeof(*seq->tokens));
seq->tokens = new_tokens;
}
}
static balanced_token_sequence
read_balanced(int opening)
{
debug_enter();
int closing = opening == T_LPAREN ? T_RPAREN
: opening == T_LBRACK ? T_RBRACK : T_RBRACE;
balanced_token_sequence seq = { NULL, 0, 0 };
debug_step("opening %d, closing %d", opening, closing);
while (yychar = yylex(), yychar > 0 && yychar != closing) {
debug_step("reading token %d", yychar);
seq_reserve(&seq);
if (yychar == T_LPAREN
|| yychar == T_LBRACK
|| yychar == T_LBRACE) {
seq.tokens[seq.len].kind = yychar == T_LPAREN ? '('
: yychar == T_LBRACK ? '[' : '{';
seq.tokens[seq.len++].u.tokens = read_balanced(yychar);
} else
fill_token(&seq.tokens[seq.len++].u.token);
}
debug_leave();
return seq;
}
static balanced_token_sequence
read_balanced_token_sequence(void)
{
lint_assert(yychar < 0);
balanced_token_sequence seq = read_balanced(T_LPAREN);
balanced_token_sequence seq = lex_balanced();
yyclearin;
return seq;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: externs1.h,v 1.226 2024/05/11 16:12:28 rillig Exp $ */
/* $NetBSD: externs1.h,v 1.227 2024/05/12 08:48:36 rillig Exp $ */
/*
* Copyright (c) 1994, 1995 Jochen Pohl
@ -72,6 +72,7 @@ extern int yydebug;
int yyerror(const char *);
int yyparse(void);
extern char *yytext;
/*
* lex.c
@ -402,6 +403,7 @@ void lex_slash_slash_comment(void);
void lex_unknown_character(int);
int lex_input(void);
bool quoted_next(const buffer *, quoted_iterator *);
balanced_token_sequence lex_balanced(void);
/*
* ckbool.c

View File

@ -1,4 +1,4 @@
/* $NetBSD: lex.c,v 1.225 2024/05/09 11:08:07 rillig Exp $ */
/* $NetBSD: lex.c,v 1.226 2024/05/12 08:48:36 rillig Exp $ */
/*
* Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved.
@ -38,7 +38,7 @@
#include <sys/cdefs.h>
#if defined(__RCSID)
__RCSID("$NetBSD: lex.c,v 1.225 2024/05/09 11:08:07 rillig Exp $");
__RCSID("$NetBSD: lex.c,v 1.226 2024/05/12 08:48:36 rillig Exp $");
#endif
#include <ctype.h>
@ -468,15 +468,15 @@ lex_keyword(sym_t *sym)
* member, tag, ...).
*/
extern int
lex_name(const char *yytext, size_t yyleng)
lex_name(const char *text, size_t len)
{
sym_t *sym = symtab_search(yytext);
sym_t *sym = symtab_search(text);
if (sym != NULL && sym->s_keyword != NULL)
return lex_keyword(sym);
sbuf_t *sb = xmalloc(sizeof(*sb));
sb->sb_len = yyleng;
sb->sb_len = len;
sb->sb_sym = sym;
yylval.y_name = sb;
@ -486,8 +486,8 @@ lex_name(const char *yytext, size_t yyleng)
return sym->s_scl == TYPEDEF ? T_TYPENAME : T_NAME;
}
char *name = block_zero_alloc(yyleng + 1, "string");
(void)memcpy(name, yytext, yyleng + 1);
char *name = block_zero_alloc(len + 1, "string");
(void)memcpy(name, text, len + 1);
sb->sb_name = name;
return T_NAME;
}
@ -547,10 +547,9 @@ integer_constant_type_unsigned(unsigned l, uint64_t ui, bool warned)
}
int
lex_integer_constant(const char *yytext, size_t yyleng, int base)
lex_integer_constant(const char *text, size_t len, int base)
{
const char *cp = yytext;
size_t len = yyleng;
const char *cp = text;
/* skip 0[xX] or 0[bB] */
if (base == 16 || base == 2) {
@ -627,10 +626,9 @@ convert_integer(int64_t si, tspec_t t, unsigned int bits)
}
int
lex_floating_constant(const char *yytext, size_t yyleng)
lex_floating_constant(const char *text, size_t len)
{
const char *cp = yytext;
size_t len = yyleng;
const char *cp = text;
bool imaginary = cp[len - 1] == 'i';
if (imaginary)
@ -1072,9 +1070,9 @@ set_csrc_pos(void)
* # lineno "filename" [GCC-flag...]
*/
void
lex_directive(const char *yytext)
lex_directive(const char *text)
{
const char *p = yytext + 1; /* skip '#' */
const char *p = text + 1; /* skip '#' */
while (*p == ' ' || *p == '\t')
p++;
@ -1511,6 +1509,78 @@ pushdown(const sym_t *sym)
return nsym;
}
static void
fill_token(int tk, const char *text, token *tok)
{
switch (tk) {
case T_NAME:
case T_TYPENAME:
tok->kind = TK_IDENTIFIER;
tok->u.identifier = xstrdup(yylval.y_name->sb_name);
break;
case T_CON:
tok->kind = TK_CONSTANT;
tok->u.constant = *yylval.y_val;
break;
case T_NAMED_CONSTANT:
tok->kind = TK_IDENTIFIER;
tok->u.identifier = xstrdup(text);
break;
case T_STRING:;
tok->kind = TK_STRING_LITERALS;
tok->u.string_literals.len = yylval.y_string->len;
tok->u.string_literals.cap = yylval.y_string->cap;
tok->u.string_literals.data = xstrdup(yylval.y_string->data);
break;
default:
tok->kind = TK_PUNCTUATOR;
tok->u.punctuator = xstrdup(text);
}
}
static void
seq_reserve(balanced_token_sequence *seq)
{
if (seq->len >= seq->cap) {
seq->cap = 16 + 2 * seq->cap;
const balanced_token *old_tokens = seq->tokens;
balanced_token *new_tokens = block_zero_alloc(
seq->cap * sizeof(*seq->tokens), "balanced_tokens");
memcpy(new_tokens, old_tokens,
seq->len * sizeof(*seq->tokens));
seq->tokens = new_tokens;
}
}
static balanced_token_sequence
read_balanced(int opening)
{
int closing = opening == T_LPAREN ? T_RPAREN
: opening == T_LBRACK ? T_RBRACK : T_RBRACE;
balanced_token_sequence seq = { NULL, 0, 0 };
int tok;
while (tok = yylex(), tok > 0 && tok != closing) {
seq_reserve(&seq);
if (tok == T_LPAREN || tok == T_LBRACK || tok == T_LBRACE) {
seq.tokens[seq.len].kind = tok == T_LPAREN ? '('
: tok == T_LBRACK ? '[' : '{';
seq.tokens[seq.len].u.tokens = read_balanced(tok);
} else {
fill_token(tok, yytext, &seq.tokens[seq.len].u.token);
freeyyv(&yylval, tok);
}
seq.len++;
}
return seq;
}
balanced_token_sequence
lex_balanced(void)
{
return read_balanced(T_LPAREN);
}
/*
* Free any dynamically allocated memory referenced by
* the value stack or yylval.

View File

@ -1,5 +1,5 @@
%{
/* $NetBSD: scan.l,v 1.141 2024/05/11 16:12:28 rillig Exp $ */
/* $NetBSD: scan.l,v 1.142 2024/05/12 08:48:36 rillig Exp $ */
/*
* Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved.
@ -35,7 +35,7 @@
#include <sys/cdefs.h>
#if defined(__RCSID)
__RCSID("$NetBSD: scan.l,v 1.141 2024/05/11 16:12:28 rillig Exp $");
__RCSID("$NetBSD: scan.l,v 1.142 2024/05/12 08:48:36 rillig Exp $");
#endif
#include "lint1.h"
@ -49,6 +49,7 @@ EXP ([eE][+-]?[0-9]+)
PEXP (p[+-]?[0-9A-Fa-f]+)
FSUF ([fFlL]?[i]?)
%pointer
%option nounput
%%