lint: parse but otherwise ignore C23 attributes

The C23 attributes are only parsed before an expression in an expression
statement, as a proof of concept.  Other places will follow later.
This commit is contained in:
rillig 2024-05-11 16:12:28 +00:00
parent 9cfc443f88
commit 4bd7e6e13f
6 changed files with 369 additions and 26 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: c23.c,v 1.12 2024/05/09 20:56:41 rillig Exp $ */
/* $NetBSD: c23.c,v 1.13 2024/05/11 16:12:28 rillig Exp $ */
# 3 "c23.c"
// Tests for the option -Ac23, which allows features from C23 and all earlier
@ -109,3 +109,57 @@ thread_local extern int extern_thread_local_2;
static thread_local int static_thread_local_1;
/* expect+1: warning: static variable 'static_thread_local_2' unused [226] */
thread_local static int static_thread_local_2;
int
attributes(int i)
{
// An attribute specifier list may be empty.
[[]]i++;
// There may be leading or trailing commas.
[[,]]i++;
// There may be arbitrary commas around or between the attributes.
[[,,,,,]]i++;
// An attribute may be a plain identifier without arguments.
[[identifier]]i++;
// The identifier may be prefixed with one additional identifier.
[[prefix::identifier]]i++;
// An attribute may have empty arguments.
[[identifier()]]i++;
// The arguments of an attribute may be arbitrary tokens.
[[identifier([])]]i++;
// The commas in this "argument list" are ordinary punctuator tokens,
// they do not separate any arguments.
// The structure of the attribute argument is:
// 1. empty balanced token sequence between '[' and ']'
// 2. token ','
// 3. empty balanced token sequence between '{' and '}'
// 4. token ','
// 5. empty balanced token sequence between '(' and ')'
[[identifier([], {}, ())]]i++;
// Inside an argument, parentheses may be nested.
[[identifier(((((())))))]]i++;
// Inside an argument, brackets may be nested.
[[identifier([[[[[]]]]])]]i++;
// Inside an argument, braces may be nested.
[[identifier({{{{{}}}}})]]i++;
// An attribute argument may contain arbitrary punctuation.
[[identifier(++++ ? ? ? : : :: )]]i++;
// An attribute argument may contain constants and string literals.
[[identifier(0, 0.0, "hello" " " "world")]]i++;
// There may be multiple attribute specifier sequences in a row.
[[]][[]][[]]i++;
return i;
}

View File

@ -1,5 +1,5 @@
%{
/* $NetBSD: cgram.y,v 1.499 2024/05/09 20:56:41 rillig Exp $ */
/* $NetBSD: cgram.y,v 1.500 2024/05/11 16:12:28 rillig Exp $ */
/*
* Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved.
@ -35,7 +35,7 @@
#include <sys/cdefs.h>
#if defined(__RCSID)
__RCSID("$NetBSD: cgram.y,v 1.499 2024/05/09 20:56:41 rillig Exp $");
__RCSID("$NetBSD: cgram.y,v 1.500 2024/05/11 16:12:28 rillig Exp $");
#endif
#include <limits.h>
@ -69,6 +69,7 @@ static int saved_lwarn = LWARN_NOTHING_SAVED;
static void cgram_declare(sym_t *, bool, sbuf_t *);
static void read_until_rparen(void);
static balanced_token_sequence read_balanced_token_sequence(void);
static sym_t *symbolrename(sym_t *, sbuf_t *);
@ -110,6 +111,42 @@ is_either(const char *s, const char *a, const char *b)
return strcmp(s, a) == 0 || strcmp(s, b) == 0;
}
static void
attribute_list_add(attribute_list *list, attribute attr)
{
if (list->len >= list->cap) {
attribute *old_attrs = list->attrs;
list->cap = 16 + 2 * list->cap;
list->attrs = block_zero_alloc(
list->cap * sizeof(*list->attrs), "attribute_list.attrs");
memcpy(list->attrs, old_attrs,
list->len * sizeof(*list->attrs));
}
list->attrs[list->len++] = attr;
}
static void
attribute_list_add_all(attribute_list *dst, attribute_list src)
{
for (size_t i = 0, n = src.len; i < n; i++)
attribute_list_add(dst, src.attrs[i]);
}
static attribute
new_attribute(const sbuf_t *prefix, const sbuf_t *name,
const balanced_token_sequence *arg)
{
attribute attr = { .name = xstrdup(name->sb_name) };
if (prefix != NULL)
attr.prefix = xstrdup(prefix->sb_name);
if (arg != NULL) {
attr.arg = block_zero_alloc(sizeof(*attr.arg),
"balanced_token_sequence");
*attr.arg = *arg;
}
return attr;
}
#if YYDEBUG && YYBYACC
#define YYSTYPE_TOSTRING cgram_to_string
#endif
@ -141,6 +178,9 @@ is_either(const char *s, const char *a, const char *b)
bool y_in_system_header;
designation y_designation;
named_constant y_named_constant;
attribute y_attribute;
attribute_list y_attribute_list;
balanced_token_sequence y_tokens;
};
/* for Bison:
@ -231,6 +271,7 @@ is_either(const char *s, const char *a, const char *b)
%token T_COMMA
%token T_SEMI
%token T_ELLIPSIS
%token T_DCOLON
%token T_REAL
%token T_IMAG
%token T_GENERIC
@ -388,6 +429,11 @@ is_either(const char *s, const char *a, const char *b)
%type <y_range> range
/* No type for init_lbrace. */
/* No type for init_rbrace. */
%type <y_attribute_list> attribute_specifier_sequence
%type <y_attribute_list> attribute_specifier
%type <y_attribute_list> attribute_list
%type <y_attribute> attribute
%type <y_tokens> attribute_argument_clause
%type <y_name> asm_or_symbolrename_opt
/* No type for statement. */
/* No type for no_attr_statement. */
@ -1854,27 +1900,71 @@ init_rbrace: /* helper */
}
;
/* TODO: Implement 'attribute_specifier_sequence' from C23 6.7.13.2. */
/* C23 6.7.13.2 */
attribute_specifier_sequence:
attribute_specifier {
$$ = (attribute_list) { NULL, 0, 0 };
attribute_list_add_all(&$$, $1);
}
| attribute_specifier_sequence attribute_specifier {
$$ = $1;
attribute_list_add_all(&$$, $2);
}
;
/* TODO: Implement 'attribute_specifier' from C23 6.7.13.2. */
/* C23 6.7.13.2 */
attribute_specifier:
T_LBRACK T_LBRACK attribute_list T_RBRACK T_RBRACK {
$$ = $3;
}
;
/* TODO: Implement 'attribute_list' from C23 6.7.13.2. */
/* C23 6.7.13.2 */
attribute_list:
/* empty */ {
$$ = (attribute_list) { NULL, 0, 0 };
}
| attribute {
$$ = (attribute_list) { NULL, 0, 0 };
attribute_list_add(&$$, $1);
}
| attribute_list T_COMMA
| attribute_list T_COMMA attribute {
$$ = $1;
attribute_list_add(&$$, $3);
}
;
/* TODO: Implement 'attribute' from C23 6.7.13.2. */
/* C23 6.7.13.2 */
attribute:
identifier {
$$ = new_attribute(NULL, $1, NULL);
}
| identifier T_DCOLON identifier {
$$ = new_attribute($1, $3, NULL);
}
| identifier attribute_argument_clause {
$$ = new_attribute(NULL, $1, &$2);
}
| identifier T_DCOLON identifier attribute_argument_clause {
$$ = new_attribute($1, $3, &$4);
}
;
/* TODO: Implement 'attribute_token' from C23 6.7.13.2. */
/* The rule 'attribute_token' is inlined into 'attribute'. */
/* The rule 'standard_attribute' is inlined into 'attribute_token'. */
/* The rule 'attribute_prefixed_token' is inlined into 'attribute_token'. */
/* The rule 'attribute_prefix' is inlined into 'attribute_token'. */
/* TODO: Implement 'standard_attribute' from C23 6.7.13.2. */
/* C23 6.7.13.2 */
attribute_argument_clause:
T_LPAREN {
$$ = read_balanced_token_sequence();
}
;
/* TODO: Implement 'attribute_prefixed_token' from C23 6.7.13.2. */
/* TODO: Implement 'attribute_prefix' from C23 6.7.13.2. */
/* TODO: Implement 'attribute_argument_clause' from C23 6.7.13.2. */
/* TODO: Implement 'balanced_token_sequence' from C23 6.7.13.2. */
/* TODO: Implement 'balanced_token' from C23 6.7.13.2. */
/* The rule 'balanced_token_sequence' is inlined into 'attribute_argument_clause'. */
/* The rule 'balanced_token' is inlined into 'balanced_token_sequence'. */
asm_or_symbolrename_opt: /* GCC extensions */
/* empty */ {
@ -2013,6 +2103,11 @@ expression_statement:
check_statement_reachable();
suppress_fallthrough = false;
}
| attribute_specifier_sequence expression T_SEMI {
debug_attribute_list(&$1);
expr($2, false, false, false, false);
suppress_fallthrough = false;
}
;
/* C99 6.8.4, C23 6.8.5.1 */
@ -2404,10 +2499,10 @@ yyerror(const char *msg)
#if YYDEBUG && YYBYACC
static const char *
cgram_to_string(int token, YYSTYPE val)
cgram_to_string(int tok, YYSTYPE val)
{
switch (token) {
switch (tok) {
case T_INCDEC:
return val.y_inc ? "++" : "--";
case T_MULTIPLICATIVE:
@ -2467,6 +2562,82 @@ read_until_rparen(void)
yyclearin;
}
static void
fill_token(token *tok)
{
switch (yychar) {
case T_NAME:
case T_TYPENAME:
tok->kind = TK_IDENTIFIER;
tok->u.identifier = xstrdup(yylval.y_name->sb_name);
break;
case T_CON:
tok->kind = TK_CONSTANT;
tok->u.constant = *yylval.y_val;
break;
case T_NAMED_CONSTANT:
tok->kind = TK_IDENTIFIER;
tok->u.identifier = xstrdup(yytext);
break;
case T_STRING:;
tok->kind = TK_STRING_LITERALS;
tok->u.string_literals.len = yylval.y_string->len;
tok->u.string_literals.cap = yylval.y_string->cap;
tok->u.string_literals.data = xstrdup(yylval.y_string->data);
break;
default:
tok->kind = TK_PUNCTUATOR;
tok->u.punctuator = xstrdup(yytext);
}
}
static void
seq_reserve(balanced_token_sequence *seq)
{
if (seq->len >= seq->cap) {
seq->cap = 16 + 2 * seq->cap;
const balanced_token *old_tokens = seq->tokens;
balanced_token *new_tokens = block_zero_alloc(
seq->cap * sizeof(*seq->tokens), "balanced_tokens");
memcpy(new_tokens, old_tokens, seq->len * sizeof(*seq->tokens));
seq->tokens = new_tokens;
}
}
static balanced_token_sequence
read_balanced(int opening)
{
debug_enter();
int closing = opening == T_LPAREN ? T_RPAREN
: opening == T_LBRACK ? T_RBRACK : T_RBRACE;
balanced_token_sequence seq = { NULL, 0, 0 };
debug_step("opening %d, closing %d", opening, closing);
while (yychar = yylex(), yychar > 0 && yychar != closing) {
debug_step("reading token %d", yychar);
seq_reserve(&seq);
if (yychar == T_LPAREN
|| yychar == T_LBRACK
|| yychar == T_LBRACE) {
seq.tokens[seq.len].kind = yychar == T_LPAREN ? '('
: yychar == T_LBRACK ? '[' : '{';
seq.tokens[seq.len++].u.tokens = read_balanced(yychar);
} else
fill_token(&seq.tokens[seq.len++].u.token);
}
debug_leave();
return seq;
}
static balanced_token_sequence
read_balanced_token_sequence(void)
{
lint_assert(yychar < 0);
balanced_token_sequence seq = read_balanced(T_LPAREN);
yyclearin;
return seq;
}
static sym_t *
symbolrename(sym_t *s, sbuf_t *sb)
{

View File

@ -1,4 +1,4 @@
/* $NetBSD: debug.c,v 1.78 2024/05/09 11:08:07 rillig Exp $ */
/* $NetBSD: debug.c,v 1.79 2024/05/11 16:12:28 rillig Exp $ */
/*-
* Copyright (c) 2021 The NetBSD Foundation, Inc.
@ -35,7 +35,7 @@
#include <sys/cdefs.h>
#if defined(__RCSID)
__RCSID("$NetBSD: debug.c,v 1.78 2024/05/09 11:08:07 rillig Exp $");
__RCSID("$NetBSD: debug.c,v 1.79 2024/05/11 16:12:28 rillig Exp $");
#endif
#include <stdlib.h>
@ -526,4 +526,73 @@ debug_dcs_all(void)
debug_decl_level(dl);
}
}
static void
debug_token(const token *tok)
{
switch (tok->kind) {
case TK_IDENTIFIER:
debug_printf("%s", tok->u.identifier);
break;
case TK_CONSTANT:;
val_t c = tok->u.constant;
tspec_t t = c.v_tspec;
if (is_floating(t))
debug_printf("%Lg", c.u.floating);
else if (is_uinteger(t))
debug_printf("%llu", (unsigned long long)c.u.integer);
else if (is_integer(t))
debug_printf("%lld", (long long)c.u.integer);
else {
lint_assert(t == BOOL);
debug_printf("%s",
c.u.integer != 0 ? "true" : "false");
}
break;
case TK_STRING_LITERALS:
debug_printf("%s", tok->u.string_literals.data);
break;
case TK_PUNCTUATOR:
debug_printf("%s", tok->u.punctuator);
break;
}
}
static void
debug_balanced_token_sequence(const balanced_token_sequence *seq)
{
const char *sep = "";
for (size_t i = 0, n = seq->len; i < n; i++) {
const balanced_token *tok = seq->tokens + i;
if (tok->kind != '\0') {
debug_printf("%s%c", sep, tok->kind);
debug_balanced_token_sequence(&tok->u.tokens);
debug_printf("%c", tok->kind == '(' ? ')'
: tok->kind == '[' ? ']' : '}');
} else {
debug_printf("%s", sep);
debug_token(&tok->u.token);
}
sep = " ";
}
}
void
debug_attribute_list(const attribute_list *list)
{
for (size_t i = 0, n = list->len; i < n; i++) {
const attribute *attr = list->attrs + i;
debug_printf("attribute [[");
if (attr->prefix != NULL)
debug_printf("%s::", attr->prefix);
debug_printf("%s", attr->name);
if (attr->arg != NULL) {
debug_printf("(");
debug_balanced_token_sequence(attr->arg);
debug_printf(")");
}
debug_step("]]");
}
}
#endif

View File

@ -1,4 +1,4 @@
/* $NetBSD: externs1.h,v 1.225 2024/05/09 11:08:07 rillig Exp $ */
/* $NetBSD: externs1.h,v 1.226 2024/05/11 16:12:28 rillig Exp $ */
/*
* Copyright (c) 1994, 1995 Jochen Pohl
@ -152,6 +152,7 @@ void debug_pop_indented(bool);
void debug_enter_func(const char *);
void debug_step(const char *fmt, ...) __printflike(1, 2);
void debug_leave_func(const char *);
void debug_attribute_list(const attribute_list *);
#define debug_enter() debug_enter_func(__func__)
#define debug_leave() debug_leave_func(__func__)
#else
@ -171,6 +172,7 @@ void debug_leave_func(const char *);
#define debug_enter() debug_noop()
#define debug_step(...) debug_noop()
#define debug_leave() debug_noop()
#define debug_attribute_list(list) debug_noop()
#endif
/*

View File

@ -1,4 +1,4 @@
/* $NetBSD: lint1.h,v 1.226 2024/05/09 11:08:07 rillig Exp $ */
/* $NetBSD: lint1.h,v 1.227 2024/05/11 16:12:28 rillig Exp $ */
/*
* Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved.
@ -513,6 +513,52 @@ typedef struct {
bool unescaped_newline; /* stops iterating */
} quoted_iterator;
typedef enum {
TK_IDENTIFIER,
TK_CONSTANT,
TK_STRING_LITERALS,
TK_PUNCTUATOR,
} token_kind;
typedef struct token {
token_kind kind;
union {
const char *identifier;
val_t constant;
buffer string_literals;
const char *punctuator;
} u;
} token;
typedef struct balanced_token_sequence balanced_token_sequence;
typedef struct balanced_token balanced_token;
struct balanced_token_sequence {
balanced_token *tokens;
size_t len;
size_t cap;
};
struct balanced_token {
char kind; // '\0', '(', '[', '{'
union {
token token;
balanced_token_sequence tokens;
} u;
};
typedef struct {
const char *prefix;
const char *name;
balanced_token_sequence *arg;
} attribute;
typedef struct {
attribute *attrs;
size_t len;
size_t cap;
} attribute_list;
#include "externs1.h"
#define lint_assert(cond) \

View File

@ -1,5 +1,5 @@
%{
/* $NetBSD: scan.l,v 1.140 2023/09/14 22:20:08 rillig Exp $ */
/* $NetBSD: scan.l,v 1.141 2024/05/11 16:12:28 rillig Exp $ */
/*
* Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved.
@ -35,7 +35,7 @@
#include <sys/cdefs.h>
#if defined(__RCSID)
__RCSID("$NetBSD: scan.l,v 1.140 2023/09/14 22:20:08 rillig Exp $");
__RCSID("$NetBSD: scan.l,v 1.141 2024/05/11 16:12:28 rillig Exp $");
#endif
#include "lint1.h"
@ -111,6 +111,7 @@ FSUF ([fFlL]?[i]?)
"(" return T_LPAREN;
")" return T_RPAREN;
"..." return T_ELLIPSIS;
"::" return T_DCOLON;
"'" return lex_character_constant();
"L'" return lex_wide_character_constant();
^#.*$ lex_directive(yytext);