snprintb: reject combinations of 'f' with ':' as well as 'F' with '='

These combinations would lead to garbled output.
This commit is contained in:
rillig 2024-04-07 12:05:23 +00:00
parent 864a50a39a
commit 442f4c6c59
3 changed files with 184 additions and 190 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: snprintb.c,v 1.46 2024/04/07 10:10:54 rillig Exp $ */
/* $NetBSD: snprintb.c,v 1.47 2024/04/07 12:05:23 rillig Exp $ */
/*-
* Copyright (c) 2002, 2024 The NetBSD Foundation, Inc.
@ -35,7 +35,7 @@
# include <sys/cdefs.h>
# if defined(LIBC_SCCS)
__RCSID("$NetBSD: snprintb.c,v 1.46 2024/04/07 10:10:54 rillig Exp $");
__RCSID("$NetBSD: snprintb.c,v 1.47 2024/04/07 12:05:23 rillig Exp $");
# endif
# include <sys/types.h>
@ -46,7 +46,7 @@ __RCSID("$NetBSD: snprintb.c,v 1.46 2024/04/07 10:10:54 rillig Exp $");
# include <errno.h>
# else /* ! _KERNEL */
# include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: snprintb.c,v 1.46 2024/04/07 10:10:54 rillig Exp $");
__KERNEL_RCSID(0, "$NetBSD: snprintb.c,v 1.47 2024/04/07 12:05:23 rillig Exp $");
# include <sys/param.h>
# include <sys/inttypes.h>
# include <sys/systm.h>
@ -150,7 +150,8 @@ old_style(state *s)
static int
new_style(state *s)
{
uint64_t field = s->val;
uint8_t field_kind = 0; // 0 or 'f' or 'F'
uint64_t field = 0; // valid if field_kind != '\0'
int matched = 1;
const char *prev_bitfmt = s->bitfmt;
while (*s->bitfmt != '\0') {
@ -158,6 +159,7 @@ new_style(state *s)
uint8_t kind = cur_bitfmt[0];
switch (kind) {
case 'b':
field_kind = 0;
prev_bitfmt = cur_bitfmt;
uint8_t b_bit = cur_bitfmt[1];
if (b_bit >= 64)
@ -174,6 +176,7 @@ new_style(state *s)
break;
case 'f':
case 'F':
field_kind = kind;
prev_bitfmt = cur_bitfmt;
matched = 0;
uint8_t f_lsb = cur_bitfmt[1];
@ -200,6 +203,10 @@ new_style(state *s)
case '=':
case ':':
s->bitfmt += 2;
if (kind == '=' && field_kind != 'f')
return -1;
if (kind == ':' && field_kind != 'F')
return -1;
uint8_t cmp = cur_bitfmt[1];
if (cur_bitfmt[2] == '\0')
return -1;
@ -213,6 +220,8 @@ new_style(state *s)
maybe_wrap_line(s, prev_bitfmt);
break;
case '*':
if (field_kind == 0)
return -1;
if (cur_bitfmt[1] == '\0')
return -1;
s->bitfmt++;

View File

@ -1,4 +1,4 @@
.\" $NetBSD: snprintb.3,v 1.36 2024/02/29 21:08:54 rillig Exp $
.\" $NetBSD: snprintb.3,v 1.37 2024/04/07 12:05:23 rillig Exp $
.\"
.\" Copyright (c) 1998, 2024 The NetBSD Foundation, Inc.
.\" All rights reserved.
@ -48,7 +48,6 @@ The
.Fn snprintb
function formats a bitmask into a mnemonic form suitable for printing.
.Pp
This conversion is useful for decoding bit fields in device registers.
It formats the integer
.Fa val
into the buffer
@ -103,13 +102,18 @@ character
The decoding directive in
.Fa fmt
describes how the bitfield is to be interpreted and displayed.
It follows two possible syntaxes, referred to as
It follows two possible formats, referred to as
.Dq old
and
.Dq new .
The main advantage of the
The
.Dq old
format is limited to describing single bits in a 32-bit value,
the bit positions are 1-based.
The
.Dq new
formatting is that it is capable of handling multi-bit fields.
format supports multi-bit fields and 64-bit values,
the bit positions are 0-based.
.Pp
If the first character of
.Fa fmt
@ -119,153 +123,149 @@ the remainder of the
.Fa fmt
argument follows the
.Dq new
syntax.
The second character
.Pq the first for the old format
is a binary character representation of the
output numeral base in which the bitfield will be printed before it is decoded.
Recognized radix values
format.
.Pp
The next character
.Po the first for the
.Dq old
format
.Pc
specifies the numeral base in which to print the numbers in the output.
The possible values
.Pq in C escape-character format
are
.Ql \e10
.Pq octal ,
.Ql \e12
.Pq decimal ,
and
.Ql \e20
.Pq hexadecimal .
.Ql \e010
for octal,
.Ql \e012
for decimal, and
.Ql \e020
for hexadecimal.
.Pp
The remaining characters in the
.Fa fmt
argument are interpreted as a list of formatting directives.
.
.Ss Old Syntax
.Pp
The
argument represent the formatting conversions,
according to the
.Dq old
format syntax is a series of bit-position\(endescription pairs.
or
.Dq new
format.
.
.Ss Old Format
.Pp
Each directive begins with a binary character value that represents
the position of the bit being described.
In the
.Dq old
format, each conversion specifies a bit position
and a description that is printed if the corresponding bit is set.
.Pp
.Sy NB :
the bit positions in the old syntax are
.Em 1-based\^ !
A bit position value of 1
.Pq Ql \e1
describes the least significant bit.
Whereas a position value of 32
.Po octal
.Ql \e040 ,
hexadecimal
.Ql \ex20 ,
the ASCII space character
.Pc
describes the most significant bit.
The old syntax is limited to 32-bit values.
The bit position is a 1-based single-byte binary value,
ranging from
.Ql \e001
(1) for the least significant bit up to
.Ql \e040
(32) for the most significant bit.
.Pp
The remaining characters are the description to print should the bit
being described be set.
.Pp
Descriptions are delimited by the next bit position value character
encountered
.Pq distinguishable by its value being \*[Le] 32 ,
The description is delimited by the next character whose value is 32 or less
.Po see
.Xr ascii 7
.Pc ,
or by the end of the format string itself.
.
.Ss New Syntax
.Ss New Format
.Pp
For the
In the
.Dq new
format syntax,
a formatting directive begins with a field type
followed by a binary field position and possibly a field length,
followed by a description.
format,
each conversion begins with a conversion type,
followed by type-specific parameters, each encoded as a single byte,
followed by a
.Tn NUL Ns -terminated description.
The bit positions are 0-based,
the least significant bit is bit-position zero.
Each description is terminated by a
.Tn NUL
byte.
they range from
.Sq \e000
for the least significant bit to
.Sq \e077
(63) for the most significant bit.
.
.Bl -tag -width Cm
.
.It Cm b\e Ns Ar B
Describes a single bit at bit-position
.Ar B .
The remaining characters are the description to print should the bit
being described be set.
This field directive is similar in function to the old format.
When converting old formats to the new syntax don't forget that the
new syntax uses zero-based bit positions.
.It Cm b Ar bit Ar descr
Prints the description from
.Ar descr
if the bit at the position
.Ar bit
is set.
.
.It Cm f\e Ns Ar B Ns Cm \e Ns Ar L
Describes a multi-bit field beginning at bit-position
.Ar B
and having a bit-length of
.Ar L .
The remaining characters are printed as a description of the field
followed by
.Ql \&=
and the value of the field.
The value of the field is printed in the base specified as the second
character of the
.Ar fmt
argument.
.It Cm f Ar lsb Ar width Ar descr
Prints the description from
.Ar descr ,
a delimiting
.Sq \&=
and the numerical value of the multi-bit field
whose least significant bit is at
.Ar lsb
and that spans
.Ar width
bits.
To print individual values of the field, see the
.Sq Cm \&=
and
.Sq Cm \&*
conversions below.
.
.It Cm F\e Ns Ar B Ns Cm \e Ns Ar L
.It Cm F Ar lsb Ar width Op Ar descr
Describes a multi-bit field like
.Sq Cm f ,
but just extracts the value for use with the
.Sq Cm \&=
.Sq Cm \&:
and
.Sq Cm \&:
formatting directives described below.
.Sq Cm \&*
conversions below.
The description from
.Ar descr
is ignored,
it is only present for uniformity with the other conversions.
.
.It Cm \&=\e Ns Ar V
The field previously extracted by the last
.It Cm \&= Ar cmp Ar descr
Compares the field value from the previous
.Sq Cm f
or
.Sq Cm F
directive is compared to the byte value
.Ar V
.Pq for values 0 through 255 .
If they are equal,
conversion to the single-byte value
.Ar cmp ,
which ranges from 0 to 255.
If they are equal, prints
.Ql \&=
followed by the description string following
.Ar V
is printed.
This and the
.Sq Cm \&:
directive may be repeated to annotate multiple possible values.
followed by the description from
.Ar descr .
This conversion may be repeated.
.
.It Cm \&:\e Ns Ar V
Operates like the
.It Cm \&: Ar cmp Ar descr
Compares the field value from the previous
.Sq Cm F
conversion to the single-byte value
.Ar cmp ,
which ranges from 0 to 255.
If they are equal, prints the description from
.Ar descr .
This conversion may be repeated.
.
.It Cm * Ar fmt
If none of the previous
.Sq Cm \&=
directive, but omits the leading
.Ql \&= .
.
.It Cm * Ns Ar FMT
This provides a
.Dq default
case that prints the extracted field value using the
or
.Sq Cm \&:
conversions matched, prints the field value, using the
.Xr printf 3
format
.Ar FMT
when other
.Sq Cm \&:
or
.Sq Cm \&=
directives have not matched.
.Ar FMT
may contain a
.Ar fmt .
The format string
.Ar fmt
may contain a single
.Vt uintmax_t
format specification that prints the value that
did not match, since the field can be more than 32 bits wide.
conversion specification that prints the value that did not match.
.El
.Pp
The new format is terminated by an additional
.Tn NUL
character at the end, following that delimiting the last
bit-position\(endescription pair.
character at the end, following that delimiting the last conversion.
This
.Tn NUL
is supplied by the compiler to terminate the string literal and
@ -302,8 +302,7 @@ An example of the new formatting style:
.Bd -literal -offset indent
snprintb(buf, bufsize,
"\e177\e020"
"b\e0LSB\e0" "b\e1BITONE\e0"
"f\e4\e4NIBBLE2\e0"
"b\e0LSB\e0" "b\e1BITONE\e0" "f\e4\e4NIBBLE2\e0"
"f\ex10\e4BURST\e0" "=\e4FOUR\e0" "=\exf""FIFTEEN\e0"
"b\ex1fMSB\e0",
0x800f0701)
@ -400,6 +399,9 @@ does not describe a supported numeral base,
or a bit number in the
.Ar fmt
argument is out of bounds,
or the sequence of conversions in the
.Ar fmt
argument is invalid,
or
.Fn snprintf
failed.

View File

@ -1,4 +1,4 @@
/* $NetBSD: t_snprintb.c,v 1.33 2024/04/07 10:10:54 rillig Exp $ */
/* $NetBSD: t_snprintb.c,v 1.34 2024/04/07 12:05:23 rillig Exp $ */
/*
* Copyright (c) 2002, 2004, 2008, 2010, 2024 The NetBSD Foundation, Inc.
@ -32,7 +32,7 @@
#include <sys/cdefs.h>
__COPYRIGHT("@(#) Copyright (c) 2008, 2010, 2024\
The NetBSD Foundation, inc. All rights reserved.");
__RCSID("$NetBSD: t_snprintb.c,v 1.33 2024/04/07 10:10:54 rillig Exp $");
__RCSID("$NetBSD: t_snprintb.c,v 1.34 2024/04/07 12:05:23 rillig Exp $");
#include <stdio.h>
#include <string.h>
@ -123,6 +123,9 @@ check_snprintb_m(const char *file, size_t line,
#define h_snprintb_error(bitfmt, want_buf) \
h_snprintb_m_len(1024, bitfmt, 0x00, 0, -1, want_buf)
#define h_snprintb_val_error(bitfmt, val, want_buf) \
h_snprintb_m_len(1024, bitfmt, val, 0, -1, want_buf)
#define h_snprintb_m(bitfmt, val, line_max, want_buf) \
h_snprintb_m_len(1024, bitfmt, val, line_max, \
sizeof(want_buf) - 1, want_buf)
@ -419,15 +422,13 @@ ATF_TC_BODY(snprintb, tc)
//
// The description of a 'b' conversion must not be empty, as the
// output would contain several commas in a row.
h_snprintb_len(
1024,
h_snprintb_val_error(
"\177\020"
"b\000lsb\0"
"b\001\0"
"b\002\0"
"b\007msb\0",
0xff,
-1,
"0xff<lsb#");
// new style single bits, bit number too large
@ -591,13 +592,11 @@ ATF_TC_BODY(snprintb, tc)
//
// The description of an 'f' conversion must not be empty, as the
// output would contain an isolated '='.
h_snprintb_len(
1024,
h_snprintb_val_error(
"\177\020"
"f\000\004\0"
"=\001one\0",
0x1,
-1,
"0x1#");
// new style bit-field, non-printable description
@ -620,29 +619,25 @@ ATF_TC_BODY(snprintb, tc)
//
// The description of a '=' conversion must not be empty, as the
// output would contain several '=' in a row.
h_snprintb_len(
1024,
h_snprintb_val_error(
"\177\020"
"f\000\004f\0"
"=\001one\0"
"=\001\0"
"=\001\0",
0x1,
-1,
"0x1<f=0x1=one#");
// new style bit-field, 'F' followed by ':' with empty description
//
// The description of a ':' conversion must not be empty, as the
// output would contain empty angle brackets.
h_snprintb_len(
1024,
h_snprintb_val_error(
"\177\020"
"F\000\004\0"
":\001\0"
"*default\0",
0x1,
-1,
"0x1<#");
// new style bit-field, 'F', ':' with empty description, '*'
@ -651,14 +646,12 @@ ATF_TC_BODY(snprintb, tc)
// output would contain empty angle brackets. Not in this particular
// test case, as the value is different, but the structural error is
// detected nevertheless.
h_snprintb_len(
1024,
h_snprintb_val_error(
"\177\020"
"F\000\004\0"
":\001\0"
"*default\0",
0x2,
-1,
"0x2<#");
// new style bit-field, 'f' with non-exhaustive '='
@ -819,29 +812,24 @@ ATF_TC_BODY(snprintb, tc)
"0xff<f=0xff=000000000000000000000000000255%>");
// new style unknown directive, at the beginning
h_snprintb_len(
128,
h_snprintb_val_error(
"\177\020"
"unknown\0",
0xff,
-1,
"0xff#");
// new style unknown directive, after a known directive
h_snprintb_len(
128,
h_snprintb_val_error(
"\177\020"
"b\007msb\0"
"unknown\0",
0xff,
-1,
"0xff<msb#");
// new style combinations, 'b' '='
//
// A '=' directive without a preceding 'f' or 'F' directive generates
// misleading output outside the angle brackets, which is a mistake.
h_snprintb(
// A '=' conversion requires a preceding 'f' conversion.
h_snprintb_val_error(
"\177\020"
"b\004bit4\0"
"=\000clear\0"
@ -852,14 +840,12 @@ ATF_TC_BODY(snprintb, tc)
"=\001set\0"
"=\245complete\0",
0xa5,
"0xa5=complete<bit0=complete>");
"0xa5#");
// new style combinations, 'b' ':'
//
// A ':' directive without a preceding 'f' or 'F' directive generates
// misleading output outside or inside the angle brackets, which is a
// mistake.
h_snprintb(
// A ':' conversion requires a preceding 'f' or 'F' conversion.
h_snprintb_val_error(
"\177\020"
"b\004bit4\0"
":\000clear\0"
@ -870,96 +856,93 @@ ATF_TC_BODY(snprintb, tc)
":\001set\0"
":\245complete\0",
0xa5,
"0xa5complete<bit0complete>");
"0xa5#");
// new style combinations, 'b' '*'
//
// A '*' directive without a preceding 'f' or 'F' directive is ignored.
h_snprintb(
// A '*' conversion requires a preceding 'f' or 'F' conversion.
h_snprintb_val_error(
"\177\020"
"b\004bit4\0"
"*default(%ju)\0"
"b\000bit0\0"
"*default(%ju)\0",
0xa5,
"0xa5<bit0>");
"0xa5#");
// new style combinations, 'f' 'b' '='
//
// A 'b' directive that occurs between an 'f' and an '=' directive
// generates misleading output, which is a mistake.
h_snprintb(
// A '=' conversion requires a preceding 'f' conversion, there must
// not be a 'b' conversion in between.
h_snprintb_val_error(
"\177\020"
"f\000\010f\0"
"b\005bit5\0"
"=\245match\0",
0xa5,
"0xa5<f=0xa5,bit5=match>");
"0xa5<f=0xa5,bit5#");
// new style combinations, 'f' 'b' ':'
// new style combinations, 'F' 'b' ':'
//
// A 'b' directive that occurs between an 'f' and a ':' directive
// generates misleading output, which is a mistake.
h_snprintb(
// A ':' conversion requires a preceding 'F' conversion, there must
// not be a 'b' conversion in between.
//
// The isolated leading comma is produced by the non-exhaustive 'F'
// conversion. Detecting these at runtime would be too costly.
h_snprintb_val_error(
"\177\020"
"f\000\010f\0"
"F\000\010f\0"
"b\005bit5\0"
":\245match\0",
0xa5,
"0xa5<f=0xa5,bit5match>");
"0xa5<,bit5#");
// new style combinations, 'f' ':'
//
// Combining the 'f' directive with the ':' directive produces the
// misleading output '0x1one', which is a mistake.
h_snprintb(
// The ':' conversion requires a preceding 'F' conversion, not 'f'.
h_snprintb_val_error(
"\177\20"
"f\000\004nibble\0"
":\001one\0",
0x01,
"0x1<nibble=0x1one>");
"0x1<nibble=0x1#");
// new style combinations, 'F' '='
//
// Combining the 'F' and '=' directives outputs an isolated '=', which
// is a mistake.
h_snprintb(
// A '=' conversion requires a preceding 'f' conversion, not 'F'.
h_snprintb_val_error(
"\177\20"
"F\000\004\0"
"=\001one\0",
0x01,
"0x1<=one>");
"0x1<#");
// new style combinations, '='
//
// A '=' directive without a preceding 'f' or 'F' directive generates
// output that doesn't match the standard '0xaa<description>' form,
// which is a mistake.
h_snprintb(
// A '=' conversion requires a preceding 'f' or 'F' conversion.
h_snprintb_val_error(
"\177\020"
"=\245match\0",
0xa5,
"0xa5=match");
"0xa5#");
// new style combinations, ':'
//
// A ':' directive without a preceding 'f' or 'F' directive generates
// misleading output, which is a mistake.
h_snprintb(
// A ':' conversion requires a preceding 'f' or 'F' conversion.
h_snprintb_val_error(
"\177\020"
":\245match\0",
0xa5,
"0xa5match");
"0xa5#");
// new style combinations, '*'
//
// A '*' directive without a preceding 'f' or 'F' is useless, which is
// a mistake.
h_snprintb(
// A '*' conversion requires a preceding 'f' or 'F' conversion.
h_snprintb_val_error(
"\177\020"
"*match\0",
0xa5,
"0xa5");
"0xa5#");
// new style combinations, 'f' '*' '='
//
@ -1492,7 +1475,7 @@ ATF_TC_BODY(snprintb_m, tc)
h_snprintb_m(
"\177\020"
"f\000\004bits\0"
":\000zero\0",
"=\000zero\0",
0xff,
11,
"0xff<bits=#\0");