mirror of
https://git.musl-libc.org/git/musl
synced 2025-02-13 16:54:16 +03:00
new restartable integer parsing framework.
this fixes a number of bugs in integer parsing due to lazy haphazard wrapping, as well as some misinterpretations of the standard. the new parser is able to work character-at-a-time or on whole strings, making it easy to support the wide functions without unbounded space for conversion. it will also be possible to update scanf to use the new parser.
This commit is contained in:
parent
0e2331c9b6
commit
ecc9c5fcfa
105
src/internal/intparse.c
Normal file
105
src/internal/intparse.c
Normal file
@ -0,0 +1,105 @@
|
||||
#include <stdint.h>
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
#include "intparse.h"
|
||||
|
||||
/* Lookup table for digit values. -1==255>=36 -> invalid */
|
||||
static const unsigned char digits[] = {
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
|
||||
-1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
|
||||
25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1,
|
||||
-1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
|
||||
25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
};
|
||||
|
||||
#define SLIM (UINT_MAX/36-1)
|
||||
#define LLIM (UINTMAX_MAX/36-1)
|
||||
|
||||
int __intparse(struct intparse *v, const void *buf, size_t n)
|
||||
{
|
||||
const unsigned char *s = buf;
|
||||
int d, b = v->base;
|
||||
|
||||
v->cnt += n;
|
||||
for (; n; n--, s++) switch (v->state) {
|
||||
case 0:
|
||||
v->state++;
|
||||
if (*s=='+' || *s=='-') {
|
||||
v->neg = *s=='-';
|
||||
continue;
|
||||
}
|
||||
case 1:
|
||||
v->state++;
|
||||
if (*s=='0' && (!b || b==16)) continue;
|
||||
if (!b) v->base = b = 10;
|
||||
v->state++;
|
||||
goto firstdigit;
|
||||
case 2:
|
||||
v->state++;
|
||||
if ((!b || b==16) && (*s|32) == 'x') {
|
||||
v->base = b = 16;
|
||||
continue;
|
||||
}
|
||||
if (!b) v->base = b = 8;
|
||||
goto seconddigit;
|
||||
case 3:
|
||||
firstdigit:
|
||||
if (digits[*s] >= b) {
|
||||
v->err = EINVAL;
|
||||
return 0;
|
||||
}
|
||||
seconddigit:
|
||||
v->state++;
|
||||
case 4:
|
||||
if (b==10) {
|
||||
for (; n && *s-'0'<10U && v->small<=SLIM; n--, s++)
|
||||
v->small = v->small * 10 + (*s-'0');
|
||||
} else if ((b&-b) == b) {
|
||||
int bs = "\0\1\2\4\7\3\6\5"[(0x17*b)>>5&7];
|
||||
for (; n && (d=digits[*s])<b && v->small<=SLIM; n--, s++)
|
||||
v->small = (v->small<<bs) + d;
|
||||
} else {
|
||||
for (; n && (d=digits[*s])<b && v->small<=SLIM; n--, s++)
|
||||
v->small = v->small * b + d;
|
||||
}
|
||||
if (!n) return 1;
|
||||
v->state++;
|
||||
v->val = v->small;
|
||||
case 5:
|
||||
for (; n && (d=digits[*s])<b && v->val<=LLIM; n--, s++)
|
||||
v->val = v->val * b + d;
|
||||
if (!n) return 1;
|
||||
if (d >= b) goto finished;
|
||||
if (v->val < (UINTMAX_MAX-d)/b)
|
||||
v->val = v->val * b + d;
|
||||
else
|
||||
v->err = ERANGE;
|
||||
v->state++;
|
||||
n--; s++;
|
||||
case 6:
|
||||
if (n && digits[*s]<b) {
|
||||
v->err = ERANGE;
|
||||
v->val = UINTMAX_MAX;
|
||||
|
||||
n--; s++;
|
||||
}
|
||||
for (; n && digits[*s]<b; n--, s++);
|
||||
if (!n) return 1;
|
||||
}
|
||||
return 1;
|
||||
finished:
|
||||
v->cnt -= n;
|
||||
return 0;
|
||||
}
|
11
src/internal/intparse.h
Normal file
11
src/internal/intparse.h
Normal file
@ -0,0 +1,11 @@
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
struct intparse {
|
||||
uintmax_t val;
|
||||
unsigned small;
|
||||
size_t cnt;
|
||||
char neg, base, state, err;
|
||||
};
|
||||
|
||||
int __intparse(struct intparse *, const void *, size_t);
|
@ -1,25 +1,38 @@
|
||||
#include <inttypes.h>
|
||||
#include <errno.h>
|
||||
#include <ctype.h>
|
||||
#include "intparse.h"
|
||||
|
||||
intmax_t strtoimax(const char *s1, char **p, int base)
|
||||
{
|
||||
const unsigned char *s = (const void *)s1;
|
||||
int sign = 0;
|
||||
uintmax_t x;
|
||||
const unsigned char *s = (void *)s1;
|
||||
struct intparse ip = {0};
|
||||
|
||||
if (p) *p = (char *)s1;
|
||||
|
||||
if (base && base-2U > 34) {
|
||||
errno = EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Initial whitespace */
|
||||
for (; isspace(*s); s++);
|
||||
|
||||
/* Optional sign */
|
||||
if (*s == '-') sign = *s++;
|
||||
else if (*s == '+') s++;
|
||||
ip.base = base;
|
||||
__intparse(&ip, s, SIZE_MAX);
|
||||
|
||||
x = strtoumax((const void *)s, p, base);
|
||||
if (x > INTMAX_MAX) {
|
||||
if (!sign || -x != INTMAX_MIN)
|
||||
errno = ERANGE;
|
||||
return sign ? INTMAX_MIN : INTMAX_MAX;
|
||||
if (p && ip.err != EINVAL)
|
||||
*p = (char *)s + ip.cnt;
|
||||
|
||||
if (ip.err) {
|
||||
errno = ip.err;
|
||||
if (ip.err = EINVAL) return 0;
|
||||
return ip.neg ? INTMAX_MIN : INTMAX_MAX;
|
||||
}
|
||||
return sign ? -x : x;
|
||||
|
||||
if (ip.val > INTMAX_MAX) {
|
||||
if (!ip.neg || -ip.val != INTMAX_MIN)
|
||||
errno = ERANGE;
|
||||
return ip.neg ? INTMAX_MIN : INTMAX_MAX;
|
||||
}
|
||||
return ip.neg ? -ip.val : ip.val;
|
||||
}
|
||||
|
@ -2,122 +2,33 @@
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
|
||||
/* Lookup table for digit values. -1==255>=36 -> invalid */
|
||||
static const unsigned char digits[] = {
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
|
||||
-1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
|
||||
25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1,
|
||||
-1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
|
||||
25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
};
|
||||
#include "intparse.h"
|
||||
|
||||
uintmax_t strtoumax(const char *s1, char **p, int base)
|
||||
{
|
||||
const unsigned char *s = (void *)s1;
|
||||
size_t x1, z1;
|
||||
uintmax_t x, z=0;
|
||||
int sign = 0;
|
||||
int shift;
|
||||
struct intparse ip = {0};
|
||||
|
||||
if (!p) p = (char **)&s1;
|
||||
if (p) *p = (char *)s1;
|
||||
|
||||
/* Initial whitespace */
|
||||
for (; isspace(*s); s++);
|
||||
|
||||
/* Optional sign */
|
||||
if (*s == '-') sign = *s++;
|
||||
else if (*s == '+') s++;
|
||||
|
||||
/* Default base 8, 10, or 16 depending on prefix */
|
||||
if (base == 0) {
|
||||
if (s[0] == '0') {
|
||||
if ((s[1]|32) == 'x') base = 16;
|
||||
else base = 8;
|
||||
} else {
|
||||
base = 10;
|
||||
}
|
||||
}
|
||||
|
||||
if ((unsigned)base-2 > 36-2 || digits[*s]>=base) {
|
||||
*p = (char *)s1;
|
||||
if (base && base-2U > 34) {
|
||||
errno = EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Main loops. Only use big types if we have to. */
|
||||
if (base == 10) {
|
||||
for (x1=0; isdigit(*s) && x1<=SIZE_MAX/10-10; s++)
|
||||
x1 = 10*x1 + *s-'0';
|
||||
for (x=x1; isdigit(*s) && x<=UINTMAX_MAX/10-10; s++)
|
||||
x = 10*x + *s-'0';
|
||||
if (isdigit(*s)) {
|
||||
if (isdigit(s[1]) || 10*x>UINTMAX_MAX-(*s-'0'))
|
||||
goto overflow;
|
||||
x = 10*x + *s-'0';
|
||||
}
|
||||
} else if (!(base & base/2)) {
|
||||
if (base == 16) {
|
||||
if (s[0]=='0' && (s[1]|32)=='x' && digits[s[2]]<16)
|
||||
s+=2;
|
||||
shift=4;
|
||||
z1 = SIZE_MAX/16;
|
||||
z = UINTMAX_MAX/16;
|
||||
} else if (base == 8) {
|
||||
shift=3;
|
||||
z1 = SIZE_MAX/8;
|
||||
z = UINTMAX_MAX/8;
|
||||
} else if (base == 2) {
|
||||
shift=1;
|
||||
z1 = SIZE_MAX/2;
|
||||
z = UINTMAX_MAX/2;
|
||||
} else if (base == 4) {
|
||||
shift=2;
|
||||
z1 = SIZE_MAX/4;
|
||||
z = UINTMAX_MAX/4;
|
||||
} else /* if (base == 32) */ {
|
||||
shift=5;
|
||||
z1 = SIZE_MAX/32;
|
||||
z = UINTMAX_MAX/32;
|
||||
}
|
||||
for (x1=0; digits[*s]<base && x1<=z1; s++)
|
||||
x1 = (x1<<shift) + digits[*s];
|
||||
for (x=x1; digits[*s]<base && x<=z; s++)
|
||||
x = (x<<shift) + digits[*s];
|
||||
if (digits[*s] < base) goto overflow;
|
||||
} else {
|
||||
z1 = SIZE_MAX/base-base;
|
||||
for (x1=0; digits[*s]<base && x1<=z1; s++)
|
||||
x1 = x1*base + digits[*s];
|
||||
if (digits[*s]<base)
|
||||
z = UINTMAX_MAX/base-base;
|
||||
for (x=x1; digits[*s]<base && x<=z; s++)
|
||||
x = x*base + digits[*s];
|
||||
if (digits[*s] < base) {
|
||||
if (digits[s[1]]<base || x*base>UINTMAX_MAX-digits[*s])
|
||||
goto overflow;
|
||||
x = x*base + digits[*s];
|
||||
}
|
||||
for (; isspace(*s); s++);
|
||||
|
||||
ip.base = base;
|
||||
__intparse(&ip, s, SIZE_MAX);
|
||||
|
||||
if (p && ip.err != EINVAL)
|
||||
*p = (char *)s + ip.cnt;
|
||||
|
||||
if (ip.err) {
|
||||
errno = ip.err;
|
||||
if (ip.err = EINVAL) return 0;
|
||||
return UINTMAX_MAX;
|
||||
}
|
||||
|
||||
*p = (char *)s;
|
||||
return sign ? -x : x;
|
||||
|
||||
overflow:
|
||||
for (; digits[*s] < base; s++);
|
||||
*p = (char *)s;
|
||||
errno = ERANGE;
|
||||
return UINTMAX_MAX;
|
||||
return ip.neg ? -ip.val : ip.val;
|
||||
}
|
||||
|
@ -2,24 +2,38 @@
|
||||
#include <wctype.h>
|
||||
#include <inttypes.h>
|
||||
#include <errno.h>
|
||||
#include "intparse.h"
|
||||
|
||||
intmax_t wcstoimax(const wchar_t *s, wchar_t **p, int base)
|
||||
{
|
||||
int sign = 0;
|
||||
uintmax_t x;
|
||||
struct intparse ip = {0};
|
||||
unsigned char tmp;
|
||||
|
||||
if (p) *p = (wchar_t *)s;
|
||||
|
||||
if (base && base-2U > 34) {
|
||||
errno = EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Initial whitespace */
|
||||
for (; iswspace(*s); s++);
|
||||
|
||||
/* Optional sign */
|
||||
if (*s == '-') sign = *s++;
|
||||
else if (*s == '+') s++;
|
||||
ip.base = base;
|
||||
for (; *s<256 && (tmp=*s, __intparse(&ip, &tmp, 1)); s++);
|
||||
|
||||
x = wcstoumax(s, p, base);
|
||||
if (x > INTMAX_MAX) {
|
||||
if (!sign || -x != INTMAX_MIN)
|
||||
errno = ERANGE;
|
||||
return sign ? INTMAX_MIN : INTMAX_MAX;
|
||||
if (p && ip.err != EINVAL)
|
||||
*p = (wchar_t *)s;
|
||||
|
||||
if (ip.err) {
|
||||
errno = ip.err;
|
||||
if (ip.err = EINVAL) return 0;
|
||||
return ip.neg ? INTMAX_MIN : INTMAX_MAX;
|
||||
}
|
||||
return sign ? -x : x;
|
||||
|
||||
if (ip.val > INTMAX_MAX) {
|
||||
if (!ip.neg || -ip.val != INTMAX_MIN)
|
||||
errno = ERANGE;
|
||||
return ip.neg ? INTMAX_MIN : INTMAX_MAX;
|
||||
}
|
||||
return ip.neg ? -ip.val : ip.val;
|
||||
}
|
||||
|
@ -3,46 +3,33 @@
|
||||
#include <stdlib.h>
|
||||
#include <inttypes.h>
|
||||
#include <errno.h>
|
||||
#include "intparse.h"
|
||||
|
||||
uintmax_t wcstoumax(const wchar_t *s, wchar_t **p, int base)
|
||||
{
|
||||
/* Large enough for largest value in binary */
|
||||
char buf[sizeof(uintmax_t)*8+2];
|
||||
int sign = 0, skipped=0;
|
||||
struct intparse ip = {0};
|
||||
unsigned char tmp;
|
||||
|
||||
if (!p) p = (wchar_t **)&s;
|
||||
if (p) *p = (wchar_t *)s;
|
||||
|
||||
if (base && (unsigned)base-2 > 36-2) {
|
||||
*p = (wchar_t *)s;
|
||||
if (base && base-2U > 34) {
|
||||
errno = EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Initial whitespace */
|
||||
for (; iswspace(*s); s++);
|
||||
|
||||
/* Optional sign */
|
||||
if (*s == '-') sign = *s++;
|
||||
else if (*s == '+') s++;
|
||||
ip.base = base;
|
||||
for (; *s<256 && (tmp=*s, __intparse(&ip, &tmp, 1)); s++);
|
||||
|
||||
/* Skip leading zeros but don't allow leading zeros before "0x". */
|
||||
for (; s[0]=='0' && s[1]=='0'; s++) skipped=1;
|
||||
if (skipped && (base==0 || base==16) && (s[1]|32)=='x') {
|
||||
*p = (wchar_t *)(s+1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Convert to normal char string so we can use strtoumax */
|
||||
buf[0] = sign;
|
||||
if (wcstombs(buf+!!sign, s, sizeof buf-1) == -1) return 0;
|
||||
buf[sizeof buf-1]=0;
|
||||
|
||||
/* Compute final position */
|
||||
if (p) {
|
||||
if ((base==0 || base==16) && s[0]=='0' && (s[1]|32)=='x' && iswxdigit(s[2])) s+=2;
|
||||
for(;*s&&((unsigned)*s-'0'<base||((unsigned)*s|32)-'a'<base-10);s++);
|
||||
if (p && ip.err != EINVAL)
|
||||
*p = (wchar_t *)s;
|
||||
|
||||
if (ip.err) {
|
||||
errno = ip.err;
|
||||
if (ip.err = EINVAL) return 0;
|
||||
return UINTMAX_MAX;
|
||||
}
|
||||
|
||||
return strtoumax(buf, 0, base);
|
||||
return ip.neg ? -ip.val : ip.val;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user