From ccb962b2ea91a7764fac2f3755c9d25b9ccdde66 Mon Sep 17 00:00:00 2001 From: Kris Maglione Date: Mon, 19 Jul 2010 12:50:19 -0400 Subject: [PATCH] [9libs] Sync with plan9port. --- include/fmt.h | 18 +- include/stuff/util.h | 4 - include/utf.h | 9 +- lib/libbio/bgetrune.c | 2 +- lib/libbio/binit.c | 4 +- lib/libbio/bputrune.c | 2 +- lib/libbio/bvprint.c | 3 + lib/libfmt/Makefile | 1 + lib/libfmt/dofmt.c | 105 +++++- lib/libfmt/dorfmt.c | 12 +- lib/libfmt/fltfmt.c | 773 ++++++++++++++++++++++++++------------ lib/libfmt/fmt.c | 1 + lib/libfmt/fmtdef.h | 1 + lib/libfmt/fmtfd.c | 2 + lib/libfmt/fmtlocale.c | 55 +++ lib/libfmt/fmtquote.c | 6 +- lib/libfmt/fmtstr.c | 1 + lib/libfmt/nan64.c | 59 +-- lib/libfmt/runefmtstr.c | 1 + lib/libfmt/runevseprint.c | 1 + lib/libfmt/runevsmprint.c | 1 + lib/libfmt/runevsnprint.c | 1 + lib/libfmt/sprint.c | 5 +- lib/libfmt/strtod.c | 6 +- lib/libfmt/test.c | 19 + lib/libfmt/vseprint.c | 1 + lib/libfmt/vsmprint.c | 1 + lib/libfmt/vsnprint.c | 1 + lib/libregexp/rregexec.c | 19 +- lib/libutf/rune.c | 86 +++-- 30 files changed, 856 insertions(+), 344 deletions(-) create mode 100644 lib/libfmt/fmtlocale.c diff --git a/include/fmt.h b/include/fmt.h index 4b3a98af..db723231 100644 --- a/include/fmt.h +++ b/include/fmt.h @@ -27,10 +27,22 @@ struct Fmt{ void *farg; /* to make flush a closure */ int nfmt; /* num chars formatted so far */ va_list args; /* args passed to dofmt */ - int r; /* % format Rune */ + Rune r; /* % format Rune */ int width; int prec; unsigned long flags; + char *decimal; /* decimal point; cannot be "" */ + + /* For %'d */ + char *thousands; /* separator for thousands */ + + /* + * Each char is an integer indicating #digits before next separator. Values: + * \xFF: no more grouping (or \x7F; defined to be CHAR_MAX in POSIX) + * \x00: repeat previous indefinitely + * \x**: count that many + */ + char *grouping; /* descriptor of separator placement */ }; enum{ @@ -40,7 +52,8 @@ enum{ FmtSharp = FmtPrec << 1, FmtSpace = FmtSharp << 1, FmtSign = FmtSpace << 1, - FmtZero = FmtSign << 1, + FmtApost = FmtSign << 1, + FmtZero = FmtApost << 1, FmtUnsigned = FmtZero << 1, FmtShort = FmtUnsigned << 1, FmtLong = FmtShort << 1, @@ -121,6 +134,7 @@ double fmtcharstod(int(*f)(void*), void*); int fmtfdflush(Fmt*); int fmtfdinit(Fmt*, int fd, char *buf, int size); int fmtinstall(int, int (*f)(Fmt*)); +void fmtlocaleinit(Fmt*, char *decimal, char *thousands, char *grouping); int fmtprint(Fmt*, const char*, ...); int fmtrune(Fmt*, int); int fmtrunestrcpy(Fmt*, Rune*); diff --git a/include/stuff/util.h b/include/stuff/util.h index b7745284..89fa1ec3 100644 --- a/include/stuff/util.h +++ b/include/stuff/util.h @@ -28,10 +28,6 @@ enum { GInvert = 1<<0, }; -enum { - Runemax = (1 << (sizeof(Rune) * 8)) - 1, -}; - #define utf8locale() (!strcmp(nl_langinfo(CODESET), "UTF-8")) #ifdef VARARGCK diff --git a/include/utf.h b/include/utf.h index 66a92d70..d2c2152a 100644 --- a/include/utf.h +++ b/include/utf.h @@ -1,14 +1,15 @@ #ifndef _UTF_H_ #define _UTF_H_ 1 -typedef unsigned short Rune; /* 16 bits */ +typedef unsigned int Rune; /* 32 bits */ enum { - UTFmax = 3, /* maximum bytes per rune */ + UTFmax = 4, /* maximum bytes per rune */ Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ Runeself = 0x80, /* rune and UTF sequences are the same (<) */ - Runeerror = 0xFFFD, /* decoding error in UTF */ + Runeerror = 0xFFFD, /* decoding error in UTF */ + Runemax = 0x10FFFF /* maximum rune value */ }; /* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/utf/?*.c | grep -v static |grep -v __ */ @@ -19,7 +20,7 @@ int islowerrune(Rune); int isspacerune(Rune); int istitlerune(Rune); int isupperrune(Rune); -int runelen(Rune); +int runelen(long); int runenlen(const Rune*, int); Rune* runestrcat(Rune*, const Rune*); Rune* runestrchr(const Rune*, Rune); diff --git a/lib/libbio/bgetrune.c b/lib/libbio/bgetrune.c index 1a6e8abf..1ea054bb 100644 --- a/lib/libbio/bgetrune.c +++ b/lib/libbio/bgetrune.c @@ -7,7 +7,7 @@ Bgetrune(Biobuf *bp) { int c, i; Rune rune; - char str[4]; + char str[UTFmax]; c = Bgetc(bp); if(c < Runeself) { /* one char */ diff --git a/lib/libbio/binit.c b/lib/libbio/binit.c index 19dbed0c..5472f45a 100644 --- a/lib/libbio/binit.c +++ b/lib/libbio/binit.c @@ -123,13 +123,13 @@ Bopen(const char *name, int mode) return 0; case OREAD: - f = open(name, OREAD); + f = open(name, mode); if(f < 0) return 0; break; case OWRITE: - f = creat(name, 0666); + f = creat(name, mode); if(f < 0) return 0; } diff --git a/lib/libbio/bputrune.c b/lib/libbio/bputrune.c index c7808c0f..e25f01ef 100644 --- a/lib/libbio/bputrune.c +++ b/lib/libbio/bputrune.c @@ -6,7 +6,7 @@ int Bputrune(Biobuf *bp, long c) { Rune rune; - char str[4]; + char str[UTFmax]; int n; rune = c; diff --git a/lib/libbio/bvprint.c b/lib/libbio/bvprint.c index efaf2ee5..7c0095e9 100644 --- a/lib/libbio/bvprint.c +++ b/lib/libbio/bvprint.c @@ -29,8 +29,11 @@ Bvprint(Biobuf *bp, const char *fmt, va_list arg) f.flush = fmtBflush; f.farg = bp; f.nfmt = 0; + fmtlocaleinit(&f, nil, nil, nil); n = fmtvprint(&f, fmt, arg); bp->ocount = (char*)f.to - (char*)f.stop; + if(n == 0) + n = f.nfmt; return n; } diff --git a/lib/libfmt/Makefile b/lib/libfmt/Makefile index 8afee20f..d55ebfcd 100644 --- a/lib/libfmt/Makefile +++ b/lib/libfmt/Makefile @@ -17,6 +17,7 @@ OBJ=\ fmt\ fmtfd\ fmtfdflush\ + fmtlocale\ fmtlock\ fmtprint\ fmtquote\ diff --git a/lib/libfmt/dofmt.c b/lib/libfmt/dofmt.c index a7d4d2fc..90524a1c 100644 --- a/lib/libfmt/dofmt.c +++ b/lib/libfmt/dofmt.c @@ -139,8 +139,10 @@ __fmtcpy(Fmt *f, const void *vm, int n, int sz) m = (char*)vm; me = m + sz; - w = f->width; fl = f->flags; + w = 0; + if(fl & FmtWidth) + w = f->width; if((fl & FmtPrec) && n > f->prec) n = f->prec; if(f->runes){ @@ -194,8 +196,10 @@ __fmtrcpy(Fmt *f, const void *vm, int n) int w; m = (Rune*)vm; - w = f->width; fl = f->flags; + w = 0; + if(fl & FmtWidth) + w = f->width; if((fl & FmtPrec) && n > f->prec) n = f->prec; if(f->runes){ @@ -324,10 +328,14 @@ __percentfmt(Fmt *f) int __ifmt(Fmt *f) { - char buf[70], *p, *conv; + char buf[140], *p, *conv; + /* 140: for 64 bits of binary + 3-byte sep every 4 digits */ uvlong vu; ulong u; int neg, base, i, n, fl, w, isv; + int ndig, len, excess, bytelen; + char *grouping; + char *thousands; neg = 0; fl = f->flags; @@ -366,21 +374,25 @@ __ifmt(Fmt *f) u = va_arg(f->args, int); } conv = "0123456789abcdef"; + grouping = "\4"; /* for hex, octal etc. (undefined by spec but nice) */ + thousands = f->thousands; switch(f->r){ case 'd': case 'i': case 'u': base = 10; - break; - case 'x': - base = 16; + grouping = f->grouping; break; case 'X': - base = 16; conv = "0123456789ABCDEF"; + /* fall through */ + case 'x': + base = 16; + thousands = ":"; break; case 'b': base = 2; + thousands = ":"; break; case 'o': base = 8; @@ -398,7 +410,11 @@ __ifmt(Fmt *f) } } p = buf + sizeof buf - 1; - n = 0; + n = 0; /* in runes */ + excess = 0; /* number of bytes > number runes */ + ndig = 0; + len = utflen(thousands); + bytelen = strlen(thousands); if(isv){ while(vu){ i = vu % base; @@ -407,6 +423,12 @@ __ifmt(Fmt *f) *p-- = ','; n++; } + if((fl & FmtApost) && __needsep(&ndig, &grouping)){ + n += len; + excess += bytelen - len; + p -= bytelen; + memmove(p+1, thousands, bytelen); + } *p-- = conv[i]; n++; } @@ -418,16 +440,47 @@ __ifmt(Fmt *f) *p-- = ','; n++; } + if((fl & FmtApost) && __needsep(&ndig, &grouping)){ + n += len; + excess += bytelen - len; + p -= bytelen; + memmove(p+1, thousands, bytelen); + } *p-- = conv[i]; n++; } } if(n == 0){ - *p-- = '0'; - n = 1; + /* + * "The result of converting a zero value with + * a precision of zero is no characters." - ANSI + * + * "For o conversion, # increases the precision, if and only if + * necessary, to force the first digit of the result to be a zero + * (if the value and precision are both 0, a single 0 is printed)." - ANSI + */ + if(!(fl & FmtPrec) || f->prec != 0 || (f->r == 'o' && (fl & FmtSharp))){ + *p-- = '0'; + n = 1; + if(fl & FmtApost) + __needsep(&ndig, &grouping); + } + + /* + * Zero values don't get 0x. + */ + if(f->r == 'x' || f->r == 'X') + fl &= ~FmtSharp; } - for(w = f->prec; n < w && p > buf+3; n++) + for(w = f->prec; n < w && p > buf+3; n++){ + if((fl & FmtApost) && __needsep(&ndig, &grouping)){ + n += len; + excess += bytelen - len; + p -= bytelen; + memmove(p+1, thousands, bytelen); + } *p-- = '0'; + } if(neg || (fl & (FmtSign|FmtSpace))) n++; if(fl & FmtSharp){ @@ -441,9 +494,19 @@ __ifmt(Fmt *f) } } if((fl & FmtZero) && !(fl & (FmtLeft|FmtPrec))){ - for(w = f->width; n < w && p > buf+3; n++) + w = 0; + if(fl & FmtWidth) + w = f->width; + for(; n < w && p > buf+3; n++){ + if((fl & FmtApost) && __needsep(&ndig, &grouping)){ + n += len; + excess += bytelen - len; + p -= bytelen; + memmove(p+1, thousands, bytelen); + } *p-- = '0'; - f->width = 0; + } + f->flags &= ~FmtWidth; } if(fl & FmtSharp){ if(base == 16) @@ -458,7 +521,7 @@ __ifmt(Fmt *f) else if(fl & FmtSpace) *p-- = ' '; f->flags &= ~FmtPrec; - return __fmtcpy(f, p + 1, n, n); + return __fmtcpy(f, p + 1, n, n + excess); } int @@ -499,6 +562,9 @@ __flagfmt(Fmt *f) case '#': f->flags |= FmtSharp; break; + case '\'': + f->flags |= FmtApost; + break; case ' ': f->flags |= FmtSpace; break; @@ -526,12 +592,13 @@ __flagfmt(Fmt *f) int __badfmt(Fmt *f) { - char x[3]; + char x[2+UTFmax]; + int n; x[0] = '%'; - x[1] = f->r; - x[2] = '%'; - f->prec = 3; - __fmtcpy(f, (const void*)x, 3, 3); + n = 1 + runetochar(x+1, &f->r); + x[n++] = '%'; + f->prec = n; + __fmtcpy(f, (const void*)x, n, n); return 0; } diff --git a/lib/libfmt/dorfmt.c b/lib/libfmt/dorfmt.c index 08f3c005..e517371f 100644 --- a/lib/libfmt/dorfmt.c +++ b/lib/libfmt/dorfmt.c @@ -19,6 +19,7 @@ /* format the output into f->to and return the number of characters fmted */ +/* BUG: THIS FILE IS NOT UPDATED TO THE NEW SPEC */ int dorfmt(Fmt *f, const Rune *fmt) { @@ -30,8 +31,8 @@ dorfmt(Fmt *f, const Rune *fmt) nfmt = f->nfmt; for(;;){ if(f->runes){ - rt = f->to; - rs = f->stop; + rt = (Rune*)f->to; + rs = (Rune*)f->stop; while((r = *fmt++) && r != '%'){ FMTRCHAR(f, rt, rs, r); } @@ -41,8 +42,8 @@ dorfmt(Fmt *f, const Rune *fmt) return f->nfmt - nfmt; f->stop = rs; }else{ - t = f->to; - s = f->stop; + t = (char*)f->to; + s = (char*)f->stop; while((r = *fmt++) && r != '%'){ FMTRUNE(f, t, f->stop, r); } @@ -53,8 +54,9 @@ dorfmt(Fmt *f, const Rune *fmt) f->stop = s; } - fmt = __fmtdispatch(f, (Rune*)fmt, 1); + fmt = (Rune*)__fmtdispatch(f, (Rune*)fmt, 1); if(fmt == nil) return -1; } + return 0; /* not reached */ } diff --git a/lib/libfmt/fltfmt.c b/lib/libfmt/fltfmt.c index b1e8c7b3..fd923216 100644 --- a/lib/libfmt/fltfmt.c +++ b/lib/libfmt/fltfmt.c @@ -18,8 +18,10 @@ #include #include #include -#include +#include +#include #include "plan9.h" +#include "fmt.h" #include "fmtdef.h" enum @@ -52,8 +54,8 @@ static double pows10[] = 1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, 1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, }; - -#define pow10(x) fmtpow10(x) +#define npows10 ((int)(sizeof(pows10)/sizeof(pows10[0]))) +#define pow10(x) fmtpow10(x) static double pow10(int n) @@ -63,330 +65,615 @@ pow10(int n) neg = 0; if(n < 0){ - if(n < DBL_MIN_10_EXP){ - return 0.; - } neg = 1; n = -n; - }else if(n > DBL_MAX_10_EXP){ - return HUGE_VAL; } - if(n < (int)(sizeof(pows10)/sizeof(pows10[0]))) + + if(n < npows10) d = pows10[n]; else{ - d = pows10[sizeof(pows10)/sizeof(pows10[0]) - 1]; + d = pows10[npows10-1]; for(;;){ - n -= sizeof(pows10)/sizeof(pows10[0]) - 1; - if(n < (int)(sizeof(pows10)/sizeof(pows10[0]))){ + n -= npows10 - 1; + if(n < npows10){ d *= pows10[n]; break; } - d *= pows10[sizeof(pows10)/sizeof(pows10[0]) - 1]; + d *= pows10[npows10 - 1]; } } - if(neg){ + if(neg) return 1./d; - } return d; } +/* + * add 1 to the decimal integer string a of length n. + * if 99999 overflows into 10000, return 1 to tell caller + * to move the virtual decimal point. + */ static int -xadd(char *a, int n, int v) +xadd1(char *a, int n) { char *b; int c; - if(n < 0 || n >= NSIGNIF) + if(n < 0 || n > NSIGNIF) return 0; - for(b = a+n; b >= a; b--) { - c = *b + v; + for(b = a+n-1; b >= a; b--) { + c = *b + 1; if(c <= '9') { *b = c; return 0; } *b = '0'; - v = 1; } - *a = '1'; /* overflow adding */ + /* + * need to overflow adding digit. + * shift number down and insert 1 at beginning. + * decimal is known to be 0s or we wouldn't + * have gotten this far. (e.g., 99999+1 => 00000) + */ + a[0] = '1'; return 1; } +/* + * subtract 1 from the decimal integer string a. + * if 10000 underflows into 09999, make it 99999 + * and return 1 to tell caller to move the virtual + * decimal point. this way, xsub1 is inverse of xadd1. + */ static int -xsub(char *a, int n, int v) +xsub1(char *a, int n) { char *b; int c; - for(b = a+n; b >= a; b--) { - c = *b - v; + if(n < 0 || n > NSIGNIF) + return 0; + for(b = a+n-1; b >= a; b--) { + c = *b - 1; if(c >= '0') { + if(c == '0' && b == a) { + /* + * just zeroed the top digit; shift everyone up. + * decimal is known to be 9s or we wouldn't + * have gotten this far. (e.g., 10000-1 => 09999) + */ + *b = '9'; + return 1; + } *b = c; return 0; } *b = '9'; - v = 1; } - *a = '9'; /* underflow subtracting */ - return 1; + /* + * can't get here. the number a is always normalized + * so that it has a nonzero first digit. + */ + abort(); } +/* + * format exponent like sprintf(p, "e%+02d", e) + */ static void -xdtoa(Fmt *fmt, char *s2, double f) +xfmtexp(char *p, int e, int ucase) { - char s1[NSIGNIF+10]; - double g, h; - int e, d, i, n; - int c1, c2, c3, c4, ucase, sign, chr, prec; + char se[9]; + int i; - prec = FDEFLT; - if(fmt->flags & FmtPrec) - prec = fmt->prec; - if(prec > FDIGIT) - prec = FDIGIT; - if(__isNaN(f)) { - strcpy(s2, "NaN"); - return; + *p++ = ucase ? 'E' : 'e'; + if(e < 0) { + *p++ = '-'; + e = -e; + } else + *p++ = '+'; + i = 0; + while(e) { + se[i++] = e % 10 + '0'; + e /= 10; } - if(__isInf(f, 1)) { - strcpy(s2, "+Inf"); - return; - } - if(__isInf(f, -1)) { - strcpy(s2, "-Inf"); - return; - } - sign = 0; + while(i < 2) + se[i++] = '0'; + while(i > 0) + *p++ = se[--i]; + *p++ = '\0'; +} + +/* + * compute decimal integer m, exp such that: + * f = m*10^exp + * m is as short as possible with losing exactness + * assumes special cases (NaN, +Inf, -Inf) have been handled. + */ +static void +xdtoa(double f, char *s, int *exp, int *neg, int *ns) +{ + int c, d, e2, e, ee, i, ndigit, oerrno; + char tmp[NSIGNIF+10]; + double g; + + oerrno = errno; /* in case strtod smashes errno */ + + /* + * make f non-negative. + */ + *neg = 0; if(f < 0) { f = -f; - sign++; - } - ucase = 0; - chr = fmt->r; - if(isupper(chr)) { - ucase = 1; - chr = tolower(chr); - } - - e = 0; - g = f; - if(g != 0) { - frexp(f, &e); - e = e * .301029995664; - if(e >= -150 && e <= +150) { - d = 0; - h = f; - } else { - d = e/2; - h = f * pow10(-d); - } - g = h * pow10(d-e); - while(g < 1) { - e--; - g = h * pow10(d-e); - } - while(g >= 10) { - e++; - g = h * pow10(d-e); - } + *neg = 1; } /* - * convert NSIGNIF digits and convert - * back to get accuracy. + * must handle zero specially. + */ + if(f == 0){ + *exp = 0; + s[0] = '0'; + s[1] = '\0'; + *ns = 1; + return; + } + + /* + * find g,e such that f = g*10^e. + * guess 10-exponent using 2-exponent, then fine tune. + */ + frexp(f, &e2); + e = (int)(e2 * .301029995664); + g = f * pow10(-e); + while(g < 1) { + e--; + g = f * pow10(-e); + } + while(g >= 10) { + e++; + g = f * pow10(-e); + } + + /* + * convert NSIGNIF digits as a first approximation. */ for(i=0; i= NSIGNIF-2) { - strcpy(s2, s1); - d = e; - s1[NSIGNIF-2] = '0'; - s1[NSIGNIF-1] = '0'; - sprint(s1+NSIGNIF, "e%d", e-NSIGNIF+1); - g = strtod(s1, nil); - if(g == f) - goto found; - if(xadd(s1, NSIGNIF-3, 1)) { - e++; - sprint(s1+NSIGNIF, "e%d", e-NSIGNIF+1); - } - g = strtod(s1, nil); - if(g == f) - goto found; - strcpy(s1, s2); - e = d; - } + e -= NSIGNIF-1; + xfmtexp(s+NSIGNIF, e, 0); /* - * convert back so s1 gets exact answer + * adjust conversion until strtod(s) == f exactly. */ - for(;;) { - sprint(s1+NSIGNIF, "e%d", e-NSIGNIF+1); - g = strtod(s1, nil); + for(i=0; i<10; i++) { + g = fmtstrtod(s, nil); if(f > g) { - if(xadd(s1, NSIGNIF-1, 1)) + if(xadd1(s, NSIGNIF)) { + /* gained a digit */ e--; + xfmtexp(s+NSIGNIF, e, 0); + } continue; } if(f < g) { - if(xsub(s1, NSIGNIF-1, 1)) + if(xsub1(s, NSIGNIF)) { + /* lost a digit */ e++; + xfmtexp(s+NSIGNIF, e, 0); + } continue; } break; } -found: /* - * sign + * play with the decimal to try to simplify. */ - d = 0; - i = 0; - if(sign) - s2[d++] = '-'; - else if(fmt->flags & FmtSign) - s2[d++] = '+'; - else if(fmt->flags & FmtSpace) - s2[d++] = ' '; /* - * copy into final place - * c1 digits of leading '0' - * c2 digits from conversion - * c3 digits of trailing '0' - * c4 digits after '.' + * bump last few digits up to 9 if we can */ - c1 = 0; - c2 = prec + 1; - c3 = 0; - c4 = prec; - switch(chr) { - default: - if(xadd(s1, c2, 5)) - e++; - break; - case 'g': - /* - * decide on 'e' of 'f' style convers - */ - if(xadd(s1, c2, 5)) - e++; - if(e >= -5 && e <= prec) { - c1 = -e - 1; - c4 = prec - e; - chr = 'h'; // flag for 'f' style - } - break; - case 'f': - if(xadd(s1, c2+e, 5)) - e++; - c1 = -e; - if(c1 > prec) - c1 = c2; - c2 += e; - break; - } - - /* - * clean up c1 c2 and c3 - */ - if(c1 < 0) - c1 = 0; - if(c2 < 0) - c2 = 0; - if(c2 > NSIGNIF) { - c3 = c2-NSIGNIF; - c2 = NSIGNIF; - } - - /* - * copy digits - */ - while(c1 > 0) { - if(c1+c2+c3 == c4) - s2[d++] = '.'; - s2[d++] = '0'; - c1--; - } - while(c2 > 0) { - if(c2+c3 == c4) - s2[d++] = '.'; - s2[d++] = s1[i++]; - c2--; - } - while(c3 > 0) { - if(c3 == c4) - s2[d++] = '.'; - s2[d++] = '0'; - c3--; - } - - /* - * strip trailing '0' on g conv - */ - if(fmt->flags & FmtSharp) { - if(0 == c4) - s2[d++] = '.'; - } else - if(chr == 'g' || chr == 'h') { - for(n=d-1; n>=0; n--) - if(s2[n] != '0') - break; - for(i=n; i>=0; i--) - if(s2[i] == '.') { - d = n; - if(i != n) - d++; + for(i=NSIGNIF-1; i>=NSIGNIF-3; i--) { + c = s[i]; + if(c != '9') { + s[i] = '9'; + g = fmtstrtod(s, nil); + if(g != f) { + s[i] = c; break; } - } - if(chr == 'e' || chr == 'g') { - if(ucase) - s2[d++] = 'E'; - else - s2[d++] = 'e'; - c1 = e; - if(c1 < 0) { - s2[d++] = '-'; - c1 = -c1; - } else - s2[d++] = '+'; - if(c1 >= 100) { - s2[d++] = c1/100 + '0'; - c1 = c1%100; } - s2[d++] = c1/10 + '0'; - s2[d++] = c1%10 + '0'; } - s2[d] = 0; + + /* + * add 1 in hopes of turning 9s to 0s + */ + if(s[NSIGNIF-1] == '9') { + strcpy(tmp, s); + ee = e; + if(xadd1(tmp, NSIGNIF)) { + ee--; + xfmtexp(tmp+NSIGNIF, ee, 0); + } + g = fmtstrtod(tmp, nil); + if(g == f) { + strcpy(s, tmp); + e = ee; + } + } + + /* + * bump last few digits down to 0 as we can. + */ + for(i=NSIGNIF-1; i>=NSIGNIF-3; i--) { + c = s[i]; + if(c != '0') { + s[i] = '0'; + g = fmtstrtod(s, nil); + if(g != f) { + s[i] = c; + break; + } + } + } + + /* + * remove trailing zeros. + */ + ndigit = NSIGNIF; + while(ndigit > 1 && s[ndigit-1] == '0'){ + e++; + --ndigit; + } + s[ndigit] = 0; + *exp = e; + *ns = ndigit; + errno = oerrno; } -static int -floatfmt(Fmt *fmt, double f) -{ - char s[FDIGIT+10]; +#ifdef PLAN9PORT +static char *special[] = { "NaN", "NaN", "+Inf", "+Inf", "-Inf", "-Inf" }; +#else +static char *special[] = { "nan", "NAN", "inf", "INF", "-inf", "-INF" }; +#endif - xdtoa(fmt, s, f); - fmt->flags &= FmtWidth|FmtLeft; - __fmtcpy(fmt, s, strlen(s), strlen(s)); +int +__efgfmt(Fmt *fmt) +{ + char buf[NSIGNIF+10], *dot, *digits, *p, *s, suf[10], *t; + double f; + int c, chr, dotwid, e, exp, fl, ndigits, neg, newndigits; + int pad, point, prec, realchr, sign, sufwid, ucase, wid, z1, z2; + Rune r, *rs, *rt; + + if(fmt->flags&FmtLong) + f = va_arg(fmt->args, long double); + else + f = va_arg(fmt->args, double); + + /* + * extract formatting flags + */ + fl = fmt->flags; + fmt->flags = 0; + prec = FDEFLT; + if(fl & FmtPrec) + prec = fmt->prec; + chr = fmt->r; + ucase = 0; + switch(chr) { + case 'A': + case 'E': + case 'F': + case 'G': + chr += 'a'-'A'; + ucase = 1; + break; + } + + /* + * pick off special numbers. + */ + if(__isNaN(f)) { + s = special[0+ucase]; + special: + fmt->flags = fl & (FmtWidth|FmtLeft); + return __fmtcpy(fmt, s, strlen(s), strlen(s)); + } + if(__isInf(f, 1)) { + s = special[2+ucase]; + goto special; + } + if(__isInf(f, -1)) { + s = special[4+ucase]; + goto special; + } + + /* + * get exact representation. + */ + digits = buf; + xdtoa(f, digits, &exp, &neg, &ndigits); + + /* + * get locale's decimal point. + */ + dot = fmt->decimal; + if(dot == nil) + dot = "."; + dotwid = utflen(dot); + + /* + * now the formatting fun begins. + * compute parameters for actual fmt: + * + * pad: number of spaces to insert before/after field. + * z1: number of zeros to insert before digits + * z2: number of zeros to insert after digits + * point: number of digits to print before decimal point + * ndigits: number of digits to use from digits[] + * suf: trailing suffix, like "e-5" + */ + realchr = chr; + switch(chr){ + case 'g': + /* + * convert to at most prec significant digits. (prec=0 means 1) + */ + if(prec == 0) + prec = 1; + if(ndigits > prec) { + if(digits[prec] >= '5' && xadd1(digits, prec)) + exp++; + exp += ndigits-prec; + ndigits = prec; + } + + /* + * extra rules for %g (implemented below): + * trailing zeros removed after decimal unless FmtSharp. + * decimal point only if digit follows. + */ + + /* fall through to %e */ + default: + case 'e': + /* + * one significant digit before decimal, no leading zeros. + */ + point = 1; + z1 = 0; + + /* + * decimal point is after ndigits digits right now. + * slide to be after first. + */ + e = exp + (ndigits-1); + + /* + * if this is %g, check exponent and convert prec + */ + if(realchr == 'g') { + if(-4 <= e && e < prec) + goto casef; + prec--; /* one digit before decimal; rest after */ + } + + /* + * compute trailing zero padding or truncate digits. + */ + if(1+prec >= ndigits) + z2 = 1+prec - ndigits; + else { + /* + * truncate digits + */ + assert(realchr != 'g'); + newndigits = 1+prec; + if(digits[newndigits] >= '5' && xadd1(digits, newndigits)) { + /* + * had 999e4, now have 100e5 + */ + e++; + } + ndigits = newndigits; + z2 = 0; + } + xfmtexp(suf, e, ucase); + sufwid = strlen(suf); + break; + + casef: + case 'f': + /* + * determine where digits go with respect to decimal point + */ + if(ndigits+exp > 0) { + point = ndigits+exp; + z1 = 0; + } else { + point = 1; + z1 = 1 + -(ndigits+exp); + } + + /* + * %g specifies prec = number of significant digits + * convert to number of digits after decimal point + */ + if(realchr == 'g') + prec += z1 - point; + + /* + * compute trailing zero padding or truncate digits. + */ + if(point+prec >= z1+ndigits) + z2 = point+prec - (z1+ndigits); + else { + /* + * truncate digits + */ + assert(realchr != 'g'); + newndigits = point+prec - z1; + if(newndigits < 0) { + z1 += newndigits; + newndigits = 0; + } else if(newndigits == 0) { + /* perhaps round up */ + if(digits[0] >= '5'){ + digits[0] = '1'; + newndigits = 1; + goto newdigit; + } + } else if(digits[newndigits] >= '5' && xadd1(digits, newndigits)) { + /* + * digits was 999, is now 100; make it 1000 + */ + digits[newndigits++] = '0'; + newdigit: + /* + * account for new digit + */ + if(z1) /* 0.099 => 0.100 or 0.99 => 1.00*/ + z1--; + else /* 9.99 => 10.00 */ + point++; + } + z2 = 0; + ndigits = newndigits; + } + sufwid = 0; + break; + } + + /* + * if %g is given without FmtSharp, remove trailing zeros. + * must do after truncation, so that e.g. print %.3g 1.001 + * produces 1, not 1.00. sorry, but them's the rules. + */ + if(realchr == 'g' && !(fl & FmtSharp)) { + if(z1+ndigits+z2 >= point) { + if(z1+ndigits < point) + z2 = point - (z1+ndigits); + else{ + z2 = 0; + while(z1+ndigits > point && digits[ndigits-1] == '0') + ndigits--; + } + } + } + + /* + * compute width of all digits and decimal point and suffix if any + */ + wid = z1+ndigits+z2; + if(wid > point) + wid += dotwid; + else if(wid == point){ + if(fl & FmtSharp) + wid += dotwid; + else + point++; /* do not print any decimal point */ + } + wid += sufwid; + + /* + * determine sign + */ + sign = 0; + if(neg) + sign = '-'; + else if(fl & FmtSign) + sign = '+'; + else if(fl & FmtSpace) + sign = ' '; + if(sign) + wid++; + + /* + * compute padding + */ + pad = 0; + if((fl & FmtWidth) && fmt->width > wid) + pad = fmt->width - wid; + if(pad && !(fl & FmtLeft) && (fl & FmtZero)){ + z1 += pad; + point += pad; + pad = 0; + } + + /* + * format the actual field. too bad about doing this twice. + */ + if(fmt->runes){ + if(pad && !(fl & FmtLeft) && __rfmtpad(fmt, pad) < 0) + return -1; + rt = (Rune*)fmt->to; + rs = (Rune*)fmt->stop; + if(sign) + FMTRCHAR(fmt, rt, rs, sign); + while(z1>0 || ndigits>0 || z2>0) { + if(z1 > 0){ + z1--; + c = '0'; + }else if(ndigits > 0){ + ndigits--; + c = *digits++; + }else{ + z2--; + c = '0'; + } + FMTRCHAR(fmt, rt, rs, c); + if(--point == 0) { + for(p = dot; *p; ){ + p += chartorune(&r, p); + FMTRCHAR(fmt, rt, rs, r); + } + } + } + fmt->nfmt += rt - (Rune*)fmt->to; + fmt->to = rt; + if(sufwid && __fmtcpy(fmt, suf, sufwid, sufwid) < 0) + return -1; + if(pad && (fl & FmtLeft) && __rfmtpad(fmt, pad) < 0) + return -1; + }else{ + if(pad && !(fl & FmtLeft) && __fmtpad(fmt, pad) < 0) + return -1; + t = (char*)fmt->to; + s = (char*)fmt->stop; + if(sign) + FMTCHAR(fmt, t, s, sign); + while(z1>0 || ndigits>0 || z2>0) { + if(z1 > 0){ + z1--; + c = '0'; + }else if(ndigits > 0){ + ndigits--; + c = *digits++; + }else{ + z2--; + c = '0'; + } + FMTCHAR(fmt, t, s, c); + if(--point == 0) + for(p=dot; *p; p++) + FMTCHAR(fmt, t, s, *p); + } + fmt->nfmt += t - (char*)fmt->to; + fmt->to = t; + if(sufwid && __fmtcpy(fmt, suf, sufwid, sufwid) < 0) + return -1; + if(pad && (fl & FmtLeft) && __fmtpad(fmt, pad) < 0) + return -1; + } return 0; } -int -__efgfmt(Fmt *f) -{ - double d; - - d = va_arg(f->args, double); - return floatfmt(f, d); -} diff --git a/lib/libfmt/fmt.c b/lib/libfmt/fmt.c index 0d9639d4..491103b5 100644 --- a/lib/libfmt/fmt.c +++ b/lib/libfmt/fmt.c @@ -40,6 +40,7 @@ static Convfmt knownfmt[] = { ' ', __flagfmt, '#', __flagfmt, '%', __percentfmt, + '\'', __flagfmt, '+', __flagfmt, ',', __flagfmt, '-', __flagfmt, diff --git a/lib/libfmt/fmtdef.h b/lib/libfmt/fmtdef.h index d3678312..5c04cb91 100644 --- a/lib/libfmt/fmtdef.h +++ b/lib/libfmt/fmtdef.h @@ -53,6 +53,7 @@ void __fmtunlock(void); int __ifmt(Fmt *f); int __isInf(double d, int sign); int __isNaN(double d); +int __needsep(int *ndig, char **grouping); int __needsquotes(char *s, int *quotelenp); int __percentfmt(Fmt *f); void __quotesetup(char *s, Rune *r, int nin, int nout, Quoteinfo *q, int sharp, int runesout); diff --git a/lib/libfmt/fmtfd.c b/lib/libfmt/fmtfd.c index 9f35f02a..277864af 100644 --- a/lib/libfmt/fmtfd.c +++ b/lib/libfmt/fmtfd.c @@ -41,6 +41,8 @@ fmtfdinit(Fmt *f, int fd, char *buf, int size) f->stop = buf + size; f->flush = __fmtFdFlush; f->farg = (void*)(uintptr_t)fd; + f->flags = 0; f->nfmt = 0; + fmtlocaleinit(f, nil, nil, nil); return 0; } diff --git a/lib/libfmt/fmtlocale.c b/lib/libfmt/fmtlocale.c new file mode 100644 index 00000000..9ebdced3 --- /dev/null +++ b/lib/libfmt/fmtlocale.c @@ -0,0 +1,55 @@ +/* Copyright (c) 2004 Google Inc.; see LICENSE */ + +#include +#include +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* + * Fill in the internationalization stuff in the State structure. + * For nil arguments, provide the sensible defaults: + * decimal is a period + * thousands separator is a comma + * thousands are marked every three digits + */ +void +fmtlocaleinit(Fmt *f, char *decimal, char *thousands, char *grouping) +{ + if(decimal == nil || decimal[0] == '\0') + decimal = "."; + if(thousands == nil) + thousands = ","; + if(grouping == nil) + grouping = "\3"; + f->decimal = decimal; + f->thousands = thousands; + f->grouping = grouping; +} + +/* + * We are about to emit a digit in e.g. %'d. If that digit would + * overflow a thousands (e.g.) grouping, tell the caller to emit + * the thousands separator. Always advance the digit counter + * and pointer into the grouping descriptor. + */ +int +__needsep(int *ndig, char **grouping) +{ + int group; + + (*ndig)++; + group = *(unsigned char*)*grouping; + /* CHAR_MAX means no further grouping. \0 means we got the empty string */ + if(group == 0xFF || group == 0x7f || group == 0x00) + return 0; + if(*ndig > group){ + /* if we're at end of string, continue with this grouping; else advance */ + if((*grouping)[1] != '\0') + (*grouping)++; + *ndig = 1; + return 1; + } + return 0; +} + diff --git a/lib/libfmt/fmtquote.c b/lib/libfmt/fmtquote.c index b6f2e179..d23fd0a6 100644 --- a/lib/libfmt/fmtquote.c +++ b/lib/libfmt/fmtquote.c @@ -120,8 +120,10 @@ qstrfmt(char *sin, Rune *rin, Quoteinfo *q, Fmt *f) rm = rin; rme = rm + q->nrunesin; - w = f->width; fl = f->flags; + w = 0; + if(fl & FmtWidth) + w = f->width; if(f->runes){ if(!(fl & FmtLeft) && __rfmtpad(f, w - q->nrunesout) < 0) return -1; @@ -209,7 +211,7 @@ __quotestrfmt(int runesin, Fmt *f) outlen = (char*)f->stop - (char*)f->to; __quotesetup(s, r, nin, outlen, &q, f->flags&FmtSharp, f->runes); -//print("bytes in %d bytes out %d runes in %d runesout %d\n", q.nbytesin, q.nbytesout, q.nrunesin, q.nrunesout); +/*print("bytes in %d bytes out %d runes in %d runesout %d\n", q.nbytesin, q.nbytesout, q.nrunesin, q.nrunesout); */ if(runesin){ if(!q.quoted) diff --git a/lib/libfmt/fmtstr.c b/lib/libfmt/fmtstr.c index a5f8f8d7..df66051f 100644 --- a/lib/libfmt/fmtstr.c +++ b/lib/libfmt/fmtstr.c @@ -23,5 +23,6 @@ fmtstrflush(Fmt *f) if(f->start == nil) return nil; *(char*)f->to = '\0'; + f->to = f->start; return (char*)f->start; } diff --git a/lib/libfmt/nan64.c b/lib/libfmt/nan64.c index d7946b75..a7bd6460 100644 --- a/lib/libfmt/nan64.c +++ b/lib/libfmt/nan64.c @@ -6,58 +6,67 @@ */ #include "plan9.h" +#include #include "fmt.h" #include "fmtdef.h" -#if defined (__APPLE__) || (__powerpc__) -#define _NEEDLL -#endif - static uvlong uvnan = ((uvlong)0x7FF00000<<32)|0x00000001; static uvlong uvinf = ((uvlong)0x7FF00000<<32)|0x00000000; static uvlong uvneginf = ((uvlong)0xFFF00000<<32)|0x00000000; +/* gcc sees through the obvious casts. */ +static uvlong +d2u(double d) +{ + union { + uvlong v; + double d; + } u; + assert(sizeof(u.d) == sizeof(u.v)); + u.d = d; + return u.v; +} + +static double +u2d(uvlong v) +{ + union { + uvlong v; + double d; + } u; + assert(sizeof(u.d) == sizeof(u.v)); + u.v = v; + return u.d; +} + double __NaN(void) { - uvlong *p; - - /* gcc complains about "return *(double*)&uvnan;" */ - p = &uvnan; - return *(double*)p; + return u2d(uvnan); } int __isNaN(double d) { uvlong x; - double *p; - - p = &d; - x = *(uvlong*)p; - return (ulong)(x>>32)==0x7FF00000 && !__isInf(d, 0); + + x = d2u(d); + /* IEEE 754: exponent bits 0x7FF and non-zero mantissa */ + return (x&uvinf) == uvinf && (x&~uvneginf) != 0; } double __Inf(int sign) { - uvlong *p; - - if(sign < 0) - p = &uvinf; - else - p = &uvneginf; - return *(double*)p; + return u2d(sign < 0 ? uvneginf : uvinf); } int __isInf(double d, int sign) { uvlong x; - double *p; - - p = &d; - x = *(uvlong*)p; + + x = d2u(d); if(sign == 0) return x==uvinf || x==uvneginf; else if(sign > 0) diff --git a/lib/libfmt/runefmtstr.c b/lib/libfmt/runefmtstr.c index e17bc166..4ac51611 100644 --- a/lib/libfmt/runefmtstr.c +++ b/lib/libfmt/runefmtstr.c @@ -23,5 +23,6 @@ runefmtstrflush(Fmt *f) if(f->start == nil) return nil; *(Rune*)f->to = '\0'; + f->to = f->start; return f->start; } diff --git a/lib/libfmt/runevseprint.c b/lib/libfmt/runevseprint.c index ee9d9c5e..2eda859e 100644 --- a/lib/libfmt/runevseprint.c +++ b/lib/libfmt/runevseprint.c @@ -32,6 +32,7 @@ runevseprint(Rune *buf, Rune *e, const char *fmt, va_list args) f.farg = nil; f.nfmt = 0; va_copy(f.args,args); + fmtlocaleinit(&f, nil, nil, nil); dofmt(&f, fmt); va_end(f.args); *(Rune*)f.to = '\0'; diff --git a/lib/libfmt/runevsmprint.c b/lib/libfmt/runevsmprint.c index 002d35e0..121a00ae 100644 --- a/lib/libfmt/runevsmprint.c +++ b/lib/libfmt/runevsmprint.c @@ -63,6 +63,7 @@ runefmtstrinit(Fmt *f) f->flush = runeFmtStrFlush; f->farg = (void*)(uintptr_t)n; f->nfmt = 0; + fmtlocaleinit(f, nil, nil, nil); return 0; } diff --git a/lib/libfmt/runevsnprint.c b/lib/libfmt/runevsnprint.c index e602be0a..456201ef 100644 --- a/lib/libfmt/runevsnprint.c +++ b/lib/libfmt/runevsnprint.c @@ -32,6 +32,7 @@ runevsnprint(Rune *buf, int len, const char *fmt, va_list args) f.farg = nil; f.nfmt = 0; va_copy(f.args,args); + fmtlocaleinit(&f, nil, nil, nil); dofmt(&f, fmt); va_end(f.args); *(Rune*)f.to = '\0'; diff --git a/lib/libfmt/sprint.c b/lib/libfmt/sprint.c index 897c9786..df4e6209 100644 --- a/lib/libfmt/sprint.c +++ b/lib/libfmt/sprint.c @@ -26,8 +26,11 @@ sprint(char *buf, const char *fmt, ...) /* * on PowerPC, the stack is near the top of memory, so * we must be sure not to overflow a 32-bit pointer. + * + * careful! gcc-4.2 assumes buf+len < buf can never be true and + * optimizes the test away. casting to uintptr works around this bug. */ - if(buf+len < buf) + if((uintptr_t)buf+len < (uintptr_t)buf) len = -(uintptr_t)buf-1; va_start(args, fmt); diff --git a/lib/libfmt/strtod.c b/lib/libfmt/strtod.c index fbc1c59e..8859e5ff 100644 --- a/lib/libfmt/strtod.c +++ b/lib/libfmt/strtod.c @@ -67,7 +67,7 @@ enum S4, /* _+#.# #S4 eS5 */ S5, /* _+#.#e +S6 #S7 */ S6, /* _+#.#e+ #S7 */ - S7, /* _+#.#e+# #S7 */ + S7 /* _+#.#e+# #S7 */ }; static int xcmp(char*, char*); @@ -239,7 +239,7 @@ fmtstrtod(const char *as, char **aas) /* close approx by naive conversion */ mid[0] = 0; mid[1] = 1; - for(i=0; c=a[i]; i++) { + for(i=0; (c=a[i]) != '\0'; i++) { mid[0] = mid[0]*10 + (c-'0'); mid[1] = mid[1]*10; if(i >= 8) @@ -521,7 +521,7 @@ xcmp(char *a, char *b) { int c1, c2; - while(c1 = *b++) { + while((c1 = *b++) != '\0') { c2 = *a++; if(isupper(c2)) c2 = tolower(c2); diff --git a/lib/libfmt/test.c b/lib/libfmt/test.c index 04296e21..32f0fc2e 100644 --- a/lib/libfmt/test.c +++ b/lib/libfmt/test.c @@ -40,5 +40,24 @@ main(int argc, char *argv[]) print("%d\n", 23); print("%i\n", 23); print("%0.10d\n", 12345); + + /* test %4$d formats */ + print("%3$d %4$06d %2$d %1$d\n", 444, 333, 111, 222); + print("%3$d %4$06d %2$d %1$d\n", 444, 333, 111, 222); + print("%3$d %4$*5$06d %2$d %1$d\n", 444, 333, 111, 222, 20); + print("%3$hd %4$*5$06d %2$d %1$d\n", 444, 333, (short)111, 222, 20); + print("%3$lld %4$*5$06d %2$d %1$d\n", 444, 333, 111LL, 222, 20); + + /* test %'d formats */ + print("%'d %'d %'d\n", 1, 2222, 33333333); + print("%'019d\n", 0); + print("%08d %08d %08d\n", 1, 2222, 33333333); + print("%'08d %'08d %'08d\n", 1, 2222, 33333333); + print("%'x %'X %'b\n", 0x11111111, 0xabcd1234, 12345); + print("%'lld %'lld %'lld\n", 1LL, 222222222LL, 3333333333333LL); + print("%019lld %019lld %019lld\n", 1LL, 222222222LL, 3333333333333LL); + print("%'019lld %'019lld %'019lld\n", 1LL, 222222222LL, 3333333333333LL); + print("%'020lld %'020lld %'020lld\n", 1LL, 222222222LL, 3333333333333LL); + print("%'llx %'llX %'llb\n", 0x111111111111LL, 0xabcd12345678LL, 112342345LL); return 0; } diff --git a/lib/libfmt/vseprint.c b/lib/libfmt/vseprint.c index c55dc328..087bf9fc 100644 --- a/lib/libfmt/vseprint.c +++ b/lib/libfmt/vseprint.c @@ -31,6 +31,7 @@ vseprint(char *buf, char *e, const char *fmt, va_list args) f.farg = nil; f.nfmt = 0; va_copy(f.args,args); + fmtlocaleinit(&f, nil, nil, nil); dofmt(&f, fmt); va_end(f.args); *(char*)f.to = '\0'; diff --git a/lib/libfmt/vsmprint.c b/lib/libfmt/vsmprint.c index 2d9dd6f9..013e7ad1 100644 --- a/lib/libfmt/vsmprint.c +++ b/lib/libfmt/vsmprint.c @@ -63,6 +63,7 @@ fmtstrinit(Fmt *f) f->flush = fmtStrFlush; f->farg = (void*)(uintptr_t)n; f->nfmt = 0; + fmtlocaleinit(f, nil, nil, nil); return 0; } diff --git a/lib/libfmt/vsnprint.c b/lib/libfmt/vsnprint.c index 7c79b9c2..2b7c4b2c 100644 --- a/lib/libfmt/vsnprint.c +++ b/lib/libfmt/vsnprint.c @@ -32,6 +32,7 @@ vsnprint(char *buf, int len, const char *fmt, va_list args) f.farg = nil; f.nfmt = 0; va_copy(f.args,args); + fmtlocaleinit(&f, nil, nil, nil); dofmt(&f, fmt); va_end(f.args); *(char*)f.to = '\0'; diff --git a/lib/libregexp/rregexec.c b/lib/libregexp/rregexec.c index 6071e7d7..56b22e9a 100644 --- a/lib/libregexp/rregexec.c +++ b/lib/libregexp/rregexec.c @@ -25,6 +25,7 @@ rregexec1(Reprog *progp, /* program to run */ Relist* tle; /* ends of this and next list */ Relist* nle; int match; + Rune *p; match = 0; checkstart = j->startchar; @@ -44,20 +45,18 @@ rregexec1(Reprog *progp, /* program to run */ if(checkstart) { switch(j->starttype) { case RUNE: - while(*s != j->startchar) { - if(*s == 0 || s == j->reol) - return match; - s++; - } + p = runestrchr(s, j->startchar); + if(p == 0 || p == j->reol) + return match; + s = p; break; case BOL: if(s == bol) break; - while(*s != '\n') { - if(*s == 0 || s == j->reol) - return match; - s++; - } + p = runestrchr(s, '\n'); + if(p == 0 || s == j->reol) + return match; + s = p+1; break; } } diff --git a/lib/libutf/rune.c b/lib/libutf/rune.c index 19339ed9..ff68ff07 100644 --- a/lib/libutf/rune.c +++ b/lib/libutf/rune.c @@ -23,27 +23,30 @@ enum Bit2 = 5, Bit3 = 4, Bit4 = 3, + Bit5 = 2, T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ + T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ - Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */ - Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */ - Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */ + Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */ + Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */ + Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */ + Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */ Maskx = (1< T4 Tx Tx Tx + */ + if(UTFmax >= 4) { + c3 = *(uchar*)(str+3) ^ Tx; + if(c3 & Testx) + goto bad; + if(c < T5) { + l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; + if(l <= Rune3) + goto bad; + if(l > Runemax) + goto bad; + *rune = l; + return 4; + } + } + /* * bad decoding */ @@ -113,7 +135,7 @@ runetochar(char *str, const Rune *rune) /* * two character sequence - * 0080-07FF => T2 Tx + * 00080-007FF => T2 Tx */ if(c <= Rune2) { str[0] = T2 | (c >> 1*Bitx); @@ -123,20 +145,36 @@ runetochar(char *str, const Rune *rune) /* * three character sequence - * 0800-FFFF => T3 Tx Tx + * 00800-0FFFF => T3 Tx Tx */ - str[0] = T3 | (c >> 2*Bitx); - str[1] = Tx | ((c >> 1*Bitx) & Maskx); - str[2] = Tx | (c & Maskx); - return 3; + if(c > Runemax) + c = Runeerror; + if(c <= Rune3) { + str[0] = T3 | (c >> 2*Bitx); + str[1] = Tx | ((c >> 1*Bitx) & Maskx); + str[2] = Tx | (c & Maskx); + return 3; + } + + /* + * four character sequence + * 010000-1FFFFF => T4 Tx Tx Tx + */ + str[0] = T4 | (c >> 3*Bitx); + str[1] = Tx | ((c >> 2*Bitx) & Maskx); + str[2] = Tx | ((c >> 1*Bitx) & Maskx); + str[3] = Tx | (c & Maskx); + return 4; } int -runelen(Rune c) +runelen(long c) { + Rune rune; char str[10]; - return runetochar(str, &c); + rune = c; + return runetochar(str, &rune); } int @@ -153,7 +191,10 @@ runenlen(const Rune *r, int nrune) if(c <= Rune2) nb += 2; else + if(c <= Rune3 || c > Runemax) nb += 3; + else + nb += 4; } return nb; } @@ -163,13 +204,14 @@ fullrune(const char *str, int n) { int c; - if(n > 0) { - c = *(uchar*)str; - if(c < Tx) - return 1; - if(n > 1) - if(c < T3 || n > 2) - return 1; - } - return 0; + if(n <= 0) + return 0; + c = *(uchar*)str; + if(c < Tx) + return 1; + if(c < T3) + return n >= 2; + if(UTFmax == 3 || c < T4) + return n >= 3; + return n >= 4; }