From 8c8d735eb7c610811da2cd6efc7a8d59420b4b37 Mon Sep 17 00:00:00 2001
From: Fabian Giesen <rygorous@gmail.com>
Date: Fri, 12 Aug 2016 13:44:11 -0700
Subject: [PATCH 01/23] stb_image: More input validation in deflate decoder

Fixes issue #312.
---
 stb_image.h | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/stb_image.h b/stb_image.h
index a3c1129..d4b623d 100644
--- a/stb_image.h
+++ b/stb_image.h
@@ -3721,6 +3721,7 @@ static int stbi__compute_huffman_codes(stbi__zbuf *a)
    int hlit  = stbi__zreceive(a,5) + 257;
    int hdist = stbi__zreceive(a,5) + 1;
    int hclen = stbi__zreceive(a,4) + 4;
+   int ntot  = hlit + hdist;
 
    memset(codelength_sizes, 0, sizeof(codelength_sizes));
    for (i=0; i < hclen; ++i) {
@@ -3730,27 +3731,29 @@ static int stbi__compute_huffman_codes(stbi__zbuf *a)
    if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
 
    n = 0;
-   while (n < hlit + hdist) {
+   while (n < ntot) {
       int c = stbi__zhuffman_decode(a, &z_codelength);
       if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
       if (c < 16)
          lencodes[n++] = (stbi_uc) c;
-      else if (c == 16) {
-         c = stbi__zreceive(a,2)+3;
-         memset(lencodes+n, lencodes[n-1], c);
-         n += c;
-      } else if (c == 17) {
-         c = stbi__zreceive(a,3)+3;
-         memset(lencodes+n, 0, c);
-         n += c;
-      } else {
-         STBI_ASSERT(c == 18);
-         c = stbi__zreceive(a,7)+11;
-         memset(lencodes+n, 0, c);
+      else {
+         stbi_uc fill = 0;
+         if (c == 16) {
+            c = stbi__zreceive(a,2)+3;
+            if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
+            fill = lencodes[n-1];
+         } else if (c == 17)
+            c = stbi__zreceive(a,3)+3;
+         else {
+            STBI_ASSERT(c == 18);
+            c = stbi__zreceive(a,7)+11;
+         }
+         if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
+         memset(lencodes+n, fill, c);
          n += c;
       }
    }
-   if (n != hlit+hdist) return stbi__err("bad codelengths","Corrupt PNG");
+   if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
    if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
    if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
    return 1;

From 02190634c2ff6e3b68fcd99b84cf4acabb70ea2e Mon Sep 17 00:00:00 2001
From: Fabian Giesen <rygorous@gmail.com>
Date: Fri, 12 Aug 2016 17:22:46 -0700
Subject: [PATCH 02/23] stb_image: Overflow checking for image allocs.

Adds some helpers that check whether a product of multiple
factors (that need to be non-negative: this is enforced)
summed with another non-negative value overflows when
performed as int. Since stb_image mostly works in ints,
this seems like the safest route. Limits size of images
to 2GB but several of the decoders already enforce this
limit (or even lower ones).

Also adds wrappers for malloc that combine a mul-add-with-
overflow-check with the actual malloc, and return NULL
on failure. Then use them when allocating something that
is the product of multiple factors.

For image formats, also add a top-level "is this too big?"
check that gives a more useful error message; otherwise,
the failed mallocs result in an "out of memory" error.
The idea is that the top-level checks should be the primary
way to catch these bugs (and produce a useful error message).
But a misleading error message is still vastly preferable to
a buffer overflow exploit.

Fixes issues #310, #313, #314, #318. (Verified with the
provided test images)

Along the way, this fixes a previously unnoticed bug in
ldr_to_hdr / hdr_to_ldr (missing NULL check); these functions
are called with the result of an image decoder, so NULLs can
definitely happen.

Another bug noticed along the way is that handling of
interlaced 16-bit PNGs was incorrect. Fixing this (along
with the previous modifications) fixes issue #311.

Yet another bug noticed during this change is that reduce_png
did not check the right pointer during its out of memory
check. Fix that too.
---
 stb_image.h | 156 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 131 insertions(+), 25 deletions(-)

diff --git a/stb_image.h b/stb_image.h
index d4b623d..2a6c45a 100644
--- a/stb_image.h
+++ b/stb_image.h
@@ -566,6 +566,7 @@ STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch
 #include <stddef.h> // ptrdiff_t on osx
 #include <stdlib.h>
 #include <string.h>
+#include <limits.h>
 
 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
 #include <math.h>  // ldexp
@@ -900,6 +901,77 @@ static void *stbi__malloc(size_t size)
     return STBI_MALLOC(size);
 }
 
+// stb_image uses ints pervasively, including for offset calculations.
+// therefore the largest decoded image size we can support with the
+// current code, even on 64-bit targets, is INT_MAX. this is not a
+// significant limitation for the intended use case.
+//
+// we do, however, need to make sure our size calculations don't
+// overflow. hence a few helper functions for size calculations that
+// multiply integers together, making sure that they're non-negative
+// and no overflow occurs.
+
+// return 1 if the sum is valid, 0 on overflow.
+// negative terms are considered invalid.
+static int stbi__addsizes_valid(int a, int b)
+{
+   if (b < 0) return 0;
+   // now 0 <= b <= INT_MAX, hence also
+   // 0 <= INT_MAX - b <= INTMAX.
+   // And "a + b <= INT_MAX" (which might overflow) is the
+   // same as a <= INT_MAX - b (no overflow)
+   return a <= INT_MAX - b;
+}
+
+// returns 1 if the product is valid, 0 on overflow.
+// negative factors are considered invalid.
+static int stbi__mul2sizes_valid(int a, int b)
+{
+   if (a < 0 || b < 0) return 0;
+   if (b == 0) return 1; // mul-by-0 is always safe
+   // portable way to check for no overflows in a*b
+   return a <= INT_MAX/b;
+}
+
+// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
+static int stbi__mad2sizes_valid(int a, int b, int add)
+{
+   return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
+}
+
+// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
+static int stbi__mad3sizes_valid(int a, int b, int c, int add)
+{
+   return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
+      stbi__addsizes_valid(a*b*c, add);
+}
+
+// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
+static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
+{
+   return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
+      stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
+}
+
+// mallocs with size overflow checking
+static void *stbi__malloc_mad2(int a, int b, int add)
+{
+   if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
+   return stbi__malloc(a*b + add);
+}
+
+static void *stbi__malloc_mad3(int a, int b, int c, int add)
+{
+   if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
+   return stbi__malloc(a*b*c + add);
+}
+
+static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
+{
+   if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
+   return stbi__malloc(a*b*c*d + add);
+}
+
 // stbi__err - error
 // stbi__errpf - error returning pointer to float
 // stbi__errpuc - error returning pointer to unsigned char
@@ -1346,7 +1418,7 @@ static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int r
    if (req_comp == img_n) return data;
    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
 
-   good = (unsigned char *) stbi__malloc(req_comp * x * y);
+   good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
    if (good == NULL) {
       STBI_FREE(data);
       return stbi__errpuc("outofmem", "Out of memory");
@@ -1386,7 +1458,9 @@ static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int r
 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
 {
    int i,k,n;
-   float *output = (float *) stbi__malloc(x * y * comp * sizeof(float));
+   float *output;
+   if (!data) return NULL;
+   output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
    if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
    // compute number of non-alpha components
    if (comp & 1) n = comp; else n = comp-1;
@@ -1406,7 +1480,9 @@ static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
 {
    int i,k,n;
-   stbi_uc *output = (stbi_uc *) stbi__malloc(x * y * comp);
+   stbi_uc *output;
+   if (!data) return NULL;
+   output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
    if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
    // compute number of non-alpha components
    if (comp & 1) n = comp; else n = comp-1;
@@ -2738,7 +2814,7 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
 
    if (scan != STBI__SCAN_load) return 1;
 
-   if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
+   if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
 
    for (i=0; i < s->img_n; ++i) {
       if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
@@ -2750,6 +2826,7 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
    z->img_v_max = v_max;
    z->img_mcu_w = h_max * 8;
    z->img_mcu_h = v_max * 8;
+   // these sizes can't be more than 17 bits
    z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
    z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
 
@@ -2761,9 +2838,12 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
       // the bogus oversized data from using interleaved MCUs and their
       // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
       // discard the extra data until colorspace conversion
+      //
+      // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
+      // so these muls can't overflow with 32-bit ints (which we require)
       z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
       z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
-      z->img_comp[i].raw_data = stbi__malloc(z->img_comp[i].w2 * z->img_comp[i].h2+15);
+      z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
 
       if (z->img_comp[i].raw_data == NULL) {
          for(--i; i >= 0; --i) {
@@ -2776,9 +2856,10 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
       z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
       z->img_comp[i].linebuf = NULL;
       if (z->progressive) {
-         z->img_comp[i].coeff_w = (z->img_comp[i].w2 + 7) >> 3;
-         z->img_comp[i].coeff_h = (z->img_comp[i].h2 + 7) >> 3;
-         z->img_comp[i].raw_coeff = STBI_MALLOC(z->img_comp[i].coeff_w * z->img_comp[i].coeff_h * 64 * sizeof(short) + 15);
+         // w2, h2 are multiples of 8 (see above)
+         z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
+         z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
+         z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
          z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
       } else {
          z->img_comp[i].coeff = 0;
@@ -3368,7 +3449,7 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp
       }
 
       // can't error after this so, this is safe
-      output = (stbi_uc *) stbi__malloc(n * z->s->img_x * z->s->img_y + 1);
+      output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
       if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
 
       // now go ahead and resample
@@ -4019,7 +4100,7 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r
    int width = x;
 
    STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
-   a->out = (stbi_uc *) stbi__malloc(x * y * output_bytes); // extra bytes to write off the end into
+   a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
    if (!a->out) return stbi__err("outofmem", "Out of memory");
 
    img_width_bytes = (((img_n * x * depth) + 7) >> 3);
@@ -4217,13 +4298,15 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r
 
 static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
 {
+   int bytes = (depth == 16 ? 2 : 1);
+   int out_bytes = out_n * bytes;
    stbi_uc *final;
    int p;
    if (!interlaced)
       return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
 
    // de-interlacing
-   final = (stbi_uc *) stbi__malloc(a->s->img_x * a->s->img_y * out_n);
+   final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
    for (p=0; p < 7; ++p) {
       int xorig[] = { 0,4,0,2,0,1,0 };
       int yorig[] = { 0,0,4,0,2,0,1 };
@@ -4243,8 +4326,8 @@ static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint3
             for (i=0; i < x; ++i) {
                int out_y = j*yspc[p]+yorig[p];
                int out_x = i*xspc[p]+xorig[p];
-               memcpy(final + out_y*a->s->img_x*out_n + out_x*out_n,
-                      a->out + (j*x+i)*out_n, out_n);
+               memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
+                      a->out + (j*x+i)*out_bytes, out_bytes);
             }
          }
          STBI_FREE(a->out);
@@ -4312,7 +4395,7 @@ static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int
    stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
    stbi_uc *p, *temp_out, *orig = a->out;
 
-   p = (stbi_uc *) stbi__malloc(pixel_count * pal_img_n);
+   p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
    if (p == NULL) return stbi__err("outofmem", "Out of memory");
 
    // between here and free(out) below, exitting would leak
@@ -4354,9 +4437,10 @@ static int stbi__reduce_png(stbi__png *p)
    if (p->depth != 16) return 1; // don't need to do anything if not 16-bit data
 
    reduced = (stbi_uc *)stbi__malloc(img_len);
-   if (p == NULL) return stbi__err("outofmem", "Out of memory");
+   if (reduced == NULL) return stbi__err("outofmem", "Out of memory");
 
-   for (i = 0; i < img_len; ++i) reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is a decent approx of 16->8 bit scaling
+   for (i = 0; i < img_len; ++i)
+      reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is a decent approx of 16->8 bit scaling
 
    p->out = reduced;
    STBI_FREE(orig);
@@ -4846,7 +4930,11 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
    else
       target = s->img_n; // if they want monochrome, we'll post-convert
 
-   out = (stbi_uc *) stbi__malloc(target * s->img_x * s->img_y);
+   // sanity-check size
+   if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
+      return stbi__errpuc("too large", "Corrupt BMP");
+
+   out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
    if (!out) return stbi__errpuc("outofmem", "Out of memory");
    if (info.bpp < 16) {
       int z=0;
@@ -5146,7 +5234,10 @@ static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int
    *y = tga_height;
    if (comp) *comp = tga_comp;
 
-   tga_data = (unsigned char*)stbi__malloc( (size_t)tga_width * tga_height * tga_comp );
+   if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
+      return stbi__errpuc("too large", "Corrupt TGA");
+
+   tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
    if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
 
    // skip to the data's starting position (offset usually = 0)
@@ -5165,7 +5256,7 @@ static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int
          //   any data to skip? (offset usually = 0)
          stbi__skip(s, tga_palette_start );
          //   load the palette
-         tga_palette = (unsigned char*)stbi__malloc( tga_palette_len * tga_comp );
+         tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
          if (!tga_palette) {
             STBI_FREE(tga_data);
             return stbi__errpuc("outofmem", "Out of memory");
@@ -5365,8 +5456,12 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int
    if (compression > 1)
       return stbi__errpuc("bad compression", "PSD has an unknown compression format");
 
+   // Check size
+   if (!stbi__mad3sizes_valid(4, w, h, 0))
+      return stbi__errpuc("too large", "Corrupt PSD");
+
    // Create the destination image.
-   out = (stbi_uc *) stbi__malloc(4 * w*h);
+   out = (stbi_uc *) stbi__malloc_mad3(4, w, h, 0);
    if (!out) return stbi__errpuc("outofmem", "Out of memory");
    pixelCount = w*h;
 
@@ -5668,14 +5763,14 @@ static stbi_uc *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int re
    x = stbi__get16be(s);
    y = stbi__get16be(s);
    if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (pic header)");
-   if ((1 << 28) / x < y) return stbi__errpuc("too large", "Image too large to decode");
+   if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
 
    stbi__get32be(s); //skip `ratio'
    stbi__get16be(s); //skip `fields'
    stbi__get16be(s); //skip `pad'
 
    // intermediate buffer is RGBA
-   result = (stbi_uc *) stbi__malloc(x*y*4);
+   result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
    memset(result, 0xff, x*y*4);
 
    if (!stbi__pic_load_core(s,x,y,comp, result)) {
@@ -5934,8 +6029,11 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i
    if (g->out == 0 && !stbi__gif_header(s, g, comp,0))
       return 0; // stbi__g_failure_reason set by stbi__gif_header
 
+   if (!stbi__mad3sizes_valid(g->w, g->h, 4, 0))
+      return stbi__errpuc("too large", "GIF too large");
+
    prev_out = g->out;
-   g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h);
+   g->out = (stbi_uc *) stbi__malloc_mad3(4, g->w, g->h, 0);
    if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory");
 
    switch ((g->eflags & 0x1C) >> 2) {
@@ -6182,8 +6280,13 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re
    if (comp) *comp = 3;
    if (req_comp == 0) req_comp = 3;
 
+   if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
+      return stbi__errpf("too large", "HDR image is too large");
+
    // Read data
-   hdr_data = (float *) stbi__malloc(height * width * req_comp * sizeof(float));
+   hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
+   if (!hdr_data)
+      return stbi__errpf("outofmem", "Out of memory");
 
    // Load image data
    // image data is stored as some number of sca
@@ -6431,7 +6534,10 @@ static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int
    *y = s->img_y;
    *comp = s->img_n;
 
-   out = (stbi_uc *) stbi__malloc(s->img_n * s->img_x * s->img_y);
+   if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
+      return stbi__errpuc("too large", "PNM too large");
+
+   out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);
    if (!out) return stbi__errpuc("outofmem", "Out of memory");
    stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
 

From 62f372754f8dfb1bc5ae9e25fdc3f0c75fb08001 Mon Sep 17 00:00:00 2001
From: Fabian Giesen <rygorous@gmail.com>
Date: Fri, 12 Aug 2016 17:31:53 -0700
Subject: [PATCH 03/23] stb_image: Fix HDR/PSD RLE decoders.

Runs need to be bounds checked.

Fixes issues #315, #317.
---
 stb_image.h | 89 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 56 insertions(+), 33 deletions(-)

diff --git a/stb_image.h b/stb_image.h
index 2a6c45a..3ae6c48 100644
--- a/stb_image.h
+++ b/stb_image.h
@@ -5392,11 +5392,49 @@ static int stbi__psd_test(stbi__context *s)
    return r;
 }
 
+static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
+{
+   int count, nleft, len;
+
+   count = 0;
+   while ((nleft = pixelCount - count) > 0) {
+      len = stbi__get8(s);
+      if (len == 128) {
+         // No-op.
+      } else if (len < 128) {
+         // Copy next len+1 bytes literally.
+         len++;
+         if (len > nleft) return 0; // corrupt data
+         count += len;
+         while (len) {
+            *p = stbi__get8(s);
+            p += 4;
+            len--;
+         }
+      } else if (len > 128) {
+         stbi_uc   val;
+         // Next -len+1 bytes in the dest are replicated from next source byte.
+         // (Interpret len as a negative 8-bit int.)
+         len = 257 - len;
+         if (len > nleft) return 0; // corrupt data
+         val = stbi__get8(s);
+         count += len;
+         while (len) {
+            *p = val;
+            p += 4;
+            len--;
+         }
+      }
+   }
+
+   return 1;
+}
+
 static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
 {
-   int   pixelCount;
+   int pixelCount;
    int channelCount, compression;
-   int channel, i, count, len;
+   int channel, i;
    int bitdepth;
    int w,h;
    stbi_uc *out;
@@ -5493,34 +5531,9 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int
                *p = (channel == 3 ? 255 : 0);
          } else {
             // Read the RLE data.
-            count = 0;
-            while (count < pixelCount) {
-               len = stbi__get8(s);
-               if (len == 128) {
-                  // No-op.
-               } else if (len < 128) {
-                  // Copy next len+1 bytes literally.
-                  len++;
-                  count += len;
-                  while (len) {
-                     *p = stbi__get8(s);
-                     p += 4;
-                     len--;
-                  }
-               } else if (len > 128) {
-                  stbi_uc   val;
-                  // Next -len+1 bytes in the dest are replicated from next source byte.
-                  // (Interpret len as a negative 8-bit int.)
-                  len ^= 0x0FF;
-                  len += 2;
-                  val = stbi__get8(s);
-                  count += len;
-                  while (len) {
-                     *p = val;
-                     p += 4;
-                     len--;
-                  }
-               }
+            if (!stbi__psd_decode_rle(s, p, pixelCount)) {
+               STBI_FREE(out);
+               return stbi__errpuc("corrupt", "bad RLE data");
             }
          }
       }
@@ -6325,20 +6338,29 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re
          len <<= 8;
          len |= stbi__get8(s);
          if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
-         if (scanline == NULL) scanline = (stbi_uc *) stbi__malloc(width * 4);
+         if (scanline == NULL) {
+            scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
+            if (!scanline) {
+               STBI_FREE(hdr_data);
+               return stbi__errpf("outofmem", "Out of memory");
+            }
+         }
 
          for (k = 0; k < 4; ++k) {
+            int nleft;
             i = 0;
-            while (i < width) {
+            while ((nleft = width - i) > 0) {
                count = stbi__get8(s);
                if (count > 128) {
                   // Run
                   value = stbi__get8(s);
                   count -= 128;
+                  if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
                   for (z = 0; z < count; ++z)
                      scanline[i++ * 4 + k] = value;
                } else {
                   // Dump
+                  if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
                   for (z = 0; z < count; ++z)
                      scanline[i++ * 4 + k] = stbi__get8(s);
                }
@@ -6347,7 +6369,8 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re
          for (i=0; i < width; ++i)
             stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
       }
-      STBI_FREE(scanline);
+      if (scanline)
+         STBI_FREE(scanline);
    }
 
    return hdr_data;

From 6b66033e18c22de830b9ea599bf448843fd104cf Mon Sep 17 00:00:00 2001
From: Fabian Giesen <rygorous@gmail.com>
Date: Fri, 12 Aug 2016 17:46:43 -0700
Subject: [PATCH 04/23] stb_image: Fix memory leak and missing out-of-mem
 check.

stbi__process_frame_header had two bugs when dealing with progressive
JPEGs:
1. when malloc failed allocating raw_data, previous components'
   raw_coeff didn't get freed
2. no out-of-memory check in raw_coeff allocation

Fix both and share a bit more cleanup code in general.
---
 stb_image.h | 59 +++++++++++++++++++++++++++--------------------------
 1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/stb_image.h b/stb_image.h
index 3ae6c48..82e7081 100644
--- a/stb_image.h
+++ b/stb_image.h
@@ -2777,6 +2777,28 @@ static int stbi__process_scan_header(stbi__jpeg *z)
    return 1;
 }
 
+static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
+{
+   int i;
+   for (i=0; i < ncomp; ++i) {
+      if (z->img_comp[i].raw_data) {
+         STBI_FREE(z->img_comp[i].raw_data);
+         z->img_comp[i].raw_data = NULL;
+         z->img_comp[i].data = NULL;
+      }
+      if (z->img_comp[i].raw_coeff) {
+         STBI_FREE(z->img_comp[i].raw_coeff);
+         z->img_comp[i].raw_coeff = 0;
+         z->img_comp[i].coeff = 0;
+      }
+      if (z->img_comp[i].linebuf) {
+         STBI_FREE(z->img_comp[i].linebuf);
+         z->img_comp[i].linebuf = NULL;
+      }
+   }
+   return why;
+}
+
 static int stbi__process_frame_header(stbi__jpeg *z, int scan)
 {
    stbi__context *s = z->s;
@@ -2843,27 +2865,22 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
       // so these muls can't overflow with 32-bit ints (which we require)
       z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
       z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
+      z->img_comp[i].coeff = 0;
+      z->img_comp[i].raw_coeff = 0;
+      z->img_comp[i].linebuf = NULL;
       z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
-
-      if (z->img_comp[i].raw_data == NULL) {
-         for(--i; i >= 0; --i) {
-            STBI_FREE(z->img_comp[i].raw_data);
-            z->img_comp[i].raw_data = NULL;
-         }
-         return stbi__err("outofmem", "Out of memory");
-      }
+      if (z->img_comp[i].raw_data == NULL)
+         return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
       // align blocks for idct using mmx/sse
       z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
-      z->img_comp[i].linebuf = NULL;
       if (z->progressive) {
          // w2, h2 are multiples of 8 (see above)
          z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
          z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
          z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
+         if (z->img_comp[i].raw_coeff == NULL)
+            return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
          z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
-      } else {
-         z->img_comp[i].coeff = 0;
-         z->img_comp[i].raw_coeff = 0;
       }
    }
 
@@ -3369,23 +3386,7 @@ static void stbi__setup_jpeg(stbi__jpeg *j)
 // clean up the temporary component buffers
 static void stbi__cleanup_jpeg(stbi__jpeg *j)
 {
-   int i;
-   for (i=0; i < j->s->img_n; ++i) {
-      if (j->img_comp[i].raw_data) {
-         STBI_FREE(j->img_comp[i].raw_data);
-         j->img_comp[i].raw_data = NULL;
-         j->img_comp[i].data = NULL;
-      }
-      if (j->img_comp[i].raw_coeff) {
-         STBI_FREE(j->img_comp[i].raw_coeff);
-         j->img_comp[i].raw_coeff = 0;
-         j->img_comp[i].coeff = 0;
-      }
-      if (j->img_comp[i].linebuf) {
-         STBI_FREE(j->img_comp[i].linebuf);
-         j->img_comp[i].linebuf = NULL;
-      }
-   }
+   stbi__free_jpeg_components(j, j->s->img_n, 0);
 }
 
 typedef struct

From a2defc3d7a273416b306ce2cf2bee49431957f31 Mon Sep 17 00:00:00 2001
From: jon <jon@jon-Lemur>
Date: Mon, 28 Nov 2016 16:05:39 -0600
Subject: [PATCH 05/23] added support for RGBE header HDR files

---
 stb_image.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/stb_image.h b/stb_image.h
index a3c1129..f9ed5a6 100644
--- a/stb_image.h
+++ b/stb_image.h
@@ -6147,10 +6147,11 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re
    int len;
    unsigned char count, value;
    int i, j, k, c1,c2, z;
-
+   const char *headerToken;
 
    // Check identifier
-   if (strcmp(stbi__hdr_gettoken(s,buffer), "#?RADIANCE") != 0)
+   headerToken = stbi__hdr_gettoken(s,buffer);
+   if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
       return stbi__errpf("not HDR", "Corrupt HDR image");
 
    // Parse header

From 2a170daee5dd339bc8f3f81024a97710935c360f Mon Sep 17 00:00:00 2001
From: jon <jon@jon-Lemur>
Date: Mon, 28 Nov 2016 16:24:11 -0600
Subject: [PATCH 06/23] warning fixes, more RGBE fix

---
 stb_image.h | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/stb_image.h b/stb_image.h
index f9ed5a6..e095fb8 100644
--- a/stb_image.h
+++ b/stb_image.h
@@ -4500,7 +4500,7 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
                if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
                has_trans = 1;
                if (z->depth == 16) {
-                  for (k = 0; k < s->img_n; ++k) tc16[k] = stbi__get16be(s); // copy the values as-is
+                  for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
                } else {
                   for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
                }
@@ -5079,16 +5079,16 @@ errorEnd:
 // read 16bit value and convert to 24bit RGB
 void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
 {
-   stbi__uint16 px = stbi__get16le(s);
+   stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
    stbi__uint16 fiveBitMask = 31;
    // we have 3 channels with 5bits each
    int r = (px >> 10) & fiveBitMask;
    int g = (px >> 5) & fiveBitMask;
    int b = px & fiveBitMask;
    // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
-   out[0] = (r * 255)/31;
-   out[1] = (g * 255)/31;
-   out[2] = (b * 255)/31;
+   out[0] = (stbi_uc)((r * 255)/31);
+   out[1] = (stbi_uc)((g * 255)/31);
+   out[2] = (stbi_uc)((b * 255)/31);
 
    // some people claim that the most significant bit might be used for alpha
    // (possibly if an alpha-bit is set in the "image descriptor byte")
@@ -6069,20 +6069,24 @@ static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
 // Radiance RGBE HDR loader
 // originally by Nicolas Schulz
 #ifndef STBI_NO_HDR
-static int stbi__hdr_test_core(stbi__context *s)
+static int stbi__hdr_test_core(stbi__context *s, const char *signature)
 {
-   const char *signature = "#?RADIANCE\n";
    int i;
    for (i=0; signature[i]; ++i)
       if (stbi__get8(s) != signature[i])
-         return 0;
+          return 0;
+   stbi__rewind(s);
    return 1;
 }
 
 static int stbi__hdr_test(stbi__context* s)
 {
-   int r = stbi__hdr_test_core(s);
+   int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
    stbi__rewind(s);
+   if(!r) {
+       r = stbi__hdr_test_core(s, "#?RGBE\n");
+       stbi__rewind(s);
+   }
    return r;
 }
 

From 239a6718e185d3ecbf3da6329db0c0d94ee6d7ec Mon Sep 17 00:00:00 2001
From: Sean Barrett <sean@nothings.org>
Date: Tue, 29 Nov 2016 03:03:07 -0800
Subject: [PATCH 07/23] rename stbi_load parameters to reduce confusion

---
 stb_image.h | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/stb_image.h b/stb_image.h
index a3c1129..38a9f3d 100644
--- a/stb_image.h
+++ b/stb_image.h
@@ -238,10 +238,10 @@ publish, and distribute this file as you see fit.
 //    stbi_image_free(data)
 //
 // Standard parameters:
-//    int *x       -- outputs image width in pixels
-//    int *y       -- outputs image height in pixels
-//    int *comp    -- outputs # of image components in image file
-//    int req_comp -- if non-zero, # of image components requested in result
+//    int *x                 -- outputs image width in pixels
+//    int *y                 -- outputs image height in pixels
+//    int *channels_in_file  -- outputs # of image components in image file
+//    int desired_channels   -- if non-zero, # of image components requested in result
 //
 // The return value from an image loader is an 'unsigned char *' which points
 // to the pixel data, or NULL on an allocation failure or if the image is
@@ -406,6 +406,7 @@ enum
 };
 
 typedef unsigned char stbi_uc;
+typedef unsigned short stbi_us;
 
 #ifdef __cplusplus
 extern "C" {
@@ -433,22 +434,24 @@ typedef struct
    int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
 } stbi_io_callbacks;
 
-STBIDEF stbi_uc *stbi_load               (char              const *filename,           int *x, int *y, int *comp, int req_comp);
-STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *comp, int req_comp);
-STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *comp, int req_comp);
+STBIDEF stbi_uc *stbi_load               (char              const *filename,           int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
 
 #ifndef STBI_NO_STDIO
-STBIDEF stbi_uc *stbi_load_from_file  (FILE *f,                  int *x, int *y, int *comp, int req_comp);
+STBIDEF stbi_uc *stbi_load_from_file  (FILE *f,                  int *x, int *y, int *channels_in_file, int desired_channels);
 // for stbi_load_from_file, file pointer is left pointing immediately after image
 #endif
 
+//STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
+
 #ifndef STBI_NO_LINEAR
-   STBIDEF float *stbi_loadf                 (char const *filename,           int *x, int *y, int *comp, int req_comp);
-   STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
-   STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp);
+   STBIDEF float *stbi_loadf                 (char const *filename,           int *x, int *y, int *channels_in_file, int desired_channels);
+   STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
+   STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y,  int *channels_in_file, int desired_channels);
 
    #ifndef STBI_NO_STDIO
-   STBIDEF float *stbi_loadf_from_file  (FILE *f,                int *x, int *y, int *comp, int req_comp);
+   STBIDEF float *stbi_loadf_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
    #endif
 #endif
 

From e0700d8e2c1f2a8c8666f9776063e5f956b38230 Mon Sep 17 00:00:00 2001
From: Sean Barrett <sean@nothings.org>
Date: Tue, 29 Nov 2016 04:13:17 -0800
Subject: [PATCH 08/23] 16-bit png changes

---
 stb_image.h | 415 ++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 291 insertions(+), 124 deletions(-)

diff --git a/stb_image.h b/stb_image.h
index 38a9f3d..6c67db3 100644
--- a/stb_image.h
+++ b/stb_image.h
@@ -1,4 +1,4 @@
-/* stb_image - v2.12 - public domain image loader - http://nothings.org/stb_image.h
+/* stb_image - v2.13 - public domain image loader - http://nothings.org/stb_image.h
                                      no warranty implied; use at your own risk
 
    Do this:
@@ -146,6 +146,7 @@
 
 
    Latest revision history:
+      2.13  (2016-11-29) experimental 16-bit API, only for PNG so far
       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
       2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
                          RGB-format JPEG; remove white matting in PSD;
@@ -157,21 +158,6 @@
       2.07  (2015-09-13) partial animated GIF support
                          limited 16-bit PSD support
                          minor bugs, code cleanup, and compiler warnings
-      2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
-      2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
-      2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
-      2.03  (2015-04-12) additional corruption checking
-                         stbi_set_flip_vertically_on_load
-                         fix NEON support; fix mingw support
-      2.02  (2015-01-19) fix incorrect assert, fix warning
-      2.01  (2015-01-17) fix various warnings
-      2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
-      2.00  (2014-12-25) optimize JPEG, including x86 SSE2 & ARM NEON SIMD
-                         progressive JPEG
-                         PGM/PPM support
-                         STBI_MALLOC,STBI_REALLOC,STBI_FREE
-                         STBI_NO_*, STBI_ONLY_*
-                         GIF bugfix
 
    See end of file for full revision history.
 
@@ -434,17 +420,35 @@ typedef struct
    int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
 } stbi_io_callbacks;
 
+////////////////////////////////////
+//
+// 8-bits-per-channel interface
+//
+
 STBIDEF stbi_uc *stbi_load               (char              const *filename,           int *x, int *y, int *channels_in_file, int desired_channels);
 STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *channels_in_file, int desired_channels);
 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
 
 #ifndef STBI_NO_STDIO
-STBIDEF stbi_uc *stbi_load_from_file  (FILE *f,                  int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_uc *stbi_load_from_file   (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
 // for stbi_load_from_file, file pointer is left pointing immediately after image
 #endif
 
-//STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
+////////////////////////////////////
+//
+// 16-bits-per-channel interface
+//
 
+STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
+#ifndef STBI_NO_STDIO
+STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
+#endif
+// @TODO the other variants
+
+////////////////////////////////////
+//
+// float-per-channel interface
+//
 #ifndef STBI_NO_LINEAR
    STBIDEF float *stbi_loadf                 (char const *filename,           int *x, int *y, int *channels_in_file, int desired_channels);
    STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
@@ -830,57 +834,69 @@ static void stbi__rewind(stbi__context *s)
    s->img_buffer_end = s->img_buffer_original_end;
 }
 
+enum
+{
+   STBI_ORDER_RGB,
+   STBI_ORDER_BGR
+};
+
+typedef struct
+{
+   int bits_per_channel;
+   int channel_order;
+} stbi__result_info;
+
 #ifndef STBI_NO_JPEG
 static int      stbi__jpeg_test(stbi__context *s);
-static stbi_uc *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_PNG
 static int      stbi__png_test(stbi__context *s);
-static stbi_uc *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_BMP
 static int      stbi__bmp_test(stbi__context *s);
-static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_TGA
 static int      stbi__tga_test(stbi__context *s);
-static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_PSD
 static int      stbi__psd_test(stbi__context *s);
-static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_HDR
 static int      stbi__hdr_test(stbi__context *s);
-static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_PIC
 static int      stbi__pic_test(stbi__context *s);
-static stbi_uc *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_GIF
 static int      stbi__gif_test(stbi__context *s);
-static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_PNM
 static int      stbi__pnm_test(stbi__context *s);
-static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
@@ -938,33 +954,37 @@ STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
     stbi__vertically_flip_on_load = flag_true_if_should_flip;
 }
 
-static unsigned char *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
+   memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
+   ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
+   ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
+
    #ifndef STBI_NO_JPEG
-   if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp);
+   if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
    #endif
    #ifndef STBI_NO_PNG
-   if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp);
+   if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri);
    #endif
    #ifndef STBI_NO_BMP
-   if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp);
+   if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp, ri);
    #endif
    #ifndef STBI_NO_GIF
-   if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp);
+   if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp, ri);
    #endif
    #ifndef STBI_NO_PSD
-   if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp);
+   if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri);
    #endif
    #ifndef STBI_NO_PIC
-   if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp);
+   if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
    #endif
    #ifndef STBI_NO_PNM
-   if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp);
+   if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri);
    #endif
 
    #ifndef STBI_NO_HDR
    if (stbi__hdr_test(s)) {
-      float *hdr = stbi__hdr_load(s, x,y,comp,req_comp);
+      float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
       return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
    }
    #endif
@@ -972,35 +992,118 @@ static unsigned char *stbi__load_main(stbi__context *s, int *x, int *y, int *com
    #ifndef STBI_NO_TGA
    // test tga last because it's a crappy test!
    if (stbi__tga_test(s))
-      return stbi__tga_load(s,x,y,comp,req_comp);
+      return stbi__tga_load(s,x,y,comp,req_comp, ri);
    #endif
 
    return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
 }
 
-static unsigned char *stbi__load_flip(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
 {
-   unsigned char *result = stbi__load_main(s, x, y, comp, req_comp);
+   int i;
+   int img_len = w * h * channels;
+   stbi_uc *reduced;
 
-   if (stbi__vertically_flip_on_load && result != NULL) {
+   reduced = (stbi_uc *) stbi__malloc(img_len);
+   if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
+
+   for (i = 0; i < img_len; ++i)
+      reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
+
+   STBI_FREE(orig);
+   return reduced;
+}
+
+static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
+{
+   int i;
+   int img_len = w * h * channels;
+   stbi__uint16 *reduced;
+
+   reduced = (stbi__uint16 *) stbi__malloc(img_len*2);
+   if (reduced == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
+
+   for (i = 0; i < img_len; ++i)
+      reduced[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
+
+   STBI_FREE(orig);
+   return reduced;
+}
+
+static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__result_info ri;
+   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri);
+
+   if (result == NULL)
+      return NULL;
+
+   if (ri.bits_per_channel != 8) {
+      STBI_ASSERT(ri.bits_per_channel == 16);
+      result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
+      ri.bits_per_channel = 8;
+   }
+
+   // @TODO: move stbi__convert_format to here
+   // @TODO: special case RGB-to-Y for 8-bit-to-16-bit case in postprocess_16bit
+
+   if (stbi__vertically_flip_on_load) {
       int w = *x, h = *y;
-      int depth = req_comp ? req_comp : *comp;
+      int channels = req_comp ? req_comp : *comp;
       int row,col,z;
-      stbi_uc temp;
+      stbi_uc *image = (stbi_uc *) result;
 
       // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
       for (row = 0; row < (h>>1); row++) {
          for (col = 0; col < w; col++) {
-            for (z = 0; z < depth; z++) {
-               temp = result[(row * w + col) * depth + z];
-               result[(row * w + col) * depth + z] = result[((h - row - 1) * w + col) * depth + z];
-               result[((h - row - 1) * w + col) * depth + z] = temp;
+            for (z = 0; z < channels; z++) {
+               stbi_uc temp = image[(row * w + col) * channels + z];
+               image[(row * w + col) * channels + z] = image[((h - row - 1) * w + col) * channels + z];
+               image[((h - row - 1) * w + col) * channels + z] = temp;
             }
          }
       }
    }
 
-   return result;
+   return (unsigned char *) result;
+}
+
+static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__result_info ri;
+   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri);
+
+   if (result == NULL)
+      return NULL;
+
+   if (ri.bits_per_channel != 16) {
+      STBI_ASSERT(ri.bits_per_channel == 8);
+      result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
+      ri.bits_per_channel = 16;
+   }
+
+   // @TODO: move stbi__convert_format16 to here
+   // @TODO: special case RGB-to-Y for 8-bit-to-16-bit case to keep more precision (look at function, discards 8 bits)
+
+   if (stbi__vertically_flip_on_load) {
+      int w = *x, h = *y;
+      int channels = req_comp ? req_comp : *comp;
+      int row,col,z;
+      stbi__uint16 *image = (stbi__uint16 *) result;
+
+      // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
+      for (row = 0; row < (h>>1); row++) {
+         for (col = 0; col < w; col++) {
+            for (z = 0; z < channels; z++) {
+               stbi__uint16 temp = image[(row * w + col) * channels + z];
+               image[(row * w + col) * channels + z] = image[((h - row - 1) * w + col) * channels + z];
+               image[((h - row - 1) * w + col) * channels + z] = temp;
+            }
+         }
+      }
+   }
+
+   return (stbi__uint16 *) result;
 }
 
 #ifndef STBI_NO_HDR
@@ -1056,27 +1159,52 @@ STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req
    unsigned char *result;
    stbi__context s;
    stbi__start_file(&s,f);
-   result = stbi__load_flip(&s,x,y,comp,req_comp);
+   result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
    if (result) {
       // need to 'unget' all the characters in the IO buffer
       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
    }
    return result;
 }
+
+STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__uint16 *result;
+   stbi__context s;
+   stbi__start_file(&s,f);
+   result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
+   if (result) {
+      // need to 'unget' all the characters in the IO buffer
+      fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
+   }
+   return result;
+}
+
+STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
+{
+   FILE *f = stbi__fopen(filename, "rb");
+   stbi__uint16 *result;
+   if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
+   result = stbi_load_from_file_16(f,x,y,comp,req_comp);
+   fclose(f);
+   return result;
+}
+
+
 #endif //!STBI_NO_STDIO
 
 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
 {
    stbi__context s;
    stbi__start_mem(&s,buffer,len);
-   return stbi__load_flip(&s,x,y,comp,req_comp);
+   return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
 }
 
 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
 {
    stbi__context s;
    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
-   return stbi__load_flip(&s,x,y,comp,req_comp);
+   return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
 }
 
 #ifndef STBI_NO_LINEAR
@@ -1085,13 +1213,14 @@ static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int
    unsigned char *data;
    #ifndef STBI_NO_HDR
    if (stbi__hdr_test(s)) {
-      float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp);
+      stbi__result_info ri;
+      float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
       if (hdr_data)
          stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
       return hdr_data;
    }
    #endif
-   data = stbi__load_flip(s, x, y, comp, req_comp);
+   data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
    if (data)
       return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
    return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
@@ -1359,26 +1488,75 @@ static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int r
       unsigned char *src  = data + j * x * img_n   ;
       unsigned char *dest = good + j * x * req_comp;
 
-      #define COMBO(a,b)  ((a)*8+(b))
-      #define CASE(a,b)   case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
+      #define STBI__COMBO(a,b)  ((a)*8+(b))
+      #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
       // convert source image with img_n components to one with req_comp components;
       // avoid switch per pixel, so use switch per scanline and massive macros
-      switch (COMBO(img_n, req_comp)) {
-         CASE(1,2) dest[0]=src[0], dest[1]=255; break;
-         CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break;
-         CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break;
-         CASE(2,1) dest[0]=src[0]; break;
-         CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break;
-         CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break;
-         CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break;
-         CASE(3,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break;
-         CASE(3,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; break;
-         CASE(4,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break;
-         CASE(4,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break;
-         CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break;
+      switch (STBI__COMBO(img_n, req_comp)) {
+         STBI__CASE(1,2) dest[0]=src[0], dest[1]=255; break;
+         STBI__CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break;
+         STBI__CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break;
+         STBI__CASE(2,1) dest[0]=src[0]; break;
+         STBI__CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break;
+         STBI__CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break;
+         STBI__CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break;
+         STBI__CASE(3,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break;
+         STBI__CASE(3,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; break;
+         STBI__CASE(4,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break;
+         STBI__CASE(4,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break;
+         STBI__CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break;
          default: STBI_ASSERT(0);
       }
-      #undef CASE
+      #undef STBI__CASE
+   }
+
+   STBI_FREE(data);
+   return good;
+}
+
+static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
+{
+   return (stbi__uint16) (((r*77) + (g*150) +  (29*b)) >> 8);
+}
+
+static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
+{
+   int i,j;
+   stbi__uint16 *good;
+
+   if (req_comp == img_n) return data;
+   STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
+
+   good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
+   if (good == NULL) {
+      STBI_FREE(data);
+      return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
+   }
+
+   for (j=0; j < (int) y; ++j) {
+      stbi__uint16 *src  = data + j * x * img_n   ;
+      stbi__uint16 *dest = good + j * x * req_comp;
+
+      #define STBI__COMBO(a,b)  ((a)*8+(b))
+      #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
+      // convert source image with img_n components to one with req_comp components;
+      // avoid switch per pixel, so use switch per scanline and massive macros
+      switch (STBI__COMBO(img_n, req_comp)) {
+         STBI__CASE(1,2) dest[0]=src[0], dest[1]=0xffff; break;
+         STBI__CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break;
+         STBI__CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff; break;
+         STBI__CASE(2,1) dest[0]=src[0]; break;
+         STBI__CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break;
+         STBI__CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break;
+         STBI__CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff; break;
+         STBI__CASE(3,1) dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); break;
+         STBI__CASE(3,2) dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; break;
+         STBI__CASE(4,1) dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); break;
+         STBI__CASE(4,2) dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; break;
+         STBI__CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break;
+         default: STBI_ASSERT(0);
+      }
+      #undef STBI__CASE
    }
 
    STBI_FREE(data);
@@ -3427,7 +3605,7 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp
    }
 }
 
-static unsigned char *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    unsigned char* result;
    stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
@@ -4084,37 +4262,37 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r
       // this is a little gross, so that we don't switch per-pixel or per-component
       if (depth < 8 || img_n == out_n) {
          int nk = (width - 1)*filter_bytes;
-         #define CASE(f) \
+         #define STBI__CASE(f) \
              case f:     \
                 for (k=0; k < nk; ++k)
          switch (filter) {
             // "none" filter turns into a memcpy here; make that explicit.
             case STBI__F_none:         memcpy(cur, raw, nk); break;
-            CASE(STBI__F_sub)          cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); break;
-            CASE(STBI__F_up)           cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
-            CASE(STBI__F_avg)          cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); break;
-            CASE(STBI__F_paeth)        cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); break;
-            CASE(STBI__F_avg_first)    cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); break;
-            CASE(STBI__F_paeth_first)  cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); break;
+            STBI__CASE(STBI__F_sub)          cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); break;
+            STBI__CASE(STBI__F_up)           cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
+            STBI__CASE(STBI__F_avg)          cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); break;
+            STBI__CASE(STBI__F_paeth)        cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); break;
+            STBI__CASE(STBI__F_avg_first)    cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); break;
+            STBI__CASE(STBI__F_paeth_first)  cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); break;
          }
-         #undef CASE
+         #undef STBI__CASE
          raw += nk;
       } else {
          STBI_ASSERT(img_n+1 == out_n);
-         #define CASE(f) \
+         #define STBI__CASE(f) \
              case f:     \
                 for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
                    for (k=0; k < filter_bytes; ++k)
          switch (filter) {
-            CASE(STBI__F_none)         cur[k] = raw[k]; break;
-            CASE(STBI__F_sub)          cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); break;
-            CASE(STBI__F_up)           cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
-            CASE(STBI__F_avg)          cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); break;
-            CASE(STBI__F_paeth)        cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); break;
-            CASE(STBI__F_avg_first)    cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); break;
-            CASE(STBI__F_paeth_first)  cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); break;
+            STBI__CASE(STBI__F_none)         cur[k] = raw[k]; break;
+            STBI__CASE(STBI__F_sub)          cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); break;
+            STBI__CASE(STBI__F_up)           cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
+            STBI__CASE(STBI__F_avg)          cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); break;
+            STBI__CASE(STBI__F_paeth)        cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); break;
+            STBI__CASE(STBI__F_avg_first)    cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); break;
+            STBI__CASE(STBI__F_paeth_first)  cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); break;
          }
-         #undef CASE
+         #undef STBI__CASE
 
          // the loop above sets the high byte of the pixels' alpha, but for
          // 16 bit png files we also need the low byte set. we'll do that here.
@@ -4344,26 +4522,6 @@ static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int
    return 1;
 }
 
-static int stbi__reduce_png(stbi__png *p)
-{
-   int i;
-   int img_len = p->s->img_x * p->s->img_y * p->s->img_out_n;
-   stbi_uc *reduced;
-   stbi__uint16 *orig = (stbi__uint16*)p->out;
-
-   if (p->depth != 16) return 1; // don't need to do anything if not 16-bit data
-
-   reduced = (stbi_uc *)stbi__malloc(img_len);
-   if (p == NULL) return stbi__err("outofmem", "Out of memory");
-
-   for (i = 0; i < img_len; ++i) reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is a decent approx of 16->8 bit scaling
-
-   p->out = reduced;
-   STBI_FREE(orig);
-
-   return 1;
-}
-
 static int stbi__unpremultiply_on_load = 0;
 static int stbi__de_iphone_flag = 0;
 
@@ -4590,20 +4748,19 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
    }
 }
 
-static unsigned char *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp)
+static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
 {
-   unsigned char *result=NULL;
+   void *result=NULL;
    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
    if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
-      if (p->depth == 16) {
-         if (!stbi__reduce_png(p)) {
-            return result;
-         }
-      }
+      ri->bits_per_channel = p->depth;
       result = p->out;
       p->out = NULL;
       if (req_comp && req_comp != p->s->img_out_n) {
-         result = stbi__convert_format(result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
+         if (ri->bits_per_channel == 8)
+            result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
+         else
+            result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
          p->s->img_out_n = req_comp;
          if (result == NULL) return result;
       }
@@ -4618,11 +4775,11 @@ static unsigned char *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req
    return result;
 }
 
-static unsigned char *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    stbi__png p;
    p.s = s;
-   return stbi__do_png(&p, x,y,comp,req_comp);
+   return stbi__do_png(&p, x,y,comp,req_comp, ri);
 }
 
 static int stbi__png_test(stbi__context *s)
@@ -4810,7 +4967,7 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
 }
 
 
-static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    stbi_uc *out;
    unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
@@ -4818,6 +4975,7 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
    int psize=0,i,j,width;
    int flip_vertically, pad, target;
    stbi__bmp_data info;
+   STBI_NOTUSED(ri);
 
    info.all_a = 255;   
    if (stbi__bmp_parse_header(s, &info) == NULL)
@@ -5099,7 +5257,7 @@ void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
    // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
 }
 
-static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    //   read in the TGA header stuff
    int tga_offset = stbi__get8(s);
@@ -5125,6 +5283,7 @@ static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int
    int RLE_count = 0;
    int RLE_repeating = 0;
    int read_next_pixel = 1;
+   STBI_NOTUSED(ri);
 
    //   do a tiny bit of precessing
    if ( tga_image_type >= 8 )
@@ -5301,7 +5460,8 @@ static int stbi__psd_test(stbi__context *s)
    return r;
 }
 
-static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+// @TODO: return 16-bit PSD data to 16-bit interface
+static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    int   pixelCount;
    int channelCount, compression;
@@ -5309,6 +5469,7 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int
    int bitdepth;
    int w,h;
    stbi_uc *out;
+   STBI_NOTUSED(ri);
 
    // Check identifier
    if (stbi__get32be(s) != 0x38425053)   // "8BPS"
@@ -5657,10 +5818,11 @@ static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *c
    return result;
 }
 
-static stbi_uc *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp)
+static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
 {
    stbi_uc *result;
    int i, x,y;
+   STBI_NOTUSED(ri);
 
    for (i=0; i<92; ++i)
       stbi__get8(s);
@@ -6042,11 +6204,12 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i
    STBI_NOTUSED(req_comp);
 }
 
-static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    stbi_uc *u = 0;
    stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
    memset(g, 0, sizeof(*g));
+   STBI_NOTUSED(ri);
 
    u = stbi__gif_load_next(s, g, comp, req_comp);
    if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
@@ -6139,7 +6302,7 @@ static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
    }
 }
 
-static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    char buffer[STBI__HDR_BUFLEN];
    char *token;
@@ -6150,7 +6313,7 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re
    int len;
    unsigned char count, value;
    int i, j, k, c1,c2, z;
-
+   STBI_NOTUSED(ri);
 
    // Check identifier
    if (strcmp(stbi__hdr_gettoken(s,buffer), "#?RADIANCE") != 0)
@@ -6422,11 +6585,14 @@ static int      stbi__pnm_test(stbi__context *s)
    return 1;
 }
 
-static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    stbi_uc *out;
+   STBI_NOTUSED(ri);
+
    if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
       return 0;
+
    *x = s->img_x;
    *y = s->img_y;
    *comp = s->img_n;
@@ -6596,6 +6762,7 @@ STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int
 
 /*
    revision history:
+      2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
       2.11  (2016-04-02) allocate large structures on the stack
                          remove white matting for transparent PSD

From 77363995175908b304525f2cc2b17180f2fdad96 Mon Sep 17 00:00:00 2001
From: Sean Barrett <sean@nothings.org>
Date: Sun, 4 Dec 2016 05:13:58 -0800
Subject: [PATCH 09/23] return 16-bit PSDs through 16-bit API (untested)

---
 stb_image.h | 100 +++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 68 insertions(+), 32 deletions(-)

diff --git a/stb_image.h b/stb_image.h
index 6c67db3..5a3febb 100644
--- a/stb_image.h
+++ b/stb_image.h
@@ -843,6 +843,7 @@ enum
 typedef struct
 {
    int bits_per_channel;
+   int num_channels;
    int channel_order;
 } stbi__result_info;
 
@@ -872,7 +873,7 @@ static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
 
 #ifndef STBI_NO_PSD
 static int      stbi__psd_test(stbi__context *s);
-static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
 static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
@@ -954,11 +955,12 @@ STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
     stbi__vertically_flip_on_load = flag_true_if_should_flip;
 }
 
-static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
 {
    memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
    ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
    ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
+   ri->num_channels = 0;
 
    #ifndef STBI_NO_JPEG
    if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
@@ -973,7 +975,7 @@ static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int re
    if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp, ri);
    #endif
    #ifndef STBI_NO_PSD
-   if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri);
+   if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
    #endif
    #ifndef STBI_NO_PIC
    if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
@@ -1033,7 +1035,7 @@ static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int chan
 static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
 {
    stbi__result_info ri;
-   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri);
+   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
 
    if (result == NULL)
       return NULL;
@@ -1045,7 +1047,6 @@ static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x,
    }
 
    // @TODO: move stbi__convert_format to here
-   // @TODO: special case RGB-to-Y for 8-bit-to-16-bit case in postprocess_16bit
 
    if (stbi__vertically_flip_on_load) {
       int w = *x, h = *y;
@@ -1071,7 +1072,7 @@ static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x,
 static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
 {
    stbi__result_info ri;
-   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri);
+   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
 
    if (result == NULL)
       return NULL;
@@ -1083,7 +1084,7 @@ static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x,
    }
 
    // @TODO: move stbi__convert_format16 to here
-   // @TODO: special case RGB-to-Y for 8-bit-to-16-bit case to keep more precision (look at function, discards 8 bits)
+   // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
 
    if (stbi__vertically_flip_on_load) {
       int w = *x, h = *y;
@@ -5461,7 +5462,7 @@ static int stbi__psd_test(stbi__context *s)
 }
 
 // @TODO: return 16-bit PSD data to 16-bit interface
-static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
 {
    int   pixelCount;
    int channelCount, compression;
@@ -5527,7 +5528,13 @@ static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req
       return stbi__errpuc("bad compression", "PSD has an unknown compression format");
 
    // Create the destination image.
-   out = (stbi_uc *) stbi__malloc(4 * w*h);
+
+   if (!compression && bitdepth == 16 && bpc == 16) {
+      out = (stbi_uc *) stbi__malloc(4 * w*h * 2);
+      ri->bits_per_channel = 16;
+   } else
+      out = (stbi_uc *) stbi__malloc(4 * w*h);
+
    if (!out) return stbi__errpuc("outofmem", "Out of memory");
    pixelCount = w*h;
 
@@ -5597,44 +5604,73 @@ static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req
 
       // Read the data by channel.
       for (channel = 0; channel < 4; channel++) {
-         stbi_uc *p;
-
-         p = out + channel;
          if (channel >= channelCount) {
             // Fill this channel with default data.
-            stbi_uc val = channel == 3 ? 255 : 0;
-            for (i = 0; i < pixelCount; i++, p += 4)
-               *p = val;
-         } else {
-            // Read the data.
-            if (bitdepth == 16) {
-               for (i = 0; i < pixelCount; i++, p += 4)
-                  *p = (stbi_uc) (stbi__get16be(s) >> 8);
+            if (bitdepth == 16 && bpc == 16) {
+               stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
+               stbi_uc val = channel == 3 ? 65535 : 0;
+               for (i = 0; i < pixelCount; i++, q += 4)
+                  *q = val;
             } else {
+               stbi_uc *p = out+channel;
+               stbi_uc val = channel == 3 ? 255 : 0;
                for (i = 0; i < pixelCount; i++, p += 4)
-                  *p = stbi__get8(s);
+                  *p = val;
+            }
+         } else {
+            if (ri->bits_per_channel == 16) {    // output bpc
+               stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
+               for (i = 0; i < pixelCount; i++, q += 4)
+                  *q = (stbi__uint16) stbi__get16be(s);
+            } else {
+               stbi_uc *p = out+channel;
+               if (bitdepth == 16) {  // input bpc
+                  for (i = 0; i < pixelCount; i++, p += 4)
+                     *p = (stbi_uc) (stbi__get16be(s) >> 8);
+               } else {
+                  for (i = 0; i < pixelCount; i++, p += 4)
+                     *p = stbi__get8(s);
+               }
             }
          }
       }
    }
 
+   // remove weird white matte from PSD
    if (channelCount >= 4) {
-      for (i=0; i < w*h; ++i) {
-         unsigned char *pixel = out + 4*i;
-         if (pixel[3] != 0 && pixel[3] != 255) {
-            // remove weird white matte from PSD
-            float a = pixel[3] / 255.0f;
-            float ra = 1.0f / a;
-            float inv_a = 255.0f * (1 - ra);
-            pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
-            pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
-            pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
+      if (ri->bits_per_channel == 16) {
+         for (i=0; i < w*h; ++i) {
+            stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
+            if (pixel[3] != 0 && pixel[3] != 65535) {
+               float a = pixel[3] / 65535.0f;
+               float ra = 1.0f / a;
+               float inv_a = 65535.0f * (1 - ra);
+               pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
+               pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
+               pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
+            }
+         }
+      } else {
+         for (i=0; i < w*h; ++i) {
+            unsigned char *pixel = out + 4*i;
+            if (pixel[3] != 0 && pixel[3] != 255) {
+               float a = pixel[3] / 255.0f;
+               float ra = 1.0f / a;
+               float inv_a = 255.0f * (1 - ra);
+               pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
+               pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
+               pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
+            }
          }
       }
    }
 
+   // convert to desired output format
    if (req_comp && req_comp != 4) {
-      out = stbi__convert_format(out, 4, req_comp, w, h);
+      if (ri->bits_per_channel == 16)
+         out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
+      else
+         out = stbi__convert_format(out, 4, req_comp, w, h);
       if (out == NULL) return out; // stbi__convert_format frees input on failure
    }
 

From 7759a2a93d192951b00804be1466b5e7296ca7c0 Mon Sep 17 00:00:00 2001
From: Sean Barrett <sean@nothings.org>
Date: Sun, 4 Dec 2016 05:25:24 -0800
Subject: [PATCH 10/23] fix "misleading indentation" gcc warning

---
 stb_image.h | 74 ++++++++++++++++++++++++++---------------------------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/stb_image.h b/stb_image.h
index a2d8219..1e4c23c 100644
--- a/stb_image.h
+++ b/stb_image.h
@@ -1566,18 +1566,18 @@ static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int r
       // convert source image with img_n components to one with req_comp components;
       // avoid switch per pixel, so use switch per scanline and massive macros
       switch (STBI__COMBO(img_n, req_comp)) {
-         STBI__CASE(1,2) dest[0]=src[0], dest[1]=255; break;
-         STBI__CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break;
-         STBI__CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break;
-         STBI__CASE(2,1) dest[0]=src[0]; break;
-         STBI__CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break;
-         STBI__CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break;
-         STBI__CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break;
-         STBI__CASE(3,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break;
-         STBI__CASE(3,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; break;
-         STBI__CASE(4,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break;
-         STBI__CASE(4,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break;
-         STBI__CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break;
+         STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255;                                     } break;
+         STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
+         STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255;                     } break;
+         STBI__CASE(2,1) { dest[0]=src[0];                                                  } break;
+         STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
+         STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];                  } break;
+         STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255;        } break;
+         STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255;    } break;
+         STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break;
+         STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];                    } break;
          default: STBI_ASSERT(0);
       }
       #undef STBI__CASE
@@ -1615,18 +1615,18 @@ static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int r
       // convert source image with img_n components to one with req_comp components;
       // avoid switch per pixel, so use switch per scanline and massive macros
       switch (STBI__COMBO(img_n, req_comp)) {
-         STBI__CASE(1,2) dest[0]=src[0], dest[1]=0xffff; break;
-         STBI__CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break;
-         STBI__CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff; break;
-         STBI__CASE(2,1) dest[0]=src[0]; break;
-         STBI__CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break;
-         STBI__CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break;
-         STBI__CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff; break;
-         STBI__CASE(3,1) dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); break;
-         STBI__CASE(3,2) dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; break;
-         STBI__CASE(4,1) dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); break;
-         STBI__CASE(4,2) dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; break;
-         STBI__CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break;
+         STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff;                                     } break;
+         STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
+         STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff;                     } break;
+         STBI__CASE(2,1) { dest[0]=src[0];                                                     } break;
+         STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
+         STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];                     } break;
+         STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff;        } break;
+         STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break;
+         STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break;
+         STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];                       } break;
          default: STBI_ASSERT(0);
       }
       #undef STBI__CASE
@@ -4354,12 +4354,12 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r
          switch (filter) {
             // "none" filter turns into a memcpy here; make that explicit.
             case STBI__F_none:         memcpy(cur, raw, nk); break;
-            STBI__CASE(STBI__F_sub)          cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); break;
-            STBI__CASE(STBI__F_up)           cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
-            STBI__CASE(STBI__F_avg)          cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); break;
-            STBI__CASE(STBI__F_paeth)        cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); break;
-            STBI__CASE(STBI__F_avg_first)    cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); break;
-            STBI__CASE(STBI__F_paeth_first)  cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); break;
+            STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break;
+            STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
+            STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break;
+            STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break;
+            STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break;
+            STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break;
          }
          #undef STBI__CASE
          raw += nk;
@@ -4370,13 +4370,13 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r
                 for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
                    for (k=0; k < filter_bytes; ++k)
          switch (filter) {
-            STBI__CASE(STBI__F_none)         cur[k] = raw[k]; break;
-            STBI__CASE(STBI__F_sub)          cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); break;
-            STBI__CASE(STBI__F_up)           cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
-            STBI__CASE(STBI__F_avg)          cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); break;
-            STBI__CASE(STBI__F_paeth)        cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); break;
-            STBI__CASE(STBI__F_avg_first)    cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); break;
-            STBI__CASE(STBI__F_paeth_first)  cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); break;
+            STBI__CASE(STBI__F_none)         { cur[k] = raw[k]; } break;
+            STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break;
+            STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
+            STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break;
+            STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break;
+            STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break;
+            STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break;
          }
          #undef STBI__CASE
 

From 4a1523f60ab0b1a707e147c41dcfc2a421007a8c Mon Sep 17 00:00:00 2001
From: Sean Barrett <sean@nothings.org>
Date: Sun, 4 Dec 2016 05:28:26 -0800
Subject: [PATCH 11/23] make tga load function static to avoid link errors on
 multiple instances

---
 stb_image.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stb_image.h b/stb_image.h
index 1e4c23c..6246eb0 100644
--- a/stb_image.h
+++ b/stb_image.h
@@ -5330,7 +5330,7 @@ errorEnd:
 }
 
 // read 16bit value and convert to 24bit RGB
-void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
+static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
 {
    stbi__uint16 px = stbi__get16le(s);
    stbi__uint16 fiveBitMask = 31;

From 0b2c06a7e17da43283c872c67f4679b92dcebbb6 Mon Sep 17 00:00:00 2001
From: Sean Barrett <sean@nothings.org>
Date: Sun, 4 Dec 2016 05:39:35 -0800
Subject: [PATCH 12/23] more STB_IMAGE_STATIC fixes

---
 tests/stb.dsp       | 4 ----
 tests/test_vorbis.c | 4 ++++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/stb.dsp b/tests/stb.dsp
index 7fbb0a8..5b32abb 100644
--- a/tests/stb.dsp
+++ b/tests/stb.dsp
@@ -90,10 +90,6 @@ SOURCE=.\grid_reachability.c
 # End Source File
 # Begin Source File
 
-SOURCE=..\docs\other_libs.md
-# End Source File
-# Begin Source File
-
 SOURCE=.\stb.c
 # End Source File
 # Begin Source File
diff --git a/tests/test_vorbis.c b/tests/test_vorbis.c
index d54ed23..341a102 100644
--- a/tests/test_vorbis.c
+++ b/tests/test_vorbis.c
@@ -1,3 +1,7 @@
+#define STB_IMAGE_STATIC
+#define STB_IMAGE_IMPLEMENTATION
+#include "stb_image.h"
+
 #define STB_VORBIS_HEADER_ONLY
 #include "stb_vorbis.c"
 #include "stb.h"

From b61b7a74fa050db0679d69ac584312bc0300914c Mon Sep 17 00:00:00 2001
From: Sean Barrett <sean@nothings.org>
Date: Sun, 4 Dec 2016 05:40:21 -0800
Subject: [PATCH 13/23] update version info

---
 stb_image.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stb_image.h b/stb_image.h
index 71debbf..345e406 100644
--- a/stb_image.h
+++ b/stb_image.h
@@ -146,7 +146,7 @@
 
 
    Latest revision history:
-      2.13  (2016-11-29) experimental 16-bit API, only for PNG so far
+      2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
       2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
                          RGB-format JPEG; remove white matting in PSD;

From a468fbda725c014af5ca2cef3b57ba271d1fb685 Mon Sep 17 00:00:00 2001
From: Sean Barrett <sean@nothings.org>
Date: Mon, 5 Dec 2016 02:57:06 -0800
Subject: [PATCH 14/23] readme, add rrsprintf.h

---
 README.md   |    4 +-
 rrsprintf.h | 1055 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1057 insertions(+), 2 deletions(-)
 create mode 100644 rrsprintf.h

diff --git a/README.md b/README.md
index 840f061..2b17c3c 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ single-file public domain libraries for C/C++ <a name="stb_libs"></a>
 library    | lastest version | category | LoC | description
 --------------------- | ---- | -------- | --- | --------------------------------
 **[stb_vorbis.c](stb_vorbis.c)** | 1.09 | audio | 5399 | decode ogg vorbis files from file/memory to float/16-bit signed output
-**[stb_image.h](stb_image.h)** | 2.12 | graphics | 6755 | image loading/decoding from file/memory: JPG, PNG, TGA, BMP, PSD, GIF, HDR, PIC
+**[stb_image.h](stb_image.h)** | 2.13 | graphics | 7099 | image loading/decoding from file/memory: JPG, PNG, TGA, BMP, PSD, GIF, HDR, PIC
 **[stb_truetype.h](stb_truetype.h)** | 1.12 | graphics | 3287 | parse, decode, and rasterize characters from truetype fonts
 **[stb_image_write.h](stb_image_write.h)** | 1.02 | graphics | 1048 | image writing to disk: PNG, TGA, BMP
 **[stb_image_resize.h](stb_image_resize.h)** | 0.91 | graphics | 2578 | resize images larger/smaller with good quality
@@ -28,7 +28,7 @@ library    | lastest version | category | LoC | description
 **[stb_leakcheck.h](stb_leakcheck.h)** | 0.2 | misc | 124 | quick-and-dirty malloc/free leak-checking
 
 Total libraries: 19  
-Total lines of C code: 47970
+Total lines of C code: 48314
 
 
 FAQ
diff --git a/rrsprintf.h b/rrsprintf.h
new file mode 100644
index 0000000..62962e3
--- /dev/null
+++ b/rrsprintf.h
@@ -0,0 +1,1055 @@
+#ifndef RR_SPRINTF_H_INCLUDE
+#define RR_SPRINTF_H_INCLUDE
+
+/*
+Single file sprintf replacement.
+
+Originally written by Jeff Roberts at RAD Game Tools - 2015/10/20. 
+Hereby placed in public domain.
+
+This is a full sprintf replacement that supports everything that
+the C runtime sprintfs support, including float/double, 64-bit integers,
+hex floats, field parameters (%*.*d stuff), length reads backs, etc.
+
+Why would you need this if sprintf already exists?  Well, first off,
+it's *much* faster (see below). It's also much smaller than the CRT
+versions code-space-wise. We've also added some simple improvements 
+that are super handy (commas in thousands, callbacks at buffer full,
+for example). Finally, the format strings for MSVC and GCC differ 
+for 64-bit integers (among other small things), so this lets you use 
+the same format strings in cross platform code.
+
+It uses the standard single file trick of being both the header file
+and the source itself. If you just include it normally, you just get 
+the header file function definitions. To get the code, you include
+it from a C or C++ file and define RR_SPRINTF_IMPLEMENTATION first.
+
+It only uses va_args macros from the C runtime to do it's work. It
+does cast doubles to S64s and shifts and divides U64s, which does 
+drag in CRT code on most platforms.
+
+It compiles to roughly 8K with float support, and 4K without.
+As a comparison, when using MSVC static libs, calling sprintf drags
+in 16K.
+
+API:
+====
+int rrsprintf( char * buf, char const * fmt, ... )
+int rrsnprintf( char * buf, int count, char const * fmt, ... )
+  Convert an arg list into a buffer.  rrsnprintf always returns
+  a zero-terminated string (unlike regular snprintf).
+
+int rrvsprintf( char * buf, char const * fmt, va_list va )
+int rrvsnprintf( char * buf, int count, char const * fmt, va_list va )
+  Convert a va_list arg list into a buffer.  rrvsnprintf always returns
+  a zero-terminated string (unlike regular snprintf).
+
+int rrvsprintfcb( RRSPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va )
+    typedef char * RRSPRINTFCB( char const * buf, void * user, int len );
+  Convert into a buffer, calling back every RR_SPRINTF_MIN chars.
+  Your callback can then copy the chars out, print them or whatever.
+  This function is actually the workhorse for everything else.
+  The buffer you pass in must hold at least RR_SPRINTF_MIN characters.
+    // you return the next buffer to use or 0 to stop converting
+
+void rrsetseparators( char comma, char period )
+  Set the comma and period characters to use.
+
+FLOATS/DOUBLES:
+===============
+This code uses a internal float->ascii conversion method that uses
+doubles with error correction (double-doubles, for ~105 bits of
+precision).  This conversion is round-trip perfect - that is, an atof
+of the values output here will give you the bit-exact double back.
+
+One difference is that our insignificant digits will be different than 
+with MSVC or GCC (but they don't match each other either).  We also 
+don't attempt to find the minimum length matching float (pre-MSVC15 
+doesn't either).
+
+If you don't need float or doubles at all, define RR_SPRINTF_NOFLOAT
+and you'll save 4K of code space.
+
+64-BIT INTS:
+============
+This library also supports 64-bit integers and you can use MSVC style or
+GCC style indicators (%I64d or %lld).  It supports the C99 specifiers
+for size_t and ptr_diff_t (%jd %zd) as well.
+
+EXTRAS:
+=======
+Like some GCCs, for integers and floats, you can use a ' (single quote)
+specifier and commas will be inserted on the thousands: "%'d" on 12345 
+would print 12,345.
+
+For integers and floats, you can use a "$" specifier and the number 
+will be converted to float and then divided to get kilo, mega, giga or
+tera and then printed, so "%$d" 1024 is "1.0 k", "%$.2d" 2536000 is 
+"2.42 m", etc.
+
+In addition to octal and hexadecimal conversions, you can print 
+integers in binary: "%b" for 256 would print 100.
+
+PERFORMANCE vs MSVC 2008 32-/64-bit (GCC is even slower than MSVC):
+===================================================================
+"%d" across all 32-bit ints (4.8x/4.0x faster than 32-/64-bit MSVC)
+"%24d" across all 32-bit ints (4.5x/4.2x faster)
+"%x" across all 32-bit ints (4.5x/3.8x faster)
+"%08x" across all 32-bit ints (4.3x/3.8x faster)
+"%f" across e-10 to e+10 floats (7.3x/6.0x faster)
+"%e" across e-10 to e+10 floats (8.1x/6.0x faster)
+"%g" across e-10 to e+10 floats (10.0x/7.1x faster)
+"%f" for values near e-300 (7.9x/6.5x faster)
+"%f" for values near e+300 (10.0x/9.1x faster)
+"%e" for values near e-300 (10.1x/7.0x faster)
+"%e" for values near e+300 (9.2x/6.0x faster)
+"%.320f" for values near e-300 (12.6x/11.2x faster)
+"%a" for random values (8.6x/4.3x faster)
+"%I64d" for 64-bits with 32-bit values (4.8x/3.4x faster)
+"%I64d" for 64-bits > 32-bit values (4.9x/5.5x faster)
+"%s%s%s" for 64 char strings (7.1x/7.3x faster)
+"...512 char string..." ( 35.0x/32.5x faster!)
+*/
+
+#ifdef RR_SPRINTF_STATIC
+#define RRPUBLIC_DEC static
+#define RRPUBLIC_DEF static
+#else
+#ifdef __cplusplus
+#define RRPUBLIC_DEC extern "C"
+#define RRPUBLIC_DEF extern "C"
+#else
+#define RRPUBLIC_DEC extern 
+#define RRPUBLIC_DEF
+#endif
+#endif
+
+#include <stdarg.h>  // for va_list()
+
+#ifndef RR_SPRINTF_MIN
+#define RR_SPRINTF_MIN 512 // how many characters per callback
+#endif
+typedef char * RRSPRINTFCB( char * buf, void * user, int len );
+
+#ifndef RR_SPRINTF_DECORATE
+#define RR_SPRINTF_DECORATE(name) rr##name  // define this before including if you want to change the names
+#endif
+
+#ifndef RR_SPRINTF_IMPLEMENTATION
+
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintf )( char * buf, char const * fmt, va_list va );
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char const * fmt, va_list va );
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( sprintf ) ( char * buf, char const * fmt, ... );
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( snprintf )( char * buf, int count, char const * fmt, ... );
+
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va );
+RRPUBLIC_DEF void RR_SPRINTF_DECORATE( setseparators )( char comma, char period );
+
+#else
+
+#include <stdlib.h>  // for va_arg()
+
+#define rU32 unsigned int
+#define rS32 signed int
+
+#ifdef _MSC_VER
+#define rU64 unsigned __int64
+#define rS64 signed __int64
+#else
+#define rU64 unsigned long long
+#define rS64 signed long long
+#endif
+#define rU16 unsigned short
+
+#ifndef rUINTa 
+#if defined(__ppc64__) || defined(__aarch64__) || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64)
+#define rUINTa rU64
+#else
+#define rUINTa rU32
+#endif
+#endif
+
+#ifndef RR_SPRINTF_MSVC_MODE  // used for MSVC2013 and earlier (MSVC2015 matches GCC)
+#if defined(_MSC_VER) && (_MSC_VER<1900)
+#define RR_SPRINTF_MSVC_MODE
+#endif
+#endif
+
+#ifdef RR_SPRINTF_NOUNALIGNED  // define this before inclusion to force rrsprint to always use aligned accesses
+#define RR_UNALIGNED(code)
+#else
+#define RR_UNALIGNED(code) code
+#endif
+
+#ifndef RR_SPRINTF_NOFLOAT
+// internal float utility functions
+static rS32 rrreal_to_str( char const * * start, rU32 * len, char *out, rS32 * decimal_pos, double value, rU32 frac_digits );
+static rS32 rrreal_to_parts( rS64 * bits, rS32 * expo, double value );
+#define RRSPECIAL 0x7000
+#endif
+
+static char RRperiod='.';
+static char RRcomma=',';
+static char rrdiglookup[201]="00010203040506070809101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899";
+
+RRPUBLIC_DEF void RR_SPRINTF_DECORATE( setseparators )( char pcomma, char pperiod )
+{
+  RRperiod=pperiod;
+  RRcomma=pcomma;
+}
+
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va )
+{
+  static char hex[]="0123456789abcdefxp";
+  static char hexu[]="0123456789ABCDEFXP";
+  char * bf;
+  char const * f;
+  int tlen = 0;
+
+  bf = buf;
+  f = fmt;
+  for(;;)
+  {
+    rS32 fw,pr,tz; rU32 fl;
+
+    #define LJ 1
+    #define LP 2
+    #define LS 4
+    #define LX 8
+    #define LZ 16
+    #define BI 32
+    #define CS 64
+    #define NG 128
+    #define KI 256
+    #define HW 512
+ 
+    // macros for the callback buffer stuff
+    #define chk_cb_bufL(bytes) { int len = (int)(bf-buf); if ((len+(bytes))>=RR_SPRINTF_MIN) { tlen+=len; if (0==(bf=buf=callback(buf,user,len))) goto done; } }
+    #define chk_cb_buf(bytes) { if ( callback ) { chk_cb_bufL(bytes); } }
+    #define flush_cb() { chk_cb_bufL(RR_SPRINTF_MIN-1); } //flush if there is even one byte in the buffer
+    #define cb_buf_clamp(cl,v) cl = v; if ( callback ) { int lg = RR_SPRINTF_MIN-(int)(bf-buf); if (cl>lg) cl=lg; }
+
+    // fast copy everything up to the next % (or end of string)
+    for(;;)
+    { 
+      while (((rUINTa)f)&3)
+      {
+       schk1: if (f[0]=='%') goto scandd;
+       schk2: if (f[0]==0) goto endfmt;
+        chk_cb_buf(1); *bf++=f[0]; ++f;
+      } 
+      for(;;)
+      { 
+        rU32 v,c;
+        v=*(rU32*)f; c=(~v)&0x80808080;
+        if ((v-0x26262626)&c) goto schk1; 
+        if ((v-0x01010101)&c) goto schk2; 
+        if (callback) if ((RR_SPRINTF_MIN-(int)(bf-buf))<4) goto schk1;
+        *(rU32*)bf=v; bf+=4; f+=4;
+      }
+    } scandd:
+
+    ++f;
+
+    // ok, we have a percent, read the modifiers first
+    fw = 0; pr = -1; fl = 0; tz = 0;
+    
+    // flags
+    for(;;)
+    {
+      switch(f[0])
+      {
+        // if we have left just
+        case '-': fl|=LJ; ++f; continue;
+        // if we have leading plus
+        case '+': fl|=LP; ++f; continue; 
+        // if we have leading space
+        case ' ': fl|=LS; ++f; continue; 
+        // if we have leading 0x
+        case '#': fl|=LX; ++f; continue; 
+        // if we have thousand commas
+        case '\'': fl|=CS; ++f; continue; 
+        // if we have kilo marker
+        case '$': fl|=KI; ++f; continue; 
+        // if we have leading zero
+        case '0': fl|=LZ; ++f; goto flags_done; 
+        default: goto flags_done;
+      }
+    }
+    flags_done:
+   
+    // get the field width
+    if ( f[0] == '*' ) {fw = va_arg(va,rU32); ++f;} else { while (( f[0] >= '0' ) && ( f[0] <= '9' )) { fw = fw * 10 + f[0] - '0'; f++; } }
+    // get the precision
+    if ( f[0]=='.' ) { ++f; if ( f[0] == '*' ) {pr = va_arg(va,rU32); ++f;} else { pr = 0; while (( f[0] >= '0' ) && ( f[0] <= '9' )) { pr = pr * 10 + f[0] - '0'; f++; } } } 
+    
+    // handle integer size overrides
+    switch(f[0])
+    {
+      // are we halfwidth?
+      case 'h': fl|=HW; ++f; break;
+      // are we 64-bit (unix style)
+      case 'l': ++f; if ( f[0]=='l') { fl|=BI; ++f; } break;
+      // are we 64-bit on intmax? (c99)
+      case 'j': fl|=BI; ++f; break; 
+      // are we 64-bit on size_t or ptrdiff_t? (c99)
+      case 'z': case 't': fl|=((sizeof(char*)==8)?BI:0); ++f; break; 
+      // are we 64-bit (msft style)
+      case 'I': if ( ( f[1]=='6') && ( f[2]=='4') ) { fl|=BI; f+=3; } else if ( ( f[1]=='3') && ( f[2]=='2') ) { f+=3; } else { fl|=((sizeof(void*)==8)?BI:0); ++f; } break;
+      default: break;
+    }
+
+    // handle each replacement
+    switch( f[0] )
+    {
+      #define NUMSZ 512 // big enough for e308 (with commas) or e-307 
+      char num[NUMSZ]; 
+      char lead[8]; 
+      char tail[8]; 
+      char *s;
+      char const *h;
+      rU32 l,n,cs;
+      rU64 n64;
+      #ifndef RR_SPRINTF_NOFLOAT      
+      double fv; 
+      #endif
+      rS32 dp; char const * sn;
+
+      case 's':
+        // get the string
+        s = va_arg(va,char*); if (s==0) s = (char*)"null";
+        // get the length
+        sn = s;
+        for(;;)
+        { 
+          if ((((rUINTa)sn)&3)==0) break;
+         lchk:
+          if (sn[0]==0) goto ld;
+          ++sn;
+        }
+        n = 0xffffffff;
+        if (pr>=0) { n=(rU32)(sn-s); if (n>=(rU32)pr) goto ld; n=((rU32)(pr-n))>>2; }
+        while(n) 
+        { 
+          rU32 v=*(rU32*)sn;
+          if ((v-0x01010101)&(~v)&0x80808080UL) goto lchk; 
+          sn+=4; 
+          --n;
+        }
+        goto lchk;
+       ld:
+
+        l = (rU32) ( sn - s );
+        // clamp to precision
+        if ( l > (rU32)pr ) l = pr;
+        lead[0]=0; tail[0]=0; pr = 0; dp = 0; cs = 0;
+        // copy the string in
+        goto scopy;
+
+      case 'c': // char
+        // get the character
+        s = num + NUMSZ -1; *s = (char)va_arg(va,int);
+        l = 1;
+        lead[0]=0; tail[0]=0; pr = 0; dp = 0; cs = 0;
+        goto scopy;
+
+      case 'n': // weird write-bytes specifier
+        { int * d = va_arg(va,int*);
+        *d = tlen + (int)( bf - buf ); }
+        break;
+
+#ifdef RR_SPRINTF_NOFLOAT
+      case 'A': // float
+      case 'a': // hex float
+      case 'G': // float
+      case 'g': // float
+      case 'E': // float
+      case 'e': // float
+      case 'f': // float
+        va_arg(va,double); // eat it
+        s = (char*)"No float";
+        l = 8;
+        lead[0]=0; tail[0]=0; pr = 0; dp = 0; cs = 0;
+        goto scopy;
+#else
+      case 'A': // float
+        h=hexu;  
+        goto hexfloat;
+
+      case 'a': // hex float
+        h=hex;
+       hexfloat: 
+        fv = va_arg(va,double);
+        if (pr==-1) pr=6; // default is 6
+        // read the double into a string
+        if ( rrreal_to_parts( (rS64*)&n64, &dp, fv ) )
+          fl |= NG;
+  
+        s = num+64;
+
+        // sign
+        lead[0]=0; if (fl&NG) { lead[0]=1; lead[1]='-'; } else if (fl&LS) { lead[0]=1; lead[1]=' '; } else if (fl&LP) { lead[0]=1; lead[1]='+'; };
+
+        if (dp==-1023) dp=(n64)?-1022:0; else n64|=(((rU64)1)<<52);
+        n64<<=(64-56);
+        if (pr<15) n64+=((((rU64)8)<<56)>>(pr*4));
+        // add leading chars
+        
+        #ifdef RR_SPRINTF_MSVC_MODE
+        *s++='0';*s++='x';
+        #else
+        lead[1+lead[0]]='0'; lead[2+lead[0]]='x'; lead[0]+=2;
+        #endif
+        *s++=h[(n64>>60)&15]; n64<<=4;
+        if ( pr ) *s++=RRperiod;
+        sn = s;
+
+        // print the bits
+        n = pr; if (n>13) n = 13; if (pr>(rS32)n) tz=pr-n; pr = 0;
+        while(n--) { *s++=h[(n64>>60)&15]; n64<<=4; }
+
+        // print the expo
+        tail[1]=h[17];
+        if (dp<0) { tail[2]='-'; dp=-dp;} else tail[2]='+';
+        n = (dp>=1000)?6:((dp>=100)?5:((dp>=10)?4:3));
+        tail[0]=(char)n;
+        for(;;) { tail[n]='0'+dp%10; if (n<=3) break; --n; dp/=10; }
+
+        dp = (int)(s-sn);
+        l = (int)(s-(num+64));
+        s = num+64;
+        cs = 1 + (3<<24);
+        goto scopy;
+
+      case 'G': // float
+        h=hexu;
+        goto dosmallfloat;
+
+      case 'g': // float
+        h=hex;
+       dosmallfloat:   
+        fv = va_arg(va,double);
+        if (pr==-1) pr=6; else if (pr==0) pr = 1; // default is 6
+        // read the double into a string
+        if ( rrreal_to_str( &sn, &l, num, &dp, fv, (pr-1)|0x80000000 ) )
+          fl |= NG;
+
+        // clamp the precision and delete extra zeros after clamp
+        n = pr;
+        if ( l > (rU32)pr ) l = pr; while ((l>1)&&(pr)&&(sn[l-1]=='0')) { --pr; --l; }
+
+        // should we use %e
+        if ((dp<=-4)||(dp>(rS32)n))
+        {
+          if ( pr > (rS32)l ) pr = l-1; else if ( pr ) --pr; // when using %e, there is one digit before the decimal
+          goto doexpfromg;
+        }
+        // this is the insane action to get the pr to match %g sematics for %f
+        if(dp>0) { pr=(dp<(rS32)l)?l-dp:0; } else { pr = -dp+((pr>(rS32)l)?l:pr); }
+        goto dofloatfromg;
+
+      case 'E': // float
+        h=hexu;  
+        goto doexp;
+
+      case 'e': // float
+        h=hex;
+       doexp:   
+        fv = va_arg(va,double);
+        if (pr==-1) pr=6; // default is 6
+        // read the double into a string
+        if ( rrreal_to_str( &sn, &l, num, &dp, fv, pr|0x80000000 ) )
+          fl |= NG;
+       doexpfromg: 
+        tail[0]=0; 
+        lead[0]=0; if (fl&NG) { lead[0]=1; lead[1]='-'; } else if (fl&LS) { lead[0]=1; lead[1]=' '; } else if (fl&LP) { lead[0]=1; lead[1]='+'; };
+        if ( dp == RRSPECIAL ) { s=(char*)sn; cs=0; pr=0; goto scopy; }
+        s=num+64; 
+        // handle leading chars
+        *s++=sn[0];
+
+        if (pr) *s++=RRperiod;
+
+        // handle after decimal
+        if ((l-1)>(rU32)pr) l=pr+1;
+        for(n=1;n<l;n++) *s++=sn[n];
+        // trailing zeros
+        tz = pr-(l-1); pr=0;
+        // dump expo
+        tail[1]=h[0xe];
+        dp -= 1;
+        if (dp<0) { tail[2]='-'; dp=-dp;} else tail[2]='+';
+        #ifdef RR_SPRINTF_MSVC_MODE
+        n = 5;
+        #else
+        n = (dp>=100)?5:4;
+        #endif
+        tail[0]=(char)n;
+        for(;;) { tail[n]='0'+dp%10; if (n<=3) break; --n; dp/=10; }
+        cs = 1 + (3<<24); // how many tens
+        goto flt_lead;   
+
+      case 'f': // float
+        fv = va_arg(va,double);
+       doafloat: 
+        // do kilos
+        if (fl&KI) {while(fl<0x4000000) { if ((fv<1024.0) && (fv>-1024.0)) break; fv/=1024.0; fl+=0x1000000; }} 
+        if (pr==-1) pr=6; // default is 6
+        // read the double into a string
+        if ( rrreal_to_str( &sn, &l, num, &dp, fv, pr ) )
+          fl |= NG;
+        dofloatfromg:
+        tail[0]=0;
+        // sign
+        lead[0]=0; if (fl&NG) { lead[0]=1; lead[1]='-'; } else if (fl&LS) { lead[0]=1; lead[1]=' '; } else if (fl&LP) { lead[0]=1; lead[1]='+'; };
+        if ( dp == RRSPECIAL ) { s=(char*)sn; cs=0; pr=0; goto scopy; }
+        s=num+64; 
+
+        // handle the three decimal varieties
+        if (dp<=0) 
+        { 
+          rS32 i;
+          // handle 0.000*000xxxx
+          *s++='0'; if (pr) *s++=RRperiod; 
+          n=-dp; if((rS32)n>pr) n=pr; i=n; while(i) { if ((((rUINTa)s)&3)==0) break; *s++='0'; --i; } while(i>=4) { *(rU32*)s=0x30303030; s+=4; i-=4; } while(i) { *s++='0'; --i; }
+          if ((rS32)(l+n)>pr) l=pr-n; i=l; while(i) { *s++=*sn++; --i; }
+          tz = pr-(n+l);
+          cs = 1 + (3<<24); // how many tens did we write (for commas below)
+        }
+        else
+        {
+          cs = (fl&CS)?((600-(rU32)dp)%3):0;
+          if ((rU32)dp>=l)
+          {
+            // handle xxxx000*000.0
+            n=0; for(;;) { if ((fl&CS) && (++cs==4)) { cs = 0; *s++=RRcomma; } else { *s++=sn[n]; ++n; if (n>=l) break; } }
+            if (n<(rU32)dp)
+            {
+              n = dp - n;
+              if ((fl&CS)==0) { while(n) { if ((((rUINTa)s)&3)==0) break; *s++='0'; --n; }  while(n>=4) { *(rU32*)s=0x30303030; s+=4; n-=4; } }
+              while(n) { if ((fl&CS) && (++cs==4)) { cs = 0; *s++=RRcomma; } else { *s++='0'; --n; } }
+            }
+            cs = (int)(s-(num+64)) + (3<<24); // cs is how many tens
+            if (pr) { *s++=RRperiod; tz=pr;}
+          }
+          else
+          {
+            // handle xxxxx.xxxx000*000
+            n=0; for(;;) { if ((fl&CS) && (++cs==4)) { cs = 0; *s++=RRcomma; } else { *s++=sn[n]; ++n; if (n>=(rU32)dp) break; } }
+            cs = (int)(s-(num+64)) + (3<<24); // cs is how many tens
+            if (pr) *s++=RRperiod;
+            if ((l-dp)>(rU32)pr) l=pr+dp;
+            while(n<l) { *s++=sn[n]; ++n; }
+            tz = pr-(l-dp);
+          }
+        }
+        pr = 0;
+        
+        // handle k,m,g,t
+        if (fl&KI) { tail[0]=1; tail[1]=' '; { if (fl>>24) { tail[2]="_kmgt"[fl>>24]; tail[0]=2; } } };
+
+        flt_lead:
+        // get the length that we copied
+        l = (rU32) ( s-(num+64) );
+        s=num+64; 
+        goto scopy;
+#endif
+
+      case 'B': // upper binary
+        h = hexu;
+        goto binary;
+
+      case 'b': // lower binary
+        h = hex;
+        binary:
+        lead[0]=0;
+        if (fl&LX) { lead[0]=2;lead[1]='0';lead[2]=h[0xb]; }
+        l=(8<<4)|(1<<8);
+        goto radixnum;
+
+      case 'o': // octal
+        h = hexu;
+        lead[0]=0;
+        if (fl&LX) { lead[0]=1;lead[1]='0'; }
+        l=(3<<4)|(3<<8);
+        goto radixnum;
+
+      case 'p': // pointer
+        fl |= (sizeof(void*)==8)?BI:0;
+        pr = sizeof(void*)*2;
+        fl &= ~LZ; // 'p' only prints the pointer with zeros
+        // drop through to X
+      
+      case 'X': // upper binary
+        h = hexu;
+        goto dohexb;
+
+      case 'x': // lower binary
+        h = hex; dohexb:
+        l=(4<<4)|(4<<8);
+        lead[0]=0;
+        if (fl&LX) { lead[0]=2;lead[1]='0';lead[2]=h[16]; }
+       radixnum: 
+        // get the number
+        if ( fl&BI )
+          n64 = va_arg(va,rU64);
+        else
+          n64 = va_arg(va,rU32);
+
+        s = num + NUMSZ; dp = 0;
+        // clear tail, and clear leading if value is zero
+        tail[0]=0; if (n64==0) { lead[0]=0; if (pr==0) { l=0; cs = ( ((l>>4)&15)) << 24; goto scopy; } }
+        // convert to string
+        for(;;) { *--s = h[n64&((1<<(l>>8))-1)]; n64>>=(l>>8); if ( ! ( (n64) || ((rS32) ( (num+NUMSZ) - s ) < pr ) ) ) break; if ( fl&CS) { ++l; if ((l&15)==((l>>4)&15)) { l&=~15; *--s=RRcomma; } } };
+        // get the tens and the comma pos
+        cs = (rU32) ( (num+NUMSZ) - s ) + ( ( ((l>>4)&15)) << 24 );
+        // get the length that we copied
+        l = (rU32) ( (num+NUMSZ) - s );
+        // copy it
+        goto scopy;
+
+      case 'u': // unsigned
+      case 'i':
+      case 'd': // integer
+        // get the integer and abs it
+        if ( fl&BI )
+        {
+          rS64 i64 = va_arg(va,rS64); n64 = (rU64)i64; if ((f[0]!='u') && (i64<0)) { n64=(rU64)-i64; fl|=NG; }
+        }
+        else
+        {
+          rS32 i = va_arg(va,rS32); n64 = (rU32)i; if ((f[0]!='u') && (i<0)) { n64=(rU32)-i; fl|=NG; }
+        }
+
+        #ifndef RR_SPRINTF_NOFLOAT
+        if (fl&KI) { if (n64<1024) pr=0; else if (pr==-1) pr=1; fv=(double)(rS64)n64; goto doafloat; } 
+        #endif
+
+        // convert to string
+        s = num+NUMSZ; l=0; 
+        
+        for(;;)
+        {
+          // do in 32-bit chunks (avoid lots of 64-bit divides even with constant denominators)
+          char * o=s-8;
+          if (n64>=100000000) { n = (rU32)( n64 % 100000000);  n64 /= 100000000; } else {n = (rU32)n64; n64 = 0; }
+          if((fl&CS)==0) { while(n) { s-=2; *(rU16*)s=*(rU16*)&rrdiglookup[(n%100)*2]; n/=100; } }
+          while (n) { if ( ( fl&CS) && (l++==3) ) { l=0; *--s=RRcomma; --o; } else { *--s=(char)(n%10)+'0'; n/=10; } }
+          if (n64==0) { if ((s[0]=='0') && (s!=(num+NUMSZ))) ++s; break; }
+          while (s!=o) if ( ( fl&CS) && (l++==3) ) { l=0; *--s=RRcomma; --o; } else { *--s='0'; }
+        }
+
+        tail[0]=0;
+        // sign
+        lead[0]=0; if (fl&NG) { lead[0]=1; lead[1]='-'; } else if (fl&LS) { lead[0]=1; lead[1]=' '; } else if (fl&LP) { lead[0]=1; lead[1]='+'; };
+
+        // get the length that we copied
+        l = (rU32) ( (num+NUMSZ) - s ); if ( l == 0 ) { *--s='0'; l = 1; }
+        cs = l + (3<<24);
+        if (pr<0) pr = 0;
+
+       scopy: 
+        // get fw=leading/trailing space, pr=leading zeros
+        if (pr<(rS32)l) pr = l;
+        n = pr + lead[0] + tail[0] + tz;
+        if (fw<(rS32)n) fw = n;
+        fw -= n;
+        pr -= l;
+
+        // handle right justify and leading zeros
+        if ( (fl&LJ)==0 )
+        {
+          if (fl&LZ) // if leading zeros, everything is in pr
+          { 
+            pr = (fw>pr)?fw:pr;
+            fw = 0;
+          }
+          else
+          {
+            fl &= ~CS; // if no leading zeros, then no commas
+          }
+        }
+
+        // copy the spaces and/or zeros
+        if (fw+pr)
+        {
+          rS32 i; rU32 c; 
+
+          // copy leading spaces (or when doing %8.4d stuff)
+          if ( (fl&LJ)==0 ) while(fw>0) { cb_buf_clamp(i,fw); fw -= i; while(i) { if ((((rUINTa)bf)&3)==0) break; *bf++=' '; --i; } while(i>=4) { *(rU32*)bf=0x20202020; bf+=4; i-=4; } while (i) {*bf++=' '; --i;} chk_cb_buf(1); }
+        
+          // copy leader
+          sn=lead+1; while(lead[0]) { cb_buf_clamp(i,lead[0]); lead[0] -= (char)i; while (i) {*bf++=*sn++; --i;} chk_cb_buf(1); }
+          
+          // copy leading zeros
+          c = cs >> 24; cs &= 0xffffff;
+          cs = (fl&CS)?((rU32)(c-((pr+cs)%(c+1)))):0;
+          while(pr>0) { cb_buf_clamp(i,pr); pr -= i; if((fl&CS)==0) { while(i) { if ((((rUINTa)bf)&3)==0) break; *bf++='0'; --i; } while(i>=4) { *(rU32*)bf=0x30303030; bf+=4; i-=4; } } while (i) { if((fl&CS) && (cs++==c)) { cs = 0; *bf++=RRcomma; } else *bf++='0'; --i; } chk_cb_buf(1); }
+        }
+
+        // copy leader if there is still one
+        sn=lead+1; while(lead[0]) { rS32 i; cb_buf_clamp(i,lead[0]); lead[0] -= (char)i; while (i) {*bf++=*sn++; --i;} chk_cb_buf(1); }
+
+        // copy the string
+        n = l; while (n) { rS32 i; cb_buf_clamp(i,n); n-=i; RR_UNALIGNED( while(i>=4) { *(rU32*)bf=*(rU32*)s; bf+=4; s+=4; i-=4; } ) while (i) {*bf++=*s++; --i;} chk_cb_buf(1); }
+
+        // copy trailing zeros
+        while(tz) { rS32 i; cb_buf_clamp(i,tz); tz -= i; while(i) { if ((((rUINTa)bf)&3)==0) break; *bf++='0'; --i; } while(i>=4) { *(rU32*)bf=0x30303030; bf+=4; i-=4; } while (i) {*bf++='0'; --i;} chk_cb_buf(1); }
+
+        // copy tail if there is one
+        sn=tail+1; while(tail[0]) { rS32 i; cb_buf_clamp(i,tail[0]); tail[0] -= (char)i; while (i) {*bf++=*sn++; --i;} chk_cb_buf(1); }
+
+        // handle the left justify
+        if (fl&LJ) if (fw>0) { while (fw) { rS32 i; cb_buf_clamp(i,fw); fw-=i; while(i) { if ((((rUINTa)bf)&3)==0) break; *bf++=' '; --i; } while(i>=4) { *(rU32*)bf=0x20202020; bf+=4; i-=4; } while (i--) *bf++=' '; chk_cb_buf(1); } }
+        break;
+
+      default: // unknown, just copy code
+        s = num + NUMSZ -1; *s = f[0];
+        l = 1;
+        fw=pr=fl=0;
+        lead[0]=0; tail[0]=0; pr = 0; dp = 0; cs = 0;
+        goto scopy;
+    }
+    ++f;
+  }
+ endfmt:
+
+  if (!callback) 
+    *bf = 0;
+  else
+    flush_cb();
+ 
+ done:
+  return tlen + (int)(bf-buf);
+}
+
+// cleanup
+#undef LJ
+#undef LP
+#undef LS
+#undef LX
+#undef LZ
+#undef BI
+#undef CS
+#undef NG
+#undef KI
+#undef NUMSZ
+#undef chk_cb_bufL
+#undef chk_cb_buf
+#undef flush_cb
+#undef cb_buf_clamp
+
+// ============================================================================
+//   wrapper functions
+
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( sprintf )( char * buf, char const * fmt, ... )
+{
+  va_list va;
+  va_start( va, fmt );
+  return RR_SPRINTF_DECORATE( vsprintfcb )( 0, 0, buf, fmt, va );
+}
+
+typedef struct RRCCS
+{
+  char * buf;
+  int count;
+  char tmp[ RR_SPRINTF_MIN ];
+} RRCCS;
+
+static char * rrclampcallback( char * buf, void * user, int len )
+{
+  RRCCS * c = (RRCCS*)user;
+
+  if ( len > c->count ) len = c->count;
+
+  if (len)
+  {
+    if ( buf != c->buf )
+    {
+      char * s, * d, * se;
+      d = c->buf; s = buf; se = buf+len;
+      do{ *d++ = *s++; } while (s<se);
+    }
+    c->buf += len;
+    c->count -= len;
+  }
+  
+  if ( c->count <= 0 ) return 0;
+  return ( c->count >= RR_SPRINTF_MIN ) ? c->buf : c->tmp; // go direct into buffer if you can
+}
+
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char const * fmt, va_list va )
+{
+  RRCCS c;
+  int l;
+
+  if ( count == 0 )
+    return 0;
+
+  c.buf = buf;
+  c.count = count;
+
+  RR_SPRINTF_DECORATE( vsprintfcb )( rrclampcallback, &c, rrclampcallback(0,&c,0), fmt, va );
+  
+  // zero-terminate
+  l = (int)( c.buf - buf );
+  if ( l >= count ) // should never be greater, only equal (or less) than count
+    l = count - 1;
+  buf[l] = 0;
+
+  return l;
+}
+
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( snprintf )( char * buf, int count, char const * fmt, ... )
+{
+  va_list va;
+  va_start( va, fmt );
+
+  return RR_SPRINTF_DECORATE( vsnprintf )( buf, count, fmt, va );
+}
+
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintf )( char * buf, char const * fmt, va_list va )
+{
+  return RR_SPRINTF_DECORATE( vsprintfcb )( 0, 0, buf, fmt, va );
+}
+
+// =======================================================================
+//   low level float utility functions
+
+#ifndef RR_SPRINTF_NOFLOAT
+
+ // copies d to bits w/ strict aliasing (this compiles to nothing on /Ox)
+ #define RRCOPYFP(dest,src) { int cn; for(cn=0;cn<8;cn++) ((char*)&dest)[cn]=((char*)&src)[cn]; }
+ 
+// get float info
+static rS32 rrreal_to_parts( rS64 * bits, rS32 * expo, double value )
+{
+  double d;
+  rS64 b = 0;
+
+  // load value and round at the frac_digits
+  d = value;
+
+  RRCOPYFP( b, d );
+
+  *bits = b & ((((rU64)1)<<52)-1);
+  *expo = ((b >> 52) & 2047)-1023;
+    
+  return (rS32)(b >> 63);
+}
+
+static double const rrbot[23]={1e+000,1e+001,1e+002,1e+003,1e+004,1e+005,1e+006,1e+007,1e+008,1e+009,1e+010,1e+011,1e+012,1e+013,1e+014,1e+015,1e+016,1e+017,1e+018,1e+019,1e+020,1e+021,1e+022};
+static double const rrnegbot[22]={1e-001,1e-002,1e-003,1e-004,1e-005,1e-006,1e-007,1e-008,1e-009,1e-010,1e-011,1e-012,1e-013,1e-014,1e-015,1e-016,1e-017,1e-018,1e-019,1e-020,1e-021,1e-022};
+static double const rrnegboterr[22]={-5.551115123125783e-018,-2.0816681711721684e-019,-2.0816681711721686e-020,-4.7921736023859299e-021,-8.1803053914031305e-022,4.5251888174113741e-023,4.5251888174113739e-024,-2.0922560830128471e-025,-6.2281591457779853e-026,-3.6432197315497743e-027,6.0503030718060191e-028,2.0113352370744385e-029,-3.0373745563400371e-030,1.1806906454401013e-032,-7.7705399876661076e-032,2.0902213275965398e-033,-7.1542424054621921e-034,-7.1542424054621926e-035,2.4754073164739869e-036,5.4846728545790429e-037,9.2462547772103625e-038,-4.8596774326570872e-039};
+static double const rrtop[13]={1e+023,1e+046,1e+069,1e+092,1e+115,1e+138,1e+161,1e+184,1e+207,1e+230,1e+253,1e+276,1e+299};
+static double const rrnegtop[13]={1e-023,1e-046,1e-069,1e-092,1e-115,1e-138,1e-161,1e-184,1e-207,1e-230,1e-253,1e-276,1e-299};
+static double const rrtoperr[13]={8388608,6.8601809640529717e+028,-7.253143638152921e+052,-4.3377296974619174e+075,-1.5559416129466825e+098,-3.2841562489204913e+121,-3.7745893248228135e+144,-1.7356668416969134e+167,-3.8893577551088374e+190,-9.9566444326005119e+213,6.3641293062232429e+236,-5.2069140800249813e+259,-5.2504760255204387e+282};
+static double const rrnegtoperr[13]={3.9565301985100693e-040,-2.299904345391321e-063,3.6506201437945798e-086,1.1875228833981544e-109,-5.0644902316928607e-132,-6.7156837247865426e-155,-2.812077463003139e-178,-5.7778912386589953e-201,7.4997100559334532e-224,-4.6439668915134491e-247,-6.3691100762962136e-270,-9.436808465446358e-293,8.0970921678014997e-317};
+
+#if defined(_MSC_VER) && (_MSC_VER<=1200)                                                                                                                                                                                       
+static rU64 const rrpot[20]={1,10,100,1000, 10000,100000,1000000,10000000, 100000000,1000000000,10000000000,100000000000,  1000000000000,10000000000000,100000000000000,1000000000000000,  10000000000000000,100000000000000000,1000000000000000000,10000000000000000000U };
+#define rrtento19th ((rU64)1000000000000000000)
+#else
+static rU64 const rrpot[20]={1,10,100,1000, 10000,100000,1000000,10000000, 100000000,1000000000,10000000000ULL,100000000000ULL,  1000000000000ULL,10000000000000ULL,100000000000000ULL,1000000000000000ULL,  10000000000000000ULL,100000000000000000ULL,1000000000000000000ULL,10000000000000000000ULL };
+#define rrtento19th (1000000000000000000ULL)
+#endif
+
+#define rrddmulthi(oh,ol,xh,yh) \
+{ \
+  double ahi=0,alo,bhi=0,blo; \
+  rS64 bt; \
+  oh = xh * yh; \
+  RRCOPYFP(bt,xh); bt&=((~(rU64)0)<<27); RRCOPYFP(ahi,bt); alo = xh-ahi; \
+  RRCOPYFP(bt,yh); bt&=((~(rU64)0)<<27); RRCOPYFP(bhi,bt); blo = yh-bhi; \
+  ol = ((ahi*bhi-oh)+ahi*blo+alo*bhi)+alo*blo; \
+}
+
+#define rrddtoS64(ob,xh,xl) \
+{ \
+  double ahi=0,alo,vh,t;\
+  ob = (rS64)ph;\
+  vh=(double)ob;\
+  ahi = ( xh - vh );\
+  t = ( ahi - xh );\
+  alo = (xh-(ahi-t))-(vh+t);\
+  ob += (rS64)(ahi+alo+xl);\
+}
+
+
+#define rrddrenorm(oh,ol) { double s; s=oh+ol; ol=ol-(s-oh); oh=s; }
+
+#define rrddmultlo(oh,ol,xh,xl,yh,yl) \
+  ol = ol + ( xh*yl + xl*yh ); \
+
+#define rrddmultlos(oh,ol,xh,yl) \
+  ol = ol + ( xh*yl ); \
+
+static void rrraise_to_power10( double *ohi, double *olo, double d, rS32 power )  // power can be -323 to +350
+{
+  double ph, pl;
+  if ((power>=0) && (power<=22))
+  {
+    rrddmulthi(ph,pl,d,rrbot[power]);
+  }
+  else
+  {
+    rS32 e,et,eb;
+    double p2h,p2l;
+
+    e=power; if (power<0) e=-e; 
+    et = (e*0x2c9)>>14;/* %23 */ if (et>13) et=13; eb = e-(et*23);
+
+    ph = d; pl = 0.0;
+    if (power<0)
+    {
+      if (eb) { --eb; rrddmulthi(ph,pl,d,rrnegbot[eb]); rrddmultlos(ph,pl,d,rrnegboterr[eb]); }
+      if (et)
+      { 
+        rrddrenorm(ph,pl);
+        --et; rrddmulthi(p2h,p2l,ph,rrnegtop[et]); rrddmultlo(p2h,p2l,ph,pl,rrnegtop[et],rrnegtoperr[et]); ph=p2h;pl=p2l;
+      }
+    }
+    else
+    {
+      if (eb) 
+      { 
+        e = eb; if (eb>22) eb=22; e -= eb;
+        rrddmulthi(ph,pl,d,rrbot[eb]); 
+        if ( e ) { rrddrenorm(ph,pl); rrddmulthi(p2h,p2l,ph,rrbot[e]); rrddmultlos(p2h,p2l,rrbot[e],pl); ph=p2h;pl=p2l; }
+      }
+      if (et)
+      {
+        rrddrenorm(ph,pl);
+        --et; rrddmulthi(p2h,p2l,ph,rrtop[et]); rrddmultlo(p2h,p2l,ph,pl,rrtop[et],rrtoperr[et]); ph=p2h;pl=p2l;
+      }
+    }
+  }
+  rrddrenorm(ph,pl);
+  *ohi = ph; *olo = pl;
+}
+
+// given a float value, returns the significant bits in bits, and the position of the
+//   decimal point in decimal_pos.  +/-INF and NAN are specified by special values
+//   returned in the decimal_pos parameter.
+// frac_digits is absolute normally, but if you want from first significant digits (got %g and %e), or in 0x80000000
+static rS32 rrreal_to_str( char const * * start, rU32 * len, char *out, rS32 * decimal_pos, double value, rU32 frac_digits )
+{
+  double d;
+  rS64 bits = 0;
+  rS32 expo, e, ng, tens;
+
+  d = value;
+  RRCOPYFP(bits,d);
+  expo = (bits >> 52) & 2047;
+  ng = (rS32)(bits >> 63);
+  if (ng) d=-d;
+
+  if ( expo == 2047 ) // is nan or inf?
+  {
+    *start = (bits&((((rU64)1)<<52)-1)) ? "NaN" : "Inf";
+    *decimal_pos =  RRSPECIAL;
+    *len = 3;
+    return ng;
+  } 
+
+  if ( expo == 0 ) // is zero or denormal
+  {
+    if ((bits<<1)==0) // do zero
+    {
+      *decimal_pos = 1; 
+      *start = out;
+      out[0] = '0'; *len = 1;
+      return ng;
+    }
+    // find the right expo for denormals
+    {
+      rS64 v = ((rU64)1)<<51;
+      while ((bits&v)==0) { --expo; v >>= 1; }
+    }
+  }
+
+  // find the decimal exponent as well as the decimal bits of the value
+  {
+    double ph,pl;
+
+    // log10 estimate - very specifically tweaked to hit or undershoot by no more than 1 of log10 of all expos 1..2046
+    tens=expo-1023; tens = (tens<0)?((tens*617)/2048):(((tens*1233)/4096)+1);
+
+    // move the significant bits into position and stick them into an int 
+    rrraise_to_power10( &ph, &pl, d, 18-tens );
+
+    // get full as much precision from double-double as possible
+    rrddtoS64( bits, ph,pl );
+
+    // check if we undershot
+    if ( ((rU64)bits) >= rrtento19th ) ++tens; 
+  }
+
+  // now do the rounding in integer land
+  frac_digits = ( frac_digits & 0x80000000 ) ? ( (frac_digits&0x7ffffff) + 1 ) : ( tens + frac_digits );
+  if ( ( frac_digits < 24 ) )
+  {
+    rU32 dg = 1; if ((rU64)bits >= rrpot[9] ) dg=10; while( (rU64)bits >= rrpot[dg] ) { ++dg; if (dg==20) goto noround; }
+    if ( frac_digits < dg )
+    {
+      rU64 r;
+      // add 0.5 at the right position and round
+      e = dg - frac_digits;
+      if ( (rU32)e >= 24 ) goto noround;
+      r = rrpot[e];
+      bits = bits + (r/2);
+      if ( (rU64)bits >= rrpot[dg] ) ++tens;
+      bits /= r;
+    } 
+    noround:;
+  }
+
+  // kill long trailing runs of zeros
+  if ( bits )
+  {
+    rU32 n; for(;;) { if ( bits<=0xffffffff ) break; if (bits%1000) goto donez; bits/=1000; } n = (rU32)bits; while ((n%1000)==0) n/=1000; bits=n; donez:;
+  }
+
+  // convert to string
+  out += 64;
+  e = 0; 
+  for(;;)
+  {
+    rU32 n;
+    char * o = out-8;
+    // do the conversion in chunks of U32s (avoid most 64-bit divides, worth it, constant denomiators be damned)
+    if (bits>=100000000) { n = (rU32)( bits % 100000000);  bits /= 100000000; } else {n = (rU32)bits; bits = 0; }
+    while(n) { out-=2; *(rU16*)out=*(rU16*)&rrdiglookup[(n%100)*2]; n/=100; e+=2; }
+    if (bits==0) { if ((e) && (out[0]=='0')) { ++out; --e; } break; }
+    while( out!=o ) { *--out ='0'; ++e; }
+  }
+  
+  *decimal_pos = tens;
+  *start = out;
+  *len = e;
+  return ng;
+}
+
+#undef rrddmulthi
+#undef rrddrenorm
+#undef rrddmultlo
+#undef rrddmultlos
+#undef RRSPECIAL 
+#undef RRCOPYFP
+ 
+#endif
+
+// clean up
+#undef rU16
+#undef rU32 
+#undef rS32 
+#undef rU64
+#undef rS64
+#undef RRPUBLIC_DEC
+#undef RRPUBLIC_DEF
+#undef RR_SPRINTF_DECORATE
+#undef RR_UNALIGNED
+
+#endif
+
+#endif

From 454ed822a7a3c278cfc63bd0168bc54d1015b120 Mon Sep 17 00:00:00 2001
From: Sean Barrett <sean@nothings.org>
Date: Mon, 5 Dec 2016 02:57:49 -0800
Subject: [PATCH 15/23] deprecate rrsprintf

---
 deprecated/rrsprintf.h | 1055 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1055 insertions(+)
 create mode 100644 deprecated/rrsprintf.h

diff --git a/deprecated/rrsprintf.h b/deprecated/rrsprintf.h
new file mode 100644
index 0000000..62962e3
--- /dev/null
+++ b/deprecated/rrsprintf.h
@@ -0,0 +1,1055 @@
+#ifndef RR_SPRINTF_H_INCLUDE
+#define RR_SPRINTF_H_INCLUDE
+
+/*
+Single file sprintf replacement.
+
+Originally written by Jeff Roberts at RAD Game Tools - 2015/10/20. 
+Hereby placed in public domain.
+
+This is a full sprintf replacement that supports everything that
+the C runtime sprintfs support, including float/double, 64-bit integers,
+hex floats, field parameters (%*.*d stuff), length reads backs, etc.
+
+Why would you need this if sprintf already exists?  Well, first off,
+it's *much* faster (see below). It's also much smaller than the CRT
+versions code-space-wise. We've also added some simple improvements 
+that are super handy (commas in thousands, callbacks at buffer full,
+for example). Finally, the format strings for MSVC and GCC differ 
+for 64-bit integers (among other small things), so this lets you use 
+the same format strings in cross platform code.
+
+It uses the standard single file trick of being both the header file
+and the source itself. If you just include it normally, you just get 
+the header file function definitions. To get the code, you include
+it from a C or C++ file and define RR_SPRINTF_IMPLEMENTATION first.
+
+It only uses va_args macros from the C runtime to do it's work. It
+does cast doubles to S64s and shifts and divides U64s, which does 
+drag in CRT code on most platforms.
+
+It compiles to roughly 8K with float support, and 4K without.
+As a comparison, when using MSVC static libs, calling sprintf drags
+in 16K.
+
+API:
+====
+int rrsprintf( char * buf, char const * fmt, ... )
+int rrsnprintf( char * buf, int count, char const * fmt, ... )
+  Convert an arg list into a buffer.  rrsnprintf always returns
+  a zero-terminated string (unlike regular snprintf).
+
+int rrvsprintf( char * buf, char const * fmt, va_list va )
+int rrvsnprintf( char * buf, int count, char const * fmt, va_list va )
+  Convert a va_list arg list into a buffer.  rrvsnprintf always returns
+  a zero-terminated string (unlike regular snprintf).
+
+int rrvsprintfcb( RRSPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va )
+    typedef char * RRSPRINTFCB( char const * buf, void * user, int len );
+  Convert into a buffer, calling back every RR_SPRINTF_MIN chars.
+  Your callback can then copy the chars out, print them or whatever.
+  This function is actually the workhorse for everything else.
+  The buffer you pass in must hold at least RR_SPRINTF_MIN characters.
+    // you return the next buffer to use or 0 to stop converting
+
+void rrsetseparators( char comma, char period )
+  Set the comma and period characters to use.
+
+FLOATS/DOUBLES:
+===============
+This code uses a internal float->ascii conversion method that uses
+doubles with error correction (double-doubles, for ~105 bits of
+precision).  This conversion is round-trip perfect - that is, an atof
+of the values output here will give you the bit-exact double back.
+
+One difference is that our insignificant digits will be different than 
+with MSVC or GCC (but they don't match each other either).  We also 
+don't attempt to find the minimum length matching float (pre-MSVC15 
+doesn't either).
+
+If you don't need float or doubles at all, define RR_SPRINTF_NOFLOAT
+and you'll save 4K of code space.
+
+64-BIT INTS:
+============
+This library also supports 64-bit integers and you can use MSVC style or
+GCC style indicators (%I64d or %lld).  It supports the C99 specifiers
+for size_t and ptr_diff_t (%jd %zd) as well.
+
+EXTRAS:
+=======
+Like some GCCs, for integers and floats, you can use a ' (single quote)
+specifier and commas will be inserted on the thousands: "%'d" on 12345 
+would print 12,345.
+
+For integers and floats, you can use a "$" specifier and the number 
+will be converted to float and then divided to get kilo, mega, giga or
+tera and then printed, so "%$d" 1024 is "1.0 k", "%$.2d" 2536000 is 
+"2.42 m", etc.
+
+In addition to octal and hexadecimal conversions, you can print 
+integers in binary: "%b" for 256 would print 100.
+
+PERFORMANCE vs MSVC 2008 32-/64-bit (GCC is even slower than MSVC):
+===================================================================
+"%d" across all 32-bit ints (4.8x/4.0x faster than 32-/64-bit MSVC)
+"%24d" across all 32-bit ints (4.5x/4.2x faster)
+"%x" across all 32-bit ints (4.5x/3.8x faster)
+"%08x" across all 32-bit ints (4.3x/3.8x faster)
+"%f" across e-10 to e+10 floats (7.3x/6.0x faster)
+"%e" across e-10 to e+10 floats (8.1x/6.0x faster)
+"%g" across e-10 to e+10 floats (10.0x/7.1x faster)
+"%f" for values near e-300 (7.9x/6.5x faster)
+"%f" for values near e+300 (10.0x/9.1x faster)
+"%e" for values near e-300 (10.1x/7.0x faster)
+"%e" for values near e+300 (9.2x/6.0x faster)
+"%.320f" for values near e-300 (12.6x/11.2x faster)
+"%a" for random values (8.6x/4.3x faster)
+"%I64d" for 64-bits with 32-bit values (4.8x/3.4x faster)
+"%I64d" for 64-bits > 32-bit values (4.9x/5.5x faster)
+"%s%s%s" for 64 char strings (7.1x/7.3x faster)
+"...512 char string..." ( 35.0x/32.5x faster!)
+*/
+
+#ifdef RR_SPRINTF_STATIC
+#define RRPUBLIC_DEC static
+#define RRPUBLIC_DEF static
+#else
+#ifdef __cplusplus
+#define RRPUBLIC_DEC extern "C"
+#define RRPUBLIC_DEF extern "C"
+#else
+#define RRPUBLIC_DEC extern 
+#define RRPUBLIC_DEF
+#endif
+#endif
+
+#include <stdarg.h>  // for va_list()
+
+#ifndef RR_SPRINTF_MIN
+#define RR_SPRINTF_MIN 512 // how many characters per callback
+#endif
+typedef char * RRSPRINTFCB( char * buf, void * user, int len );
+
+#ifndef RR_SPRINTF_DECORATE
+#define RR_SPRINTF_DECORATE(name) rr##name  // define this before including if you want to change the names
+#endif
+
+#ifndef RR_SPRINTF_IMPLEMENTATION
+
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintf )( char * buf, char const * fmt, va_list va );
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char const * fmt, va_list va );
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( sprintf ) ( char * buf, char const * fmt, ... );
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( snprintf )( char * buf, int count, char const * fmt, ... );
+
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va );
+RRPUBLIC_DEF void RR_SPRINTF_DECORATE( setseparators )( char comma, char period );
+
+#else
+
+#include <stdlib.h>  // for va_arg()
+
+#define rU32 unsigned int
+#define rS32 signed int
+
+#ifdef _MSC_VER
+#define rU64 unsigned __int64
+#define rS64 signed __int64
+#else
+#define rU64 unsigned long long
+#define rS64 signed long long
+#endif
+#define rU16 unsigned short
+
+#ifndef rUINTa 
+#if defined(__ppc64__) || defined(__aarch64__) || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64)
+#define rUINTa rU64
+#else
+#define rUINTa rU32
+#endif
+#endif
+
+#ifndef RR_SPRINTF_MSVC_MODE  // used for MSVC2013 and earlier (MSVC2015 matches GCC)
+#if defined(_MSC_VER) && (_MSC_VER<1900)
+#define RR_SPRINTF_MSVC_MODE
+#endif
+#endif
+
+#ifdef RR_SPRINTF_NOUNALIGNED  // define this before inclusion to force rrsprint to always use aligned accesses
+#define RR_UNALIGNED(code)
+#else
+#define RR_UNALIGNED(code) code
+#endif
+
+#ifndef RR_SPRINTF_NOFLOAT
+// internal float utility functions
+static rS32 rrreal_to_str( char const * * start, rU32 * len, char *out, rS32 * decimal_pos, double value, rU32 frac_digits );
+static rS32 rrreal_to_parts( rS64 * bits, rS32 * expo, double value );
+#define RRSPECIAL 0x7000
+#endif
+
+static char RRperiod='.';
+static char RRcomma=',';
+static char rrdiglookup[201]="00010203040506070809101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899";
+
+RRPUBLIC_DEF void RR_SPRINTF_DECORATE( setseparators )( char pcomma, char pperiod )
+{
+  RRperiod=pperiod;
+  RRcomma=pcomma;
+}
+
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va )
+{
+  static char hex[]="0123456789abcdefxp";
+  static char hexu[]="0123456789ABCDEFXP";
+  char * bf;
+  char const * f;
+  int tlen = 0;
+
+  bf = buf;
+  f = fmt;
+  for(;;)
+  {
+    rS32 fw,pr,tz; rU32 fl;
+
+    #define LJ 1
+    #define LP 2
+    #define LS 4
+    #define LX 8
+    #define LZ 16
+    #define BI 32
+    #define CS 64
+    #define NG 128
+    #define KI 256
+    #define HW 512
+ 
+    // macros for the callback buffer stuff
+    #define chk_cb_bufL(bytes) { int len = (int)(bf-buf); if ((len+(bytes))>=RR_SPRINTF_MIN) { tlen+=len; if (0==(bf=buf=callback(buf,user,len))) goto done; } }
+    #define chk_cb_buf(bytes) { if ( callback ) { chk_cb_bufL(bytes); } }
+    #define flush_cb() { chk_cb_bufL(RR_SPRINTF_MIN-1); } //flush if there is even one byte in the buffer
+    #define cb_buf_clamp(cl,v) cl = v; if ( callback ) { int lg = RR_SPRINTF_MIN-(int)(bf-buf); if (cl>lg) cl=lg; }
+
+    // fast copy everything up to the next % (or end of string)
+    for(;;)
+    { 
+      while (((rUINTa)f)&3)
+      {
+       schk1: if (f[0]=='%') goto scandd;
+       schk2: if (f[0]==0) goto endfmt;
+        chk_cb_buf(1); *bf++=f[0]; ++f;
+      } 
+      for(;;)
+      { 
+        rU32 v,c;
+        v=*(rU32*)f; c=(~v)&0x80808080;
+        if ((v-0x26262626)&c) goto schk1; 
+        if ((v-0x01010101)&c) goto schk2; 
+        if (callback) if ((RR_SPRINTF_MIN-(int)(bf-buf))<4) goto schk1;
+        *(rU32*)bf=v; bf+=4; f+=4;
+      }
+    } scandd:
+
+    ++f;
+
+    // ok, we have a percent, read the modifiers first
+    fw = 0; pr = -1; fl = 0; tz = 0;
+    
+    // flags
+    for(;;)
+    {
+      switch(f[0])
+      {
+        // if we have left just
+        case '-': fl|=LJ; ++f; continue;
+        // if we have leading plus
+        case '+': fl|=LP; ++f; continue; 
+        // if we have leading space
+        case ' ': fl|=LS; ++f; continue; 
+        // if we have leading 0x
+        case '#': fl|=LX; ++f; continue; 
+        // if we have thousand commas
+        case '\'': fl|=CS; ++f; continue; 
+        // if we have kilo marker
+        case '$': fl|=KI; ++f; continue; 
+        // if we have leading zero
+        case '0': fl|=LZ; ++f; goto flags_done; 
+        default: goto flags_done;
+      }
+    }
+    flags_done:
+   
+    // get the field width
+    if ( f[0] == '*' ) {fw = va_arg(va,rU32); ++f;} else { while (( f[0] >= '0' ) && ( f[0] <= '9' )) { fw = fw * 10 + f[0] - '0'; f++; } }
+    // get the precision
+    if ( f[0]=='.' ) { ++f; if ( f[0] == '*' ) {pr = va_arg(va,rU32); ++f;} else { pr = 0; while (( f[0] >= '0' ) && ( f[0] <= '9' )) { pr = pr * 10 + f[0] - '0'; f++; } } } 
+    
+    // handle integer size overrides
+    switch(f[0])
+    {
+      // are we halfwidth?
+      case 'h': fl|=HW; ++f; break;
+      // are we 64-bit (unix style)
+      case 'l': ++f; if ( f[0]=='l') { fl|=BI; ++f; } break;
+      // are we 64-bit on intmax? (c99)
+      case 'j': fl|=BI; ++f; break; 
+      // are we 64-bit on size_t or ptrdiff_t? (c99)
+      case 'z': case 't': fl|=((sizeof(char*)==8)?BI:0); ++f; break; 
+      // are we 64-bit (msft style)
+      case 'I': if ( ( f[1]=='6') && ( f[2]=='4') ) { fl|=BI; f+=3; } else if ( ( f[1]=='3') && ( f[2]=='2') ) { f+=3; } else { fl|=((sizeof(void*)==8)?BI:0); ++f; } break;
+      default: break;
+    }
+
+    // handle each replacement
+    switch( f[0] )
+    {
+      #define NUMSZ 512 // big enough for e308 (with commas) or e-307 
+      char num[NUMSZ]; 
+      char lead[8]; 
+      char tail[8]; 
+      char *s;
+      char const *h;
+      rU32 l,n,cs;
+      rU64 n64;
+      #ifndef RR_SPRINTF_NOFLOAT      
+      double fv; 
+      #endif
+      rS32 dp; char const * sn;
+
+      case 's':
+        // get the string
+        s = va_arg(va,char*); if (s==0) s = (char*)"null";
+        // get the length
+        sn = s;
+        for(;;)
+        { 
+          if ((((rUINTa)sn)&3)==0) break;
+         lchk:
+          if (sn[0]==0) goto ld;
+          ++sn;
+        }
+        n = 0xffffffff;
+        if (pr>=0) { n=(rU32)(sn-s); if (n>=(rU32)pr) goto ld; n=((rU32)(pr-n))>>2; }
+        while(n) 
+        { 
+          rU32 v=*(rU32*)sn;
+          if ((v-0x01010101)&(~v)&0x80808080UL) goto lchk; 
+          sn+=4; 
+          --n;
+        }
+        goto lchk;
+       ld:
+
+        l = (rU32) ( sn - s );
+        // clamp to precision
+        if ( l > (rU32)pr ) l = pr;
+        lead[0]=0; tail[0]=0; pr = 0; dp = 0; cs = 0;
+        // copy the string in
+        goto scopy;
+
+      case 'c': // char
+        // get the character
+        s = num + NUMSZ -1; *s = (char)va_arg(va,int);
+        l = 1;
+        lead[0]=0; tail[0]=0; pr = 0; dp = 0; cs = 0;
+        goto scopy;
+
+      case 'n': // weird write-bytes specifier
+        { int * d = va_arg(va,int*);
+        *d = tlen + (int)( bf - buf ); }
+        break;
+
+#ifdef RR_SPRINTF_NOFLOAT
+      case 'A': // float
+      case 'a': // hex float
+      case 'G': // float
+      case 'g': // float
+      case 'E': // float
+      case 'e': // float
+      case 'f': // float
+        va_arg(va,double); // eat it
+        s = (char*)"No float";
+        l = 8;
+        lead[0]=0; tail[0]=0; pr = 0; dp = 0; cs = 0;
+        goto scopy;
+#else
+      case 'A': // float
+        h=hexu;  
+        goto hexfloat;
+
+      case 'a': // hex float
+        h=hex;
+       hexfloat: 
+        fv = va_arg(va,double);
+        if (pr==-1) pr=6; // default is 6
+        // read the double into a string
+        if ( rrreal_to_parts( (rS64*)&n64, &dp, fv ) )
+          fl |= NG;
+  
+        s = num+64;
+
+        // sign
+        lead[0]=0; if (fl&NG) { lead[0]=1; lead[1]='-'; } else if (fl&LS) { lead[0]=1; lead[1]=' '; } else if (fl&LP) { lead[0]=1; lead[1]='+'; };
+
+        if (dp==-1023) dp=(n64)?-1022:0; else n64|=(((rU64)1)<<52);
+        n64<<=(64-56);
+        if (pr<15) n64+=((((rU64)8)<<56)>>(pr*4));
+        // add leading chars
+        
+        #ifdef RR_SPRINTF_MSVC_MODE
+        *s++='0';*s++='x';
+        #else
+        lead[1+lead[0]]='0'; lead[2+lead[0]]='x'; lead[0]+=2;
+        #endif
+        *s++=h[(n64>>60)&15]; n64<<=4;
+        if ( pr ) *s++=RRperiod;
+        sn = s;
+
+        // print the bits
+        n = pr; if (n>13) n = 13; if (pr>(rS32)n) tz=pr-n; pr = 0;
+        while(n--) { *s++=h[(n64>>60)&15]; n64<<=4; }
+
+        // print the expo
+        tail[1]=h[17];
+        if (dp<0) { tail[2]='-'; dp=-dp;} else tail[2]='+';
+        n = (dp>=1000)?6:((dp>=100)?5:((dp>=10)?4:3));
+        tail[0]=(char)n;
+        for(;;) { tail[n]='0'+dp%10; if (n<=3) break; --n; dp/=10; }
+
+        dp = (int)(s-sn);
+        l = (int)(s-(num+64));
+        s = num+64;
+        cs = 1 + (3<<24);
+        goto scopy;
+
+      case 'G': // float
+        h=hexu;
+        goto dosmallfloat;
+
+      case 'g': // float
+        h=hex;
+       dosmallfloat:   
+        fv = va_arg(va,double);
+        if (pr==-1) pr=6; else if (pr==0) pr = 1; // default is 6
+        // read the double into a string
+        if ( rrreal_to_str( &sn, &l, num, &dp, fv, (pr-1)|0x80000000 ) )
+          fl |= NG;
+
+        // clamp the precision and delete extra zeros after clamp
+        n = pr;
+        if ( l > (rU32)pr ) l = pr; while ((l>1)&&(pr)&&(sn[l-1]=='0')) { --pr; --l; }
+
+        // should we use %e
+        if ((dp<=-4)||(dp>(rS32)n))
+        {
+          if ( pr > (rS32)l ) pr = l-1; else if ( pr ) --pr; // when using %e, there is one digit before the decimal
+          goto doexpfromg;
+        }
+        // this is the insane action to get the pr to match %g sematics for %f
+        if(dp>0) { pr=(dp<(rS32)l)?l-dp:0; } else { pr = -dp+((pr>(rS32)l)?l:pr); }
+        goto dofloatfromg;
+
+      case 'E': // float
+        h=hexu;  
+        goto doexp;
+
+      case 'e': // float
+        h=hex;
+       doexp:   
+        fv = va_arg(va,double);
+        if (pr==-1) pr=6; // default is 6
+        // read the double into a string
+        if ( rrreal_to_str( &sn, &l, num, &dp, fv, pr|0x80000000 ) )
+          fl |= NG;
+       doexpfromg: 
+        tail[0]=0; 
+        lead[0]=0; if (fl&NG) { lead[0]=1; lead[1]='-'; } else if (fl&LS) { lead[0]=1; lead[1]=' '; } else if (fl&LP) { lead[0]=1; lead[1]='+'; };
+        if ( dp == RRSPECIAL ) { s=(char*)sn; cs=0; pr=0; goto scopy; }
+        s=num+64; 
+        // handle leading chars
+        *s++=sn[0];
+
+        if (pr) *s++=RRperiod;
+
+        // handle after decimal
+        if ((l-1)>(rU32)pr) l=pr+1;
+        for(n=1;n<l;n++) *s++=sn[n];
+        // trailing zeros
+        tz = pr-(l-1); pr=0;
+        // dump expo
+        tail[1]=h[0xe];
+        dp -= 1;
+        if (dp<0) { tail[2]='-'; dp=-dp;} else tail[2]='+';
+        #ifdef RR_SPRINTF_MSVC_MODE
+        n = 5;
+        #else
+        n = (dp>=100)?5:4;
+        #endif
+        tail[0]=(char)n;
+        for(;;) { tail[n]='0'+dp%10; if (n<=3) break; --n; dp/=10; }
+        cs = 1 + (3<<24); // how many tens
+        goto flt_lead;   
+
+      case 'f': // float
+        fv = va_arg(va,double);
+       doafloat: 
+        // do kilos
+        if (fl&KI) {while(fl<0x4000000) { if ((fv<1024.0) && (fv>-1024.0)) break; fv/=1024.0; fl+=0x1000000; }} 
+        if (pr==-1) pr=6; // default is 6
+        // read the double into a string
+        if ( rrreal_to_str( &sn, &l, num, &dp, fv, pr ) )
+          fl |= NG;
+        dofloatfromg:
+        tail[0]=0;
+        // sign
+        lead[0]=0; if (fl&NG) { lead[0]=1; lead[1]='-'; } else if (fl&LS) { lead[0]=1; lead[1]=' '; } else if (fl&LP) { lead[0]=1; lead[1]='+'; };
+        if ( dp == RRSPECIAL ) { s=(char*)sn; cs=0; pr=0; goto scopy; }
+        s=num+64; 
+
+        // handle the three decimal varieties
+        if (dp<=0) 
+        { 
+          rS32 i;
+          // handle 0.000*000xxxx
+          *s++='0'; if (pr) *s++=RRperiod; 
+          n=-dp; if((rS32)n>pr) n=pr; i=n; while(i) { if ((((rUINTa)s)&3)==0) break; *s++='0'; --i; } while(i>=4) { *(rU32*)s=0x30303030; s+=4; i-=4; } while(i) { *s++='0'; --i; }
+          if ((rS32)(l+n)>pr) l=pr-n; i=l; while(i) { *s++=*sn++; --i; }
+          tz = pr-(n+l);
+          cs = 1 + (3<<24); // how many tens did we write (for commas below)
+        }
+        else
+        {
+          cs = (fl&CS)?((600-(rU32)dp)%3):0;
+          if ((rU32)dp>=l)
+          {
+            // handle xxxx000*000.0
+            n=0; for(;;) { if ((fl&CS) && (++cs==4)) { cs = 0; *s++=RRcomma; } else { *s++=sn[n]; ++n; if (n>=l) break; } }
+            if (n<(rU32)dp)
+            {
+              n = dp - n;
+              if ((fl&CS)==0) { while(n) { if ((((rUINTa)s)&3)==0) break; *s++='0'; --n; }  while(n>=4) { *(rU32*)s=0x30303030; s+=4; n-=4; } }
+              while(n) { if ((fl&CS) && (++cs==4)) { cs = 0; *s++=RRcomma; } else { *s++='0'; --n; } }
+            }
+            cs = (int)(s-(num+64)) + (3<<24); // cs is how many tens
+            if (pr) { *s++=RRperiod; tz=pr;}
+          }
+          else
+          {
+            // handle xxxxx.xxxx000*000
+            n=0; for(;;) { if ((fl&CS) && (++cs==4)) { cs = 0; *s++=RRcomma; } else { *s++=sn[n]; ++n; if (n>=(rU32)dp) break; } }
+            cs = (int)(s-(num+64)) + (3<<24); // cs is how many tens
+            if (pr) *s++=RRperiod;
+            if ((l-dp)>(rU32)pr) l=pr+dp;
+            while(n<l) { *s++=sn[n]; ++n; }
+            tz = pr-(l-dp);
+          }
+        }
+        pr = 0;
+        
+        // handle k,m,g,t
+        if (fl&KI) { tail[0]=1; tail[1]=' '; { if (fl>>24) { tail[2]="_kmgt"[fl>>24]; tail[0]=2; } } };
+
+        flt_lead:
+        // get the length that we copied
+        l = (rU32) ( s-(num+64) );
+        s=num+64; 
+        goto scopy;
+#endif
+
+      case 'B': // upper binary
+        h = hexu;
+        goto binary;
+
+      case 'b': // lower binary
+        h = hex;
+        binary:
+        lead[0]=0;
+        if (fl&LX) { lead[0]=2;lead[1]='0';lead[2]=h[0xb]; }
+        l=(8<<4)|(1<<8);
+        goto radixnum;
+
+      case 'o': // octal
+        h = hexu;
+        lead[0]=0;
+        if (fl&LX) { lead[0]=1;lead[1]='0'; }
+        l=(3<<4)|(3<<8);
+        goto radixnum;
+
+      case 'p': // pointer
+        fl |= (sizeof(void*)==8)?BI:0;
+        pr = sizeof(void*)*2;
+        fl &= ~LZ; // 'p' only prints the pointer with zeros
+        // drop through to X
+      
+      case 'X': // upper binary
+        h = hexu;
+        goto dohexb;
+
+      case 'x': // lower binary
+        h = hex; dohexb:
+        l=(4<<4)|(4<<8);
+        lead[0]=0;
+        if (fl&LX) { lead[0]=2;lead[1]='0';lead[2]=h[16]; }
+       radixnum: 
+        // get the number
+        if ( fl&BI )
+          n64 = va_arg(va,rU64);
+        else
+          n64 = va_arg(va,rU32);
+
+        s = num + NUMSZ; dp = 0;
+        // clear tail, and clear leading if value is zero
+        tail[0]=0; if (n64==0) { lead[0]=0; if (pr==0) { l=0; cs = ( ((l>>4)&15)) << 24; goto scopy; } }
+        // convert to string
+        for(;;) { *--s = h[n64&((1<<(l>>8))-1)]; n64>>=(l>>8); if ( ! ( (n64) || ((rS32) ( (num+NUMSZ) - s ) < pr ) ) ) break; if ( fl&CS) { ++l; if ((l&15)==((l>>4)&15)) { l&=~15; *--s=RRcomma; } } };
+        // get the tens and the comma pos
+        cs = (rU32) ( (num+NUMSZ) - s ) + ( ( ((l>>4)&15)) << 24 );
+        // get the length that we copied
+        l = (rU32) ( (num+NUMSZ) - s );
+        // copy it
+        goto scopy;
+
+      case 'u': // unsigned
+      case 'i':
+      case 'd': // integer
+        // get the integer and abs it
+        if ( fl&BI )
+        {
+          rS64 i64 = va_arg(va,rS64); n64 = (rU64)i64; if ((f[0]!='u') && (i64<0)) { n64=(rU64)-i64; fl|=NG; }
+        }
+        else
+        {
+          rS32 i = va_arg(va,rS32); n64 = (rU32)i; if ((f[0]!='u') && (i<0)) { n64=(rU32)-i; fl|=NG; }
+        }
+
+        #ifndef RR_SPRINTF_NOFLOAT
+        if (fl&KI) { if (n64<1024) pr=0; else if (pr==-1) pr=1; fv=(double)(rS64)n64; goto doafloat; } 
+        #endif
+
+        // convert to string
+        s = num+NUMSZ; l=0; 
+        
+        for(;;)
+        {
+          // do in 32-bit chunks (avoid lots of 64-bit divides even with constant denominators)
+          char * o=s-8;
+          if (n64>=100000000) { n = (rU32)( n64 % 100000000);  n64 /= 100000000; } else {n = (rU32)n64; n64 = 0; }
+          if((fl&CS)==0) { while(n) { s-=2; *(rU16*)s=*(rU16*)&rrdiglookup[(n%100)*2]; n/=100; } }
+          while (n) { if ( ( fl&CS) && (l++==3) ) { l=0; *--s=RRcomma; --o; } else { *--s=(char)(n%10)+'0'; n/=10; } }
+          if (n64==0) { if ((s[0]=='0') && (s!=(num+NUMSZ))) ++s; break; }
+          while (s!=o) if ( ( fl&CS) && (l++==3) ) { l=0; *--s=RRcomma; --o; } else { *--s='0'; }
+        }
+
+        tail[0]=0;
+        // sign
+        lead[0]=0; if (fl&NG) { lead[0]=1; lead[1]='-'; } else if (fl&LS) { lead[0]=1; lead[1]=' '; } else if (fl&LP) { lead[0]=1; lead[1]='+'; };
+
+        // get the length that we copied
+        l = (rU32) ( (num+NUMSZ) - s ); if ( l == 0 ) { *--s='0'; l = 1; }
+        cs = l + (3<<24);
+        if (pr<0) pr = 0;
+
+       scopy: 
+        // get fw=leading/trailing space, pr=leading zeros
+        if (pr<(rS32)l) pr = l;
+        n = pr + lead[0] + tail[0] + tz;
+        if (fw<(rS32)n) fw = n;
+        fw -= n;
+        pr -= l;
+
+        // handle right justify and leading zeros
+        if ( (fl&LJ)==0 )
+        {
+          if (fl&LZ) // if leading zeros, everything is in pr
+          { 
+            pr = (fw>pr)?fw:pr;
+            fw = 0;
+          }
+          else
+          {
+            fl &= ~CS; // if no leading zeros, then no commas
+          }
+        }
+
+        // copy the spaces and/or zeros
+        if (fw+pr)
+        {
+          rS32 i; rU32 c; 
+
+          // copy leading spaces (or when doing %8.4d stuff)
+          if ( (fl&LJ)==0 ) while(fw>0) { cb_buf_clamp(i,fw); fw -= i; while(i) { if ((((rUINTa)bf)&3)==0) break; *bf++=' '; --i; } while(i>=4) { *(rU32*)bf=0x20202020; bf+=4; i-=4; } while (i) {*bf++=' '; --i;} chk_cb_buf(1); }
+        
+          // copy leader
+          sn=lead+1; while(lead[0]) { cb_buf_clamp(i,lead[0]); lead[0] -= (char)i; while (i) {*bf++=*sn++; --i;} chk_cb_buf(1); }
+          
+          // copy leading zeros
+          c = cs >> 24; cs &= 0xffffff;
+          cs = (fl&CS)?((rU32)(c-((pr+cs)%(c+1)))):0;
+          while(pr>0) { cb_buf_clamp(i,pr); pr -= i; if((fl&CS)==0) { while(i) { if ((((rUINTa)bf)&3)==0) break; *bf++='0'; --i; } while(i>=4) { *(rU32*)bf=0x30303030; bf+=4; i-=4; } } while (i) { if((fl&CS) && (cs++==c)) { cs = 0; *bf++=RRcomma; } else *bf++='0'; --i; } chk_cb_buf(1); }
+        }
+
+        // copy leader if there is still one
+        sn=lead+1; while(lead[0]) { rS32 i; cb_buf_clamp(i,lead[0]); lead[0] -= (char)i; while (i) {*bf++=*sn++; --i;} chk_cb_buf(1); }
+
+        // copy the string
+        n = l; while (n) { rS32 i; cb_buf_clamp(i,n); n-=i; RR_UNALIGNED( while(i>=4) { *(rU32*)bf=*(rU32*)s; bf+=4; s+=4; i-=4; } ) while (i) {*bf++=*s++; --i;} chk_cb_buf(1); }
+
+        // copy trailing zeros
+        while(tz) { rS32 i; cb_buf_clamp(i,tz); tz -= i; while(i) { if ((((rUINTa)bf)&3)==0) break; *bf++='0'; --i; } while(i>=4) { *(rU32*)bf=0x30303030; bf+=4; i-=4; } while (i) {*bf++='0'; --i;} chk_cb_buf(1); }
+
+        // copy tail if there is one
+        sn=tail+1; while(tail[0]) { rS32 i; cb_buf_clamp(i,tail[0]); tail[0] -= (char)i; while (i) {*bf++=*sn++; --i;} chk_cb_buf(1); }
+
+        // handle the left justify
+        if (fl&LJ) if (fw>0) { while (fw) { rS32 i; cb_buf_clamp(i,fw); fw-=i; while(i) { if ((((rUINTa)bf)&3)==0) break; *bf++=' '; --i; } while(i>=4) { *(rU32*)bf=0x20202020; bf+=4; i-=4; } while (i--) *bf++=' '; chk_cb_buf(1); } }
+        break;
+
+      default: // unknown, just copy code
+        s = num + NUMSZ -1; *s = f[0];
+        l = 1;
+        fw=pr=fl=0;
+        lead[0]=0; tail[0]=0; pr = 0; dp = 0; cs = 0;
+        goto scopy;
+    }
+    ++f;
+  }
+ endfmt:
+
+  if (!callback) 
+    *bf = 0;
+  else
+    flush_cb();
+ 
+ done:
+  return tlen + (int)(bf-buf);
+}
+
+// cleanup
+#undef LJ
+#undef LP
+#undef LS
+#undef LX
+#undef LZ
+#undef BI
+#undef CS
+#undef NG
+#undef KI
+#undef NUMSZ
+#undef chk_cb_bufL
+#undef chk_cb_buf
+#undef flush_cb
+#undef cb_buf_clamp
+
+// ============================================================================
+//   wrapper functions
+
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( sprintf )( char * buf, char const * fmt, ... )
+{
+  va_list va;
+  va_start( va, fmt );
+  return RR_SPRINTF_DECORATE( vsprintfcb )( 0, 0, buf, fmt, va );
+}
+
+typedef struct RRCCS
+{
+  char * buf;
+  int count;
+  char tmp[ RR_SPRINTF_MIN ];
+} RRCCS;
+
+static char * rrclampcallback( char * buf, void * user, int len )
+{
+  RRCCS * c = (RRCCS*)user;
+
+  if ( len > c->count ) len = c->count;
+
+  if (len)
+  {
+    if ( buf != c->buf )
+    {
+      char * s, * d, * se;
+      d = c->buf; s = buf; se = buf+len;
+      do{ *d++ = *s++; } while (s<se);
+    }
+    c->buf += len;
+    c->count -= len;
+  }
+  
+  if ( c->count <= 0 ) return 0;
+  return ( c->count >= RR_SPRINTF_MIN ) ? c->buf : c->tmp; // go direct into buffer if you can
+}
+
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char const * fmt, va_list va )
+{
+  RRCCS c;
+  int l;
+
+  if ( count == 0 )
+    return 0;
+
+  c.buf = buf;
+  c.count = count;
+
+  RR_SPRINTF_DECORATE( vsprintfcb )( rrclampcallback, &c, rrclampcallback(0,&c,0), fmt, va );
+  
+  // zero-terminate
+  l = (int)( c.buf - buf );
+  if ( l >= count ) // should never be greater, only equal (or less) than count
+    l = count - 1;
+  buf[l] = 0;
+
+  return l;
+}
+
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( snprintf )( char * buf, int count, char const * fmt, ... )
+{
+  va_list va;
+  va_start( va, fmt );
+
+  return RR_SPRINTF_DECORATE( vsnprintf )( buf, count, fmt, va );
+}
+
+RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintf )( char * buf, char const * fmt, va_list va )
+{
+  return RR_SPRINTF_DECORATE( vsprintfcb )( 0, 0, buf, fmt, va );
+}
+
+// =======================================================================
+//   low level float utility functions
+
+#ifndef RR_SPRINTF_NOFLOAT
+
+ // copies d to bits w/ strict aliasing (this compiles to nothing on /Ox)
+ #define RRCOPYFP(dest,src) { int cn; for(cn=0;cn<8;cn++) ((char*)&dest)[cn]=((char*)&src)[cn]; }
+ 
+// get float info
+static rS32 rrreal_to_parts( rS64 * bits, rS32 * expo, double value )
+{
+  double d;
+  rS64 b = 0;
+
+  // load value and round at the frac_digits
+  d = value;
+
+  RRCOPYFP( b, d );
+
+  *bits = b & ((((rU64)1)<<52)-1);
+  *expo = ((b >> 52) & 2047)-1023;
+    
+  return (rS32)(b >> 63);
+}
+
+static double const rrbot[23]={1e+000,1e+001,1e+002,1e+003,1e+004,1e+005,1e+006,1e+007,1e+008,1e+009,1e+010,1e+011,1e+012,1e+013,1e+014,1e+015,1e+016,1e+017,1e+018,1e+019,1e+020,1e+021,1e+022};
+static double const rrnegbot[22]={1e-001,1e-002,1e-003,1e-004,1e-005,1e-006,1e-007,1e-008,1e-009,1e-010,1e-011,1e-012,1e-013,1e-014,1e-015,1e-016,1e-017,1e-018,1e-019,1e-020,1e-021,1e-022};
+static double const rrnegboterr[22]={-5.551115123125783e-018,-2.0816681711721684e-019,-2.0816681711721686e-020,-4.7921736023859299e-021,-8.1803053914031305e-022,4.5251888174113741e-023,4.5251888174113739e-024,-2.0922560830128471e-025,-6.2281591457779853e-026,-3.6432197315497743e-027,6.0503030718060191e-028,2.0113352370744385e-029,-3.0373745563400371e-030,1.1806906454401013e-032,-7.7705399876661076e-032,2.0902213275965398e-033,-7.1542424054621921e-034,-7.1542424054621926e-035,2.4754073164739869e-036,5.4846728545790429e-037,9.2462547772103625e-038,-4.8596774326570872e-039};
+static double const rrtop[13]={1e+023,1e+046,1e+069,1e+092,1e+115,1e+138,1e+161,1e+184,1e+207,1e+230,1e+253,1e+276,1e+299};
+static double const rrnegtop[13]={1e-023,1e-046,1e-069,1e-092,1e-115,1e-138,1e-161,1e-184,1e-207,1e-230,1e-253,1e-276,1e-299};
+static double const rrtoperr[13]={8388608,6.8601809640529717e+028,-7.253143638152921e+052,-4.3377296974619174e+075,-1.5559416129466825e+098,-3.2841562489204913e+121,-3.7745893248228135e+144,-1.7356668416969134e+167,-3.8893577551088374e+190,-9.9566444326005119e+213,6.3641293062232429e+236,-5.2069140800249813e+259,-5.2504760255204387e+282};
+static double const rrnegtoperr[13]={3.9565301985100693e-040,-2.299904345391321e-063,3.6506201437945798e-086,1.1875228833981544e-109,-5.0644902316928607e-132,-6.7156837247865426e-155,-2.812077463003139e-178,-5.7778912386589953e-201,7.4997100559334532e-224,-4.6439668915134491e-247,-6.3691100762962136e-270,-9.436808465446358e-293,8.0970921678014997e-317};
+
+#if defined(_MSC_VER) && (_MSC_VER<=1200)                                                                                                                                                                                       
+static rU64 const rrpot[20]={1,10,100,1000, 10000,100000,1000000,10000000, 100000000,1000000000,10000000000,100000000000,  1000000000000,10000000000000,100000000000000,1000000000000000,  10000000000000000,100000000000000000,1000000000000000000,10000000000000000000U };
+#define rrtento19th ((rU64)1000000000000000000)
+#else
+static rU64 const rrpot[20]={1,10,100,1000, 10000,100000,1000000,10000000, 100000000,1000000000,10000000000ULL,100000000000ULL,  1000000000000ULL,10000000000000ULL,100000000000000ULL,1000000000000000ULL,  10000000000000000ULL,100000000000000000ULL,1000000000000000000ULL,10000000000000000000ULL };
+#define rrtento19th (1000000000000000000ULL)
+#endif
+
+#define rrddmulthi(oh,ol,xh,yh) \
+{ \
+  double ahi=0,alo,bhi=0,blo; \
+  rS64 bt; \
+  oh = xh * yh; \
+  RRCOPYFP(bt,xh); bt&=((~(rU64)0)<<27); RRCOPYFP(ahi,bt); alo = xh-ahi; \
+  RRCOPYFP(bt,yh); bt&=((~(rU64)0)<<27); RRCOPYFP(bhi,bt); blo = yh-bhi; \
+  ol = ((ahi*bhi-oh)+ahi*blo+alo*bhi)+alo*blo; \
+}
+
+#define rrddtoS64(ob,xh,xl) \
+{ \
+  double ahi=0,alo,vh,t;\
+  ob = (rS64)ph;\
+  vh=(double)ob;\
+  ahi = ( xh - vh );\
+  t = ( ahi - xh );\
+  alo = (xh-(ahi-t))-(vh+t);\
+  ob += (rS64)(ahi+alo+xl);\
+}
+
+
+#define rrddrenorm(oh,ol) { double s; s=oh+ol; ol=ol-(s-oh); oh=s; }
+
+#define rrddmultlo(oh,ol,xh,xl,yh,yl) \
+  ol = ol + ( xh*yl + xl*yh ); \
+
+#define rrddmultlos(oh,ol,xh,yl) \
+  ol = ol + ( xh*yl ); \
+
+static void rrraise_to_power10( double *ohi, double *olo, double d, rS32 power )  // power can be -323 to +350
+{
+  double ph, pl;
+  if ((power>=0) && (power<=22))
+  {
+    rrddmulthi(ph,pl,d,rrbot[power]);
+  }
+  else
+  {
+    rS32 e,et,eb;
+    double p2h,p2l;
+
+    e=power; if (power<0) e=-e; 
+    et = (e*0x2c9)>>14;/* %23 */ if (et>13) et=13; eb = e-(et*23);
+
+    ph = d; pl = 0.0;
+    if (power<0)
+    {
+      if (eb) { --eb; rrddmulthi(ph,pl,d,rrnegbot[eb]); rrddmultlos(ph,pl,d,rrnegboterr[eb]); }
+      if (et)
+      { 
+        rrddrenorm(ph,pl);
+        --et; rrddmulthi(p2h,p2l,ph,rrnegtop[et]); rrddmultlo(p2h,p2l,ph,pl,rrnegtop[et],rrnegtoperr[et]); ph=p2h;pl=p2l;
+      }
+    }
+    else
+    {
+      if (eb) 
+      { 
+        e = eb; if (eb>22) eb=22; e -= eb;
+        rrddmulthi(ph,pl,d,rrbot[eb]); 
+        if ( e ) { rrddrenorm(ph,pl); rrddmulthi(p2h,p2l,ph,rrbot[e]); rrddmultlos(p2h,p2l,rrbot[e],pl); ph=p2h;pl=p2l; }
+      }
+      if (et)
+      {
+        rrddrenorm(ph,pl);
+        --et; rrddmulthi(p2h,p2l,ph,rrtop[et]); rrddmultlo(p2h,p2l,ph,pl,rrtop[et],rrtoperr[et]); ph=p2h;pl=p2l;
+      }
+    }
+  }
+  rrddrenorm(ph,pl);
+  *ohi = ph; *olo = pl;
+}
+
+// given a float value, returns the significant bits in bits, and the position of the
+//   decimal point in decimal_pos.  +/-INF and NAN are specified by special values
+//   returned in the decimal_pos parameter.
+// frac_digits is absolute normally, but if you want from first significant digits (got %g and %e), or in 0x80000000
+static rS32 rrreal_to_str( char const * * start, rU32 * len, char *out, rS32 * decimal_pos, double value, rU32 frac_digits )
+{
+  double d;
+  rS64 bits = 0;
+  rS32 expo, e, ng, tens;
+
+  d = value;
+  RRCOPYFP(bits,d);
+  expo = (bits >> 52) & 2047;
+  ng = (rS32)(bits >> 63);
+  if (ng) d=-d;
+
+  if ( expo == 2047 ) // is nan or inf?
+  {
+    *start = (bits&((((rU64)1)<<52)-1)) ? "NaN" : "Inf";
+    *decimal_pos =  RRSPECIAL;
+    *len = 3;
+    return ng;
+  } 
+
+  if ( expo == 0 ) // is zero or denormal
+  {
+    if ((bits<<1)==0) // do zero
+    {
+      *decimal_pos = 1; 
+      *start = out;
+      out[0] = '0'; *len = 1;
+      return ng;
+    }
+    // find the right expo for denormals
+    {
+      rS64 v = ((rU64)1)<<51;
+      while ((bits&v)==0) { --expo; v >>= 1; }
+    }
+  }
+
+  // find the decimal exponent as well as the decimal bits of the value
+  {
+    double ph,pl;
+
+    // log10 estimate - very specifically tweaked to hit or undershoot by no more than 1 of log10 of all expos 1..2046
+    tens=expo-1023; tens = (tens<0)?((tens*617)/2048):(((tens*1233)/4096)+1);
+
+    // move the significant bits into position and stick them into an int 
+    rrraise_to_power10( &ph, &pl, d, 18-tens );
+
+    // get full as much precision from double-double as possible
+    rrddtoS64( bits, ph,pl );
+
+    // check if we undershot
+    if ( ((rU64)bits) >= rrtento19th ) ++tens; 
+  }
+
+  // now do the rounding in integer land
+  frac_digits = ( frac_digits & 0x80000000 ) ? ( (frac_digits&0x7ffffff) + 1 ) : ( tens + frac_digits );
+  if ( ( frac_digits < 24 ) )
+  {
+    rU32 dg = 1; if ((rU64)bits >= rrpot[9] ) dg=10; while( (rU64)bits >= rrpot[dg] ) { ++dg; if (dg==20) goto noround; }
+    if ( frac_digits < dg )
+    {
+      rU64 r;
+      // add 0.5 at the right position and round
+      e = dg - frac_digits;
+      if ( (rU32)e >= 24 ) goto noround;
+      r = rrpot[e];
+      bits = bits + (r/2);
+      if ( (rU64)bits >= rrpot[dg] ) ++tens;
+      bits /= r;
+    } 
+    noround:;
+  }
+
+  // kill long trailing runs of zeros
+  if ( bits )
+  {
+    rU32 n; for(;;) { if ( bits<=0xffffffff ) break; if (bits%1000) goto donez; bits/=1000; } n = (rU32)bits; while ((n%1000)==0) n/=1000; bits=n; donez:;
+  }
+
+  // convert to string
+  out += 64;
+  e = 0; 
+  for(;;)
+  {
+    rU32 n;
+    char * o = out-8;
+    // do the conversion in chunks of U32s (avoid most 64-bit divides, worth it, constant denomiators be damned)
+    if (bits>=100000000) { n = (rU32)( bits % 100000000);  bits /= 100000000; } else {n = (rU32)bits; bits = 0; }
+    while(n) { out-=2; *(rU16*)out=*(rU16*)&rrdiglookup[(n%100)*2]; n/=100; e+=2; }
+    if (bits==0) { if ((e) && (out[0]=='0')) { ++out; --e; } break; }
+    while( out!=o ) { *--out ='0'; ++e; }
+  }
+  
+  *decimal_pos = tens;
+  *start = out;
+  *len = e;
+  return ng;
+}
+
+#undef rrddmulthi
+#undef rrddrenorm
+#undef rrddmultlo
+#undef rrddmultlos
+#undef RRSPECIAL 
+#undef RRCOPYFP
+ 
+#endif
+
+// clean up
+#undef rU16
+#undef rU32 
+#undef rS32 
+#undef rU64
+#undef rS64
+#undef RRPUBLIC_DEC
+#undef RRPUBLIC_DEF
+#undef RR_SPRINTF_DECORATE
+#undef RR_UNALIGNED
+
+#endif
+
+#endif

From fd23d7097d8ef8a0060d3096729e5ca4906d1f35 Mon Sep 17 00:00:00 2001
From: Sean Barrett <sean@nothings.org>
Date: Mon, 5 Dec 2016 02:58:16 -0800
Subject: [PATCH 16/23] rename rrsprintf to stb_sprintf

---
 rrsprintf.h => stb_sprintf.h | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename rrsprintf.h => stb_sprintf.h (100%)

diff --git a/rrsprintf.h b/stb_sprintf.h
similarity index 100%
rename from rrsprintf.h
rename to stb_sprintf.h

From c9fe5bac480e666fab8fd10f899f4700bc3fb3dd Mon Sep 17 00:00:00 2001
From: Sean Barrett <sean@nothings.org>
Date: Mon, 5 Dec 2016 03:48:37 -0800
Subject: [PATCH 17/23] rename all stb_sprintf identifiers to follow stb
 conventions

---
 stb.h                          |   1 +
 stb_sprintf.h                  | 617 +++++++++++++++++----------------
 tests/grid_reachability.c      |   3 +
 tests/stb.dsp                  |   4 +
 tests/test_c_compilation.c     |  12 +
 tests/test_cpp_compilation.cpp |   4 +
 6 files changed, 338 insertions(+), 303 deletions(-)

diff --git a/stb.h b/stb.h
index 6831476..4b2933c 100644
--- a/stb.h
+++ b/stb.h
@@ -730,6 +730,7 @@ STB_EXTERN int  stb_snprintf(char *s, size_t n, const char *fmt, ...);
 STB_EXTERN int  stb_vsnprintf(char *s, size_t n, const char *fmt, va_list v);
 
 #ifdef STB_DEFINE
+
 int stb_vsnprintf(char *s, size_t n, const char *fmt, va_list v)
 {
    int res;
diff --git a/stb_sprintf.h b/stb_sprintf.h
index 62962e3..8fdc70e 100644
--- a/stb_sprintf.h
+++ b/stb_sprintf.h
@@ -1,5 +1,14 @@
-#ifndef RR_SPRINTF_H_INCLUDE
-#define RR_SPRINTF_H_INCLUDE
+// stb_sprintf - v1.00 - public domain snprintf() implementation
+// originally by Jeff Roberts / RAD Game Tools, 2015/10/20
+// http://github.com/nothings/stb
+//
+// allowed types:  sc uidBboXx p AaGgEef n
+// lengths      :  h ll j z t I64 I32 I
+
+
+
+#ifndef STB_SPRINTF_H_INCLUDE
+#define STB_SPRINTF_H_INCLUDE
 
 /*
 Single file sprintf replacement.
@@ -22,7 +31,7 @@ the same format strings in cross platform code.
 It uses the standard single file trick of being both the header file
 and the source itself. If you just include it normally, you just get 
 the header file function definitions. To get the code, you include
-it from a C or C++ file and define RR_SPRINTF_IMPLEMENTATION first.
+it from a C or C++ file and define STB_SPRINTF_IMPLEMENTATION first.
 
 It only uses va_args macros from the C runtime to do it's work. It
 does cast doubles to S64s and shifts and divides U64s, which does 
@@ -34,25 +43,25 @@ in 16K.
 
 API:
 ====
-int rrsprintf( char * buf, char const * fmt, ... )
-int rrsnprintf( char * buf, int count, char const * fmt, ... )
+int stbsp_sprintf( char * buf, char const * fmt, ... )
+int stbsp_snprintf( char * buf, int count, char const * fmt, ... )
   Convert an arg list into a buffer.  rrsnprintf always returns
   a zero-terminated string (unlike regular snprintf).
 
-int rrvsprintf( char * buf, char const * fmt, va_list va )
-int rrvsnprintf( char * buf, int count, char const * fmt, va_list va )
+int stbsp_vsprintf( char * buf, char const * fmt, va_list va )
+int stbsp_vsnprintf( char * buf, int count, char const * fmt, va_list va )
   Convert a va_list arg list into a buffer.  rrvsnprintf always returns
   a zero-terminated string (unlike regular snprintf).
 
-int rrvsprintfcb( RRSPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va )
-    typedef char * RRSPRINTFCB( char const * buf, void * user, int len );
-  Convert into a buffer, calling back every RR_SPRINTF_MIN chars.
+int stbsp_vsprintfcb( STBSP_SPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va )
+    typedef char * STBSP_SPRINTFCB( char const * buf, void * user, int len );
+  Convert into a buffer, calling back every STB_SPRINTF_MIN chars.
   Your callback can then copy the chars out, print them or whatever.
   This function is actually the workhorse for everything else.
-  The buffer you pass in must hold at least RR_SPRINTF_MIN characters.
+  The buffer you pass in must hold at least STB_SPRINTF_MIN characters.
     // you return the next buffer to use or 0 to stop converting
 
-void rrsetseparators( char comma, char period )
+void stbsp_set_seperators( char comma, char period )
   Set the comma and period characters to use.
 
 FLOATS/DOUBLES:
@@ -67,10 +76,10 @@ with MSVC or GCC (but they don't match each other either).  We also
 don't attempt to find the minimum length matching float (pre-MSVC15 
 doesn't either).
 
-If you don't need float or doubles at all, define RR_SPRINTF_NOFLOAT
+If you don't need float or doubles at all, define STB_SPRINTF_NOFLOAT
 and you'll save 4K of code space.
 
-64-BIT INTS:
+64-STBSP__INTMAXT INTS:
 ============
 This library also supports 64-bit integers and you can use MSVC style or
 GCC style indicators (%I64d or %lld).  It supports the C99 specifiers
@@ -111,94 +120,94 @@ PERFORMANCE vs MSVC 2008 32-/64-bit (GCC is even slower than MSVC):
 "...512 char string..." ( 35.0x/32.5x faster!)
 */
 
-#ifdef RR_SPRINTF_STATIC
-#define RRPUBLIC_DEC static
-#define RRPUBLIC_DEF static
+#ifdef STB_SPRINTF_STATIC
+#define STBSP__PUBLICDEC static
+#define STBSP__PUBLICDEF static
 #else
 #ifdef __cplusplus
-#define RRPUBLIC_DEC extern "C"
-#define RRPUBLIC_DEF extern "C"
+#define STBSP__PUBLICDEC extern "C"
+#define STBSP__PUBLICDEF extern "C"
 #else
-#define RRPUBLIC_DEC extern 
-#define RRPUBLIC_DEF
+#define STBSP__PUBLICDEC extern 
+#define STBSP__PUBLICDEF
 #endif
 #endif
 
 #include <stdarg.h>  // for va_list()
 
-#ifndef RR_SPRINTF_MIN
-#define RR_SPRINTF_MIN 512 // how many characters per callback
+#ifndef STB_SPRINTF_MIN
+#define STB_SPRINTF_MIN 512 // how many characters per callback
 #endif
-typedef char * RRSPRINTFCB( char * buf, void * user, int len );
+typedef char * STBSP_SPRINTFCB( char * buf, void * user, int len );
 
-#ifndef RR_SPRINTF_DECORATE
-#define RR_SPRINTF_DECORATE(name) rr##name  // define this before including if you want to change the names
+#ifndef STB_SPRINTF_DECORATE
+#define STB_SPRINTF_DECORATE(name) stbsp_##name  // define this before including if you want to change the names
 #endif
 
-#ifndef RR_SPRINTF_IMPLEMENTATION
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( vsprintf )( char * buf, char const * fmt, va_list va );
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char const * fmt, va_list va );
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( sprintf ) ( char * buf, char const * fmt, ... );
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( snprintf )( char * buf, int count, char const * fmt, ... );
 
-RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintf )( char * buf, char const * fmt, va_list va );
-RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char const * fmt, va_list va );
-RRPUBLIC_DEF int RR_SPRINTF_DECORATE( sprintf ) ( char * buf, char const * fmt, ... );
-RRPUBLIC_DEF int RR_SPRINTF_DECORATE( snprintf )( char * buf, int count, char const * fmt, ... );
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( vsprintfcb )( STBSP_SPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va );
+STBSP__PUBLICDEF void STB_SPRINTF_DECORATE( set_separators )( char comma, char period );
 
-RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va );
-RRPUBLIC_DEF void RR_SPRINTF_DECORATE( setseparators )( char comma, char period );
+#endif // STB_SPRINTF_H_INCLUDE
 
-#else
+#ifndef STB_SPRINTF_IMPLEMENTATION
 
 #include <stdlib.h>  // for va_arg()
 
-#define rU32 unsigned int
-#define rS32 signed int
+#define stbsp__uint32 unsigned int
+#define stbsp__int32 signed int
 
 #ifdef _MSC_VER
-#define rU64 unsigned __int64
-#define rS64 signed __int64
+#define stbsp__uint64 unsigned __int64
+#define stbsp__int64 signed __int64
 #else
-#define rU64 unsigned long long
-#define rS64 signed long long
+#define stbsp__uint64 unsigned long long
+#define stbsp__int64 signed long long
 #endif
-#define rU16 unsigned short
+#define stbsp__uint16 unsigned short
 
-#ifndef rUINTa 
+#ifndef stbsp__uintptr 
 #if defined(__ppc64__) || defined(__aarch64__) || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64)
-#define rUINTa rU64
+#define stbsp__uintptr stbsp__uint64
 #else
-#define rUINTa rU32
+#define stbsp__uintptr stbsp__uint32
 #endif
 #endif
 
-#ifndef RR_SPRINTF_MSVC_MODE  // used for MSVC2013 and earlier (MSVC2015 matches GCC)
+#ifndef STB_SPRINTF_MSVC_MODE  // used for MSVC2013 and earlier (MSVC2015 matches GCC)
 #if defined(_MSC_VER) && (_MSC_VER<1900)
-#define RR_SPRINTF_MSVC_MODE
+#define STB_SPRINTF_MSVC_MODE
 #endif
 #endif
 
-#ifdef RR_SPRINTF_NOUNALIGNED  // define this before inclusion to force rrsprint to always use aligned accesses
-#define RR_UNALIGNED(code)
+#ifdef STB_SPRINTF_NOUNALIGNED  // define this before inclusion to force rrsprint to always use aligned accesses
+#define STBSP__UNALIGNED(code)
 #else
-#define RR_UNALIGNED(code) code
+#define STBSP__UNALIGNED(code) code
 #endif
 
-#ifndef RR_SPRINTF_NOFLOAT
+#ifndef STB_SPRINTF_NOFLOAT
 // internal float utility functions
-static rS32 rrreal_to_str( char const * * start, rU32 * len, char *out, rS32 * decimal_pos, double value, rU32 frac_digits );
-static rS32 rrreal_to_parts( rS64 * bits, rS32 * expo, double value );
-#define RRSPECIAL 0x7000
+static stbsp__int32 stbsp__real_to_str( char const * * start, stbsp__uint32 * len, char *out, stbsp__int32 * decimal_pos, double value, stbsp__uint32 frac_digits );
+static stbsp__int32 stbsp__real_to_parts( stbsp__int64 * bits, stbsp__int32 * expo, double value );
+#define STBSP__SPECIAL 0x7000
 #endif
 
-static char RRperiod='.';
-static char RRcomma=',';
-static char rrdiglookup[201]="00010203040506070809101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899";
+static char stbsp__period='.';
+static char stbsp__comma=',';
+static char stbsp__digitpair[201]="00010203040506070809101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899";
 
-RRPUBLIC_DEF void RR_SPRINTF_DECORATE( setseparators )( char pcomma, char pperiod )
+STBSP__PUBLICDEF void STB_SPRINTF_DECORATE( set_separators )( char pcomma, char pperiod )
 {
-  RRperiod=pperiod;
-  RRcomma=pcomma;
+  stbsp__period=pperiod;
+  stbsp__comma=pcomma;
 }
 
-RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va )
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( vsprintfcb )( STBSP_SPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va )
 {
   static char hex[]="0123456789abcdefxp";
   static char hexu[]="0123456789ABCDEFXP";
@@ -210,42 +219,42 @@ RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void
   f = fmt;
   for(;;)
   {
-    rS32 fw,pr,tz; rU32 fl;
+    stbsp__int32 fw,pr,tz; stbsp__uint32 fl;
 
-    #define LJ 1
-    #define LP 2
-    #define LS 4
-    #define LX 8
-    #define LZ 16
-    #define BI 32
-    #define CS 64
-    #define NG 128
-    #define KI 256
-    #define HW 512
+    #define STBSP__LEFTJUST 1
+    #define STBSP__LEADINGPLUS 2
+    #define STBSP__LEADINGSPACE 4
+    #define STBSP__LEADING_0X 8
+    #define STBSP__LEADINGZERO 16
+    #define STBSP__INTMAX 32
+    #define STBSP__TRIPLET_COMMA 64
+    #define STBSP__NEGATIVE 128
+    #define STBSP__METRIC_SUFFIX 256
+    #define STBSP__HALFWIDTH 512
  
     // macros for the callback buffer stuff
-    #define chk_cb_bufL(bytes) { int len = (int)(bf-buf); if ((len+(bytes))>=RR_SPRINTF_MIN) { tlen+=len; if (0==(bf=buf=callback(buf,user,len))) goto done; } }
-    #define chk_cb_buf(bytes) { if ( callback ) { chk_cb_bufL(bytes); } }
-    #define flush_cb() { chk_cb_bufL(RR_SPRINTF_MIN-1); } //flush if there is even one byte in the buffer
-    #define cb_buf_clamp(cl,v) cl = v; if ( callback ) { int lg = RR_SPRINTF_MIN-(int)(bf-buf); if (cl>lg) cl=lg; }
+    #define stbsp__chk_cb_bufL(bytes) { int len = (int)(bf-buf); if ((len+(bytes))>=STB_SPRINTF_MIN) { tlen+=len; if (0==(bf=buf=callback(buf,user,len))) goto done; } }
+    #define stbsp__chk_cb_buf(bytes) { if ( callback ) { stbsp__chk_cb_bufL(bytes); } }
+    #define stbsp__flush_cb() { stbsp__chk_cb_bufL(STB_SPRINTF_MIN-1); } //flush if there is even one byte in the buffer
+    #define stbsp__cb_buf_clamp(cl,v) cl = v; if ( callback ) { int lg = STB_SPRINTF_MIN-(int)(bf-buf); if (cl>lg) cl=lg; }
 
     // fast copy everything up to the next % (or end of string)
     for(;;)
     { 
-      while (((rUINTa)f)&3)
+      while (((stbsp__uintptr)f)&3)
       {
        schk1: if (f[0]=='%') goto scandd;
        schk2: if (f[0]==0) goto endfmt;
-        chk_cb_buf(1); *bf++=f[0]; ++f;
+        stbsp__chk_cb_buf(1); *bf++=f[0]; ++f;
       } 
       for(;;)
       { 
-        rU32 v,c;
-        v=*(rU32*)f; c=(~v)&0x80808080;
+        stbsp__uint32 v,c;
+        v=*(stbsp__uint32*)f; c=(~v)&0x80808080;
         if ((v-0x26262626)&c) goto schk1; 
         if ((v-0x01010101)&c) goto schk2; 
-        if (callback) if ((RR_SPRINTF_MIN-(int)(bf-buf))<4) goto schk1;
-        *(rU32*)bf=v; bf+=4; f+=4;
+        if (callback) if ((STB_SPRINTF_MIN-(int)(bf-buf))<4) goto schk1;
+        *(stbsp__uint32*)bf=v; bf+=4; f+=4;
       }
     } scandd:
 
@@ -259,61 +268,63 @@ RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void
     {
       switch(f[0])
       {
-        // if we have left just
-        case '-': fl|=LJ; ++f; continue;
+        // if we have left justify
+        case '-': fl|=STBSP__LEFTJUST; ++f; continue;
         // if we have leading plus
-        case '+': fl|=LP; ++f; continue; 
+        case '+': fl|=STBSP__LEADINGPLUS; ++f; continue; 
         // if we have leading space
-        case ' ': fl|=LS; ++f; continue; 
+        case ' ': fl|=STBSP__LEADINGSPACE; ++f; continue; 
         // if we have leading 0x
-        case '#': fl|=LX; ++f; continue; 
+        case '#': fl|=STBSP__LEADING_0X; ++f; continue; 
         // if we have thousand commas
-        case '\'': fl|=CS; ++f; continue; 
+        case '\'': fl|=STBSP__TRIPLET_COMMA; ++f; continue; 
         // if we have kilo marker
-        case '$': fl|=KI; ++f; continue; 
+        case '$': fl|=STBSP__METRIC_SUFFIX; ++f; continue; 
         // if we have leading zero
-        case '0': fl|=LZ; ++f; goto flags_done; 
+        case '0': fl|=STBSP__LEADINGZERO; ++f; goto flags_done; 
         default: goto flags_done;
       }
     }
     flags_done:
    
     // get the field width
-    if ( f[0] == '*' ) {fw = va_arg(va,rU32); ++f;} else { while (( f[0] >= '0' ) && ( f[0] <= '9' )) { fw = fw * 10 + f[0] - '0'; f++; } }
+    if ( f[0] == '*' ) {fw = va_arg(va,stbsp__uint32); ++f;} else { while (( f[0] >= '0' ) && ( f[0] <= '9' )) { fw = fw * 10 + f[0] - '0'; f++; } }
     // get the precision
-    if ( f[0]=='.' ) { ++f; if ( f[0] == '*' ) {pr = va_arg(va,rU32); ++f;} else { pr = 0; while (( f[0] >= '0' ) && ( f[0] <= '9' )) { pr = pr * 10 + f[0] - '0'; f++; } } } 
+    if ( f[0]=='.' ) { ++f; if ( f[0] == '*' ) {pr = va_arg(va,stbsp__uint32); ++f;} else { pr = 0; while (( f[0] >= '0' ) && ( f[0] <= '9' )) { pr = pr * 10 + f[0] - '0'; f++; } } } 
     
     // handle integer size overrides
     switch(f[0])
     {
       // are we halfwidth?
-      case 'h': fl|=HW; ++f; break;
+      case 'h': fl|=STBSP__HALFWIDTH; ++f; break;
       // are we 64-bit (unix style)
-      case 'l': ++f; if ( f[0]=='l') { fl|=BI; ++f; } break;
+      case 'l': ++f; if ( f[0]=='l') { fl|=STBSP__INTMAX; ++f; } break;
       // are we 64-bit on intmax? (c99)
-      case 'j': fl|=BI; ++f; break; 
+      case 'j': fl|=STBSP__INTMAX; ++f; break; 
       // are we 64-bit on size_t or ptrdiff_t? (c99)
-      case 'z': case 't': fl|=((sizeof(char*)==8)?BI:0); ++f; break; 
+      case 'z': case 't': fl|=((sizeof(char*)==8)?STBSP__INTMAX:0); ++f; break; 
       // are we 64-bit (msft style)
-      case 'I': if ( ( f[1]=='6') && ( f[2]=='4') ) { fl|=BI; f+=3; } else if ( ( f[1]=='3') && ( f[2]=='2') ) { f+=3; } else { fl|=((sizeof(void*)==8)?BI:0); ++f; } break;
+      case 'I': if ( ( f[1]=='6') && ( f[2]=='4') ) { fl|=STBSP__INTMAX; f+=3; }
+                else if ( ( f[1]=='3') && ( f[2]=='2') ) { f+=3; }
+                else { fl|=((sizeof(void*)==8)?STBSP__INTMAX:0); ++f; } break;
       default: break;
     }
 
     // handle each replacement
     switch( f[0] )
     {
-      #define NUMSZ 512 // big enough for e308 (with commas) or e-307 
-      char num[NUMSZ]; 
+      #define STBSP__NUMSZ   512 // big enough for e308 (with commas) or e-307 
+      char num[STBSP__NUMSZ]; 
       char lead[8]; 
       char tail[8]; 
       char *s;
       char const *h;
-      rU32 l,n,cs;
-      rU64 n64;
-      #ifndef RR_SPRINTF_NOFLOAT      
+      stbsp__uint32 l,n,cs;
+      stbsp__uint64 n64;
+      #ifndef STB_SPRINTF_NOFLOAT      
       double fv; 
       #endif
-      rS32 dp; char const * sn;
+      stbsp__int32 dp; char const * sn;
 
       case 's':
         // get the string
@@ -322,16 +333,16 @@ RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void
         sn = s;
         for(;;)
         { 
-          if ((((rUINTa)sn)&3)==0) break;
+          if ((((stbsp__uintptr)sn)&3)==0) break;
          lchk:
           if (sn[0]==0) goto ld;
           ++sn;
         }
         n = 0xffffffff;
-        if (pr>=0) { n=(rU32)(sn-s); if (n>=(rU32)pr) goto ld; n=((rU32)(pr-n))>>2; }
+        if (pr>=0) { n=(stbsp__uint32)(sn-s); if (n>=(stbsp__uint32)pr) goto ld; n=((stbsp__uint32)(pr-n))>>2; }
         while(n) 
         { 
-          rU32 v=*(rU32*)sn;
+          stbsp__uint32 v=*(stbsp__uint32*)sn;
           if ((v-0x01010101)&(~v)&0x80808080UL) goto lchk; 
           sn+=4; 
           --n;
@@ -339,16 +350,16 @@ RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void
         goto lchk;
        ld:
 
-        l = (rU32) ( sn - s );
+        l = (stbsp__uint32) ( sn - s );
         // clamp to precision
-        if ( l > (rU32)pr ) l = pr;
+        if ( l > (stbsp__uint32)pr ) l = pr;
         lead[0]=0; tail[0]=0; pr = 0; dp = 0; cs = 0;
         // copy the string in
         goto scopy;
 
       case 'c': // char
         // get the character
-        s = num + NUMSZ -1; *s = (char)va_arg(va,int);
+        s = num + STBSP__NUMSZ -1; *s = (char)va_arg(va,int);
         l = 1;
         lead[0]=0; tail[0]=0; pr = 0; dp = 0; cs = 0;
         goto scopy;
@@ -358,7 +369,7 @@ RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void
         *d = tlen + (int)( bf - buf ); }
         break;
 
-#ifdef RR_SPRINTF_NOFLOAT
+#ifdef STB_SPRINTF_NOFLOAT
       case 'A': // float
       case 'a': // hex float
       case 'G': // float
@@ -382,30 +393,30 @@ RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void
         fv = va_arg(va,double);
         if (pr==-1) pr=6; // default is 6
         // read the double into a string
-        if ( rrreal_to_parts( (rS64*)&n64, &dp, fv ) )
-          fl |= NG;
+        if ( stbsp__real_to_parts( (stbsp__int64*)&n64, &dp, fv ) )
+          fl |= STBSP__NEGATIVE;
   
         s = num+64;
 
         // sign
-        lead[0]=0; if (fl&NG) { lead[0]=1; lead[1]='-'; } else if (fl&LS) { lead[0]=1; lead[1]=' '; } else if (fl&LP) { lead[0]=1; lead[1]='+'; };
+        lead[0]=0; if (fl&STBSP__NEGATIVE) { lead[0]=1; lead[1]='-'; } else if (fl&STBSP__LEADINGSPACE) { lead[0]=1; lead[1]=' '; } else if (fl&STBSP__LEADINGPLUS) { lead[0]=1; lead[1]='+'; };
 
-        if (dp==-1023) dp=(n64)?-1022:0; else n64|=(((rU64)1)<<52);
+        if (dp==-1023) dp=(n64)?-1022:0; else n64|=(((stbsp__uint64)1)<<52);
         n64<<=(64-56);
-        if (pr<15) n64+=((((rU64)8)<<56)>>(pr*4));
+        if (pr<15) n64+=((((stbsp__uint64)8)<<56)>>(pr*4));
         // add leading chars
         
-        #ifdef RR_SPRINTF_MSVC_MODE
+        #ifdef STB_SPRINTF_MSVC_MODE
         *s++='0';*s++='x';
         #else
         lead[1+lead[0]]='0'; lead[2+lead[0]]='x'; lead[0]+=2;
         #endif
         *s++=h[(n64>>60)&15]; n64<<=4;
-        if ( pr ) *s++=RRperiod;
+        if ( pr ) *s++=stbsp__period;
         sn = s;
 
         // print the bits
-        n = pr; if (n>13) n = 13; if (pr>(rS32)n) tz=pr-n; pr = 0;
+        n = pr; if (n>13) n = 13; if (pr>(stbsp__int32)n) tz=pr-n; pr = 0;
         while(n--) { *s++=h[(n64>>60)&15]; n64<<=4; }
 
         // print the expo
@@ -431,21 +442,21 @@ RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void
         fv = va_arg(va,double);
         if (pr==-1) pr=6; else if (pr==0) pr = 1; // default is 6
         // read the double into a string
-        if ( rrreal_to_str( &sn, &l, num, &dp, fv, (pr-1)|0x80000000 ) )
-          fl |= NG;
+        if ( stbsp__real_to_str( &sn, &l, num, &dp, fv, (pr-1)|0x80000000 ) )
+          fl |= STBSP__NEGATIVE;
 
         // clamp the precision and delete extra zeros after clamp
         n = pr;
-        if ( l > (rU32)pr ) l = pr; while ((l>1)&&(pr)&&(sn[l-1]=='0')) { --pr; --l; }
+        if ( l > (stbsp__uint32)pr ) l = pr; while ((l>1)&&(pr)&&(sn[l-1]=='0')) { --pr; --l; }
 
         // should we use %e
-        if ((dp<=-4)||(dp>(rS32)n))
+        if ((dp<=-4)||(dp>(stbsp__int32)n))
         {
-          if ( pr > (rS32)l ) pr = l-1; else if ( pr ) --pr; // when using %e, there is one digit before the decimal
+          if ( pr > (stbsp__int32)l ) pr = l-1; else if ( pr ) --pr; // when using %e, there is one digit before the decimal
           goto doexpfromg;
         }
         // this is the insane action to get the pr to match %g sematics for %f
-        if(dp>0) { pr=(dp<(rS32)l)?l-dp:0; } else { pr = -dp+((pr>(rS32)l)?l:pr); }
+        if(dp>0) { pr=(dp<(stbsp__int32)l)?l-dp:0; } else { pr = -dp+((pr>(stbsp__int32)l)?l:pr); }
         goto dofloatfromg;
 
       case 'E': // float
@@ -458,20 +469,20 @@ RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void
         fv = va_arg(va,double);
         if (pr==-1) pr=6; // default is 6
         // read the double into a string
-        if ( rrreal_to_str( &sn, &l, num, &dp, fv, pr|0x80000000 ) )
-          fl |= NG;
+        if ( stbsp__real_to_str( &sn, &l, num, &dp, fv, pr|0x80000000 ) )
+          fl |= STBSP__NEGATIVE;
        doexpfromg: 
         tail[0]=0; 
-        lead[0]=0; if (fl&NG) { lead[0]=1; lead[1]='-'; } else if (fl&LS) { lead[0]=1; lead[1]=' '; } else if (fl&LP) { lead[0]=1; lead[1]='+'; };
-        if ( dp == RRSPECIAL ) { s=(char*)sn; cs=0; pr=0; goto scopy; }
+        lead[0]=0; if (fl&STBSP__NEGATIVE) { lead[0]=1; lead[1]='-'; } else if (fl&STBSP__LEADINGSPACE) { lead[0]=1; lead[1]=' '; } else if (fl&STBSP__LEADINGPLUS) { lead[0]=1; lead[1]='+'; };
+        if ( dp == STBSP__SPECIAL ) { s=(char*)sn; cs=0; pr=0; goto scopy; }
         s=num+64; 
         // handle leading chars
         *s++=sn[0];
 
-        if (pr) *s++=RRperiod;
+        if (pr) *s++=stbsp__period;
 
         // handle after decimal
-        if ((l-1)>(rU32)pr) l=pr+1;
+        if ((l-1)>(stbsp__uint32)pr) l=pr+1;
         for(n=1;n<l;n++) *s++=sn[n];
         // trailing zeros
         tz = pr-(l-1); pr=0;
@@ -479,7 +490,7 @@ RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void
         tail[1]=h[0xe];
         dp -= 1;
         if (dp<0) { tail[2]='-'; dp=-dp;} else tail[2]='+';
-        #ifdef RR_SPRINTF_MSVC_MODE
+        #ifdef STB_SPRINTF_MSVC_MODE
         n = 5;
         #else
         n = (dp>=100)?5:4;
@@ -493,52 +504,52 @@ RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void
         fv = va_arg(va,double);
        doafloat: 
         // do kilos
-        if (fl&KI) {while(fl<0x4000000) { if ((fv<1024.0) && (fv>-1024.0)) break; fv/=1024.0; fl+=0x1000000; }} 
+        if (fl&STBSP__METRIC_SUFFIX) {while(fl<0x4000000) { if ((fv<1024.0) && (fv>-1024.0)) break; fv/=1024.0; fl+=0x1000000; }} 
         if (pr==-1) pr=6; // default is 6
         // read the double into a string
-        if ( rrreal_to_str( &sn, &l, num, &dp, fv, pr ) )
-          fl |= NG;
+        if ( stbsp__real_to_str( &sn, &l, num, &dp, fv, pr ) )
+          fl |= STBSP__NEGATIVE;
         dofloatfromg:
         tail[0]=0;
         // sign
-        lead[0]=0; if (fl&NG) { lead[0]=1; lead[1]='-'; } else if (fl&LS) { lead[0]=1; lead[1]=' '; } else if (fl&LP) { lead[0]=1; lead[1]='+'; };
-        if ( dp == RRSPECIAL ) { s=(char*)sn; cs=0; pr=0; goto scopy; }
+        lead[0]=0; if (fl&STBSP__NEGATIVE) { lead[0]=1; lead[1]='-'; } else if (fl&STBSP__LEADINGSPACE) { lead[0]=1; lead[1]=' '; } else if (fl&STBSP__LEADINGPLUS) { lead[0]=1; lead[1]='+'; };
+        if ( dp == STBSP__SPECIAL ) { s=(char*)sn; cs=0; pr=0; goto scopy; }
         s=num+64; 
 
         // handle the three decimal varieties
         if (dp<=0) 
         { 
-          rS32 i;
+          stbsp__int32 i;
           // handle 0.000*000xxxx
-          *s++='0'; if (pr) *s++=RRperiod; 
-          n=-dp; if((rS32)n>pr) n=pr; i=n; while(i) { if ((((rUINTa)s)&3)==0) break; *s++='0'; --i; } while(i>=4) { *(rU32*)s=0x30303030; s+=4; i-=4; } while(i) { *s++='0'; --i; }
-          if ((rS32)(l+n)>pr) l=pr-n; i=l; while(i) { *s++=*sn++; --i; }
+          *s++='0'; if (pr) *s++=stbsp__period; 
+          n=-dp; if((stbsp__int32)n>pr) n=pr; i=n; while(i) { if ((((stbsp__uintptr)s)&3)==0) break; *s++='0'; --i; } while(i>=4) { *(stbsp__uint32*)s=0x30303030; s+=4; i-=4; } while(i) { *s++='0'; --i; }
+          if ((stbsp__int32)(l+n)>pr) l=pr-n; i=l; while(i) { *s++=*sn++; --i; }
           tz = pr-(n+l);
           cs = 1 + (3<<24); // how many tens did we write (for commas below)
         }
         else
         {
-          cs = (fl&CS)?((600-(rU32)dp)%3):0;
-          if ((rU32)dp>=l)
+          cs = (fl&STBSP__TRIPLET_COMMA)?((600-(stbsp__uint32)dp)%3):0;
+          if ((stbsp__uint32)dp>=l)
           {
             // handle xxxx000*000.0
-            n=0; for(;;) { if ((fl&CS) && (++cs==4)) { cs = 0; *s++=RRcomma; } else { *s++=sn[n]; ++n; if (n>=l) break; } }
-            if (n<(rU32)dp)
+            n=0; for(;;) { if ((fl&STBSP__TRIPLET_COMMA) && (++cs==4)) { cs = 0; *s++=stbsp__comma; } else { *s++=sn[n]; ++n; if (n>=l) break; } }
+            if (n<(stbsp__uint32)dp)
             {
               n = dp - n;
-              if ((fl&CS)==0) { while(n) { if ((((rUINTa)s)&3)==0) break; *s++='0'; --n; }  while(n>=4) { *(rU32*)s=0x30303030; s+=4; n-=4; } }
-              while(n) { if ((fl&CS) && (++cs==4)) { cs = 0; *s++=RRcomma; } else { *s++='0'; --n; } }
+              if ((fl&STBSP__TRIPLET_COMMA)==0) { while(n) { if ((((stbsp__uintptr)s)&3)==0) break; *s++='0'; --n; }  while(n>=4) { *(stbsp__uint32*)s=0x30303030; s+=4; n-=4; } }
+              while(n) { if ((fl&STBSP__TRIPLET_COMMA) && (++cs==4)) { cs = 0; *s++=stbsp__comma; } else { *s++='0'; --n; } }
             }
             cs = (int)(s-(num+64)) + (3<<24); // cs is how many tens
-            if (pr) { *s++=RRperiod; tz=pr;}
+            if (pr) { *s++=stbsp__period; tz=pr;}
           }
           else
           {
             // handle xxxxx.xxxx000*000
-            n=0; for(;;) { if ((fl&CS) && (++cs==4)) { cs = 0; *s++=RRcomma; } else { *s++=sn[n]; ++n; if (n>=(rU32)dp) break; } }
+            n=0; for(;;) { if ((fl&STBSP__TRIPLET_COMMA) && (++cs==4)) { cs = 0; *s++=stbsp__comma; } else { *s++=sn[n]; ++n; if (n>=(stbsp__uint32)dp) break; } }
             cs = (int)(s-(num+64)) + (3<<24); // cs is how many tens
-            if (pr) *s++=RRperiod;
-            if ((l-dp)>(rU32)pr) l=pr+dp;
+            if (pr) *s++=stbsp__period;
+            if ((l-dp)>(stbsp__uint32)pr) l=pr+dp;
             while(n<l) { *s++=sn[n]; ++n; }
             tz = pr-(l-dp);
           }
@@ -546,11 +557,11 @@ RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void
         pr = 0;
         
         // handle k,m,g,t
-        if (fl&KI) { tail[0]=1; tail[1]=' '; { if (fl>>24) { tail[2]="_kmgt"[fl>>24]; tail[0]=2; } } };
+        if (fl&STBSP__METRIC_SUFFIX) { tail[0]=1; tail[1]=' '; { if (fl>>24) { tail[2]="_kmgt"[fl>>24]; tail[0]=2; } } };
 
         flt_lead:
         // get the length that we copied
-        l = (rU32) ( s-(num+64) );
+        l = (stbsp__uint32) ( s-(num+64) );
         s=num+64; 
         goto scopy;
 #endif
@@ -563,21 +574,21 @@ RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void
         h = hex;
         binary:
         lead[0]=0;
-        if (fl&LX) { lead[0]=2;lead[1]='0';lead[2]=h[0xb]; }
+        if (fl&STBSP__LEADING_0X) { lead[0]=2;lead[1]='0';lead[2]=h[0xb]; }
         l=(8<<4)|(1<<8);
         goto radixnum;
 
       case 'o': // octal
         h = hexu;
         lead[0]=0;
-        if (fl&LX) { lead[0]=1;lead[1]='0'; }
+        if (fl&STBSP__LEADING_0X) { lead[0]=1;lead[1]='0'; }
         l=(3<<4)|(3<<8);
         goto radixnum;
 
       case 'p': // pointer
-        fl |= (sizeof(void*)==8)?BI:0;
+        fl |= (sizeof(void*)==8)?STBSP__INTMAX:0;
         pr = sizeof(void*)*2;
-        fl &= ~LZ; // 'p' only prints the pointer with zeros
+        fl &= ~STBSP__LEADINGZERO; // 'p' only prints the pointer with zeros
         // drop through to X
       
       case 'X': // upper binary
@@ -588,23 +599,23 @@ RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void
         h = hex; dohexb:
         l=(4<<4)|(4<<8);
         lead[0]=0;
-        if (fl&LX) { lead[0]=2;lead[1]='0';lead[2]=h[16]; }
+        if (fl&STBSP__LEADING_0X) { lead[0]=2;lead[1]='0';lead[2]=h[16]; }
        radixnum: 
         // get the number
-        if ( fl&BI )
-          n64 = va_arg(va,rU64);
+        if ( fl&STBSP__INTMAX )
+          n64 = va_arg(va,stbsp__uint64);
         else
-          n64 = va_arg(va,rU32);
+          n64 = va_arg(va,stbsp__uint32);
 
-        s = num + NUMSZ; dp = 0;
+        s = num + STBSP__NUMSZ; dp = 0;
         // clear tail, and clear leading if value is zero
         tail[0]=0; if (n64==0) { lead[0]=0; if (pr==0) { l=0; cs = ( ((l>>4)&15)) << 24; goto scopy; } }
         // convert to string
-        for(;;) { *--s = h[n64&((1<<(l>>8))-1)]; n64>>=(l>>8); if ( ! ( (n64) || ((rS32) ( (num+NUMSZ) - s ) < pr ) ) ) break; if ( fl&CS) { ++l; if ((l&15)==((l>>4)&15)) { l&=~15; *--s=RRcomma; } } };
+        for(;;) { *--s = h[n64&((1<<(l>>8))-1)]; n64>>=(l>>8); if ( ! ( (n64) || ((stbsp__int32) ( (num+STBSP__NUMSZ) - s ) < pr ) ) ) break; if ( fl&STBSP__TRIPLET_COMMA) { ++l; if ((l&15)==((l>>4)&15)) { l&=~15; *--s=stbsp__comma; } } };
         // get the tens and the comma pos
-        cs = (rU32) ( (num+NUMSZ) - s ) + ( ( ((l>>4)&15)) << 24 );
+        cs = (stbsp__uint32) ( (num+STBSP__NUMSZ) - s ) + ( ( ((l>>4)&15)) << 24 );
         // get the length that we copied
-        l = (rU32) ( (num+NUMSZ) - s );
+        l = (stbsp__uint32) ( (num+STBSP__NUMSZ) - s );
         // copy it
         goto scopy;
 
@@ -612,99 +623,99 @@ RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void
       case 'i':
       case 'd': // integer
         // get the integer and abs it
-        if ( fl&BI )
+        if ( fl&STBSP__INTMAX )
         {
-          rS64 i64 = va_arg(va,rS64); n64 = (rU64)i64; if ((f[0]!='u') && (i64<0)) { n64=(rU64)-i64; fl|=NG; }
+          stbsp__int64 i64 = va_arg(va,stbsp__int64); n64 = (stbsp__uint64)i64; if ((f[0]!='u') && (i64<0)) { n64=(stbsp__uint64)-i64; fl|=STBSP__NEGATIVE; }
         }
         else
         {
-          rS32 i = va_arg(va,rS32); n64 = (rU32)i; if ((f[0]!='u') && (i<0)) { n64=(rU32)-i; fl|=NG; }
+          stbsp__int32 i = va_arg(va,stbsp__int32); n64 = (stbsp__uint32)i; if ((f[0]!='u') && (i<0)) { n64=(stbsp__uint32)-i; fl|=STBSP__NEGATIVE; }
         }
 
-        #ifndef RR_SPRINTF_NOFLOAT
-        if (fl&KI) { if (n64<1024) pr=0; else if (pr==-1) pr=1; fv=(double)(rS64)n64; goto doafloat; } 
+        #ifndef STB_SPRINTF_NOFLOAT
+        if (fl&STBSP__METRIC_SUFFIX) { if (n64<1024) pr=0; else if (pr==-1) pr=1; fv=(double)(stbsp__int64)n64; goto doafloat; } 
         #endif
 
         // convert to string
-        s = num+NUMSZ; l=0; 
+        s = num+STBSP__NUMSZ; l=0; 
         
         for(;;)
         {
           // do in 32-bit chunks (avoid lots of 64-bit divides even with constant denominators)
           char * o=s-8;
-          if (n64>=100000000) { n = (rU32)( n64 % 100000000);  n64 /= 100000000; } else {n = (rU32)n64; n64 = 0; }
-          if((fl&CS)==0) { while(n) { s-=2; *(rU16*)s=*(rU16*)&rrdiglookup[(n%100)*2]; n/=100; } }
-          while (n) { if ( ( fl&CS) && (l++==3) ) { l=0; *--s=RRcomma; --o; } else { *--s=(char)(n%10)+'0'; n/=10; } }
-          if (n64==0) { if ((s[0]=='0') && (s!=(num+NUMSZ))) ++s; break; }
-          while (s!=o) if ( ( fl&CS) && (l++==3) ) { l=0; *--s=RRcomma; --o; } else { *--s='0'; }
+          if (n64>=100000000) { n = (stbsp__uint32)( n64 % 100000000);  n64 /= 100000000; } else {n = (stbsp__uint32)n64; n64 = 0; }
+          if((fl&STBSP__TRIPLET_COMMA)==0) { while(n) { s-=2; *(stbsp__uint16*)s=*(stbsp__uint16*)&stbsp__digitpair[(n%100)*2]; n/=100; } }
+          while (n) { if ( ( fl&STBSP__TRIPLET_COMMA) && (l++==3) ) { l=0; *--s=stbsp__comma; --o; } else { *--s=(char)(n%10)+'0'; n/=10; } }
+          if (n64==0) { if ((s[0]=='0') && (s!=(num+STBSP__NUMSZ))) ++s; break; }
+          while (s!=o) if ( ( fl&STBSP__TRIPLET_COMMA) && (l++==3) ) { l=0; *--s=stbsp__comma; --o; } else { *--s='0'; }
         }
 
         tail[0]=0;
         // sign
-        lead[0]=0; if (fl&NG) { lead[0]=1; lead[1]='-'; } else if (fl&LS) { lead[0]=1; lead[1]=' '; } else if (fl&LP) { lead[0]=1; lead[1]='+'; };
+        lead[0]=0; if (fl&STBSP__NEGATIVE) { lead[0]=1; lead[1]='-'; } else if (fl&STBSP__LEADINGSPACE) { lead[0]=1; lead[1]=' '; } else if (fl&STBSP__LEADINGPLUS) { lead[0]=1; lead[1]='+'; };
 
         // get the length that we copied
-        l = (rU32) ( (num+NUMSZ) - s ); if ( l == 0 ) { *--s='0'; l = 1; }
+        l = (stbsp__uint32) ( (num+STBSP__NUMSZ) - s ); if ( l == 0 ) { *--s='0'; l = 1; }
         cs = l + (3<<24);
         if (pr<0) pr = 0;
 
        scopy: 
         // get fw=leading/trailing space, pr=leading zeros
-        if (pr<(rS32)l) pr = l;
+        if (pr<(stbsp__int32)l) pr = l;
         n = pr + lead[0] + tail[0] + tz;
-        if (fw<(rS32)n) fw = n;
+        if (fw<(stbsp__int32)n) fw = n;
         fw -= n;
         pr -= l;
 
         // handle right justify and leading zeros
-        if ( (fl&LJ)==0 )
+        if ( (fl&STBSP__LEFTJUST)==0 )
         {
-          if (fl&LZ) // if leading zeros, everything is in pr
+          if (fl&STBSP__LEADINGZERO) // if leading zeros, everything is in pr
           { 
             pr = (fw>pr)?fw:pr;
             fw = 0;
           }
           else
           {
-            fl &= ~CS; // if no leading zeros, then no commas
+            fl &= ~STBSP__TRIPLET_COMMA; // if no leading zeros, then no commas
           }
         }
 
         // copy the spaces and/or zeros
         if (fw+pr)
         {
-          rS32 i; rU32 c; 
+          stbsp__int32 i; stbsp__uint32 c; 
 
           // copy leading spaces (or when doing %8.4d stuff)
-          if ( (fl&LJ)==0 ) while(fw>0) { cb_buf_clamp(i,fw); fw -= i; while(i) { if ((((rUINTa)bf)&3)==0) break; *bf++=' '; --i; } while(i>=4) { *(rU32*)bf=0x20202020; bf+=4; i-=4; } while (i) {*bf++=' '; --i;} chk_cb_buf(1); }
+          if ( (fl&STBSP__LEFTJUST)==0 ) while(fw>0) { stbsp__cb_buf_clamp(i,fw); fw -= i; while(i) { if ((((stbsp__uintptr)bf)&3)==0) break; *bf++=' '; --i; } while(i>=4) { *(stbsp__uint32*)bf=0x20202020; bf+=4; i-=4; } while (i) {*bf++=' '; --i;} stbsp__chk_cb_buf(1); }
         
           // copy leader
-          sn=lead+1; while(lead[0]) { cb_buf_clamp(i,lead[0]); lead[0] -= (char)i; while (i) {*bf++=*sn++; --i;} chk_cb_buf(1); }
+          sn=lead+1; while(lead[0]) { stbsp__cb_buf_clamp(i,lead[0]); lead[0] -= (char)i; while (i) {*bf++=*sn++; --i;} stbsp__chk_cb_buf(1); }
           
           // copy leading zeros
           c = cs >> 24; cs &= 0xffffff;
-          cs = (fl&CS)?((rU32)(c-((pr+cs)%(c+1)))):0;
-          while(pr>0) { cb_buf_clamp(i,pr); pr -= i; if((fl&CS)==0) { while(i) { if ((((rUINTa)bf)&3)==0) break; *bf++='0'; --i; } while(i>=4) { *(rU32*)bf=0x30303030; bf+=4; i-=4; } } while (i) { if((fl&CS) && (cs++==c)) { cs = 0; *bf++=RRcomma; } else *bf++='0'; --i; } chk_cb_buf(1); }
+          cs = (fl&STBSP__TRIPLET_COMMA)?((stbsp__uint32)(c-((pr+cs)%(c+1)))):0;
+          while(pr>0) { stbsp__cb_buf_clamp(i,pr); pr -= i; if((fl&STBSP__TRIPLET_COMMA)==0) { while(i) { if ((((stbsp__uintptr)bf)&3)==0) break; *bf++='0'; --i; } while(i>=4) { *(stbsp__uint32*)bf=0x30303030; bf+=4; i-=4; } } while (i) { if((fl&STBSP__TRIPLET_COMMA) && (cs++==c)) { cs = 0; *bf++=stbsp__comma; } else *bf++='0'; --i; } stbsp__chk_cb_buf(1); }
         }
 
         // copy leader if there is still one
-        sn=lead+1; while(lead[0]) { rS32 i; cb_buf_clamp(i,lead[0]); lead[0] -= (char)i; while (i) {*bf++=*sn++; --i;} chk_cb_buf(1); }
+        sn=lead+1; while(lead[0]) { stbsp__int32 i; stbsp__cb_buf_clamp(i,lead[0]); lead[0] -= (char)i; while (i) {*bf++=*sn++; --i;} stbsp__chk_cb_buf(1); }
 
         // copy the string
-        n = l; while (n) { rS32 i; cb_buf_clamp(i,n); n-=i; RR_UNALIGNED( while(i>=4) { *(rU32*)bf=*(rU32*)s; bf+=4; s+=4; i-=4; } ) while (i) {*bf++=*s++; --i;} chk_cb_buf(1); }
+        n = l; while (n) { stbsp__int32 i; stbsp__cb_buf_clamp(i,n); n-=i; STBSP__UNALIGNED( while(i>=4) { *(stbsp__uint32*)bf=*(stbsp__uint32*)s; bf+=4; s+=4; i-=4; } ) while (i) {*bf++=*s++; --i;} stbsp__chk_cb_buf(1); }
 
         // copy trailing zeros
-        while(tz) { rS32 i; cb_buf_clamp(i,tz); tz -= i; while(i) { if ((((rUINTa)bf)&3)==0) break; *bf++='0'; --i; } while(i>=4) { *(rU32*)bf=0x30303030; bf+=4; i-=4; } while (i) {*bf++='0'; --i;} chk_cb_buf(1); }
+        while(tz) { stbsp__int32 i; stbsp__cb_buf_clamp(i,tz); tz -= i; while(i) { if ((((stbsp__uintptr)bf)&3)==0) break; *bf++='0'; --i; } while(i>=4) { *(stbsp__uint32*)bf=0x30303030; bf+=4; i-=4; } while (i) {*bf++='0'; --i;} stbsp__chk_cb_buf(1); }
 
         // copy tail if there is one
-        sn=tail+1; while(tail[0]) { rS32 i; cb_buf_clamp(i,tail[0]); tail[0] -= (char)i; while (i) {*bf++=*sn++; --i;} chk_cb_buf(1); }
+        sn=tail+1; while(tail[0]) { stbsp__int32 i; stbsp__cb_buf_clamp(i,tail[0]); tail[0] -= (char)i; while (i) {*bf++=*sn++; --i;} stbsp__chk_cb_buf(1); }
 
         // handle the left justify
-        if (fl&LJ) if (fw>0) { while (fw) { rS32 i; cb_buf_clamp(i,fw); fw-=i; while(i) { if ((((rUINTa)bf)&3)==0) break; *bf++=' '; --i; } while(i>=4) { *(rU32*)bf=0x20202020; bf+=4; i-=4; } while (i--) *bf++=' '; chk_cb_buf(1); } }
+        if (fl&STBSP__LEFTJUST) if (fw>0) { while (fw) { stbsp__int32 i; stbsp__cb_buf_clamp(i,fw); fw-=i; while(i) { if ((((stbsp__uintptr)bf)&3)==0) break; *bf++=' '; --i; } while(i>=4) { *(stbsp__uint32*)bf=0x20202020; bf+=4; i-=4; } while (i--) *bf++=' '; stbsp__chk_cb_buf(1); } }
         break;
 
       default: // unknown, just copy code
-        s = num + NUMSZ -1; *s = f[0];
+        s = num + STBSP__NUMSZ -1; *s = f[0];
         l = 1;
         fw=pr=fl=0;
         lead[0]=0; tail[0]=0; pr = 0; dp = 0; cs = 0;
@@ -717,48 +728,48 @@ RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintfcb )( RRSPRINTFCB * callback, void
   if (!callback) 
     *bf = 0;
   else
-    flush_cb();
+    stbsp__flush_cb();
  
  done:
   return tlen + (int)(bf-buf);
 }
 
 // cleanup
-#undef LJ
-#undef LP
-#undef LS
-#undef LX
-#undef LZ
-#undef BI
-#undef CS
-#undef NG
-#undef KI
-#undef NUMSZ
-#undef chk_cb_bufL
-#undef chk_cb_buf
-#undef flush_cb
-#undef cb_buf_clamp
+#undef STBSP__LEFTJUST
+#undef STBSP__LEADINGPLUS
+#undef STBSP__LEADINGSPACE
+#undef STBSP__LEADING_0X
+#undef STBSP__LEADINGZERO
+#undef STBSP__INTMAX
+#undef STBSP__TRIPLET_COMMA
+#undef STBSP__NEGATIVE
+#undef STBSP__METRIC_SUFFIX
+#undef STBSP__NUMSZ
+#undef stbsp__chk_cb_bufL
+#undef stbsp__chk_cb_buf
+#undef stbsp__flush_cb
+#undef stbsp__cb_buf_clamp
 
 // ============================================================================
 //   wrapper functions
 
-RRPUBLIC_DEF int RR_SPRINTF_DECORATE( sprintf )( char * buf, char const * fmt, ... )
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( sprintf )( char * buf, char const * fmt, ... )
 {
   va_list va;
   va_start( va, fmt );
-  return RR_SPRINTF_DECORATE( vsprintfcb )( 0, 0, buf, fmt, va );
+  return STB_SPRINTF_DECORATE( vsprintfcb )( 0, 0, buf, fmt, va );
 }
 
-typedef struct RRCCS
+typedef struct stbsp__context
 {
   char * buf;
   int count;
-  char tmp[ RR_SPRINTF_MIN ];
-} RRCCS;
+  char tmp[ STB_SPRINTF_MIN ];
+} stbsp__context;
 
-static char * rrclampcallback( char * buf, void * user, int len )
+static char * stbsp__clamp_callback( char * buf, void * user, int len )
 {
-  RRCCS * c = (RRCCS*)user;
+  stbsp__context * c = (stbsp__context*)user;
 
   if ( len > c->count ) len = c->count;
 
@@ -775,12 +786,12 @@ static char * rrclampcallback( char * buf, void * user, int len )
   }
   
   if ( c->count <= 0 ) return 0;
-  return ( c->count >= RR_SPRINTF_MIN ) ? c->buf : c->tmp; // go direct into buffer if you can
+  return ( c->count >= STB_SPRINTF_MIN ) ? c->buf : c->tmp; // go direct into buffer if you can
 }
 
-RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char const * fmt, va_list va )
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char const * fmt, va_list va )
 {
-  RRCCS c;
+  stbsp__context c;
   int l;
 
   if ( count == 0 )
@@ -789,7 +800,7 @@ RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char c
   c.buf = buf;
   c.count = count;
 
-  RR_SPRINTF_DECORATE( vsprintfcb )( rrclampcallback, &c, rrclampcallback(0,&c,0), fmt, va );
+  STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__clamp_callback, &c, stbsp__clamp_callback(0,&c,0), fmt, va );
   
   // zero-terminate
   l = (int)( c.buf - buf );
@@ -800,100 +811,100 @@ RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char c
   return l;
 }
 
-RRPUBLIC_DEF int RR_SPRINTF_DECORATE( snprintf )( char * buf, int count, char const * fmt, ... )
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( snprintf )( char * buf, int count, char const * fmt, ... )
 {
   va_list va;
   va_start( va, fmt );
 
-  return RR_SPRINTF_DECORATE( vsnprintf )( buf, count, fmt, va );
+  return STB_SPRINTF_DECORATE( vsnprintf )( buf, count, fmt, va );
 }
 
-RRPUBLIC_DEF int RR_SPRINTF_DECORATE( vsprintf )( char * buf, char const * fmt, va_list va )
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( vsprintf )( char * buf, char const * fmt, va_list va )
 {
-  return RR_SPRINTF_DECORATE( vsprintfcb )( 0, 0, buf, fmt, va );
+  return STB_SPRINTF_DECORATE( vsprintfcb )( 0, 0, buf, fmt, va );
 }
 
 // =======================================================================
 //   low level float utility functions
 
-#ifndef RR_SPRINTF_NOFLOAT
+#ifndef STB_SPRINTF_NOFLOAT
 
- // copies d to bits w/ strict aliasing (this compiles to nothing on /Ox)
- #define RRCOPYFP(dest,src) { int cn; for(cn=0;cn<8;cn++) ((char*)&dest)[cn]=((char*)&src)[cn]; }
+// copies d to bits w/ strict aliasing (this compiles to nothing on /Ox)
+#define STBSP__COPYFP(dest,src) { int cn; for(cn=0;cn<8;cn++) ((char*)&dest)[cn]=((char*)&src)[cn]; }
  
 // get float info
-static rS32 rrreal_to_parts( rS64 * bits, rS32 * expo, double value )
+static stbsp__int32 stbsp__real_to_parts( stbsp__int64 * bits, stbsp__int32 * expo, double value )
 {
   double d;
-  rS64 b = 0;
+  stbsp__int64 b = 0;
 
   // load value and round at the frac_digits
   d = value;
 
-  RRCOPYFP( b, d );
+  STBSP__COPYFP( b, d );
 
-  *bits = b & ((((rU64)1)<<52)-1);
-  *expo = ((b >> 52) & 2047)-1023;
+  *bits = b & ((((stbsp__uint64)1)<<52)-1);
+  *expo = (stbsp__int32) (((b >> 52) & 2047)-1023);
     
-  return (rS32)(b >> 63);
+  return (stbsp__int32)(b >> 63);
 }
 
-static double const rrbot[23]={1e+000,1e+001,1e+002,1e+003,1e+004,1e+005,1e+006,1e+007,1e+008,1e+009,1e+010,1e+011,1e+012,1e+013,1e+014,1e+015,1e+016,1e+017,1e+018,1e+019,1e+020,1e+021,1e+022};
-static double const rrnegbot[22]={1e-001,1e-002,1e-003,1e-004,1e-005,1e-006,1e-007,1e-008,1e-009,1e-010,1e-011,1e-012,1e-013,1e-014,1e-015,1e-016,1e-017,1e-018,1e-019,1e-020,1e-021,1e-022};
-static double const rrnegboterr[22]={-5.551115123125783e-018,-2.0816681711721684e-019,-2.0816681711721686e-020,-4.7921736023859299e-021,-8.1803053914031305e-022,4.5251888174113741e-023,4.5251888174113739e-024,-2.0922560830128471e-025,-6.2281591457779853e-026,-3.6432197315497743e-027,6.0503030718060191e-028,2.0113352370744385e-029,-3.0373745563400371e-030,1.1806906454401013e-032,-7.7705399876661076e-032,2.0902213275965398e-033,-7.1542424054621921e-034,-7.1542424054621926e-035,2.4754073164739869e-036,5.4846728545790429e-037,9.2462547772103625e-038,-4.8596774326570872e-039};
-static double const rrtop[13]={1e+023,1e+046,1e+069,1e+092,1e+115,1e+138,1e+161,1e+184,1e+207,1e+230,1e+253,1e+276,1e+299};
-static double const rrnegtop[13]={1e-023,1e-046,1e-069,1e-092,1e-115,1e-138,1e-161,1e-184,1e-207,1e-230,1e-253,1e-276,1e-299};
-static double const rrtoperr[13]={8388608,6.8601809640529717e+028,-7.253143638152921e+052,-4.3377296974619174e+075,-1.5559416129466825e+098,-3.2841562489204913e+121,-3.7745893248228135e+144,-1.7356668416969134e+167,-3.8893577551088374e+190,-9.9566444326005119e+213,6.3641293062232429e+236,-5.2069140800249813e+259,-5.2504760255204387e+282};
-static double const rrnegtoperr[13]={3.9565301985100693e-040,-2.299904345391321e-063,3.6506201437945798e-086,1.1875228833981544e-109,-5.0644902316928607e-132,-6.7156837247865426e-155,-2.812077463003139e-178,-5.7778912386589953e-201,7.4997100559334532e-224,-4.6439668915134491e-247,-6.3691100762962136e-270,-9.436808465446358e-293,8.0970921678014997e-317};
+static double const stbsp__bot[23]={1e+000,1e+001,1e+002,1e+003,1e+004,1e+005,1e+006,1e+007,1e+008,1e+009,1e+010,1e+011,1e+012,1e+013,1e+014,1e+015,1e+016,1e+017,1e+018,1e+019,1e+020,1e+021,1e+022};
+static double const stbsp__negbot[22]={1e-001,1e-002,1e-003,1e-004,1e-005,1e-006,1e-007,1e-008,1e-009,1e-010,1e-011,1e-012,1e-013,1e-014,1e-015,1e-016,1e-017,1e-018,1e-019,1e-020,1e-021,1e-022};
+static double const stbsp__negboterr[22]={-5.551115123125783e-018,-2.0816681711721684e-019,-2.0816681711721686e-020,-4.7921736023859299e-021,-8.1803053914031305e-022,4.5251888174113741e-023,4.5251888174113739e-024,-2.0922560830128471e-025,-6.2281591457779853e-026,-3.6432197315497743e-027,6.0503030718060191e-028,2.0113352370744385e-029,-3.0373745563400371e-030,1.1806906454401013e-032,-7.7705399876661076e-032,2.0902213275965398e-033,-7.1542424054621921e-034,-7.1542424054621926e-035,2.4754073164739869e-036,5.4846728545790429e-037,9.2462547772103625e-038,-4.8596774326570872e-039};
+static double const stbsp__top[13]={1e+023,1e+046,1e+069,1e+092,1e+115,1e+138,1e+161,1e+184,1e+207,1e+230,1e+253,1e+276,1e+299};
+static double const stbsp__negtop[13]={1e-023,1e-046,1e-069,1e-092,1e-115,1e-138,1e-161,1e-184,1e-207,1e-230,1e-253,1e-276,1e-299};
+static double const stbsp__toperr[13]={8388608,6.8601809640529717e+028,-7.253143638152921e+052,-4.3377296974619174e+075,-1.5559416129466825e+098,-3.2841562489204913e+121,-3.7745893248228135e+144,-1.7356668416969134e+167,-3.8893577551088374e+190,-9.9566444326005119e+213,6.3641293062232429e+236,-5.2069140800249813e+259,-5.2504760255204387e+282};
+static double const stbsp__negtoperr[13]={3.9565301985100693e-040,-2.299904345391321e-063,3.6506201437945798e-086,1.1875228833981544e-109,-5.0644902316928607e-132,-6.7156837247865426e-155,-2.812077463003139e-178,-5.7778912386589953e-201,7.4997100559334532e-224,-4.6439668915134491e-247,-6.3691100762962136e-270,-9.436808465446358e-293,8.0970921678014997e-317};
 
 #if defined(_MSC_VER) && (_MSC_VER<=1200)                                                                                                                                                                                       
-static rU64 const rrpot[20]={1,10,100,1000, 10000,100000,1000000,10000000, 100000000,1000000000,10000000000,100000000000,  1000000000000,10000000000000,100000000000000,1000000000000000,  10000000000000000,100000000000000000,1000000000000000000,10000000000000000000U };
-#define rrtento19th ((rU64)1000000000000000000)
+static stbsp__uint64 const stbsp__powten[20]={1,10,100,1000, 10000,100000,1000000,10000000, 100000000,1000000000,10000000000,100000000000,  1000000000000,10000000000000,100000000000000,1000000000000000,  10000000000000000,100000000000000000,1000000000000000000,10000000000000000000U };
+#define stbsp__tento19th ((stbsp__uint64)1000000000000000000)
 #else
-static rU64 const rrpot[20]={1,10,100,1000, 10000,100000,1000000,10000000, 100000000,1000000000,10000000000ULL,100000000000ULL,  1000000000000ULL,10000000000000ULL,100000000000000ULL,1000000000000000ULL,  10000000000000000ULL,100000000000000000ULL,1000000000000000000ULL,10000000000000000000ULL };
-#define rrtento19th (1000000000000000000ULL)
+static stbsp__uint64 const stbsp__powten[20]={1,10,100,1000, 10000,100000,1000000,10000000, 100000000,1000000000,10000000000ULL,100000000000ULL,  1000000000000ULL,10000000000000ULL,100000000000000ULL,1000000000000000ULL,  10000000000000000ULL,100000000000000000ULL,1000000000000000000ULL,10000000000000000000ULL };
+#define stbsp__tento19th (1000000000000000000ULL)
 #endif
 
-#define rrddmulthi(oh,ol,xh,yh) \
+#define stbsp__ddmulthi(oh,ol,xh,yh) \
 { \
   double ahi=0,alo,bhi=0,blo; \
-  rS64 bt; \
+  stbsp__int64 bt; \
   oh = xh * yh; \
-  RRCOPYFP(bt,xh); bt&=((~(rU64)0)<<27); RRCOPYFP(ahi,bt); alo = xh-ahi; \
-  RRCOPYFP(bt,yh); bt&=((~(rU64)0)<<27); RRCOPYFP(bhi,bt); blo = yh-bhi; \
+  STBSP__COPYFP(bt,xh); bt&=((~(stbsp__uint64)0)<<27); STBSP__COPYFP(ahi,bt); alo = xh-ahi; \
+  STBSP__COPYFP(bt,yh); bt&=((~(stbsp__uint64)0)<<27); STBSP__COPYFP(bhi,bt); blo = yh-bhi; \
   ol = ((ahi*bhi-oh)+ahi*blo+alo*bhi)+alo*blo; \
 }
 
-#define rrddtoS64(ob,xh,xl) \
+#define stbsp__ddtoS64(ob,xh,xl) \
 { \
   double ahi=0,alo,vh,t;\
-  ob = (rS64)ph;\
+  ob = (stbsp__int64)ph;\
   vh=(double)ob;\
   ahi = ( xh - vh );\
   t = ( ahi - xh );\
   alo = (xh-(ahi-t))-(vh+t);\
-  ob += (rS64)(ahi+alo+xl);\
+  ob += (stbsp__int64)(ahi+alo+xl);\
 }
 
 
-#define rrddrenorm(oh,ol) { double s; s=oh+ol; ol=ol-(s-oh); oh=s; }
+#define stbsp__ddrenorm(oh,ol) { double s; s=oh+ol; ol=ol-(s-oh); oh=s; }
 
-#define rrddmultlo(oh,ol,xh,xl,yh,yl) \
+#define stbsp__ddmultlo(oh,ol,xh,xl,yh,yl) \
   ol = ol + ( xh*yl + xl*yh ); \
 
-#define rrddmultlos(oh,ol,xh,yl) \
+#define stbsp__ddmultlos(oh,ol,xh,yl) \
   ol = ol + ( xh*yl ); \
 
-static void rrraise_to_power10( double *ohi, double *olo, double d, rS32 power )  // power can be -323 to +350
+static void stbsp__raise_to_power10( double *ohi, double *olo, double d, stbsp__int32 power )  // power can be -323 to +350
 {
   double ph, pl;
   if ((power>=0) && (power<=22))
   {
-    rrddmulthi(ph,pl,d,rrbot[power]);
+    stbsp__ddmulthi(ph,pl,d,stbsp__bot[power]);
   }
   else
   {
-    rS32 e,et,eb;
+    stbsp__int32 e,et,eb;
     double p2h,p2l;
 
     e=power; if (power<0) e=-e; 
@@ -902,11 +913,11 @@ static void rrraise_to_power10( double *ohi, double *olo, double d, rS32 power )
     ph = d; pl = 0.0;
     if (power<0)
     {
-      if (eb) { --eb; rrddmulthi(ph,pl,d,rrnegbot[eb]); rrddmultlos(ph,pl,d,rrnegboterr[eb]); }
+      if (eb) { --eb; stbsp__ddmulthi(ph,pl,d,stbsp__negbot[eb]); stbsp__ddmultlos(ph,pl,d,stbsp__negboterr[eb]); }
       if (et)
       { 
-        rrddrenorm(ph,pl);
-        --et; rrddmulthi(p2h,p2l,ph,rrnegtop[et]); rrddmultlo(p2h,p2l,ph,pl,rrnegtop[et],rrnegtoperr[et]); ph=p2h;pl=p2l;
+        stbsp__ddrenorm(ph,pl);
+        --et; stbsp__ddmulthi(p2h,p2l,ph,stbsp__negtop[et]); stbsp__ddmultlo(p2h,p2l,ph,pl,stbsp__negtop[et],stbsp__negtoperr[et]); ph=p2h;pl=p2l;
       }
     }
     else
@@ -914,17 +925,17 @@ static void rrraise_to_power10( double *ohi, double *olo, double d, rS32 power )
       if (eb) 
       { 
         e = eb; if (eb>22) eb=22; e -= eb;
-        rrddmulthi(ph,pl,d,rrbot[eb]); 
-        if ( e ) { rrddrenorm(ph,pl); rrddmulthi(p2h,p2l,ph,rrbot[e]); rrddmultlos(p2h,p2l,rrbot[e],pl); ph=p2h;pl=p2l; }
+        stbsp__ddmulthi(ph,pl,d,stbsp__bot[eb]); 
+        if ( e ) { stbsp__ddrenorm(ph,pl); stbsp__ddmulthi(p2h,p2l,ph,stbsp__bot[e]); stbsp__ddmultlos(p2h,p2l,stbsp__bot[e],pl); ph=p2h;pl=p2l; }
       }
       if (et)
       {
-        rrddrenorm(ph,pl);
-        --et; rrddmulthi(p2h,p2l,ph,rrtop[et]); rrddmultlo(p2h,p2l,ph,pl,rrtop[et],rrtoperr[et]); ph=p2h;pl=p2l;
+        stbsp__ddrenorm(ph,pl);
+        --et; stbsp__ddmulthi(p2h,p2l,ph,stbsp__top[et]); stbsp__ddmultlo(p2h,p2l,ph,pl,stbsp__top[et],stbsp__toperr[et]); ph=p2h;pl=p2l;
       }
     }
   }
-  rrddrenorm(ph,pl);
+  stbsp__ddrenorm(ph,pl);
   *ohi = ph; *olo = pl;
 }
 
@@ -932,22 +943,22 @@ static void rrraise_to_power10( double *ohi, double *olo, double d, rS32 power )
 //   decimal point in decimal_pos.  +/-INF and NAN are specified by special values
 //   returned in the decimal_pos parameter.
 // frac_digits is absolute normally, but if you want from first significant digits (got %g and %e), or in 0x80000000
-static rS32 rrreal_to_str( char const * * start, rU32 * len, char *out, rS32 * decimal_pos, double value, rU32 frac_digits )
+static stbsp__int32 stbsp__real_to_str( char const * * start, stbsp__uint32 * len, char *out, stbsp__int32 * decimal_pos, double value, stbsp__uint32 frac_digits )
 {
   double d;
-  rS64 bits = 0;
-  rS32 expo, e, ng, tens;
+  stbsp__int64 bits = 0;
+  stbsp__int32 expo, e, ng, tens;
 
   d = value;
-  RRCOPYFP(bits,d);
-  expo = (bits >> 52) & 2047;
-  ng = (rS32)(bits >> 63);
+  STBSP__COPYFP(bits,d);
+  expo = (stbsp__int32) ((bits >> 52) & 2047);
+  ng = (stbsp__int32)(bits >> 63);
   if (ng) d=-d;
 
   if ( expo == 2047 ) // is nan or inf?
   {
-    *start = (bits&((((rU64)1)<<52)-1)) ? "NaN" : "Inf";
-    *decimal_pos =  RRSPECIAL;
+    *start = (bits&((((stbsp__uint64)1)<<52)-1)) ? "NaN" : "Inf";
+    *decimal_pos =  STBSP__SPECIAL;
     *len = 3;
     return ng;
   } 
@@ -963,7 +974,7 @@ static rS32 rrreal_to_str( char const * * start, rU32 * len, char *out, rS32 * d
     }
     // find the right expo for denormals
     {
-      rS64 v = ((rU64)1)<<51;
+      stbsp__int64 v = ((stbsp__uint64)1)<<51;
       while ((bits&v)==0) { --expo; v >>= 1; }
     }
   }
@@ -976,29 +987,29 @@ static rS32 rrreal_to_str( char const * * start, rU32 * len, char *out, rS32 * d
     tens=expo-1023; tens = (tens<0)?((tens*617)/2048):(((tens*1233)/4096)+1);
 
     // move the significant bits into position and stick them into an int 
-    rrraise_to_power10( &ph, &pl, d, 18-tens );
+    stbsp__raise_to_power10( &ph, &pl, d, 18-tens );
 
     // get full as much precision from double-double as possible
-    rrddtoS64( bits, ph,pl );
+    stbsp__ddtoS64( bits, ph,pl );
 
     // check if we undershot
-    if ( ((rU64)bits) >= rrtento19th ) ++tens; 
+    if ( ((stbsp__uint64)bits) >= stbsp__tento19th ) ++tens; 
   }
 
   // now do the rounding in integer land
   frac_digits = ( frac_digits & 0x80000000 ) ? ( (frac_digits&0x7ffffff) + 1 ) : ( tens + frac_digits );
   if ( ( frac_digits < 24 ) )
   {
-    rU32 dg = 1; if ((rU64)bits >= rrpot[9] ) dg=10; while( (rU64)bits >= rrpot[dg] ) { ++dg; if (dg==20) goto noround; }
+    stbsp__uint32 dg = 1; if ((stbsp__uint64)bits >= stbsp__powten[9] ) dg=10; while( (stbsp__uint64)bits >= stbsp__powten[dg] ) { ++dg; if (dg==20) goto noround; }
     if ( frac_digits < dg )
     {
-      rU64 r;
+      stbsp__uint64 r;
       // add 0.5 at the right position and round
       e = dg - frac_digits;
-      if ( (rU32)e >= 24 ) goto noround;
-      r = rrpot[e];
+      if ( (stbsp__uint32)e >= 24 ) goto noround;
+      r = stbsp__powten[e];
       bits = bits + (r/2);
-      if ( (rU64)bits >= rrpot[dg] ) ++tens;
+      if ( (stbsp__uint64)bits >= stbsp__powten[dg] ) ++tens;
       bits /= r;
     } 
     noround:;
@@ -1007,7 +1018,12 @@ static rS32 rrreal_to_str( char const * * start, rU32 * len, char *out, rS32 * d
   // kill long trailing runs of zeros
   if ( bits )
   {
-    rU32 n; for(;;) { if ( bits<=0xffffffff ) break; if (bits%1000) goto donez; bits/=1000; } n = (rU32)bits; while ((n%1000)==0) n/=1000; bits=n; donez:;
+    stbsp__uint32 n;
+    for(;;) { if ( bits<=0xffffffff ) break; if (bits%1000) goto donez; bits/=1000; }
+    n = (stbsp__uint32)bits;
+    while ((n%1000)==0) n/=1000;
+    bits=n;
+    donez:;
   }
 
   // convert to string
@@ -1015,11 +1031,11 @@ static rS32 rrreal_to_str( char const * * start, rU32 * len, char *out, rS32 * d
   e = 0; 
   for(;;)
   {
-    rU32 n;
+    stbsp__uint32 n;
     char * o = out-8;
     // do the conversion in chunks of U32s (avoid most 64-bit divides, worth it, constant denomiators be damned)
-    if (bits>=100000000) { n = (rU32)( bits % 100000000);  bits /= 100000000; } else {n = (rU32)bits; bits = 0; }
-    while(n) { out-=2; *(rU16*)out=*(rU16*)&rrdiglookup[(n%100)*2]; n/=100; e+=2; }
+    if (bits>=100000000) { n = (stbsp__uint32)( bits % 100000000);  bits /= 100000000; } else {n = (stbsp__uint32)bits; bits = 0; }
+    while(n) { out-=2; *(stbsp__uint16*)out=*(stbsp__uint16*)&stbsp__digitpair[(n%100)*2]; n/=100; e+=2; }
     if (bits==0) { if ((e) && (out[0]=='0')) { ++out; --e; } break; }
     while( out!=o ) { *--out ='0'; ++e; }
   }
@@ -1030,26 +1046,21 @@ static rS32 rrreal_to_str( char const * * start, rU32 * len, char *out, rS32 * d
   return ng;
 }
 
-#undef rrddmulthi
-#undef rrddrenorm
-#undef rrddmultlo
-#undef rrddmultlos
-#undef RRSPECIAL 
-#undef RRCOPYFP
+#undef stbsp__ddmulthi
+#undef stbsp__ddrenorm
+#undef stbsp__ddmultlo
+#undef stbsp__ddmultlos
+#undef STBSP__SPECIAL 
+#undef STBSP__COPYFP
  
-#endif
+#endif // STB_SPRINTF_NOFLOAT
 
 // clean up
-#undef rU16
-#undef rU32 
-#undef rS32 
-#undef rU64
-#undef rS64
-#undef RRPUBLIC_DEC
-#undef RRPUBLIC_DEF
-#undef RR_SPRINTF_DECORATE
-#undef RR_UNALIGNED
+#undef stbsp__uint16
+#undef stbsp__uint32 
+#undef stbsp__int32 
+#undef stbsp__uint64
+#undef stbsp__int64
+#undef STBSP__UNALIGNED
 
-#endif
-
-#endif
+#endif // STB_SPRINTF_IMPLEMENTATION
diff --git a/tests/grid_reachability.c b/tests/grid_reachability.c
index 2bb1cd8..905f2c2 100644
--- a/tests/grid_reachability.c
+++ b/tests/grid_reachability.c
@@ -143,6 +143,8 @@ void end_timer(void)
    printf("%6.4lf ms: %s\n", tm * 1000, message);
 }
 
+extern void quicktest(void);
+
 int loc[5000][2];
 int main(int argc, char **argv)
 {
@@ -152,6 +154,7 @@ int main(int argc, char **argv)
    uint8 *map = stbi_load("data/map_03.png", &w, &h, 0, 1);
 
    assert(map);
+   quicktest();
 
    for (j=0; j < h; ++j)
       for (i=0; i < w; ++i)
diff --git a/tests/stb.dsp b/tests/stb.dsp
index 5b32abb..05f0a7f 100644
--- a/tests/stb.dsp
+++ b/tests/stb.dsp
@@ -154,6 +154,10 @@ SOURCE=..\stb_rect_pack.h
 # End Source File
 # Begin Source File
 
+SOURCE=..\stb_sprintf.h
+# End Source File
+# Begin Source File
+
 SOURCE=..\stb_textedit.h
 # End Source File
 # Begin Source File
diff --git a/tests/test_c_compilation.c b/tests/test_c_compilation.c
index de25330..2a54df5 100644
--- a/tests/test_c_compilation.c
+++ b/tests/test_c_compilation.c
@@ -1,3 +1,7 @@
+#include "stb_sprintf.h"
+#define STB_SPRINTF_IMPLEMENTATION
+#include "stb_sprintf.h"
+
 #define STB_PERLIN_IMPLEMENTATION
 #define STB_IMAGE_WRITE_IMPLEMENTATION
 #define STB_DXT_IMPLEMENATION
@@ -28,3 +32,11 @@
 #define STBTE_DRAW_TILE(x,y,id,highlight,data)  0
 #define STB_TILEMAP_EDITOR_IMPLEMENTATION
 #include "stb_tilemap_editor.h"
+
+
+int quicktest(void)
+{
+   char buffer[999];
+   stbsp_sprintf(buffer, "test%%test");
+   return 0;
+}
\ No newline at end of file
diff --git a/tests/test_cpp_compilation.cpp b/tests/test_cpp_compilation.cpp
index 4e77da5..138948b 100644
--- a/tests/test_cpp_compilation.cpp
+++ b/tests/test_cpp_compilation.cpp
@@ -1,3 +1,7 @@
+#include "stb_sprintf.h"
+#define STB_SPRINTF_IMPLEMENTATION
+#include "stb_sprintf.h"
+
 #define STB_TRUETYPE_IMPLEMENTATION
 #define STB_PERLIN_IMPLEMENTATION
 #define STB_IMAGE_WRITE_IMPLEMENTATION

From 3f2716ace46a46e50960dfbd2ad96a71ceda4837 Mon Sep 17 00:00:00 2001
From: Sean Barrett <sean@nothings.org>
Date: Mon, 5 Dec 2016 03:50:06 -0800
Subject: [PATCH 18/23] add stb_sprintf to readme

---
 README.md         | 7 ++++---
 tools/README.list | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 2b17c3c..df79931 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,7 @@ library    | lastest version | category | LoC | description
 **[stb_image_write.h](stb_image_write.h)** | 1.02 | graphics | 1048 | image writing to disk: PNG, TGA, BMP
 **[stb_image_resize.h](stb_image_resize.h)** | 0.91 | graphics | 2578 | resize images larger/smaller with good quality
 **[stb_rect_pack.h](stb_rect_pack.h)** | 0.10 | graphics | 583 | simple 2D rectangle packer with decent quality
+**[stb_sprintf.h](stb_sprintf.h)** | 1.00 | utility | 1066 | fast sprintf, snprintf for C/C++
 **[stretchy_buffer.h](stretchy_buffer.h)** | 1.02 | utility | 216 | typesafe dynamic array for C (i.e. approximation to vector<>), doesn't compile as C++
 **[stb_textedit.h](stb_textedit.h)** | 1.10 | user&nbsp;interface | 1330 | guts of a text editor for games etc implementing them from scratch
 **[stb_voxel_render.h](stb_voxel_render.h)** | 0.84 | 3D&nbsp;graphics | 3752 | Minecraft-esque voxel rendering "engine" with many more features
@@ -24,11 +25,11 @@ library    | lastest version | category | LoC | description
 **[stb_c_lexer.h](stb_c_lexer.h)** | 0.07 | parsing | 816 | simplify writing parsers for C-like languages
 **[stb_divide.h](stb_divide.h)** | 0.91 | math | 379 | more useful 32-bit modulus e.g. "euclidean divide"
 **[stb_connected_comp...](stb_connected_components.h)** | 0.95 | misc | 1006 | incrementally compute reachability on grids
-**[stb.h](stb.h)** | 2.28 | misc | 14276 | helper functions for C, mostly redundant in C++; basically author's personal stuff
+**[stb.h](stb.h)** | 2.28 | misc | 14277 | helper functions for C, mostly redundant in C++; basically author's personal stuff
 **[stb_leakcheck.h](stb_leakcheck.h)** | 0.2 | misc | 124 | quick-and-dirty malloc/free leak-checking
 
-Total libraries: 19  
-Total lines of C code: 48314
+Total libraries: 20  
+Total lines of C code: 49381
 
 
 FAQ
diff --git a/tools/README.list b/tools/README.list
index b5c3e5b..cc59037 100644
--- a/tools/README.list
+++ b/tools/README.list
@@ -4,6 +4,7 @@ stb_truetype.h              | graphics         | parse, decode, and rasterize ch
 stb_image_write.h           | graphics         | image writing to disk: PNG, TGA, BMP
 stb_image_resize.h          | graphics         | resize images larger/smaller with good quality
 stb_rect_pack.h             | graphics         | simple 2D rectangle packer with decent quality
+stb_sprintf.h               | utility          | fast sprintf, snprintf for C/C++
 stretchy_buffer.h           | utility          | typesafe dynamic array for C (i.e. approximation to vector<>), doesn't compile as C++
 stb_textedit.h              | user interface   | guts of a text editor for games etc implementing them from scratch
 stb_voxel_render.h          | 3D graphics      | Minecraft-esque voxel rendering "engine" with many more features

From 9953803d0cacba488c4ede50fe9d757a8da36d51 Mon Sep 17 00:00:00 2001
From: Sean Barrett <sean@nothings.org>
Date: Mon, 5 Dec 2016 03:53:54 -0800
Subject: [PATCH 19/23] fix bad search-replace in comment

---
 stb_sprintf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stb_sprintf.h b/stb_sprintf.h
index 8fdc70e..4be7b14 100644
--- a/stb_sprintf.h
+++ b/stb_sprintf.h
@@ -79,7 +79,7 @@ doesn't either).
 If you don't need float or doubles at all, define STB_SPRINTF_NOFLOAT
 and you'll save 4K of code space.
 
-64-STBSP__INTMAXT INTS:
+64-BIT INTS:
 ============
 This library also supports 64-bit integers and you can use MSVC style or
 GCC style indicators (%I64d or %lld).  It supports the C99 specifiers

From ca3b8d74263aae9d6299166b3f4b6864df95f79a Mon Sep 17 00:00:00 2001
From: Sean Barrett <sean@nothings.org>
Date: Mon, 5 Dec 2016 04:19:53 -0800
Subject: [PATCH 20/23] add credits to readme

---
 README.md              | 4 ++++
 stb_sprintf.h          | 2 --
 tools/README.header.md | 4 ++++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index df79931..bb2b31c 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,10 @@ stb
 
 single-file public domain libraries for C/C++ <a name="stb_libs"></a>
 
+Most libraries by stb, except: stb_dxt by Fabian "ryg" Giesen, stb_image_resize
+by Jorge L. "VinoBS" Rodriguez, and stb_sprintf by Jeff Roberts.
+
+
 library    | lastest version | category | LoC | description
 --------------------- | ---- | -------- | --- | --------------------------------
 **[stb_vorbis.c](stb_vorbis.c)** | 1.09 | audio | 5399 | decode ogg vorbis files from file/memory to float/16-bit signed output
diff --git a/stb_sprintf.h b/stb_sprintf.h
index 4be7b14..23de7e4 100644
--- a/stb_sprintf.h
+++ b/stb_sprintf.h
@@ -5,8 +5,6 @@
 // allowed types:  sc uidBboXx p AaGgEef n
 // lengths      :  h ll j z t I64 I32 I
 
-
-
 #ifndef STB_SPRINTF_H_INCLUDE
 #define STB_SPRINTF_H_INCLUDE
 
diff --git a/tools/README.header.md b/tools/README.header.md
index 71e765b..847fe62 100644
--- a/tools/README.header.md
+++ b/tools/README.header.md
@@ -3,5 +3,9 @@ stb
 
 single-file public domain libraries for C/C++ <a name="stb_libs"></a>
 
+Most libraries by stb, except: stb_dxt by Fabian "ryg" Giesen, stb_image_resize
+by Jorge L. "VinoBS" Rodriguez, and stb_sprintf by Jeff Roberts.
+
+
 library    | lastest version | category | LoC | description
 --------------------- | ---- | -------- | --- | --------------------------------

From e6e20b43dbc26ee838c45b740f4f47159a2f6b2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aras=20Pranckevi=C4=8Dius?= <nearaz@gmail.com>
Date: Mon, 5 Dec 2016 14:49:39 +0200
Subject: [PATCH 21/23] stb_sprintf: seperators -> separators typo in comment

---
 stb_sprintf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stb_sprintf.h b/stb_sprintf.h
index 23de7e4..a75475b 100644
--- a/stb_sprintf.h
+++ b/stb_sprintf.h
@@ -59,7 +59,7 @@ int stbsp_vsprintfcb( STBSP_SPRINTFCB * callback, void * user, char * buf, char
   The buffer you pass in must hold at least STB_SPRINTF_MIN characters.
     // you return the next buffer to use or 0 to stop converting
 
-void stbsp_set_seperators( char comma, char period )
+void stbsp_set_separators( char comma, char period )
   Set the comma and period characters to use.
 
 FLOATS/DOUBLES:

From 554e0727429cc65e187b740a484d83c7216ccce0 Mon Sep 17 00:00:00 2001
From: Sean Barrett <sean@nothings.org>
Date: Mon, 5 Dec 2016 06:53:36 -0800
Subject: [PATCH 22/23] fix #ifdef mistake in stb_sprintf

---
 README.md     | 4 ++--
 stb_sprintf.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index bb2b31c..e1e8f5f 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ library    | lastest version | category | LoC | description
 **[stb_image_write.h](stb_image_write.h)** | 1.02 | graphics | 1048 | image writing to disk: PNG, TGA, BMP
 **[stb_image_resize.h](stb_image_resize.h)** | 0.91 | graphics | 2578 | resize images larger/smaller with good quality
 **[stb_rect_pack.h](stb_rect_pack.h)** | 0.10 | graphics | 583 | simple 2D rectangle packer with decent quality
-**[stb_sprintf.h](stb_sprintf.h)** | 1.00 | utility | 1066 | fast sprintf, snprintf for C/C++
+**[stb_sprintf.h](stb_sprintf.h)** | 1.01 | utility | 1064 | fast sprintf, snprintf for C/C++
 **[stretchy_buffer.h](stretchy_buffer.h)** | 1.02 | utility | 216 | typesafe dynamic array for C (i.e. approximation to vector<>), doesn't compile as C++
 **[stb_textedit.h](stb_textedit.h)** | 1.10 | user&nbsp;interface | 1330 | guts of a text editor for games etc implementing them from scratch
 **[stb_voxel_render.h](stb_voxel_render.h)** | 0.84 | 3D&nbsp;graphics | 3752 | Minecraft-esque voxel rendering "engine" with many more features
@@ -33,7 +33,7 @@ library    | lastest version | category | LoC | description
 **[stb_leakcheck.h](stb_leakcheck.h)** | 0.2 | misc | 124 | quick-and-dirty malloc/free leak-checking
 
 Total libraries: 20  
-Total lines of C code: 49381
+Total lines of C code: 49379
 
 
 FAQ
diff --git a/stb_sprintf.h b/stb_sprintf.h
index 23de7e4..fc32543 100644
--- a/stb_sprintf.h
+++ b/stb_sprintf.h
@@ -1,4 +1,4 @@
-// stb_sprintf - v1.00 - public domain snprintf() implementation
+// stb_sprintf - v1.01 - public domain snprintf() implementation
 // originally by Jeff Roberts / RAD Game Tools, 2015/10/20
 // http://github.com/nothings/stb
 //
@@ -152,7 +152,7 @@ STBSP__PUBLICDEF void STB_SPRINTF_DECORATE( set_separators )( char comma, char p
 
 #endif // STB_SPRINTF_H_INCLUDE
 
-#ifndef STB_SPRINTF_IMPLEMENTATION
+#ifdef STB_SPRINTF_IMPLEMENTATION
 
 #include <stdlib.h>  // for va_arg()
 

From 3e7f2d6ebddfe54bd9a9491db1349513a357bfe5 Mon Sep 17 00:00:00 2001
From: Sean Barrett <sean@nothings.org>
Date: Mon, 5 Dec 2016 06:58:30 -0800
Subject: [PATCH 23/23] fix missing renames

---
 stb_sprintf.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/stb_sprintf.h b/stb_sprintf.h
index 4e3b3c0..7521f7e 100644
--- a/stb_sprintf.h
+++ b/stb_sprintf.h
@@ -43,12 +43,12 @@ API:
 ====
 int stbsp_sprintf( char * buf, char const * fmt, ... )
 int stbsp_snprintf( char * buf, int count, char const * fmt, ... )
-  Convert an arg list into a buffer.  rrsnprintf always returns
+  Convert an arg list into a buffer.  stbsp_snprintf always returns
   a zero-terminated string (unlike regular snprintf).
 
 int stbsp_vsprintf( char * buf, char const * fmt, va_list va )
 int stbsp_vsnprintf( char * buf, int count, char const * fmt, va_list va )
-  Convert a va_list arg list into a buffer.  rrvsnprintf always returns
+  Convert a va_list arg list into a buffer.  stbsp_vsnprintf always returns
   a zero-terminated string (unlike regular snprintf).
 
 int stbsp_vsprintfcb( STBSP_SPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va )
@@ -182,7 +182,7 @@ STBSP__PUBLICDEF void STB_SPRINTF_DECORATE( set_separators )( char comma, char p
 #endif
 #endif
 
-#ifdef STB_SPRINTF_NOUNALIGNED  // define this before inclusion to force rrsprint to always use aligned accesses
+#ifdef STB_SPRINTF_NOUNALIGNED  // define this before inclusion to force stbsp_sprintf to always use aligned accesses
 #define STBSP__UNALIGNED(code)
 #else
 #define STBSP__UNALIGNED(code) code