From 4ea60463f5a5cc5c30bf3f20be0dd5141f48aa3c Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Thu, 6 Feb 2014 13:59:24 -0200 Subject: [PATCH] UTF-8 encoding exported as format '%U' in 'lua_pushfstring' --- llex.c | 25 +++++++------------------ lobject.c | 28 ++++++++++++++++++++++++++-- lobject.h | 5 ++++- 3 files changed, 37 insertions(+), 21 deletions(-) diff --git a/llex.c b/llex.c index 818c0812..514a8150 100644 --- a/llex.c +++ b/llex.c @@ -1,5 +1,5 @@ /* -** $Id: llex.c,v 2.71 2014/01/31 15:14:22 roberto Exp roberto $ +** $Id: llex.c,v 2.72 2014/02/04 18:57:34 roberto Exp roberto $ ** Lexical Analyzer ** See Copyright Notice in lua.h */ @@ -359,22 +359,11 @@ static unsigned int readutf8esc (LexState *ls) { } -static void utf8esc (LexState *ls, unsigned int r) { - if (r < 0x80) /* ascii? */ - save(ls, r); - else { /* need continuation bytes */ - int buff[4]; /* to store continuation bytes */ - int n = 0; /* number of continuation bytes */ - unsigned int mfb = 0x3f; /* maximum that fits in first byte */ - do { - buff[n++] = 0x80 | (r & 0x3f); /* add continuation byte */ - r >>= 6; /* remove added bits */ - mfb >>= 1; /* now there is one less bit in first byte */ - } while (r > mfb); /* needs continuation byte? */ - save(ls, (~mfb << 1) | r); /* add first byte */ - while (n-- > 0) /* add 'buff' to string, reversed */ - save(ls, buff[n]); - } +static void utf8esc (LexState *ls) { + char buff[UTF8BUFFSZ]; + int n = luaO_utf8esc(buff, readutf8esc(ls)); + for (; n > 0; n--) /* add 'buff' to string */ + save(ls, buff[UTF8BUFFSZ - n]); } @@ -414,7 +403,7 @@ static void read_string (LexState *ls, int del, SemInfo *seminfo) { case 't': c = '\t'; goto read_save; case 'v': c = '\v'; goto read_save; case 'x': c = readhexaesc(ls); goto read_save; - case 'u': utf8esc(ls, readutf8esc(ls)); goto no_save; + case 'u': utf8esc(ls); goto no_save; case '\n': case '\r': inclinenumber(ls); c = '\n'; goto only_save; case '\\': case '\"': case '\'': diff --git a/lobject.c b/lobject.c index 90a1e443..90e7d71a 100644 --- a/lobject.c +++ b/lobject.c @@ -1,5 +1,5 @@ /* -** $Id: lobject.c,v 2.71 2013/12/30 20:47:58 roberto Exp roberto $ +** $Id: lobject.c,v 2.72 2014/01/27 13:34:32 roberto Exp roberto $ ** Some generic functions over Lua objects ** See Copyright Notice in lua.h */ @@ -284,12 +284,30 @@ int luaO_str2int (const char *s, size_t len, lua_Integer *result) { } +int luaO_utf8esc (char *buff, unsigned int x) { + int n = 1; /* number of bytes put in buffer (backwards) */ + if (x < 0x80) /* ascii? */ + buff[UTF8BUFFSZ - 1] = x; + else { /* need continuation bytes */ + unsigned int mfb = 0x3f; /* maximum that fits in first byte */ + do { + buff[UTF8BUFFSZ - (n++)] = 0x80 | (x & 0x3f); /* add continuation byte */ + x >>= 6; /* remove added bits */ + mfb >>= 1; /* now there is one less bit available in first byte */ + } while (x > mfb); /* still needs continuation byte? */ + buff[UTF8BUFFSZ - n] = (~mfb << 1) | x; /* add first byte */ + } + return n; +} + + static void pushstr (lua_State *L, const char *str, size_t l) { setsvalue2s(L, L->top++, luaS_newlstr(L, str, l)); } -/* this function handles only `%d', `%c', %f, %p, and `%s' formats */ +/* this function handles only '%d', '%c', '%f', '%p', and '%s' + conventional formats, plus Lua-specific '%L' and '%U' */ const char *luaO_pushvfstring (lua_State *L, const char *fmt, va_list argp) { int n = 0; for (;;) { @@ -328,6 +346,12 @@ const char *luaO_pushvfstring (lua_State *L, const char *fmt, va_list argp) { pushstr(L, buff, l); break; } + case 'U': { + char buff[UTF8BUFFSZ]; + int l = luaO_utf8esc(buff, va_arg(argp, int)); + pushstr(L, buff + UTF8BUFFSZ - l, l); + break; + } case '%': { pushstr(L, "%", 1); break; diff --git a/lobject.h b/lobject.h index ca92fc56..23bbe74f 100644 --- a/lobject.h +++ b/lobject.h @@ -1,5 +1,5 @@ /* -** $Id: lobject.h,v 2.82 2013/09/05 19:31:49 roberto Exp roberto $ +** $Id: lobject.h,v 2.83 2013/12/04 12:15:22 roberto Exp roberto $ ** Type definitions for Lua objects ** See Copyright Notice in lua.h */ @@ -479,9 +479,12 @@ typedef struct Table { LUAI_DDEC const TValue luaO_nilobject_; +/* size of buffer for 'luaO_utf8esc' function */ +#define UTF8BUFFSZ 8 LUAI_FUNC int luaO_int2fb (unsigned int x); LUAI_FUNC int luaO_fb2int (int x); +LUAI_FUNC int luaO_utf8esc (char *buff, unsigned int x); LUAI_FUNC int luaO_ceillog2 (unsigned int x); LUAI_FUNC void luaO_arith (lua_State *L, int op, const TValue *p1, const TValue *p2, TValue *res);