Enhance console_io to permit emits limited in various ways, such as valid UTF-8, upto control chars, or with counted limits, all getting away from 0-termination as the sole limit. In CLI, use this capability to avoid certain emit-chars-singly procedures that were breaking up UTF-8 characters. This fixes broken json mode output (on Windows) and (maybe) C-literal-like emits.
FossilOrigin-Name: 906c5c4082e30b7a0d07df89a42566461e2113507c5a0b339827ca0822b8fe84
This commit is contained in:
parent
fdbd9119d4
commit
14a08730de
@ -278,22 +278,20 @@ SQLITE_INTERNAL_LINKAGE void setTextMode(FILE *pf, short bFlush){
|
||||
#undef setModeFlushQ
|
||||
|
||||
#if SHELL_CON_TRANSLATE
|
||||
/* Write plain 0-terminated output to stream known as reaching console. */
|
||||
static int conioZstrOut(PerStreamTags *ppst, const char *z){
|
||||
/* Write buffer cBuf as output to stream known to reach console,
|
||||
** limited to ncTake char's. Return ncTake on success, else 0. */
|
||||
static int conZstrEmit(PerStreamTags *ppst, const char *z, int ncTake){
|
||||
int rv = 0;
|
||||
if( z!=NULL && *z!=0 ){
|
||||
int nc;
|
||||
int nwc;
|
||||
nc = (int)strlen(z);
|
||||
nwc = MultiByteToWideChar(CP_UTF8,0, z,nc, 0,0);
|
||||
if( z!=NULL ){
|
||||
int nwc = MultiByteToWideChar(CP_UTF8,0, z,ncTake, 0,0);
|
||||
if( nwc > 0 ){
|
||||
WCHAR *zw = sqlite3_malloc64(nwc*sizeof(WCHAR));
|
||||
if( zw!=NULL ){
|
||||
nwc = MultiByteToWideChar(CP_UTF8,0, z,nc, zw,nwc);
|
||||
nwc = MultiByteToWideChar(CP_UTF8,0, z,ncTake, zw,nwc);
|
||||
if( nwc > 0 ){
|
||||
/* Translation from UTF-8 to UTF-16, then WCHARs out. */
|
||||
if( WriteConsoleW(ppst->hx, zw,nwc, 0, NULL) ){
|
||||
rv = nc;
|
||||
rv = ncTake;
|
||||
}
|
||||
}
|
||||
sqlite3_free(zw);
|
||||
@ -306,9 +304,11 @@ static int conioZstrOut(PerStreamTags *ppst, const char *z){
|
||||
/* For {f,o,e}PrintfUtf8() when stream is known to reach console. */
|
||||
static int conioVmPrintf(PerStreamTags *ppst, const char *zFormat, va_list ap){
|
||||
char *z = sqlite3_vmprintf(zFormat, ap);
|
||||
int rv = conioZstrOut(ppst, z);
|
||||
sqlite3_free(z);
|
||||
return rv;
|
||||
if( z ){
|
||||
int rv = conZstrEmit(ppst, z, (int)strlen(z));
|
||||
sqlite3_free(z);
|
||||
return rv;
|
||||
}else return 0;
|
||||
}
|
||||
#endif /* SHELL_CON_TRANSLATE */
|
||||
|
||||
@ -428,7 +428,7 @@ SQLITE_INTERNAL_LINKAGE int fPutsUtf8(const char *z, FILE *pfO){
|
||||
if( pstReachesConsole(ppst) ){
|
||||
int rv;
|
||||
maybeSetupAsConsole(ppst, 1);
|
||||
rv = conioZstrOut(ppst, z);
|
||||
rv = conZstrEmit(ppst, z, (int)strlen(z));
|
||||
if( 0 == isKnownWritable(ppst->pf) ) restoreConsoleArb(ppst);
|
||||
return rv;
|
||||
}else {
|
||||
@ -444,7 +444,7 @@ SQLITE_INTERNAL_LINKAGE int ePutsUtf8(const char *z){
|
||||
PerStreamTags pst; /* Needed only for heretofore unknown streams. */
|
||||
PerStreamTags *ppst = getEmitStreamInfo(2, &pst, &pfErr);
|
||||
#if SHELL_CON_TRANSLATE
|
||||
if( pstReachesConsole(ppst) ) return conioZstrOut(ppst, z);
|
||||
if( pstReachesConsole(ppst) ) return conZstrEmit(ppst, z, (int)strlen(z));
|
||||
else {
|
||||
#endif
|
||||
return (fputs(z, pfErr)<0)? 0 : (int)strlen(z);
|
||||
@ -458,7 +458,7 @@ SQLITE_INTERNAL_LINKAGE int oPutsUtf8(const char *z){
|
||||
PerStreamTags pst; /* Needed only for heretofore unknown streams. */
|
||||
PerStreamTags *ppst = getEmitStreamInfo(1, &pst, &pfOut);
|
||||
#if SHELL_CON_TRANSLATE
|
||||
if( pstReachesConsole(ppst) ) return conioZstrOut(ppst, z);
|
||||
if( pstReachesConsole(ppst) ) return conZstrEmit(ppst, z, (int)strlen(z));
|
||||
else {
|
||||
#endif
|
||||
return (fputs(z, pfOut)<0)? 0 : (int)strlen(z);
|
||||
@ -467,33 +467,96 @@ SQLITE_INTERNAL_LINKAGE int oPutsUtf8(const char *z){
|
||||
#endif
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* Next 3 functions could be optimized to avoid console mode futzing. */
|
||||
SQLITE_INTERNAL_LINKAGE int fPutcUtf8(int ch, FILE *pfO){
|
||||
if( (ch & ~0x7f) != 0 ) return 0;
|
||||
else{
|
||||
char ac[2] = "?";
|
||||
ac[0] = (char)ch;
|
||||
return (fPutsUtf8(ac, pfO) > 0);
|
||||
}
|
||||
}
|
||||
SQLITE_INTERNAL_LINKAGE int oPutcUtf8(int ch){
|
||||
if( (ch & ~0x7f) != 0 ) return 0;
|
||||
else{
|
||||
char ac[2] = "?";
|
||||
ac[0] = (char)ch;
|
||||
return (oPutsUtf8(ac) > 0);
|
||||
}
|
||||
}
|
||||
SQLITE_INTERNAL_LINKAGE int ePutcUtf8(int ch){
|
||||
if( (ch & ~0x7f) != 0 ) return 0;
|
||||
else{
|
||||
char ac[2] = "?";
|
||||
ac[0] = (char)ch;
|
||||
return (ePutsUtf8(ac) > 0);
|
||||
/* Skip over as much z[] input char sequence as is valid UTF-8,
|
||||
** limited per nAccept char's or whole characters and containing
|
||||
** no char cn such that ((1<<cn) & ccm)!=0. On return, the
|
||||
** sequence z:return (inclusive:exclusive) is validated UTF-8.
|
||||
** Limit: nAccept>=0 => char count, nAccept<0 => character
|
||||
*/
|
||||
static const char* zSkipValidUtf8(const char *z, int nAccept, long ccm){
|
||||
int ng = (nAccept<0)? -nAccept : 0;
|
||||
const char *pcLimit = (nAccept>=0)? z+nAccept : 0;
|
||||
while( (pcLimit)? (z<pcLimit) : (ng-- > 0) ){
|
||||
char c = *z;
|
||||
if( (c & 0x80) == 0 ){
|
||||
if( ccm != 0L && c < 0x20 && ((1L<<c) & ccm) != 0 ) return z;
|
||||
++z; /* ASCII */
|
||||
}else if( (c & 0xC0) != 0xC0 ) return z; /* not a lead byte */
|
||||
else{
|
||||
const char *zt = z+1; /* Got lead byte, look at trail bytes.*/
|
||||
do{
|
||||
if( pcLimit && zt >= pcLimit ) return z;
|
||||
else{
|
||||
char ct = *zt++;
|
||||
if( ct==0 || (zt-z)>4 || (ct & 0xC0)!=0x80 ){
|
||||
/* Trailing bytes are too few, too many, or invalid. */
|
||||
return z;
|
||||
}
|
||||
}
|
||||
} while( ((c <<= 1) & 0x40) == 0x40 ); /* Eat lead byte's count. */
|
||||
z = zt;
|
||||
}
|
||||
}
|
||||
return z;
|
||||
}
|
||||
|
||||
SQLITE_INTERNAL_LINKAGE int
|
||||
fPutbUtf8(FILE *pfO, const char *cBuf, int nAccept, long ctrlMask){
|
||||
const char *zPast = zSkipValidUtf8(cBuf, nAccept, ctrlMask);
|
||||
int ncConsume = (int)(zPast - cBuf);
|
||||
if( pfO == 0 ) return ncConsume;
|
||||
#if SHELL_CON_TRANSLATE
|
||||
PerStreamTags pst; /* Needed only for heretofore unknown streams. */
|
||||
PerStreamTags *ppst = getEmitStreamInfo(0, &pst, &pfO);
|
||||
if( pstReachesConsole(ppst) ){
|
||||
int rv;
|
||||
maybeSetupAsConsole(ppst, 1);
|
||||
rv = conZstrEmit(ppst, cBuf, ncConsume);
|
||||
if( 0 == isKnownWritable(ppst->pf) ) restoreConsoleArb(ppst);
|
||||
return rv;
|
||||
}else {
|
||||
#endif
|
||||
return (int)fwrite(cBuf, 1, ncConsume, pfO);
|
||||
#if SHELL_CON_TRANSLATE
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
SQLITE_INTERNAL_LINKAGE int
|
||||
oPutbUtf8(const char *cBuf, int nAccept, long ctrlMask){
|
||||
FILE *pfOut;
|
||||
const char *zPast = zSkipValidUtf8(cBuf, nAccept, ctrlMask);
|
||||
int ncConsume = (int)(zPast - cBuf);
|
||||
PerStreamTags pst; /* Needed only for heretofore unknown streams. */
|
||||
PerStreamTags *ppst = getEmitStreamInfo(1, &pst, &pfOut);
|
||||
#if SHELL_CON_TRANSLATE
|
||||
if( pstReachesConsole(ppst) ){
|
||||
return conZstrEmit(ppst, cBuf, ncConsume);
|
||||
}else {
|
||||
#endif
|
||||
return (int)fwrite(cBuf, 1, ncConsume, pfOut);
|
||||
#if SHELL_CON_TRANSLATE
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
SQLITE_INTERNAL_LINKAGE int
|
||||
ePutbUtf8(const char *cBuf, int nAccept, long ctrlMask){
|
||||
FILE *pfErr;
|
||||
const char *zPast = zSkipValidUtf8(cBuf, nAccept, ctrlMask);
|
||||
int ncConsume = (int)(zPast - cBuf);
|
||||
PerStreamTags pst; /* Needed only for heretofore unknown streams. */
|
||||
PerStreamTags *ppst = getEmitStreamInfo(2, &pst, &pfErr);
|
||||
#if SHELL_CON_TRANSLATE
|
||||
if( pstReachesConsole(ppst) ){
|
||||
return conZstrEmit(ppst, cBuf, ncConsume);
|
||||
}else {
|
||||
#endif
|
||||
return (int)fwrite(cBuf, 1, ncConsume, pfErr);
|
||||
#if SHELL_CON_TRANSLATE
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
SQLITE_INTERNAL_LINKAGE char* fGetsUtf8(char *cBuf, int ncMax, FILE *pfIn){
|
||||
if( pfIn==0 ) pfIn = stdin;
|
||||
|
@ -136,20 +136,39 @@ SQLITE_INTERNAL_LINKAGE int oPutsUtf8(const char *z);
|
||||
/* Like fPutsUtf8 except stream is always the designated error. */
|
||||
SQLITE_INTERNAL_LINKAGE int ePutsUtf8(const char *z);
|
||||
|
||||
#if 0
|
||||
/*
|
||||
** Emit output like fputc(), with appropriate translation(s).
|
||||
** This is not strictly needed on fully UTF-8-aware platforms.
|
||||
** It exists for sake of orthogonality and output designation.
|
||||
** Emit output like fPutsUtf8(), except that the length of the
|
||||
** accepted char or character sequence may be limited by nAccept.
|
||||
**
|
||||
** The routine returns an error for non-ASCII character input.
|
||||
** The magnitude and sign of nAccept control what nAccept limits.
|
||||
** If positive, nAccept limits the number of char's accepted.
|
||||
** If negative, it limits the number of valid input characters.
|
||||
** Obtain the behavior of {f,o,e}PutsUtf8 with nAccept==INT_MAX.
|
||||
**
|
||||
** Returns the number of accepted char values.
|
||||
**
|
||||
** When ctrlMask!=0, it specifies a set of control characters not
|
||||
** accepted as input, so that cBuf[abs(N)] on return will be one
|
||||
** of the non-accepted characters unless nAccept limited the scan.
|
||||
** Each bit in ctrlMask, 1<<cn, directs cn to not be accepted.
|
||||
**
|
||||
** The cBuf content will only be accessad up to the lesser of the
|
||||
** limits specified by nAccept or a terminator char. It need not
|
||||
** have a sentinel unless the nAccept limit exceeds the content.
|
||||
** A common sentinel is '\x00', selected with ctrlMask == 1L .
|
||||
**
|
||||
** Special-case treatment occurs when fPutbUtf8() is given a NULL
|
||||
** pfOut argument; No output is attempted, but the return value
|
||||
** will still reflect the above conditions.
|
||||
*/
|
||||
SQLITE_INTERNAL_LINKAGE int fPutcUtf8(int ch, FILE *pfO);
|
||||
/* Like fPutcUtf8 except stream is always the designated output. */
|
||||
SQLITE_INTERNAL_LINKAGE int oPutcUtf8(int ch);
|
||||
/* Like fPutcUtf8 except stream is always the designated error. */
|
||||
SQLITE_INTERNAL_LINKAGE int ePutcUtf8(int ch);
|
||||
#endif
|
||||
SQLITE_INTERNAL_LINKAGE int
|
||||
fPutbUtf8(FILE *pfOut, const char *cBuf, int nAccept, long ctrlMask);
|
||||
/* Like fPutbUtf8 except stream is always the designated output. */
|
||||
SQLITE_INTERNAL_LINKAGE int
|
||||
oPutbUtf8(const char *cBuf, int nAccept, long ctrlMask);
|
||||
/* Like fPutbUtf8 except stream is always the designated error. */
|
||||
SQLITE_INTERNAL_LINKAGE int
|
||||
ePutbUtf8(const char *cBuf, int nAccept, long ctrlMask);
|
||||
|
||||
/*
|
||||
** Collect input like fgets(...) with special provisions for input
|
||||
|
16
manifest
16
manifest
@ -1,5 +1,5 @@
|
||||
C Add\ssqlite3_x86.exe\sunversioned\sbinary.
|
||||
D 2023-11-12T19:57:23.673
|
||||
C Enhance\sconsole_io\sto\spermit\semits\slimited\sin\svarious\sways,\ssuch\sas\svalid\sUTF-8,\supto\scontrol\schars,\sor\swith\scounted\slimits,\sall\sgetting\saway\sfrom\s0-termination\sas\sthe\ssole\slimit.\sIn\sCLI,\suse\sthis\scapability\sto\savoid\scertain\semit-chars-singly\sprocedures\sthat\swere\sbreaking\sup\sUTF-8\scharacters.\sThis\sfixes\sbroken\sjson\smode\soutput\s(on\sWindows)\sand\s(maybe)\sC-literal-like\semits.
|
||||
D 2023-11-13T05:24:00.316
|
||||
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
|
||||
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
|
||||
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
|
||||
@ -50,8 +50,8 @@ F ext/README.md fd5f78013b0a2bc6f0067afb19e6ad040e89a10179b4f6f03eee58fac5f169bd
|
||||
F ext/async/README.txt e12275968f6fde133a80e04387d0e839b0c51f91
|
||||
F ext/async/sqlite3async.c 6f247666b495c477628dd19364d279c78ea48cd90c72d9f9b98ad1aff3294f94
|
||||
F ext/async/sqlite3async.h 46b47c79357b97ad85d20d2795942c0020dc20c532114a49808287f04aa5309a
|
||||
F ext/consio/console_io.c d1f45b1380877d546adc24b339929b7d667117637e18a203c21aacf5d94ba430 x
|
||||
F ext/consio/console_io.h ec611fe8f08645d69cb18d46ab2a09c4653f2fc13ecb04c18e6012d8ea89c463
|
||||
F ext/consio/console_io.c 41fd57d99340dd51efa3a61af12eedbb90a62ff5db31c5f9d3e665134bc09353 x
|
||||
F ext/consio/console_io.h 3a2ad14e8adcca431fe9bd12e8ebb5ecfe30002cfa190b2be1479048a441e6ec
|
||||
F ext/expert/README.md b321c2762bb93c18ea102d5a5f7753a4b8bac646cb392b3b437f633caf2020c3
|
||||
F ext/expert/expert.c d548d603a4cc9e61f446cc179c120c6713511c413f82a4a32b1e1e69d3f086a4
|
||||
F ext/expert/expert1.test 0dd5cb096d66bed593e33053a3b364f6ef52ed72064bf5cf298364636dbf3cd6
|
||||
@ -728,7 +728,7 @@ F src/random.c 606b00941a1d7dd09c381d3279a058d771f406c5213c9932bbd93d5587be4b9c
|
||||
F src/resolve.c d017bad7ba8e778617701a0e986fdeb393d67d6afa84fb28ef4e8b8ad2acf916
|
||||
F src/rowset.c 8432130e6c344b3401a8874c3cb49fefe6873fec593294de077afea2dce5ec97
|
||||
F src/select.c 503331aca8785254a7bf3d74ab338a99118fa297e1184a4dde33b3cdf7a9d341
|
||||
F src/shell.c.in 3169bab2f1f67ec01912b6b5e4ed8d629e5b8aece9a1a6fd7e88643b6c3abe18
|
||||
F src/shell.c.in adb9a11d6c7c0d2260ad381468d22f4d0d68240a061e5553fce73e7e1ad40410
|
||||
F src/sqlite.h.in 4f841d3d117b830ee5ee45e8d89ceff1195f3ebb72d041ace8d116ba4c103b35
|
||||
F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8
|
||||
F src/sqlite3ext.h 3f046c04ea3595d6bfda99b781926b17e672fd6d27da2ba6d8d8fc39981dcb54
|
||||
@ -2142,8 +2142,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
|
||||
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
|
||||
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
|
||||
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
|
||||
P 7850fb98a19d0ae3535367de3bca9e50408a2c21504c5772947fc39f287aa830
|
||||
R 1cd3ad3b17c1d449dd66c6c243573a41
|
||||
P a731cdddbb99dbd3f9d1875cad5094239b78969c84fe4c56ecd63e33a5874e3f
|
||||
R 1643036426e25a342daef2af41a2624b
|
||||
U larrybr
|
||||
Z 1619f1a730674def90d8e58700e55e2d
|
||||
Z 98b08e37ed82f681bcbba6df6ae3a4d3
|
||||
# Remove this line to create a well-formed Fossil manifest.
|
||||
|
@ -1 +1 @@
|
||||
a731cdddbb99dbd3f9d1875cad5094239b78969c84fe4c56ecd63e33a5874e3f
|
||||
906c5c4082e30b7a0d07df89a42566461e2113507c5a0b339827ca0822b8fe84
|
@ -1819,16 +1819,37 @@ static void output_quoted_escaped_string(const char *z){
|
||||
setTextMode(pfO, 1);
|
||||
}
|
||||
|
||||
/* Like strpbrk, but with an optional limit on search length. */
|
||||
static const char *anyOfInStr(const char *s, const char *zAny, i64 n){
|
||||
if( n<0 ) return strpbrk(s, zAny);
|
||||
else{
|
||||
void *pcFirst = (void*)(s+(size_t)n);
|
||||
while(*zAny){
|
||||
void *pc = memchr(s, *zAny&0xff, n);
|
||||
if( pc && pc < pcFirst ) pcFirst = pc;
|
||||
++zAny;
|
||||
}
|
||||
return (const char*)(((const char*)pcFirst < s+(size_t)n)? pcFirst : 0);
|
||||
}
|
||||
}
|
||||
/*
|
||||
** Output the given string as a quoted according to C or TCL quoting rules.
|
||||
*/
|
||||
static void output_c_string(const char *z){
|
||||
unsigned int c;
|
||||
char c;
|
||||
static const char *zq = "\"";
|
||||
static long ctrlMask = ~0L;
|
||||
static const char *zDQBSRO = "\"\\\x7f"; /* double-quote, backslash, rubout */
|
||||
char ace[3] = "\\?";
|
||||
char cbsSay;
|
||||
oputz(zq);
|
||||
while( (c = *(z++))!= 0 ){
|
||||
while( *z!=0 ){
|
||||
char *pcDQBSRO = strpbrk(z, zDQBSRO);
|
||||
const char *pcPast = z + fPutbUtf8(0, z, INT_MAX, ctrlMask);
|
||||
const char *pcEnd = (pcDQBSRO && pcDQBSRO < pcPast)? pcDQBSRO : pcPast;
|
||||
if( pcEnd > z ) oPutbUtf8(z, (int)(pcEnd-z), 0);
|
||||
if( (c = *pcEnd)==0 ) break;
|
||||
++pcEnd;
|
||||
switch( c ){
|
||||
case '\\': case '"':
|
||||
cbsSay = (char)c;
|
||||
@ -1848,6 +1869,7 @@ static void output_c_string(const char *z){
|
||||
ace[1] = (char)c;
|
||||
oputz(ace+1);
|
||||
}
|
||||
z = pcEnd;
|
||||
}
|
||||
oputz(zq);
|
||||
}
|
||||
@ -1856,14 +1878,26 @@ static void output_c_string(const char *z){
|
||||
** Output the given string as a quoted according to JSON quoting rules.
|
||||
*/
|
||||
static void output_json_string(const char *z, i64 n){
|
||||
unsigned int c;
|
||||
char c;
|
||||
static const char *zq = "\"";
|
||||
static long ctrlMask = ~0L;
|
||||
static const char *zDQBS = "\"\\";
|
||||
const char *pcLimit;
|
||||
char ace[3] = "\\?";
|
||||
char cbsSay;
|
||||
|
||||
if( z==0 ) z = "";
|
||||
if( n<0 ) n = strlen(z);
|
||||
pcLimit = z + ((n<0)? strlen(z) : (size_t)n);
|
||||
oputz(zq);
|
||||
while( n-- ){
|
||||
while( z < pcLimit ){
|
||||
const char *pcDQBS = anyOfInStr(z, zDQBS, pcLimit-z);
|
||||
const char *pcPast = z + fPutbUtf8(0, z, (int)(pcLimit-z), ctrlMask);
|
||||
const char *pcEnd = (pcDQBS && pcDQBS < pcPast)? pcDQBS : pcPast;
|
||||
if( pcEnd > z ){
|
||||
oPutbUtf8(z, (int)(pcEnd-z), 0);
|
||||
z = pcEnd;
|
||||
}
|
||||
if( z >= pcLimit ) break;
|
||||
c = *(z++);
|
||||
switch( c ){
|
||||
case '"': case '\\':
|
||||
|
Loading…
Reference in New Issue
Block a user