From 73f03276d918a5184463095b3fd6bbd9a042cece Mon Sep 17 00:00:00 2001 From: larrybr Date: Tue, 7 Nov 2023 19:30:14 +0000 Subject: [PATCH] Refactor MBCS/UTF-8 translation to avoid extra allocations, supporting non-formatted (faster) output. Some code cleanup. Wrap .system/.shell command exection with restoration of startup console mode and renewing mode setup. Changes to make legacy MBCS build work better (than legacy did, even with --no-utf8.) FossilOrigin-Name: d5e88fcde53ca7ba05bb164943a9f57bd92080bb7e5eebbbed64b9886ac97338 --- ext/consio/console_io.c | 141 +++++++++++++++++++++++----------------- manifest | 14 ++-- manifest.uuid | 2 +- src/shell.c.in | 2 + 4 files changed, 93 insertions(+), 66 deletions(-) diff --git a/ext/consio/console_io.c b/ext/consio/console_io.c index c60e2b0a49..35016b0c4a 100755 --- a/ext/consio/console_io.c +++ b/ext/consio/console_io.c @@ -119,7 +119,6 @@ typedef struct ConsoleInfo { #endif static ConsoleInfo consoleInfo = { - /* {0,0,0}, // iInitialFmode */ { /* pst */ CI_INITIALIZER, CI_INITIALIZER, CI_INITIALIZER }, #if SHELL_CON_TRANSLATE 0, 0, 1, /* haveInput, outputIx, stdinEof */ @@ -150,7 +149,6 @@ consoleClassifySetup( FILE *pfIn, FILE *pfOut, FILE *pfErr ){ } if( ix > 0 ) fflush(apf[ix]); #if SHELL_CON_TRANSLATE == 2 - // _setmode(_fileno(apf[ix]), _O_U8TEXT); _setmode(_fileno(apf[ix]), _O_TEXT); #endif } @@ -165,14 +163,13 @@ SQLITE_INTERNAL_LINKAGE void SQLITE_CDECL consoleRestore( void ){ if( consoleInfo.cscs & (CSCS_InConsole<hx, ppst->consMode); @@ -213,43 +210,59 @@ SQLITE_INTERNAL_LINKAGE void setTextMode(FILE *pf, short bFlush){ #undef setModeFlushQ #if SHELL_CON_TRANSLATE +/* Write plain 0-terminated output to stream known as console. */ +static int conioZstrOut(int rch, const char *z){ + int rv = 0; + if( z!=NULL && *z!=0 ){ + int nc; + int nwc; +# if SHELL_CON_TRANSLATE == 2 + UINT cocp = GetConsoleOutputCP(); + FILE *pfO = consoleInfo.pst[rch].pf; + if( cocp == CP_UTF8 ){ + /* This is not legacy action. But it can work better, + ** when the console putatively can handle UTF-8. */ + return fputs(z, pfO)<0 ? 0 : (int)strlen(z); + } +# endif + nc = (int)strlen(z); + nwc = MultiByteToWideChar(CP_UTF8,0, z,nc, 0,0); + if( nwc > 0 ){ + WCHAR *zw = sqlite3_malloc64(nwc*sizeof(WCHAR)); + if( zw!=NULL ){ + nwc = MultiByteToWideChar(CP_UTF8,0, z,nc, zw,nwc); + if( nwc > 0 ){ +# if SHELL_CON_TRANSLATE == 2 + /* Legacy translation to active code page, then MBCS out. */ + rv = WideCharToMultiByte(cocp,0, zw,nwc, 0,0, 0,0); + if( rv != 0 ){ + char *zmb = sqlite3_malloc64(rv+1); + if( zmb != NULL ){ + rv = WideCharToMultiByte(cocp,0, zw,nwc, zmb,rv, 0,0); + zmb[rv] = 0; + if( fputs(zmb, pfO)<0 ) rv = 0; + sqlite3_free(zmb); + } + } +# elif SHELL_CON_TRANSLATE == 1 + /* Translation from UTF-8 to UTF-16, then WCHARs out. */ + if( WriteConsoleW(consoleInfo.pst[rch].hx, zw,nwc, 0, NULL) ){ + rv = nc; + } +# endif + } + sqlite3_free(zw); + } + } + } + return rv; +} + /* For fprintfUtf8() and printfUtf8() when stream is known as console. */ static int conioVmPrintf(int rch, const char *zFormat, va_list ap){ - int rv = 0; - char *z1 = sqlite3_vmprintf(zFormat, ap); -# if SHELL_CON_TRANSLATE == 2 - if( z1!=NULL ){ - UINT ccp = GetConsoleOutputCP(); - FILE *pfO = consoleInfo.pst[rch].pf; - /* Legacy translation to active code page, then MBCS chars out. */ - char *z2 = sqlite3_win32_utf8_to_mbcs_v2(z1, 0); - if( z2!=NULL ){ - rv = (int)strlen(z2); - fputs(z2, pfO); - sqlite3_free(z2); - } - sqlite3_free(z1); - } -# elif SHELL_CON_TRANSLATE == 1 - /* Translation from UTF-8 to UTF-16, then WCHAR characters out. */ - if( z1!=NULL ){ - int nwc; - WCHAR *zw2 = 0; - rv = (int)strlen(z1); - nwc = MultiByteToWideChar(CP_UTF8,0,z1,rv,0,0); - if( nwc>0 ){ - zw2 = sqlite3_malloc64((nwc+1)*sizeof(WCHAR)); - if( zw2!=NULL ){ - HANDLE ho = consoleInfo.pst[rch].hx; - nwc = MultiByteToWideChar(CP_UTF8,0,z1,rv,zw2,nwc); - zw2[nwc] = 0; - WriteConsoleW(ho, zw2, nwc, 0, NULL); - sqlite3_free(zw2); - }else rv = 0; - } - sqlite3_free(z1); - } -# endif + char *z = sqlite3_vmprintf(zFormat, ap); + int rv = conioZstrOut(rch, z); + sqlite3_free(z); return rv; } #endif @@ -292,9 +305,14 @@ SQLITE_INTERNAL_LINKAGE int fprintfUtf8(FILE *pfO, const char *zFormat, ...){ SQLITE_INTERNAL_LINKAGE int fputsUtf8(const char *z, FILE *pfO){ #if SHELL_CON_TRANSLATE - return fprintfUtf8(pfO, "%s", z); -#else - return fputs(z, pfO); + short rch = isConOut(pfO); + if( rch > 0 ){ + return conioZstrOut(rch, z); + }else { +#endif + return (fputs(z, pfO)<0)? 0 : (int)strlen(z); +#if SHELL_CON_TRANSLATE + } #endif } @@ -374,8 +392,11 @@ SQLITE_INTERNAL_LINKAGE char* fgetsUtf8(char *cBuf, int ncMax, FILE *pfIn){ ** Also, it is interactive input so it need not be fast. */ int nco = 0; /* For converstion to WCHAR, or pre-test of same. */ - UINT ccp = GetConsoleCP(); /* For translation from mbcs. */ - if( ccp == CP_UTF8 ) return fgets(cBuf, ncMax, pfIn); + UINT cicp = GetConsoleCP(); /* For translation from mbcs. */ + /* If input code page is CP_UTF8, must bypass MBCS input + ** collection because getc() returns 0 for non-ASCII byte + ** Instead, use fgets() which repects character boundaries. */ + if( cicp == CP_UTF8 ) return fgets(cBuf, ncMax, pfIn); while( ncMax-nco >= 5 ){ /* Have space for max UTF-8 group and 0-term. */ int nug = 0; @@ -391,25 +412,29 @@ SQLITE_INTERNAL_LINKAGE char* fgetsUtf8(char *cBuf, int ncMax, FILE *pfIn){ continue; } /* Deal with possible mbcs lead byte. */ - nug = mbcsToUtf8InPlaceIfValid(cBuf+nco, 1, ncMax-nco-1, ccp); + nug = mbcsToUtf8InPlaceIfValid(cBuf+nco, 1, ncMax-nco-1, cicp); if( nug > 0 ){ nco += nug; }else{ - /* Must have just mbcs lead byte; get the trail byte. */ - int ct = getc(pfIn); - if( ct < 0 || ct == '\n' ){ - /* Just drop whatever garbage preceded the newline or. - ** EOF. It's not valid, should not happen, and there - ** is no good way to deal with it, short of bailing. */ - if( ct > 0 ){ - cBuf[nco++] = (int)ct; + /* Must have just mbcs lead byte; get the trail byte(s). */ + int ntb = 1, ct; + while( ntb <= 3 ){ /* No more under any multi-byte code. */ + ct = getc(pfIn); + if( ct < 0 || ct == '\n' ){ + /* Just drop whatever garbage preceded the newline or. + ** EOF. It's not valid, should not happen, and there + ** is no good way to deal with it, short of bailing. */ + if( ct > 0 ){ + cBuf[nco++] = (int)ct; + } + break; } - break; + /* Treat ct as bona fide MBCS trailing byte, if valid. */ + cBuf[nco+ntb] = ct; + nug = mbcsToUtf8InPlaceIfValid(cBuf+nco, 1+ntb, ncMax-nco-1, cicp); + nco += nug; } - /* Treat ct as bona fide MBCS trailing byte, if valid. */ - cBuf[nco+1] = ct; - nug = mbcsToUtf8InPlaceIfValid(cBuf+nco, 2, ncMax-nco-1, ccp); - nco += nug; + if( ct < 0 ) break; } } cBuf[nco] = 0; diff --git a/manifest b/manifest index a2db5a7373..98578565a7 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Get\sdependencies\sinto\smake\srecipes.\sGet\slegacy\sconsole\sI/O\s(-DSHELL_LEGACY_CONSOLE_IO)\sworking.\sDue\sto\smovement\sof\sMBCS/UTF-8\stranslation\sinto\straditional\sstream\sI/O\ssimulacra,\sthe\sinput\stranslation\sdoes\snot\shappen\sthe\ssame\sway.\s(It\sworks\sthe\ssame,\sbut\sfails\sdifferently\sand\sa\sbit\sbetter.)\sAdded\sprintf()\sand\sfputs()\slook-alikes,\sand\smade\sCLI\suse\sthem. -D 2023-11-07T02:41:46.723 +C Refactor\sMBCS/UTF-8\stranslation\sto\savoid\sextra\sallocations,\ssupporting\snon-formatted\s(faster)\soutput.\sSome\scode\scleanup.\sWrap\s.system/.shell\scommand\sexection\swith\srestoration\sof\sstartup\sconsole\smode\sand\srenewing\smode\ssetup.\sChanges\sto\smake\slegacy\sMBCS\sbuild\swork\sbetter\s(than\slegacy\sdid,\seven\swith\s--no-utf8.) +D 2023-11-07T19:30:14.998 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -50,7 +50,7 @@ F ext/README.md fd5f78013b0a2bc6f0067afb19e6ad040e89a10179b4f6f03eee58fac5f169bd F ext/async/README.txt e12275968f6fde133a80e04387d0e839b0c51f91 F ext/async/sqlite3async.c 6f247666b495c477628dd19364d279c78ea48cd90c72d9f9b98ad1aff3294f94 F ext/async/sqlite3async.h 46b47c79357b97ad85d20d2795942c0020dc20c532114a49808287f04aa5309a -F ext/consio/console_io.c 5011c039c6224831ebfa7f6522cf4bce72229f50c45c9aa66df0a45acd4690bf x +F ext/consio/console_io.c adb7da4947a5dc661f0106a7a6962c7528653bf95709bcddae22f3422cde25f7 x F ext/consio/console_io.h e6055b6a13a2a9f237e1672f9ef861126a37a61db0e6218a137832557f10ea25 F ext/expert/README.md b321c2762bb93c18ea102d5a5f7753a4b8bac646cb392b3b437f633caf2020c3 F ext/expert/expert.c d548d603a4cc9e61f446cc179c120c6713511c413f82a4a32b1e1e69d3f086a4 @@ -727,7 +727,7 @@ F src/random.c 606b00941a1d7dd09c381d3279a058d771f406c5213c9932bbd93d5587be4b9c F src/resolve.c d017bad7ba8e778617701a0e986fdeb393d67d6afa84fb28ef4e8b8ad2acf916 F src/rowset.c 8432130e6c344b3401a8874c3cb49fefe6873fec593294de077afea2dce5ec97 F src/select.c a19daa26e95f7245106a31f288b2f50c72d1f2cc156703f04c8c91450e111515 -F src/shell.c.in b651e2c297bfef8bd063159765b4ffab14f27816cb373b4995a4b411c33ecd51 +F src/shell.c.in 5afd6ba7c0144e2a55df1c24732d88e4ae860459970b25ee2b4a3812af53c358 F src/sqlite.h.in ef0e41e83ad1ac0dcc9ec9939bf541a44b1c5de821bee2d6c61754c3252f3276 F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8 F src/sqlite3ext.h 3f046c04ea3595d6bfda99b781926b17e672fd6d27da2ba6d8d8fc39981dcb54 @@ -2144,8 +2144,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 1d0583f2eb69fdca1cbc55763c0e86a7e32cb7771bfbc2cdf02da4e3fedbfa23 -R 8fe955210f7825f8cd02e07383e28c60 +P 1721dc6a434361c4e2b87c6e677b6dc223432b3cdd5b9eecabaa258889fb2d2a +R c19fe6cbae4ae273f253c34b1a4e1156 U larrybr -Z 407fd2a55e54d86f679485ba9f27b473 +Z 24f475e52cde2b6dcf8641f1d924498c # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index d95739e422..4d85fa6de7 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -1721dc6a434361c4e2b87c6e677b6dc223432b3cdd5b9eecabaa258889fb2d2a \ No newline at end of file +d5e88fcde53ca7ba05bb164943a9f57bd92080bb7e5eebbbed64b9886ac97338 \ No newline at end of file diff --git a/src/shell.c.in b/src/shell.c.in index 426401e671..a32c4f5d57 100644 --- a/src/shell.c.in +++ b/src/shell.c.in @@ -10500,7 +10500,9 @@ static int do_meta_command(char *zLine, ShellState *p){ zCmd = sqlite3_mprintf(strchr(azArg[i],' ')==0?"%z %s":"%z \"%s\"", zCmd, azArg[i]); } + consoleRestore(); x = zCmd!=0 ? system(zCmd) : 1; + consoleClassifySetup(stdin, stdout, stderr); sqlite3_free(zCmd); if( x ) utf8_printf(stderr, "System command returns %d\n", x); }else