From 67a0bf383f00d4f25d9ca27864caabf2a0f572b2 Mon Sep 17 00:00:00 2001 From: drh <> Date: Mon, 18 Jul 2022 11:44:16 +0000 Subject: [PATCH] Enhance the ext/misc/regexp.c code so that when it is compiled with SQLITE_DEBUG, a new function named regexp_bytecode() is available that prints out the compiled NFA as human-readable text, for debugging purposes. FossilOrigin-Name: cb5c08978fe8f074e6ae16953575213709e98b8bbae4359e0d2e6de67a7ea9e5 --- ext/misc/regexp.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++- manifest | 14 +++---- manifest.uuid | 2 +- 3 files changed, 102 insertions(+), 9 deletions(-) diff --git a/ext/misc/regexp.c b/ext/misc/regexp.c index d5b3872058..54c505233d 100644 --- a/ext/misc/regexp.c +++ b/ext/misc/regexp.c @@ -94,6 +94,31 @@ SQLITE_EXTENSION_INIT1 #define RE_OP_NOTSPACE 16 /* Not a digit */ #define RE_OP_BOUNDARY 17 /* Boundary between word and non-word */ +#if defined(SQLITE_DEBUG) +/* Opcode names used for symbolic debugging */ +static const char *ReOpName[] = { + "EOF", + "MATCH", + "ANY", + "ANYSTAR", + "FORK", + "GOTO", + "ACCEPT", + "CC_INC", + "CC_EXC", + "CC_VALUE", + "CC_RANGE", + "WORD", + "NOTWORD", + "DIGIT", + "NOTDIGIT", + "SPACE", + "NOTSPACE", + "BOUNDARY", +}; +#endif /* SQLITE_DEBUG */ + + /* Each opcode is a "state" in the NFA */ typedef unsigned short ReStateNumber; @@ -127,7 +152,7 @@ struct ReCompiled { int *aArg; /* Arguments to each operator */ unsigned (*xNextChar)(ReInput*); /* Next character function */ unsigned char zInit[12]; /* Initial text to match */ - int nInit; /* Number of characters in zInit */ + int nInit; /* Number of bytes in zInit */ unsigned nState; /* Number of entries in aOp[] and aArg[] */ unsigned nAlloc; /* Slots allocated for aOp[] and aArg[] */ }; @@ -745,6 +770,67 @@ static void re_sql_func( } } +#if defined(SQLITE_DEBUG) +/* +** This function is used for testing and debugging only. It is only available +** if the SQLITE_DEBUG compile-time option is used. +** +** Compile a regular expression and then convert the compiled expression into +** text and return that text. +*/ +static void re_bytecode_func( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const char *zPattern; + const char *zErr; + ReCompiled *pRe; + sqlite3_str *pStr; + int i; + int n; + char *z; + + zPattern = (const char*)sqlite3_value_text(argv[0]); + if( zPattern==0 ) return; + zErr = re_compile(&pRe, zPattern, sqlite3_user_data(context)!=0); + if( zErr ){ + re_free(pRe); + sqlite3_result_error(context, zErr, -1); + return; + } + if( pRe==0 ){ + sqlite3_result_error_nomem(context); + return; + } + pStr = sqlite3_str_new(0); + if( pStr==0 ) goto re_bytecode_func_err; + if( pRe->nInit>0 ){ + sqlite3_str_appendf(pStr, "INIT "); + for(i=0; inInit; i++){ + sqlite3_str_appendf(pStr, "%02x", pRe->zInit[i]); + } + sqlite3_str_appendf(pStr, "\n"); + } + for(i=0; inState; i++){ + sqlite3_str_appendf(pStr, "%-8s %4d\n", + ReOpName[(unsigned char)pRe->aOp[i]], pRe->aArg[i]); + } + n = sqlite3_str_length(pStr); + z = sqlite3_str_finish(pStr); + if( n==0 ){ + sqlite3_free(z); + }else{ + sqlite3_result_text(context, z, n-1, sqlite3_free); + } + +re_bytecode_func_err: + re_free(pRe); +} + +#endif /* SQLITE_DEBUG */ + + /* ** Invoke this routine to register the regexp() function with the ** SQLite database connection. @@ -769,6 +855,13 @@ int sqlite3_regexp_init( rc = sqlite3_create_function(db, "regexpi", 2, SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC, (void*)db, re_sql_func, 0, 0); +#if defined(SQLITE_DEBUG) + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function(db, "regexp_bytecode", 1, + SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC, + 0, re_bytecode_func, 0, 0); + } +#endif /* SQLITE_DEBUG */ } return rc; } diff --git a/manifest b/manifest index 7922441c62..e2ba6ba112 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fixes\sfor\sthe\sgenerated\s"mallocs.tcl"\sand\s"leaks.tcl"\sscripts\sgenerated\sby\srunning\stcl\stests\swith\sthe\s--malloctrace=1\soption. -D 2022-07-16T18:08:48.116 +C Enhance\sthe\sext/misc/regexp.c\scode\sso\sthat\swhen\sit\sis\scompiled\swith\nSQLITE_DEBUG,\sa\snew\sfunction\snamed\sregexp_bytecode()\sis\savailable\sthat\nprints\sout\sthe\scompiled\sNFA\sas\shuman-readable\stext,\sfor\sdebugging\spurposes. +D 2022-07-18T11:44:16.316 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -334,7 +334,7 @@ F ext/misc/normalize.c bd84355c118e297522aba74de34a4fd286fc775524e0499b14473918d F ext/misc/percentile.c b9086e223d583bdaf8cb73c98a6539d501a2fc4282654adbfea576453d82e691 F ext/misc/prefixes.c 0f4f8cff5aebc00a7e3ac4021fd59cfe1a8e17c800ceaf592859ecb9cbc38196 F ext/misc/qpvtab.c 09738419e25f603a35c0ac8bd0a04daab794f48d08a9bc07a6085b9057b99009 -F ext/misc/regexp.c c1fb4f0639ad70f180fa7ebca3b9830c1c648cdcd072e28747f727699f3a5071 +F ext/misc/regexp.c 554bbbc06980fac640618da2401b9b1a5da722f60fbc9ecd0de0244c96173a23 F ext/misc/remember.c add730f0f7e7436cd15ea3fd6a90fd83c3f706ab44169f7f048438b7d6baa69c F ext/misc/rot13.c 51ac5f51e9d5fd811db58a9c23c628ad5f333c173f1fc53c8491a3603d38556c F ext/misc/scrub.c 2a44b0d44c69584c0580ad2553f6290a307a49df4668941d2812135bfb96a946 @@ -1980,8 +1980,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 92d60b64ebfc2d1f0a9cabaa88e7bf0d11737ed01a77b627af10dd1b96a5321c -R c2826cbb8b9c28cc73030d164bbfa10e -U dan -Z 0beb54d1e8fdf448050ee0b11342583e +P 449799e2d5902464540e8fda53ab429e0518278dab3b17c86911759114cddea0 +R c5fe187a7b0d702467f87ef01664ca12 +U drh +Z c7627cc664c2932a41026727705de830 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 496a3b3050..5a0612c077 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -449799e2d5902464540e8fda53ab429e0518278dab3b17c86911759114cddea0 \ No newline at end of file +cb5c08978fe8f074e6ae16953575213709e98b8bbae4359e0d2e6de67a7ea9e5 \ No newline at end of file