Fix all known problems with JSONB json_extract().

FossilOrigin-Name: d5f48c57e975ac468cf29a43a5d0b56ef6d06cf35a8b0bddf87ec1c0fc7ae028
This commit is contained in:
drh 2023-11-28 23:26:55 +00:00
commit 748178a9fb
5 changed files with 114 additions and 316 deletions

View File

@ -1,5 +1,5 @@
C The\sjson_remove()\sfunction\snow\suses\sonly\sJSONB,\snever\sJsonNodes,\sinternally.
D 2023-11-28T18:16:02.028
C Fix\sall\sknown\sproblems\swith\sJSONB\sjson_extract().
D 2023-11-28T23:26:55.773
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@ -688,7 +688,7 @@ F src/hash.h 3340ab6e1d13e725571d7cee6d3e3135f0779a7d8e76a9ce0a85971fa3953c51
F src/hwtime.h f9c2dfb84dce7acf95ce6d289e46f5f9d3d1afd328e53da8f8e9008e3b3caae6
F src/in-operator.md 10cd8f4bcd225a32518407c2fb2484089112fd71
F src/insert.c 3f0a94082d978bbdd33c38fefea15346c6c6bffb70bc645a71dc0f1f87dd3276
F src/json.c 442ff1ce9ebb82728aca5ddfcab21af46c3230e9f672307b91489f43cd6d6fdc
F src/json.c dd5c3f52877448cc9644887e83a2ff33ab0be87e158de7d5a86db12f90b28045
F src/legacy.c d7874bc885906868cd51e6c2156698f2754f02d9eee1bae2d687323c3ca8e5aa
F src/loadext.c 7432c944ff197046d67a1207790a1b13eec4548c85a9457eb0896bb3641dfb36
F src/main.c 1b89f3de98d1b59fec5bac1d66d6ece21f703821b8eaa0d53d9604c35309f6f9
@ -1325,11 +1325,11 @@ F test/json/json-generator.tcl dc0dd0f393800c98658fc4c47eaa6af29d4e17527380cd286
F test/json/json-q1-b.txt 606818a5fba6d9e418c9f4ea7d8418af026775042dad81439b72447a147a462c
F test/json/json-q1.txt 65f9d1cdcc4cffa9823fb73ed936aae5658700cd001fde448f68bfb91c807307
F test/json/json-speed-check.sh b060a9a6c696c0a807d8929400fa11bd7113edc58b0d66b9795f424f8d0db326 x
F test/json101.test 643936557daeb2eeeb839e9d87a682c7d7c6f6aee66045eedd8613cb8471d8d8
F test/json101.test 8f5d1a3350c36945c8b58f538e1c92cfd87fd50ab6f5e3d5f4cf3cdb03b9546d
F test/json102.test 557a46e16df1aa9bdbc4076a71a45814ea0e7503d6621d87d42a8c04cbc2b0ef
F test/json103.test 53df87f83a4e5fa0c0a56eb29ff6c94055c6eb919f33316d62161a8880112dbe
F test/json104.test 1b844a70cddcfa2e4cd81a5db0657b2e61e7f00868310f24f56a9ba0114348c1
F test/json105.test 11670a4387f4308ae0318cadcbd6a918ea7edcd19fbafde020720a073952675d
F test/json105.test e64a8d73677fbae67886642cd5076e2ef3efe89f8483b87595cf9c030216c9bd
F test/json501.test ab168a12eb6eb14d479f8c1cdae3ac062fd5a4679f17f976e96f1af518408330
F test/json502.test 98c38e3c4573841028a1381dfb81d4c3f9b105d39668167da10d055e503f6d0b
F test/jsonb01.test cace70765b36a36aec9a85a41ea65667d3bbf647d4400ddc3ac76f8fe7d94f90
@ -2145,8 +2145,9 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P 11aba347ff7c639500eec904e212eabe889b077351b946cfeac2b74b9703672a
R 82f8a4e62603e32381a0117063382c7a
P b69786e746ae2b927b64d9871fd120b7f8f06cc53739fd46a4da51aa16cf8576 4f106b64fe8988435872806bd0a6c223b61f53af0dd1c47c847bb4eec4e03e27
R e440543ab70160e0cdb3ca23863ac507
T +closed 4f106b64fe8988435872806bd0a6c223b61f53af0dd1c47c847bb4eec4e03e27
U drh
Z 37d5559550d9d10448cc0117446ead8c
Z c2c6abd51345cdb21ffc83ec27ff2d5d
# Remove this line to create a well-formed Fossil manifest.

View File

@ -1 +1 @@
b69786e746ae2b927b64d9871fd120b7f8f06cc53739fd46a4da51aa16cf8576
d5f48c57e975ac468cf29a43a5d0b56ef6d06cf35a8b0bddf87ec1c0fc7ae028

View File

@ -1110,180 +1110,6 @@ static u32 jsonHexToInt4(const char *z){
return v;
}
/*
** Make the return value from an SQL function be the SQL value of
** JsonNode pNode.
**
** If pNode is an atom (not an array or object) then the value returned
** is a pure SQL value - an SQLITE_INTEGER, SQLITE_REAL, SQLITE_TEXT, or
** SQLITE_NULL. However, if pNode is a JSON array or object, then the
** value returned is either RFC-8259 JSON text or a BLOB in the JSONB
** format, depending on the JSON_BLOB flag of the function user-data.
*/
static void jsonReturnFromNode(
JsonParse *pParse, /* Complete JSON parse tree */
JsonNode *pNode, /* Node to return */
sqlite3_context *pCtx, /* Return value for this function */
int omitSubtype /* Do not call sqlite3_result_subtype() */
){
switch( pNode->eType ){
default: {
assert( pNode->eType==JSON_NULL );
sqlite3_result_null(pCtx);
break;
}
case JSON_TRUE: {
sqlite3_result_int(pCtx, 1);
break;
}
case JSON_FALSE: {
sqlite3_result_int(pCtx, 0);
break;
}
case JSON_INT: {
sqlite3_int64 i = 0;
int rc;
int bNeg = 0;
const char *z;
char *zz;
sqlite3 *db = sqlite3_context_db_handle(pCtx);
assert( pNode->eU==1 );
zz = sqlite3DbStrNDup(db, pNode->u.zJContent, pNode->n);
if( zz==0 ){
sqlite3_result_error_nomem(pCtx);
return;
}
z = zz;
if( z[0]=='-' ){ z++; bNeg = 1; }
else if( z[0]=='+' ){ z++; }
rc = sqlite3DecOrHexToI64(z, &i);
sqlite3DbFree(db, zz);
if( rc<=1 ){
sqlite3_result_int64(pCtx, bNeg ? -i : i);
}else if( rc==3 && bNeg ){
sqlite3_result_int64(pCtx, SMALLEST_INT64);
}else{
goto to_double;
}
break;
}
case JSON_REAL: {
double r;
const char *z;
assert( pNode->eU==1 );
to_double:
z = pNode->u.zJContent;
sqlite3AtoF(z, &r, pNode->n, SQLITE_UTF8);
sqlite3_result_double(pCtx, r);
break;
}
case JSON_STRING: {
if( pNode->jnFlags & JNODE_RAW ){
assert( pNode->eU==1 );
sqlite3_result_text(pCtx, pNode->u.zJContent, pNode->n,
SQLITE_TRANSIENT);
}else if( (pNode->jnFlags & JNODE_ESCAPE)==0 ){
/* JSON formatted without any backslash-escapes */
assert( pNode->eU==1 );
sqlite3_result_text(pCtx, pNode->u.zJContent, pNode->n,
SQLITE_TRANSIENT);
}else{
/* Translate JSON formatted string into raw text */
u32 i;
u32 n = pNode->n;
const char *z;
char *zOut;
u32 j;
u32 nOut = n;
assert( pNode->eU==1 );
z = pNode->u.zJContent;
zOut = sqlite3_malloc( nOut+1 );
if( zOut==0 ){
sqlite3_result_error_nomem(pCtx);
break;
}
for(i=0, j=0; i<n; i++){
char c = z[i];
if( c=='\\' ){
c = z[++i];
if( c=='u' ){
u32 v = jsonHexToInt4(z+i+1);
i += 4;
if( v==0 ) break;
if( v<=0x7f ){
zOut[j++] = (char)v;
}else if( v<=0x7ff ){
zOut[j++] = (char)(0xc0 | (v>>6));
zOut[j++] = 0x80 | (v&0x3f);
}else{
u32 vlo;
if( (v&0xfc00)==0xd800
&& i<n-6
&& z[i+1]=='\\'
&& z[i+2]=='u'
&& ((vlo = jsonHexToInt4(z+i+3))&0xfc00)==0xdc00
){
/* We have a surrogate pair */
v = ((v&0x3ff)<<10) + (vlo&0x3ff) + 0x10000;
i += 6;
zOut[j++] = 0xf0 | (v>>18);
zOut[j++] = 0x80 | ((v>>12)&0x3f);
zOut[j++] = 0x80 | ((v>>6)&0x3f);
zOut[j++] = 0x80 | (v&0x3f);
}else{
zOut[j++] = 0xe0 | (v>>12);
zOut[j++] = 0x80 | ((v>>6)&0x3f);
zOut[j++] = 0x80 | (v&0x3f);
}
}
continue;
}else if( c=='b' ){
c = '\b';
}else if( c=='f' ){
c = '\f';
}else if( c=='n' ){
c = '\n';
}else if( c=='r' ){
c = '\r';
}else if( c=='t' ){
c = '\t';
}else if( c=='v' ){
c = '\v';
}else if( c=='\'' || c=='"' || c=='/' || c=='\\' ){
/* pass through unchanged */
}else if( c=='0' ){
c = 0;
}else if( c=='x' ){
c = (jsonHexToInt(z[i+1])<<4) | jsonHexToInt(z[i+2]);
i += 2;
}else if( c=='\r' && z[i+1]=='\n' ){
i++;
continue;
}else if( 0xe2==(u8)c ){
assert( 0x80==(u8)z[i+1] );
assert( 0xa8==(u8)z[i+2] || 0xa9==(u8)z[i+2] );
i += 2;
continue;
}else{
continue;
}
} /* end if( c=='\\' ) */
zOut[j++] = c;
} /* end for() */
zOut[j] = 0;
sqlite3_result_text(pCtx, zOut, j, sqlite3_free);
}
break;
}
case JSON_ARRAY:
case JSON_OBJECT: {
jsonReturnNodeAsJson(pParse, pNode, pCtx, 0, omitSubtype);
break;
}
}
}
/*
** A macro to hint to the compiler that a function should not be
** inlined.
@ -4217,67 +4043,6 @@ static void jsonReturnFromBlob(
}
}
/* Do a JSON_EXTRACT(JSON, PATH) on a when JSON is a BLOB.
*/
static void jsonExtractFromBlob(
sqlite3_context *ctx,
sqlite3_value *pJson,
sqlite3_value *pPath,
int flags
){
const char *zPath = (const char*)sqlite3_value_text(pPath);
u32 i = 0;
JsonParse px;
if( zPath==0 ) return;
memset(&px, 0, sizeof(px));
px.nBlob = sqlite3_value_bytes(pJson);
px.aBlob = (u8*)sqlite3_value_blob(pJson);
if( px.aBlob==0 ) return;
if( zPath[0]=='$' ){
zPath++;
i = jsonLookupBlobStep(&px, 0, zPath, 0);
}else if( (flags & JSON_ABPATH) ){
/* The -> and ->> operators accept abbreviated PATH arguments. This
** is mostly for compatibility with PostgreSQL, but also for
** convenience.
**
** NUMBER ==> $[NUMBER] // PG compatible
** LABEL ==> $.LABEL // PG compatible
** [NUMBER] ==> $[NUMBER] // Not PG. Purely for convenience
*/
JsonString jx;
jsonStringInit(&jx, ctx);
if( sqlite3Isdigit(zPath[0]) ){
jsonAppendRawNZ(&jx, "[", 1);
jsonAppendRaw(&jx, zPath, (int)strlen(zPath));
jsonAppendRawNZ(&jx, "]", 2);
zPath = jx.zBuf;
}else if( zPath[0]!='[' ){
jsonAppendRawNZ(&jx, ".", 1);
jsonAppendRaw(&jx, zPath, (int)strlen(zPath));
jsonAppendChar(&jx, 0);
zPath = jx.zBuf;
}
i = jsonLookupBlobStep(&px, 0, zPath, 0);
jsonStringReset(&jx);
}else{
sqlite3_result_error(ctx, "bad path", -1);
return;
}
if( i<px.nBlob ){
jsonReturnFromBlob(&px, i, ctx, 0);
}else if( i==JSON_BLOB_NOTFOUND ){
return; /* Return NULL if not found */
}else if( i==JSON_BLOB_ERROR ){
sqlite3_result_error(ctx, "malformed JSON", -1);
}else{
char *zMsg = sqlite3_mprintf("bad path syntax: %s",
sqlite3_value_text(pPath));
sqlite3_result_error(ctx, zMsg, -1);
sqlite3_free(zMsg);
}
}
/*
** pArg is a function argument that might be an SQL value or a JSON
** value. Figure out what it is and encode it as a JSONB blob.
@ -4819,6 +4584,19 @@ static void jsonArrayLengthFunc(
sqlite3_result_int64(ctx, n);
}
/*
** Generate a bad path error for json_extract()
*/
static void jsonExtractBadPathError(
sqlite3_context *ctx, /* The function call containing the error */
const char *zPath /* The path with the problem */
){
sqlite3 *db = sqlite3_context_db_handle(ctx);
char *zMsg = sqlite3MPrintf(db, "bad JSON path: %Q", zPath);
sqlite3_result_error(ctx, zMsg, -1);
sqlite3DbFree(db, zMsg);
}
/*
** json_extract(JSON, PATH, ...)
** "->"(JSON,PATH)
@ -4844,83 +4622,102 @@ static void jsonExtractFunc(
int argc,
sqlite3_value **argv
){
JsonParse *p; /* The parse */
JsonNode *pNode;
const char *zPath;
int flags = SQLITE_PTR_TO_INT(sqlite3_user_data(ctx));
JsonString jx;
JsonParse *p = 0; /* The parse */
int flags; /* Flags associated with the function */
int i; /* Loop counter */
JsonString jx; /* String for array result */
if( argc<2 ) return;
if( jsonFuncArgMightBeBinary(argv[0]) && argc==2 ){
jsonExtractFromBlob(ctx, argv[0], argv[1], flags);
return;
}
p = jsonParseCached(ctx, argv[0], ctx, 0);
p = jsonParseFuncArg(ctx, argv[0], 0);
if( p==0 ) return;
if( argc==2 ){
/* With a single PATH argument */
zPath = (const char*)sqlite3_value_text(argv[1]);
if( zPath==0 ) return;
if( flags & JSON_ABPATH ){
if( zPath[0]!='$' || (zPath[1]!='.' && zPath[1]!='[' && zPath[1]!=0) ){
/* The -> and ->> operators accept abbreviated PATH arguments. This
** is mostly for compatibility with PostgreSQL, but also for
** convenience.
**
** NUMBER ==> $[NUMBER] // PG compatible
** LABEL ==> $.LABEL // PG compatible
** [NUMBER] ==> $[NUMBER] // Not PG. Purely for convenience
*/
jsonStringInit(&jx, ctx);
if( sqlite3Isdigit(zPath[0]) ){
jsonAppendRawNZ(&jx, "$[", 2);
jsonAppendRaw(&jx, zPath, (int)strlen(zPath));
jsonAppendRawNZ(&jx, "]", 2);
}else{
jsonAppendRawNZ(&jx, "$.", 1 + (zPath[0]!='['));
jsonAppendRaw(&jx, zPath, (int)strlen(zPath));
jsonAppendChar(&jx, 0);
}
pNode = jx.eErr ? 0 : jsonLookup(p, jx.zBuf, 0, ctx);
jsonStringReset(&jx);
}else{
pNode = jsonLookup(p, zPath, 0, ctx);
}
if( pNode ){
if( flags & JSON_JSON ){
jsonReturnNodeAsJson(p, pNode, ctx, 0, 0);
}else{
jsonReturnFromNode(p, pNode, ctx, 1);
}
}
}else{
pNode = jsonLookup(p, zPath, 0, ctx);
if( p->nErr==0 && pNode ) jsonReturnFromNode(p, pNode, ctx, 0);
}
}else{
/* Two or more PATH arguments results in a JSON array with each
** element of the array being the value selected by one of the PATHs */
int i;
jsonStringInit(&jx, ctx);
flags = SQLITE_PTR_TO_INT(sqlite3_user_data(ctx));
jsonStringInit(&jx, ctx);
if( argc>2 ){
jsonAppendChar(&jx, '[');
for(i=1; i<argc; i++){
zPath = (const char*)sqlite3_value_text(argv[i]);
pNode = jsonLookup(p, zPath, 0, ctx);
if( p->nErr ) break;
jsonAppendSeparator(&jx);
if( pNode ){
jsonXlateNodeToText(p, pNode, &jx);
}
for(i=1; i<argc; i++){
/* With a single PATH argument */
const char *zPath = (const char*)sqlite3_value_text(argv[i]);
int nPath = sqlite3_value_bytes(argv[i]);
u32 j;
if( zPath==0 ) goto json_extract_error;
if( zPath[0]=='$' ){
j = jsonLookupBlobStep(p, 0, zPath+1, 0);
}else if( (flags & JSON_ABPATH) ){
/* The -> and ->> operators accept abbreviated PATH arguments. This
** is mostly for compatibility with PostgreSQL, but also for
** convenience.
**
** NUMBER ==> $[NUMBER] // PG compatible
** LABEL ==> $.LABEL // PG compatible
** [NUMBER] ==> $[NUMBER] // Not PG. Purely for convenience
*/
jsonStringInit(&jx, ctx);
if( sqlite3Isdigit(zPath[0]) ){
jsonAppendRawNZ(&jx, "[", 1);
jsonAppendRaw(&jx, zPath, nPath);
jsonAppendRawNZ(&jx, "]", 2);
}else if( zPath[0]!='[' ){
jsonAppendRawNZ(&jx, ".", 1);
jsonAppendRaw(&jx, zPath, nPath);
jsonAppendChar(&jx, 0);
}else{
jsonAppendRaw(&jx, zPath, nPath);
}
jsonStringTerminate(&jx);
j = jsonLookupBlobStep(p, 0, jx.zBuf, 0);
jsonStringReset(&jx);
}else{
jsonExtractBadPathError(ctx, zPath);
goto json_extract_error;
}
if( j<p->nBlob ){
if( argc==2 ){
if( flags & JSON_JSON ){
jsonStringInit(&jx, ctx);
jsonXlateBlobToText(p, j, &jx);
jsonReturnString(&jx);
jsonStringReset(&jx);
assert( (flags & JSON_BLOB)==0 );
sqlite3_result_subtype(ctx, JSON_SUBTYPE);
}else{
jsonReturnFromBlob(p, j, ctx, 0);
if( (flags & (JSON_SQL|JSON_BLOB))==0
&& (p->aBlob[j]&0x0f)>=JSONB_ARRAY
){
sqlite3_result_subtype(ctx, JSON_SUBTYPE);
}
}
}else{
jsonAppendSeparator(&jx);
jsonXlateBlobToText(p, j, &jx);
}
}else if( j==JSON_BLOB_NOTFOUND ){
if( argc==2 ){
goto json_extract_error; /* Return NULL if not found */
}else{
jsonAppendSeparator(&jx);
jsonAppendRawNZ(&jx, "null", 4);
}
}else if( j==JSON_BLOB_ERROR ){
sqlite3_result_error(ctx, "malformed JSON", -1);
goto json_extract_error;
}else{
jsonExtractBadPathError(ctx, zPath);
goto json_extract_error;
}
if( i==argc ){
jsonAppendChar(&jx, ']');
jsonReturnString(&jx);
}
if( argc>2 ){
jsonAppendChar(&jx, ']');
jsonReturnString(&jx);
if( (flags & JSON_BLOB)==0 ){
sqlite3_result_subtype(ctx, JSON_SUBTYPE);
}
jsonStringReset(&jx);
}
json_extract_error:
jsonStringReset(&jx);
jsonParseFree(p);
return;
}
/* This is the RFC 7396 MergePatch algorithm.

View File

@ -974,7 +974,7 @@ do_execsql_test json101-18.4 {
} {6}
do_catchsql_test json101-18.5 {
SELECT json_extract('{"":8}', '$.');
} {1 {JSON path error near ''}}
} {1 {bad JSON path: '$.'}}
# 2022-08-29 https://sqlite.org/forum/forumpost/9b9e4716c0d7bbd1
# This is not a problem specifically with JSON functions. It is

View File

@ -96,18 +96,18 @@ json_replace_test 80 {'$.b[#-1]','AAA','$.b[#-1]','BBB'} \
do_catchsql_test json105-6.10 {
SELECT json_extract(j, '$.b[#-]') FROM t1;
} {1 {JSON path error near '[#-]'}}
} {1 {bad JSON path: '$.b[#-]'}}
do_catchsql_test json105-6.20 {
SELECT json_extract(j, '$.b[#9]') FROM t1;
} {1 {JSON path error near '[#9]'}}
} {1 {bad JSON path: '$.b[#9]'}}
do_catchsql_test json105-6.30 {
SELECT json_extract(j, '$.b[#+2]') FROM t1;
} {1 {JSON path error near '[#+2]'}}
} {1 {bad JSON path: '$.b[#+2]'}}
do_catchsql_test json105-6.40 {
SELECT json_extract(j, '$.b[#-1') FROM t1;
} {1 {JSON path error near '[#-1'}}
} {1 {bad JSON path: '$.b[#-1'}}
do_catchsql_test json105-6.50 {
SELECT json_extract(j, '$.b[#-1x]') FROM t1;
} {1 {JSON path error near '[#-1x]'}}
} {1 {bad JSON path: '$.b[#-1x]'}}
finish_test