Do correct comparisons between object labels in JSON even when the two labels

contain different JSON escapes.

FossilOrigin-Name: bda2e30cc22e180b19a7a05824dd345880eb402ae5450b2d2dd954946c3ae135
This commit is contained in:
drh 2023-12-06 17:50:16 +00:00
commit a9c8469d03
4 changed files with 285 additions and 115 deletions

View File

@ -1,5 +1,5 @@
C Correctly\shandle\s8-byte\ssizes\sin\sthe\sJSONB\sformat.\n[forum:/forumpost/283daf08e91183fc|Forum\spost\s283daf08e91183fc].
D 2023-12-06T17:39:31.569
C Do\scorrect\scomparisons\sbetween\sobject\slabels\sin\sJSON\seven\swhen\sthe\stwo\slabels\ncontain\sdifferent\sJSON\sescapes.
D 2023-12-06T17:50:16.616
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@ -696,7 +696,7 @@ F src/hash.h 3340ab6e1d13e725571d7cee6d3e3135f0779a7d8e76a9ce0a85971fa3953c51
F src/hwtime.h f9c2dfb84dce7acf95ce6d289e46f5f9d3d1afd328e53da8f8e9008e3b3caae6
F src/in-operator.md 10cd8f4bcd225a32518407c2fb2484089112fd71
F src/insert.c 3f0a94082d978bbdd33c38fefea15346c6c6bffb70bc645a71dc0f1f87dd3276
F src/json.c 07247c969e80e0a70241235ea4d00bb823830ce6a48f101fbcb1c72e8abf6f91
F src/json.c c2e0fea06f40fb0319ed132fc181a25623585c943e08c690b522f216886ba316
F src/legacy.c d7874bc885906868cd51e6c2156698f2754f02d9eee1bae2d687323c3ca8e5aa
F src/loadext.c 7432c944ff197046d67a1207790a1b13eec4548c85a9457eb0896bb3641dfb36
F src/main.c 1b89f3de98d1b59fec5bac1d66d6ece21f703821b8eaa0d53d9604c35309f6f9
@ -1339,7 +1339,7 @@ F test/json103.test 53df87f83a4e5fa0c0a56eb29ff6c94055c6eb919f33316d62161a888011
F test/json104.test 1b844a70cddcfa2e4cd81a5db0657b2e61e7f00868310f24f56a9ba0114348c1
F test/json105.test e64a8d73677fbae67886642cd5076e2ef3efe89f8483b87595cf9c030216c9bd
F test/json501.test ab168a12eb6eb14d479f8c1cdae3ac062fd5a4679f17f976e96f1af518408330
F test/json502.test 98c38e3c4573841028a1381dfb81d4c3f9b105d39668167da10d055e503f6d0b
F test/json502.test 3c697e506fc38ccb455b49660b21b6e62e08ede0f2d0c869a7d171e17809093c
F test/jsonb01.test cace70765b36a36aec9a85a41ea65667d3bbf647d4400ddc3ac76f8fe7d94f90
F test/keyword1.test 37ef6bba5d2ed5b07ecdd6810571de2956599dff
F test/kvtest.c 6e0228409ea7ca0497dad503fbd109badb5e59545d131014b6aaac68b56f484a
@ -2153,8 +2153,9 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P 63cb05a862532d2d56e9e81fe32ced09bf58f03146587a118f11c2a84e195e69
R 3207d50f882d9851bd1114b702910d8f
P 73d390f39c0bbbc017e01544e4d43c76761f2599bd57f900131c706270dfd202 b9243ee8a37c62eb8848e765bd4af83bc1b3d3eb24fb4268a1357ad1f8b2e1fb
R fd697485adcd3130674f1a92f5c02f3d
T +closed b9243ee8a37c62eb8848e765bd4af83bc1b3d3eb24fb4268a1357ad1f8b2e1fb
U drh
Z 9fdc41964fbd72df8cb3bfd54d04d4f1
Z 9804c6a78387d2d6c50275c2413cc6ef
# Remove this line to create a well-formed Fossil manifest.

View File

@ -1 +1 @@
73d390f39c0bbbc017e01544e4d43c76761f2599bd57f900131c706270dfd202
bda2e30cc22e180b19a7a05824dd345880eb402ae5450b2d2dd954946c3ae135

View File

@ -2132,6 +2132,188 @@ static void jsonBlobEdit(
if( nIns && aIns ) memcpy(&pParse->aBlob[iDel], aIns, nIns);
}
/*
** Return the number of escaped newlines to be ignored.
** An escaped newline is a one of the following byte sequences:
**
** 0x5c 0x0a
** 0x5c 0x0d
** 0x5c 0x0d 0x0a
** 0x5c 0xe2 0x80 0xa8
** 0x5c 0xe2 0x80 0xa9
*/
static u32 jsonBytesToBypass(const char *z, u32 n){
u32 i = 0;
while( i+1<n ){
if( z[i]!='\\' ) return i;
if( z[i+1]=='\n' ){
i += 2;
continue;
}
if( z[i+1]=='\r' ){
if( i+2<n && z[i+2]=='\n' ){
i += 3;
}else{
i += 2;
}
continue;
}
if( 0xe2==(u8)z[i+1]
&& i+3<n
&& 0x80==(u8)z[i+2]
&& (0xa8==(u8)z[i+3] || 0xa9==(u8)z[i+3])
){
i += 4;
continue;
}
break;
}
return i;
}
/*
** Input z[0..n] defines JSON escape sequence including the leading '\\'.
** Decode that escape sequence into a single character. Write that
** character into *piOut. Return the number of bytes in the escape sequence.
*/
static u32 jsonUnescapeOneChar(const char *z, u32 n, u32 *piOut){
assert( n>0 );
assert( z[0]=='\\' );
if( n<2 ){
*piOut = 0xFFFD;
return n;
}
switch( (u8)z[1] ){
case 'u': {
u32 v, vlo;
if( n<6 ){
*piOut = 0xFFFD;
return n;
}
v = jsonHexToInt4(&z[2]);
if( (v & 0xfc00)==0xd800
&& n>=12
&& z[6]=='\\'
&& z[7]=='u'
&& ((vlo = jsonHexToInt4(&z[8]))&0xfc00)==0xdc00
){
*piOut = ((v&0x3ff)<<10) + (vlo&0x3ff) + 0x10000;
return 12;
}else{
*piOut = v;
return 6;
}
}
case 'b': { *piOut = '\b'; return 2; }
case 'f': { *piOut = '\f'; return 2; }
case 'n': { *piOut = '\n'; return 2; }
case 'r': { *piOut = '\r'; return 2; }
case 't': { *piOut = '\t'; return 2; }
case 'v': { *piOut = '\v'; return 2; }
case '0': { *piOut = 0; return 2; }
case '\'':
case '"':
case '/':
case '\\':{ *piOut = z[1]; return 2; }
case 'x': {
if( n<4 ){
*piOut = 0xFFFD;
return n;
}
*piOut = (jsonHexToInt(z[2])<<4) | jsonHexToInt(z[3]);
return 4;
}
case 0xe2:
case '\r':
case '\n': {
u32 nSkip = jsonBytesToBypass(z, n);
if( nSkip==0 ){
*piOut = 0xFFFD;
return n;
}else if( nSkip==n ){
*piOut = 0;
return n;
}else if( z[nSkip]=='\\' ){
return nSkip + jsonUnescapeOneChar(&z[nSkip], n-nSkip, piOut);
}else{
*piOut = z[nSkip];
return nSkip+1;
}
}
default: {
*piOut = 0xFFFD;
return 2;
}
}
}
/*
** Compare two object labels. Return 1 if they are equal and
** 0 if they differ.
**
** In this version, we know that one or the other or both of the
** two comparands contains an escape sequence.
*/
static SQLITE_NOINLINE int jsonLabelCompareEscaped(
const char *zLeft, /* The left label */
u32 nLeft, /* Size of the left label in bytes */
int rawLeft, /* True if zLeft contains no escapes */
const char *zRight, /* The right label */
u32 nRight, /* Size of the right label in bytes */
int rawRight /* True if zRight is escape-free */
){
u32 cLeft, cRight;
assert( rawLeft==0 || rawRight==0 );
while( nLeft>0 && nRight>0 ){
if( rawLeft || zLeft[0]!='\\' ){
cLeft = ((u8*)zLeft)[0];
zLeft++;
nLeft--;
}else{
u32 n = jsonUnescapeOneChar(zLeft, nLeft, &cLeft);
zLeft += n;
assert( n<=nLeft );
nLeft -= n;
}
if( rawRight || zRight[0]!='\\' ){
cRight = ((u8*)zRight)[0];
zRight++;
nRight--;
}else{
u32 n = jsonUnescapeOneChar(zRight, nRight, &cRight);
zRight += n;
assert( n<=nRight );
nRight -= n;
}
if( cLeft!=cRight ) return 0;
}
return nLeft==0 && nRight==0;
}
/*
** Compare two object labels. Return 1 if they are equal and
** 0 if they differ. Return -1 if an OOM occurs.
*/
static int jsonLabelCompare(
const char *zLeft, /* The left label */
u32 nLeft, /* Size of the left label in bytes */
int rawLeft, /* True if zLeft contains no escapes */
const char *zRight, /* The right label */
u32 nRight, /* Size of the right label in bytes */
int rawRight /* True if zRight is escape-free */
){
if( rawLeft && rawRight ){
/* Simpliest case: Neither label contains escapes. A simple
** memcmp() is sufficient. */
if( nLeft!=nRight ) return 0;
return memcmp(zLeft, zRight, nLeft)==0;
}else{
return jsonLabelCompareEscaped(zLeft, nLeft, rawLeft,
zRight, nRight, rawRight);
}
}
/*
** Error returns from jsonLookupStep()
*/
@ -2237,6 +2419,7 @@ static u32 jsonLookupStep(
return iRoot;
}
if( zPath[0]=='.' ){
int rawKey = 1;
x = pParse->aBlob[iRoot];
zPath++;
if( zPath[0]=='"' ){
@ -2249,6 +2432,7 @@ static u32 jsonLookupStep(
return JSON_LOOKUP_PATHERROR;
}
testcase( nKey==0 );
rawKey = memchr(zKey, '\\', nKey)==0;
}else{
zKey = zPath;
for(i=0; zPath[i] && zPath[i]!='.' && zPath[i]!='['; i++){}
@ -2262,13 +2446,17 @@ static u32 jsonLookupStep(
j = iRoot + n; /* j is the index of a label */
iEnd = j+sz;
while( j<iEnd ){
int rawLabel;
const char *zLabel;
x = pParse->aBlob[j] & 0x0f;
if( x<JSONB_TEXT || x>JSONB_TEXTRAW ) return JSON_LOOKUP_ERROR;
n = jsonbPayloadSize(pParse, j, &sz);
if( n==0 ) return JSON_LOOKUP_ERROR;
k = j+n; /* k is the index of the label text */
if( k+sz>=iEnd ) return JSON_LOOKUP_ERROR;
if( sz==nKey && memcmp(&pParse->aBlob[k], zKey, nKey)==0 ){
zLabel = (const char*)&pParse->aBlob[k];
rawLabel = x==JSONB_TEXT || x==JSONB_TEXTRAW;
if( jsonLabelCompare(zKey, nKey, rawKey, zLabel, sz, rawLabel) ){
u32 v = k+sz; /* v is the index of the value */
if( ((pParse->aBlob[v])&0x0f)>JSONB_OBJECT ) return JSON_LOOKUP_ERROR;
n = jsonbPayloadSize(pParse, v, &sz);
@ -2292,7 +2480,7 @@ static u32 jsonLookupStep(
testcase( pParse->eEdit==JEDIT_INS );
testcase( pParse->eEdit==JEDIT_SET );
memset(&ix, 0, sizeof(ix));
jsonBlobAppendNode(&ix,JSONB_TEXTRAW, nKey, 0);
jsonBlobAppendNode(&ix, rawKey?JSONB_TEXTRAW:JSONB_TEXT5, nKey, 0);
pParse->oom |= ix.oom;
rc = jsonCreateEditSubstructure(pParse, &v, &zPath[i]);
if( !JSON_LOOKUP_ISERROR(rc)
@ -2496,72 +2684,27 @@ static void jsonReturnFromBlob(
for(iIn=iOut=0; iIn<sz; iIn++){
char c = z[iIn];
if( c=='\\' ){
c = z[++iIn];
if( c=='u' ){
u32 v = jsonHexToInt4(z+iIn+1);
iIn += 4;
if( v==0 ) break;
if( v<=0x7f ){
zOut[iOut++] = (char)v;
}else if( v<=0x7ff ){
zOut[iOut++] = (char)(0xc0 | (v>>6));
zOut[iOut++] = 0x80 | (v&0x3f);
}else{
u32 vlo;
if( (v&0xfc00)==0xd800
&& iIn<sz-6
&& z[iIn+1]=='\\'
&& z[iIn+2]=='u'
&& ((vlo = jsonHexToInt4(z+iIn+3))&0xfc00)==0xdc00
){
/* We have a surrogate pair */
v = ((v&0x3ff)<<10) + (vlo&0x3ff) + 0x10000;
iIn += 6;
zOut[iOut++] = 0xf0 | (v>>18);
zOut[iOut++] = 0x80 | ((v>>12)&0x3f);
zOut[iOut++] = 0x80 | ((v>>6)&0x3f);
zOut[iOut++] = 0x80 | (v&0x3f);
}else{
zOut[iOut++] = 0xe0 | (v>>12);
zOut[iOut++] = 0x80 | ((v>>6)&0x3f);
zOut[iOut++] = 0x80 | (v&0x3f);
}
}
continue;
}else if( c=='b' ){
c = '\b';
}else if( c=='f' ){
c = '\f';
}else if( c=='n' ){
c = '\n';
}else if( c=='r' ){
c = '\r';
}else if( c=='t' ){
c = '\t';
}else if( c=='v' ){
c = '\v';
}else if( c=='\'' || c=='"' || c=='/' || c=='\\' ){
/* pass through unchanged */
}else if( c=='0' ){
c = 0;
}else if( c=='x' ){
c = (jsonHexToInt(z[iIn+1])<<4) | jsonHexToInt(z[iIn+2]);
iIn += 2;
}else if( c=='\r' && z[i+1]=='\n' ){
iIn++;
continue;
}else if( 0xe2==(u8)c
&& iIn<sz-2
&& 0x80==(u8)z[iIn+1]
&& (0xa8==(u8)z[iIn+2] || 0xa9==(u8)z[iIn+2])
){
iIn += 2;
continue;
u32 v;
u32 szEscape = jsonUnescapeOneChar(&z[iIn], sz-iIn, &v);
if( v<=0x7f ){
zOut[iOut++] = (char)v;
}else if( v<=0x7ff ){
zOut[iOut++] = (char)(0xc0 | (v>>6));
zOut[iOut++] = 0x80 | (v&0x3f);
}else if( v<0x10000 ){
zOut[iOut++] = 0xe0 | (v>>12);
zOut[iOut++] = 0x80 | ((v>>6)&0x3f);
zOut[iOut++] = 0x80 | (v&0x3f);
}else{
continue;
zOut[iOut++] = 0xf0 | (v>>18);
zOut[iOut++] = 0x80 | ((v>>12)&0x3f);
zOut[iOut++] = 0x80 | ((v>>6)&0x3f);
zOut[iOut++] = 0x80 | (v&0x3f);
}
} /* end if( c=='\\' ) */
zOut[iOut++] = c;
iIn += szEscape - 1;
}else{
zOut[iOut++] = c;
}
} /* end for() */
zOut[iOut] = 0;
sqlite3_result_text(pCtx, zOut, iOut, sqlite3_free);
@ -3142,6 +3285,20 @@ static void jsonArrayLengthFunc(
jsonParseFree(p);
}
/* True if the string is all digits */
static int jsonAllDigits(const char *z, int n){
int i;
for(i=0; i<n && sqlite3Isdigit(z[i]); i++){}
return i==n;
}
/* True if the string is all alphanumerics and underscores */
static int jsonAllAlphanum(const char *z, int n){
int i;
for(i=0; i<n && (sqlite3Isalnum(z[i]) || z[i]=='_'); i++){}
return i==n;
}
/*
** json_extract(JSON, PATH, ...)
** "->"(JSON,PATH)
@ -3199,15 +3356,19 @@ static void jsonExtractFunc(
** [NUMBER] ==> $[NUMBER] // Not PG. Purely for convenience
*/
jsonStringInit(&jx, ctx);
if( sqlite3Isdigit(zPath[0]) ){
if( jsonAllDigits(zPath, nPath) ){
jsonAppendRawNZ(&jx, "[", 1);
jsonAppendRaw(&jx, zPath, nPath);
jsonAppendRawNZ(&jx, "]", 2);
}else if( zPath[0]!='[' ){
}else if( jsonAllAlphanum(zPath, nPath) ){
jsonAppendRawNZ(&jx, ".", 1);
jsonAppendRaw(&jx, zPath, nPath);
}else{
}else if( zPath[0]=='[' && nPath>=3 && zPath[nPath-1]==']' ){
jsonAppendRaw(&jx, zPath, nPath);
}else{
jsonAppendRawNZ(&jx, ".\"", 2);
jsonAppendRaw(&jx, zPath, nPath);
jsonAppendRawNZ(&jx, "\"", 1);
}
jsonStringTerminate(&jx);
j = jsonLookupStep(p, 0, jx.zBuf, 0);
@ -3397,6 +3558,7 @@ static int jsonMergePatch(
iTCursor = iTStart;
iTEnd = iTEndBE + pTarget->delta;
while( iTCursor<iTEnd ){
int isEqual; /* true if the patch and target labels match */
iTLabel = iTCursor;
eTLabel = pTarget->aBlob[iTCursor] & 0x0f;
if( eTLabel<JSONB_TEXT || eTLabel>JSONB_TEXTRAW ){
@ -3409,33 +3571,14 @@ static int jsonMergePatch(
nTValue = jsonbPayloadSize(pTarget, iTValue, &szTValue);
if( nTValue==0 ) return JSON_MERGE_BADTARGET;
if( iTValue + nTValue + szTValue > iTEnd ) return JSON_MERGE_BADTARGET;
if( eTLabel==ePLabel ){
/* Common case */
if( szTLabel==szPLabel
&& memcmp(&pTarget->aBlob[iTLabel+nTLabel],
&pPatch->aBlob[iPLabel+nPLabel], szTLabel)==0
){
break; /* Labels match. */
}
}else{
/* Should rarely happen */
JsonString s1, s2;
int isEqual, isOom;
jsonStringInit(&s1, 0);
jsonXlateBlobToText(pTarget, iTLabel, &s1);
jsonStringInit(&s2, 0);
jsonXlateBlobToText(pPatch, iPLabel, &s2);
isOom = s1.eErr || s2.eErr;
if( s1.nUsed==s2.nUsed && memcmp(s1.zBuf, s2.zBuf, s1.nUsed)==0 ){
isEqual = 1;
}else{
isEqual = 0;
}
jsonStringReset(&s1);
jsonStringReset(&s2);
if( isOom ) return JSON_MERGE_OOM;
if( isEqual ) break;
}
isEqual = jsonLabelCompare(
(const char*)&pPatch->aBlob[iPLabel+nPLabel],
szPLabel,
(ePLabel==JSONB_TEXT || ePLabel==JSONB_TEXTRAW),
(const char*)&pTarget->aBlob[iTLabel+nTLabel],
szTLabel,
(eTLabel==JSONB_TEXT || eTLabel==JSONB_TEXTRAW));
if( isEqual ) break;
iTCursor = iTValue + nTValue + szTValue;
}
x = pPatch->aBlob[iPValue] & 0x0f;
@ -4359,22 +4502,23 @@ static int jsonEachNext(sqlite3_vtab_cursor *cur){
*/
static int jsonEachPathLength(JsonEachCursor *p){
u32 n = p->path.nUsed;
const char *z = p->path.zBuf;
if( p->iRowid==0 && p->bRecursive && n>1 ){
if( p->path.zBuf[n-1]==']' ){
if( z[n-1]==']' ){
do{
assert( n>0 );
assert( n>1 );
n--;
}while( p->path.zBuf[n]!='[' );
}while( z[n]!='[' );
}else if( z[n-1]=='"' ){
do{
assert( n>1 );
n--;
}while( z[n]!='.' || z[n+1]!='"' );
}else{
u32 sz = 0;
jsonbPayloadSize(&p->sParse, p->i, &sz);
if( p->path.zBuf[n-1]=='"' ) sz += 2;
assert( sz<n );
n -= sz;
while( p->path.zBuf[n]!='.' && ALWAYS(n>0) ){
do{
assert( n>1 );
n--;
assert( n>0 );
}
}while( z[n]!='.' );
}
}
return n;

View File

@ -36,5 +36,30 @@ do_catchsql_test 2.3 {
SELECT '{a:null,{"h":[1,[1,2,3]],"j":"abc"}:true}'->'$h[#-1]';
} {1 {malformed JSON}}
# Verify that escaped label names are compared correctly.
#
do_execsql_test 3.1 {
SELECT '{"a\x62c":123}' ->> 'abc';
} 123
do_execsql_test 3.2 {
SELECT '{"abc":123}' ->> 'a\x62c';
} 123
db null null
do_execsql_test 3.3 {
DROP TABLE t1;
CREATE TABLE t1(x);
INSERT INTO t1 VALUES(json_insert('{}','$.a\',111,'$."b\\"',222));
INSERT INTO t1 VALUES(jsonb_insert('{}','$.a\',111,'$."b\\"',222));
SELECT x->'$.a\', x->'$.a\\', x->'$."a\\"', x->'$."b\\"' FROM t1;
} {111 null 111 222 111 null 111 222}
do_execsql_test 3.4 {
SELECT json_patch('{"a\x62c":123}','{"ab\x63":456}') ->> 'abc';
} 456
do_execsql_test 4.1 {
SELECT * FROM json_tree('{"\u0017":1}','$."\x17"');
} {{\x17} 1 integer 1 1 null {$."\x17"} {$}}
finish_test