From 04624990514ae7c593ad224dd87cd9037cec752f Mon Sep 17 00:00:00 2001 From: drh <> Date: Sat, 18 May 2024 20:00:08 +0000 Subject: [PATCH 1/2] Fix the definition of sqlite3_vtab_distinct() such that return codes 2 and 3 mean that all rows must be distinct over "colUsed" which is a superset of "aOrderBy". Also, disallow return codes 2 and 3 if the rowid of the virtual table is accessed. FossilOrigin-Name: 922731ce98c0ce7837784ff7966049e59fa73da2aa04abf3506503b6fc4aa048 --- manifest | 25 ++++++++++++++----------- manifest.uuid | 2 +- src/resolve.c | 8 ++++++-- src/sqlite.h.in | 16 +++++++++------- src/sqliteInt.h | 1 + src/treeview.c | 6 ++++-- src/where.c | 2 +- 7 files changed, 36 insertions(+), 24 deletions(-) diff --git a/manifest b/manifest index acf63d973b..401fc0177e 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C JS\serror\smessage\sand\sdoc\stypos\sreported\sin\sthe\sforum.\sNo\scode\schanges. -D 2024-05-18T15:21:45.610 +C Fix\sthe\sdefinition\sof\ssqlite3_vtab_distinct()\ssuch\sthat\sreturn\scodes\s2\sand\n3\smean\sthat\sall\srows\smust\sbe\sdistinct\sover\s"colUsed"\swhich\sis\sa\ssuperset\sof\n"aOrderBy".\s\sAlso,\sdisallow\sreturn\scodes\s2\sand\s3\sif\sthe\srowid\sof\sthe\svirtual\ntable\sis\saccessed. +D 2024-05-18T20:00:08.988 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -752,14 +752,14 @@ F src/pragma.h e690a356c18e98414d2e870ea791c1be1545a714ba623719deb63f7f226d8bb7 F src/prepare.c d99931f45416652895e502328ca49fe782cfc4e1ebdcda13b3736d991ebf42ce F src/printf.c 8b250972305e14b365561be5117ed0fd364e4fd58968776df1ce64c6280b90f9 F src/random.c 606b00941a1d7dd09c381d3279a058d771f406c5213c9932bbd93d5587be4b9c -F src/resolve.c 6adf4bf10151bca046f3206ae8ff07dbf90b49de20bea49197f010276f5f494d +F src/resolve.c 22f1fa3423b377c02ae78d451cfeb1c2d96dcf0389c0642cbdcd19d3bfd7ae01 F src/rowset.c 8432130e6c344b3401a8874c3cb49fefe6873fec593294de077afea2dce5ec97 F src/select.c 1a841c38974d45cf15a7611398479182b61ad4c187423c380741d8b1688fe607 F src/shell.c.in 8f2406e4e8d726452e48058d117f52e86b789f47435157b0418fb06c631349b6 -F src/sqlite.h.in 32389e0d584551b300d0157881336162c14315a424cbf385c0d65eb7c2e31f7b +F src/sqlite.h.in 6d645f9c2da3b64ea44944024df0c733bba186ee200d849a72a25bf8ae9beac4 F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8 F src/sqlite3ext.h 3f046c04ea3595d6bfda99b781926b17e672fd6d27da2ba6d8d8fc39981dcb54 -F src/sqliteInt.h adcf1cebb6fc00fc1f1329c00bb65ac36655736080acfd2071d84aa83a104afe +F src/sqliteInt.h 6a9fa3902c9faca2b57060e822f2afadfbf96d64c4ede81e201f0e0c42d7e4aa F src/sqliteLimit.h 6878ab64bdeb8c24a1d762d45635e34b96da21132179023338c93f820eee6728 F src/status.c cb11f8589a6912af2da3bb1ec509a94dd8ef27df4d4c1a97e0bcf2309ece972b F src/table.c 0f141b58a16de7e2fbe81c308379e7279f4c6b50eb08efeec5892794a0ba30d1 @@ -817,7 +817,7 @@ F src/test_window.c cdae419fdcea5bad6dcd9368c685abdad6deb59e9fc8b84b153de513d394 F src/test_wsd.c 41cadfd9d97fe8e3e4e44f61a4a8ccd6f7ca8fe9 F src/threads.c 4ae07fa022a3dc7c5beb373cf744a85d3c5c6c3c F src/tokenize.c 3f703cacdab728d7741e5a6ac242006d74fe1c2754d4f03ed889d7253259bd68 -F src/treeview.c 4f9ba6c1c7c9893fc046fdb0bc1f6bdec7660122b1ae37e51fb9b64c286caafd +F src/treeview.c 38eefdc85d2793c4059ae651a611b30eb034389fb428f69e572bbea565da6c78 F src/trigger.c 0858f75818ed1580332db274f1032bcc5effe567cb132df5c5be8b1d800ca97f F src/update.c 732404a04d1737ef14bb6ec6b84f74edf28b3c102a92ae46b4855438a710efe7 F src/upsert.c 2e60567a0e9e8520c18671b30712a88dc73534474304af94f32bb5f3ef65ac65 @@ -839,7 +839,7 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 F src/wal.c 887fc4ca3f020ebb2e376f222069570834ac63bf50111ef0cbf3ae417048ed89 F src/wal.h ba252daaa94f889f4b2c17c027e823d9be47ce39da1d3799886bbd51f0490452 F src/walker.c 7c7ea0115345851c3da4e04e2e239a29983b61fb5b038b94eede6aba462640e2 -F src/where.c d235ba520b0147f60732b3bd411e119b43be33d348251edaa6e304a8ad52c511 +F src/where.c 6f02c3936d1f9a637d8d7b5ad7362371af3e4434b0ec1eb950189a83de560d59 F src/whereInt.h 82a13766f13d1a53b05387c2e60726289ef26404bc7b9b1f7770204d97357fb8 F src/wherecode.c f5255f49d1f42b6e7e6b0362ff3522fa88cbcaa7213e52f9374744027ecdebca F src/whereexpr.c 67d15caf88a1a9528283d68ff578e024cf9fe810b517bb0343e5aaf695ad97dd @@ -2191,8 +2191,11 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P c2188da860a1debd7c5d28b223deeef15035c33c2c5aefae9d9b620b6a2b4677 -R 54dd93ee4a8dfed3331a7d693fe52477 -U stephan -Z f16374626a15dcefe372431613b7ed3c +P 1a073f9acfb691eebf4a8cc78a72ff47ebbb6aba4acede6755fa3faefae48f2b +R be7ef6a3dcb331720f0112bf3c3e9c8c +T *branch * vtab-distinct-fix +T *sym-vtab-distinct-fix * +T -sym-trunk * +U drh +Z 3a71e5f761d3443220a5a5a181b974f3 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 1518cbf1b1..6968f2e6b3 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -1a073f9acfb691eebf4a8cc78a72ff47ebbb6aba4acede6755fa3faefae48f2b \ No newline at end of file +922731ce98c0ce7837784ff7966049e59fa73da2aa04abf3506503b6fc4aa048 \ No newline at end of file diff --git a/src/resolve.c b/src/resolve.c index 546731afe5..bf8326aa64 100644 --- a/src/resolve.c +++ b/src/resolve.c @@ -828,8 +828,12 @@ static int lookupName( ** If a generated column is referenced, set bits for every column ** of the table. */ - if( pExpr->iColumn>=0 && cnt==1 && pMatch!=0 ){ - pMatch->colUsed |= sqlite3ExprColUsed(pExpr); + if( pMatch ){ + if( pExpr->iColumn>=0 ){ + pMatch->colUsed |= sqlite3ExprColUsed(pExpr); + }else{ + pMatch->fg.rowidUsed = 1; + } } pExpr->op = eNewExprOp; diff --git a/src/sqlite.h.in b/src/sqlite.h.in index d553abb7bf..459af929b1 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -9942,20 +9942,22 @@ const char *sqlite3_vtab_collation(sqlite3_index_info*,int); **
** ^(If the sqlite3_vtab_distinct() interface returns 2, that means ** that the query planner does not need the rows returned in any particular -** order, as long as rows with the same values in all "aOrderBy" columns +** order, as long as rows with the same values in all "colUsed" columns ** are adjacent.)^ ^(Furthermore, only a single row for each particular -** combination of values in the columns identified by the "aOrderBy" field +** combination of values in the columns identified by the "colUsed" field ** needs to be returned.)^ ^It is always ok for two or more rows with the same -** values in all "aOrderBy" columns to be returned, as long as all such rows +** values in all "colUsed" columns to be returned, as long as all such rows ** are adjacent. ^The virtual table may, if it chooses, omit extra rows -** that have the same value for all columns identified by "aOrderBy". +** that have the same value for all columns identified by "colUsed". ** ^However omitting the extra rows is optional. ** This mode is used for a DISTINCT query. **
** ^(If the sqlite3_vtab_distinct() interface returns 3, that means -** that the query planner needs only distinct rows but it does need the -** rows to be sorted.)^ ^The virtual table implementation is free to omit -** rows that are identical in all aOrderBy columns, if it wants to, but +** that the query planner needs only rows that are distinct over the +** set of columns defined by "colUsed" but it does need the +** rows to be sorted according to the order defined by "aOrderBy".)^ +** ^The virtual table implementation is free to omit +** rows that are identical in all "colUsed" columns, if it wants to, but ** it is not required to omit any rows. This mode is used for queries ** that have both DISTINCT and ORDER BY clauses. ** diff --git a/src/sqliteInt.h b/src/sqliteInt.h index aa8bfc4b7b..d98a4f7f06 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -3312,6 +3312,7 @@ struct SrcItem { unsigned isOn :1; /* u3.pOn was once valid and non-NULL */ unsigned isSynthUsing :1; /* u3.pUsing is synthesized from NATURAL */ unsigned isNestedFrom :1; /* pSelect is a SF_NestedFrom subquery */ + unsigned rowidUsed :1; /* The ROWID of this table is referenced */ } fg; int iCursor; /* The VDBE cursor number used to access this table */ union { diff --git a/src/treeview.c b/src/treeview.c index d3346b4686..fa9eac6142 100644 --- a/src/treeview.c +++ b/src/treeview.c @@ -194,8 +194,10 @@ void sqlite3TreeViewSrcList(TreeView *pView, const SrcList *pSrc){ x.printfFlags |= SQLITE_PRINTF_INTERNAL; sqlite3_str_appendf(&x, "{%d:*} %!S", pItem->iCursor, pItem); if( pItem->pTab ){ - sqlite3_str_appendf(&x, " tab=%Q nCol=%d ptr=%p used=%llx", - pItem->pTab->zName, pItem->pTab->nCol, pItem->pTab, pItem->colUsed); + sqlite3_str_appendf(&x, " tab=%Q nCol=%d ptr=%p used=%llx%s", + pItem->pTab->zName, pItem->pTab->nCol, pItem->pTab, + pItem->colUsed, + pItem->fg.rowidUsed ? "+rowid" : ""); } if( (pItem->fg.jointype & (JT_LEFT|JT_RIGHT))==(JT_LEFT|JT_RIGHT) ){ sqlite3_str_appendf(&x, " FULL-OUTER-JOIN"); diff --git a/src/where.c b/src/where.c index 13a362dcca..fac0f6c5eb 100644 --- a/src/where.c +++ b/src/where.c @@ -1416,7 +1416,7 @@ static sqlite3_index_info *allocateIndexInfo( } if( i==n ){ nOrderBy = n; - if( (pWInfo->wctrlFlags & WHERE_DISTINCTBY) ){ + if( (pWInfo->wctrlFlags & WHERE_DISTINCTBY) && !pSrc->fg.rowidUsed ){ eDistinct = 2 + ((pWInfo->wctrlFlags & WHERE_SORTBYGROUP)!=0); }else if( pWInfo->wctrlFlags & WHERE_GROUPBY ){ eDistinct = 1; From df903fe0565e3793888bbd8f8a329245eec4d9ea Mon Sep 17 00:00:00 2001 From: drh <> Date: Mon, 20 May 2024 16:00:27 +0000 Subject: [PATCH 2/2] Improvements to the sqlite3_vtab_distinct() documentation. FossilOrigin-Name: 6ee041d34f292b94701919f51bbb9e12bcb9e0c4f45e4c0b83f30ff328070637 --- manifest | 15 ++++++--------- manifest.uuid | 2 +- src/sqlite.h.in | 49 ++++++++++++++++++++++++++++++++++--------------- 3 files changed, 41 insertions(+), 25 deletions(-) diff --git a/manifest b/manifest index 401fc0177e..ce10a0198d 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sthe\sdefinition\sof\ssqlite3_vtab_distinct()\ssuch\sthat\sreturn\scodes\s2\sand\n3\smean\sthat\sall\srows\smust\sbe\sdistinct\sover\s"colUsed"\swhich\sis\sa\ssuperset\sof\n"aOrderBy".\s\sAlso,\sdisallow\sreturn\scodes\s2\sand\s3\sif\sthe\srowid\sof\sthe\svirtual\ntable\sis\saccessed. -D 2024-05-18T20:00:08.988 +C Improvements\sto\sthe\ssqlite3_vtab_distinct()\sdocumentation. +D 2024-05-20T16:00:27.117 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -756,7 +756,7 @@ F src/resolve.c 22f1fa3423b377c02ae78d451cfeb1c2d96dcf0389c0642cbdcd19d3bfd7ae01 F src/rowset.c 8432130e6c344b3401a8874c3cb49fefe6873fec593294de077afea2dce5ec97 F src/select.c 1a841c38974d45cf15a7611398479182b61ad4c187423c380741d8b1688fe607 F src/shell.c.in 8f2406e4e8d726452e48058d117f52e86b789f47435157b0418fb06c631349b6 -F src/sqlite.h.in 6d645f9c2da3b64ea44944024df0c733bba186ee200d849a72a25bf8ae9beac4 +F src/sqlite.h.in c71d9ef76a6d32dc7ff2d373f2e57ce09056af26c1457bcadae5358b7628c7c3 F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8 F src/sqlite3ext.h 3f046c04ea3595d6bfda99b781926b17e672fd6d27da2ba6d8d8fc39981dcb54 F src/sqliteInt.h 6a9fa3902c9faca2b57060e822f2afadfbf96d64c4ede81e201f0e0c42d7e4aa @@ -2191,11 +2191,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 1a073f9acfb691eebf4a8cc78a72ff47ebbb6aba4acede6755fa3faefae48f2b -R be7ef6a3dcb331720f0112bf3c3e9c8c -T *branch * vtab-distinct-fix -T *sym-vtab-distinct-fix * -T -sym-trunk * +P 922731ce98c0ce7837784ff7966049e59fa73da2aa04abf3506503b6fc4aa048 +R 65c04e4dfc1793594341c544753e5e70 U drh -Z 3a71e5f761d3443220a5a5a181b974f3 +Z 4b4899e535e10a58b98edaa878f294f3 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 6968f2e6b3..1200d1128c 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -922731ce98c0ce7837784ff7966049e59fa73da2aa04abf3506503b6fc4aa048 \ No newline at end of file +6ee041d34f292b94701919f51bbb9e12bcb9e0c4f45e4c0b83f30ff328070637 \ No newline at end of file diff --git a/src/sqlite.h.in b/src/sqlite.h.in index 459af929b1..549b52a153 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -9942,26 +9942,45 @@ const char *sqlite3_vtab_collation(sqlite3_index_info*,int); **
** ^(If the sqlite3_vtab_distinct() interface returns 2, that means ** that the query planner does not need the rows returned in any particular -** order, as long as rows with the same values in all "colUsed" columns -** are adjacent.)^ ^(Furthermore, only a single row for each particular -** combination of values in the columns identified by the "colUsed" field -** needs to be returned.)^ ^It is always ok for two or more rows with the same -** values in all "colUsed" columns to be returned, as long as all such rows -** are adjacent. ^The virtual table may, if it chooses, omit extra rows -** that have the same value for all columns identified by "colUsed". -** ^However omitting the extra rows is optional. +** order, as long as rows with the same values in all columns identified +** by "aOrderBy" are adjacent.)^ ^(Furthermore, when two or more rows +** contain the same values for all columns identified by "colUsed", all but +** one such row may optionally be omitted from the result.)^ +** The virtual table is not required to omit rows that are duplicates +** over the "colUsed" columns, but if the virtual table can do that without +** too much extra effort, it could potentially help the query to run faster. ** This mode is used for a DISTINCT query. **
-** ^(If the sqlite3_vtab_distinct() interface returns 3, that means -** that the query planner needs only rows that are distinct over the -** set of columns defined by "colUsed" but it does need the -** rows to be sorted according to the order defined by "aOrderBy".)^ -** ^The virtual table implementation is free to omit -** rows that are identical in all "colUsed" columns, if it wants to, but -** it is not required to omit any rows. This mode is used for queries +** ^(If the sqlite3_vtab_distinct() interface returns 3, that means the +** virtual table must return rows in the order defined by "aOrderBy" as +** if the sqlite3_vtab_distinct() interface had returned 0. However if +** two or more rows in the result have the same values for all columns +** identified by "colUsed", then all but one such row may optionally be +** omitted.)^ Like when the return value is 2, the virtual table +** is not required to omit rows that are duplicates over the "colUsed" +** columns, but if the virtual table can do that without +** too much extra effort, it could potentially help the query to run faster. +** This mode is used for queries ** that have both DISTINCT and ORDER BY clauses. ** ** +**
The following table summarizes the conditions under which the +** virtual table is allowed to set the "orderByConsumed" flag based on +** the value returned by sqlite3_vtab_distinct(). This table is a +** restatement of the previous four paragraphs: +** +**
sqlite3_vtab_distinct() return value +** | Rows are returned in aOrderBy order +** | Rows with the same value in all aOrderBy columns are adjacent +** | Duplicates over all colUsed columns may be omitted +** |
0 | yes | yes | no +** |
1 | no | yes | no +** |
2 | no | yes | yes +** |
3 | yes | yes | yes +** |