Further improvements to the estimated cost of sorting. Take into account

the number of columns to be sorted.

FossilOrigin-Name: f3290cf83b7c02d17d85d8942954f052b486c370cd5ec732969da9061dc1d19a
This commit is contained in:
drh 2022-12-03 17:09:15 +00:00
parent 1edd0a089c
commit e4fa4794be
3 changed files with 38 additions and 28 deletions

View File

@ -1,5 +1,5 @@
C Tuning\sthe\squery\splanner\sby\sadjusting\sthe\sweights\sthat\spredict\sthe\srelative\nperformance\sof\ssorting\sand\sindex\slookup.
D 2022-12-03T00:52:21.776
C Further\simprovements\sto\sthe\sestimated\scost\sof\ssorting.\s\sTake\sinto\saccount\nthe\snumber\sof\scolumns\sto\sbe\ssorted.
D 2022-12-03T17:09:15.127
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@ -732,7 +732,7 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9
F src/wal.c b9df133a705093da8977da5eb202eaadb844839f1c7297c08d33471f5491843d
F src/wal.h c3aa7825bfa2fe0d85bef2db94655f99870a285778baa36307c0a16da32b226a
F src/walker.c f890a3298418d7cba3b69b8803594fdc484ea241206a8dfa99db6dd36f8cbb3b
F src/where.c 5826b62ddcfc92979669cb5fb80f73d0df86bbfeefa1d757f5dc1f857cd628e7
F src/where.c 32875f4f738b1b32f648e8fd52df23a5ba28744a2b4209b262cac09efc2a8569
F src/whereInt.h e25203e5bfee149f5f1225ae0166cfb4f1e65490c998a024249e98bb0647377c
F src/wherecode.c ee52c2781c36004d23c85bf111063b78fc16e5e1b6a0d424326af8bf90babb0b
F src/whereexpr.c 05295b44b54eea76d1ba766f0908928d0e20e990c249344c9521454d3d09c7ae
@ -2065,11 +2065,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P 57dd593ef0efa17dfb3a9f4eac36d5b8b879e271de817d8cd94a8c8b56d31870
R e47495a3cfe52da04033a615c7da71c2
T *branch * qp-tuning
T *sym-qp-tuning *
T -sym-trunk *
P 9f2806da4d88beceac2e81e05421f00481dd3dd100b096cd2ae6c828adb42ca7
R 7460b69c2cad54c6cd480cae6fbd9d9a
U drh
Z bdcd0fcd9740989c0fc73b883156ab70
Z b965371360d6f8e63aa93b345d42429e
# Remove this line to create a well-formed Fossil manifest.

View File

@ -1 +1 @@
9f2806da4d88beceac2e81e05421f00481dd3dd100b096cd2ae6c828adb42ca7
f3290cf83b7c02d17d85d8942954f052b486c370cd5ec732969da9061dc1d19a

View File

@ -4803,12 +4803,12 @@ static const char *wherePathName(WherePath *pPath, int nLoop, WhereLoop *pLast){
** order.
*/
static LogEst whereSortingCost(
WhereInfo *pWInfo,
LogEst nRow,
int nOrderBy,
int nSorted
WhereInfo *pWInfo, /* Query planning context */
LogEst nRow, /* Estimated number of rows to sort */
int nOrderBy, /* Number of ORDER BY clause terms */
int nSorted /* Number of initial ORDER BY terms naturally in order */
){
/* TUNING: Estimated cost of a full external sort, where N is
/* Estimated cost of a full external sort, where N is
** the number of rows to sort is:
**
** cost = (K * N * log(N)).
@ -4819,27 +4819,40 @@ static LogEst whereSortingCost(
**
** cost = (K * N * log(N)) * (Y/X)
**
** The constant K is 2.0 for an external sort that is built around
** the OP_SorterInsert, OP_SorterSort, and OP_SorterData opcodes.
** For a sort built using OP_IdxInsert and OP_Sort (which is slower
** by a constant factor), the constant K is 4.0.
** The constant K is at least 2.0 but will be larger if there are a
** large number of columns to be sorted, as the sorting time is
** proportional to the amount of content to be sorted. The algorithm
** does not currently distinguish between fat columns (BLOBs and TEXTs)
** and skinny columns (INTs). It just uses the number of columns as
** an approximation for the row width.
**
** The (Y/X) term is implemented using stack variable rScale
** below.
** And extra factor of 2.0 or 3.0 is added to the sorting cost if the sort
** is built using OP_IdxInsert and OP_Sort rather than with OP_SorterInsert.
*/
LogEst rScale, rSortCost;
assert( nOrderBy>0 && 66==sqlite3LogEst(100) );
rScale = sqlite3LogEst((nOrderBy-nSorted)*100/nOrderBy) - 66;
rSortCost = nRow + rScale + 10;
if( pWInfo->wctrlFlags & WHERE_USE_LIMIT ) rSortCost += 10;
LogEst rSortCost, nCol;
assert( pWInfo->pSelect!=0 );
assert( pWInfo->pSelect->pEList!=0 );
/* TUNING: sorting cost proportional to the number of output columns: */
nCol = sqlite3LogEst((pWInfo->pSelect->pEList->nExpr+59)/30);
rSortCost = nRow + nCol;
if( nSorted>0 ){
/* Scale the result by (Y/X) */
rSortCost += sqlite3LogEst((nOrderBy-nSorted)*100/nOrderBy) - 66;
}
/* Multiple by log(M) where M is the number of output rows.
** Use the LIMIT for M if it is smaller. Or if this sort is for
** a DISTINCT operator, M will be the number of distinct output
** rows, so fudge it downwards a bit.
*/
if( (pWInfo->wctrlFlags & WHERE_USE_LIMIT)!=0 && pWInfo->iLimit<nRow ){
nRow = pWInfo->iLimit;
if( (pWInfo->wctrlFlags & WHERE_USE_LIMIT)!=0 ){
rSortCost += 10; /* TUNING: Extra 2.0x if using LIMIT */
if( nSorted!=0 ){
rSortCost += 6; /* TUNING: Extra 1.5x if also using partial sort */
}
if( pWInfo->iLimit<nRow ){
nRow = pWInfo->iLimit;
}
}else if( (pWInfo->wctrlFlags & WHERE_WANT_DISTINCT) ){
/* TUNING: In the sort for a DISTINCT operator, assume that the DISTINCT
** reduces the number of output rows by a factor of 2 */