Performance improvements to the external merge-sorter. Keep content on an

in-memory linked lists rather than an ephemeral table prior to spilling to
disk.  Use the external merge-sorter to implement ORDER BY and GROUP BY
in addition to CREATE INDEX.

FossilOrigin-Name: 4c43e8b2d2c1d8dcba3cd1c3f2ec4e19ab419430
This commit is contained in:
drh 2011-09-03 17:07:26 +00:00
commit d40807ddef
17 changed files with 560 additions and 296 deletions

View File

@ -1,5 +1,5 @@
C Remove\sunused\slocal\svariable.
D 2011-09-02T15:08:28.662
C Performance\simprovements\sto\sthe\sexternal\smerge-sorter.\s\sKeep\scontent\son\san\nin-memory\slinked\slists\srather\sthan\san\sephemeral\stable\sprior\sto\sspilling\sto\ndisk.\s\sUse\sthe\sexternal\smerge-sorter\sto\simplement\sORDER\sBY\sand\sGROUP\sBY\nin\saddition\sto\sCREATE\sINDEX.
D 2011-09-03T17:07:26.674
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in d314143fa6be24828021d3f583ad37d9afdce505
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -124,16 +124,16 @@ F src/auth.c 523da7fb4979469955d822ff9298352d6b31de34
F src/backup.c 28a4fe55327ff708bfaf9d4326d02686f7a553c3
F src/bitvec.c af50f1c8c0ff54d6bdb7a80e2fceca5a93670bef
F src/btmutex.c 976f45a12e37293e32cae0281b15a21d48a8aaa7
F src/btree.c 4a2856b3bde9959986a7b9327841b3ff94023784
F src/btree.h 9ddf04226eac592d4cc3709c5a8b33b2351ff5f7
F src/btree.c 4d46fe30b8bc920f68b7d58a5f45316fa5d023ec
F src/btree.h f5d775cd6cfc7ac32a2535b70e8d2af48ef5f2ce
F src/btreeInt.h 67978c014fa4f7cc874032dd3aacadd8db656bc3
F src/build.c 2d5de52df616a3bf5a659cbca85211c46e2ba9bd
F src/build.c 851e81f26a75abbb98bd99a7c5f10e8670d867bb
F src/callback.c 0425c6320730e6d3981acfb9202c1bed9016ad1a
F src/complete.c dc1d136c0feee03c2f7550bafc0d29075e36deac
F src/ctime.c e3132ec65240b2e2f3d50831021eac387f27584d
F src/date.c a3c6842bad7ae632281811de112a8ba63ff08ab3
F src/delete.c ff68e5ef23aee08c0ff528f699a19397ed8bbed8
F src/expr.c 4bbdfaf66bc614be9254ce0c26a17429067a3e07
F src/expr.c cbcd8c2f1588a9862291a081699854c5e1cb28ab
F src/fault.c 160a0c015b6c2629d3899ed2daf63d75754a32bb
F src/fkey.c 9f00ea98f6b360d477b5a78b5b59a1fbde82431c
F src/func.c 59bb046d7e3df1ab512ac339ccb0a6f996a17cb7
@ -167,8 +167,8 @@ F src/os_common.h 92815ed65f805560b66166e3583470ff94478f04
F src/os_os2.c 4a75888ba3dfc820ad5e8177025972d74d7f2440
F src/os_unix.c 10e0c4dcdbec8d4189890fdf3e71b32efae194e3
F src/os_win.c 33b7b7b48939af5cef2305f5ded19d45c025e2c7
F src/pager.c 817f7f7140c9fa2641f28e6330e924708ddd870d
F src/pager.h 2bab1b2ea4eac58663b5833e3522e36b5ff63447
F src/pager.c 5545863e4e246e1744cfb6993821c6e4b63ffb64
F src/pager.h 6bea8d1949db33768de1c5b4133b267b40845f8b
F src/parse.y 12b7ebd61ea54f0e1b1083ff69cc2c8ce9353d58
F src/pcache.c 49e718c095810c6b3334e3a6d89970aceaddefce
F src/pcache.h c683390d50f856d4cd8e24342ae62027d1bb6050
@ -179,11 +179,11 @@ F src/printf.c 585a36b6a963df832cfb69505afa3a34ed5ef8a1
F src/random.c cd4a67b3953b88019f8cd4ccd81394a8ddfaba50
F src/resolve.c 36368f44569208fa074e61f4dd0b6c4fb60ca2b4
F src/rowset.c 69afa95a97c524ba6faf3805e717b5b7ae85a697
F src/select.c 14552e9ff4b27ec027a43fafb62ea5d049cd2809
F src/select.c 32d0f4e5513362706b8973e7f1b87cd0885dfbf5
F src/shell.c bbe7818ff5bc8614105ceb81ad67b8bdc0b671dd
F src/sqlite.h.in 0a6c9c23337fd1352c5c75a613ff9533aa7d91cb
F src/sqlite3ext.h 1a1a4f784aa9c3b00edd287940197de52487cd93
F src/sqliteInt.h 86a4fdb3ba9ab31d98b266797606f30fefe5b8a9
F src/sqliteInt.h c7e37ee49b1a922ddcd18fa98dd750efa4d2db14
F src/sqliteLimit.h 164b0e6749d31e0daa1a4589a169d31c0dec7b3d
F src/status.c 7ac64842c86cec2fc1a1d0e5c16d3beb8ad332bf
F src/table.c 2cd62736f845d82200acfa1287e33feb3c15d62e
@ -238,14 +238,14 @@ F src/update.c 74a6cfb34e9732c1e2a86278b229913b4b51eeec
F src/utf.c c53eb7404b3eb5c1cbb5655c6a7a0e0ce6bd50f0
F src/util.c 06302ffd2b80408d4f6c7af71f7090e0cf8d8ff7
F src/vacuum.c 05513dca036a1e7848fe18d5ed1265ac0b32365e
F src/vdbe.c 9165b35da939f4a66c7e68b0c6d3f017ca982cb1
F src/vdbe.c d4c8224cc931c6082557501d7f822fb12f273922
F src/vdbe.h c1eeedacab6bcf1e7c2cf8203ba9763a616f9a86
F src/vdbeInt.h 70767f6504aac4f0057ec2a55738470a890789ac
F src/vdbeInt.h 693d6ac6810298fc6b4c503cfbe3f99a240f40af
F src/vdbeapi.c 11dc47987abacb76ad016dcf5abc0dc422482a98
F src/vdbeaux.c de1e4cab060a45df9ebee68dd63543d14559f0e7
F src/vdbeaux.c e58acbc5ea3823922a0cd8fa21f94f39af51ee88
F src/vdbeblob.c f024f0bf420f36b070143c32b15cc7287341ffd3
F src/vdbemem.c 5e6effb96dd53d233361cbfaa3f0a43b9af689e9
F src/vdbesort.c f3d043a1bab7409d4a23cd7a35287c3ac440a167
F src/vdbesort.c e6d6f0c2aa003f7cbdea8c9be47a15a8e854fb97
F src/vdbetrace.c 5d0dc3d5fd54878cc8d6d28eb41deb8d5885b114
F src/vtab.c 901791a47318c0562cd0c676a2c6ff1bc530e582
F src/wal.c 3154756177d6219e233d84291d5b05f4e06ff5e9
@ -369,7 +369,7 @@ F test/descidx1.test 533dcbda614b0463b0ea029527fd27e5a9ab2d66
F test/descidx2.test 9f1a0c83fd57f8667c82310ca21b30a350888b5d
F test/descidx3.test fe720e8b37d59f4cef808b0bf4e1b391c2e56b6f
F test/diskfull.test 106391384780753ea6896b7b4f005d10e9866b6e
F test/distinct.test 8c4d951fc40aba84421060e07b16099d2f4c2fdf
F test/distinct.test df5b11ad606439129c88720a86787bc9ca181f31
F test/distinctagg.test 1a6ef9c87a58669438fc771450d7a72577417376
F test/e_createtable.test 4771686a586b6ae414f927c389b2c101cc05c028
F test/e_delete.test e2ae0d3fce5efd70fef99025e932afffc5616fab
@ -511,7 +511,7 @@ F test/incrvacuum_ioerr.test 22f208d01c528403240e05beecc41dc98ed01637
F test/index.test b5429732b3b983fa810e3ac867d7ca85dae35097
F test/index2.test ee83c6b5e3173a3d7137140d945d9a5d4fdfb9d6
F test/index3.test 423a25c789fc8cc51aaf2a4370bbdde2d9e9eed7
F test/index4.test c82a59c9ae2ac01804bdb100162dca057318f40f
F test/index4.test 2983216eb8c86ee62d9ed7cb206b5cc3331c0026
F test/indexedby.test be501e381b82b2f8ab406309ba7aac46e221f4ad
F test/indexfault.test 31d4ab9a7d2f6e9616933eb079722362a883eb1d
F test/init.test 15c823093fdabbf7b531fe22cf037134d09587a7
@ -592,7 +592,7 @@ F test/minmax2.test 33504c01a03bd99226144e4b03f7631a274d66e0
F test/minmax3.test cc1e8b010136db0d01a6f2a29ba5a9f321034354
F test/misc1.test 55cb2bfbf4a8cd61f4be1effc30426ad41696bff
F test/misc2.test 00d7de54eda90e237fc9a38b9e5ccc769ebf6d4d
F test/misc3.test 72c5dc87a78e7865c5ec7a969fc572913dbe96b6
F test/misc3.test 8e42d54b772a23b3c573672d3e0894d15b05221d
F test/misc4.test 9c078510fbfff05a9869a0b6d8b86a623ad2c4f6
F test/misc5.test 528468b26d03303b1f047146e5eefc941b9069f5
F test/misc6.test 953cc693924d88e6117aeba16f46f0bf5abede91
@ -961,7 +961,7 @@ F tool/symbols.sh caaf6ccc7300fd43353318b44524853e222557d5
F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
F tool/warnings.sh b7fdb2cc525f5ef4fa43c80e771636dd3690f9d2
P 2869ed28299b1c9f355ecc24635830f7f1249126
R d7407cbd16401c8c8a7c8d014c2baaa2
U mistachkin
Z 3bc82a48805c63310ead923d005627b4
P 61bda876af6df3170263d41d2933168305de58d2 99e34bdce4ccca15b79159b03b96787e7a7ff85b
R da1f20303c5b87a4ef556300a531567d
U drh
Z 01e7a916345d0e3901e7a472c5d2f441

View File

@ -1 +1 @@
61bda876af6df3170263d41d2933168305de58d2
4c43e8b2d2c1d8dcba3cd1c3f2ec4e19ab419430

View File

@ -1734,22 +1734,11 @@ int sqlite3BtreeOpen(
/* A BTREE_SINGLE database is always a temporary and/or ephemeral */
assert( (flags & BTREE_SINGLE)==0 || isTempDb );
/* The BTREE_SORTER flag is only used if SQLITE_OMIT_MERGE_SORT is undef */
#ifdef SQLITE_OMIT_MERGE_SORT
assert( (flags & BTREE_SORTER)==0 );
#endif
/* BTREE_SORTER is always on a BTREE_SINGLE, BTREE_OMIT_JOURNAL */
assert( (flags & BTREE_SORTER)==0 ||
(flags & (BTREE_SINGLE|BTREE_OMIT_JOURNAL))
==(BTREE_SINGLE|BTREE_OMIT_JOURNAL) );
if( db->flags & SQLITE_NoReadlock ){
flags |= BTREE_NO_READLOCK;
}
if( isMemdb ){
flags |= BTREE_MEMORY;
flags &= ~BTREE_SORTER;
}
if( (vfsFlags & SQLITE_OPEN_MAIN_DB)!=0 && (isMemdb || isTempDb) ){
vfsFlags = (vfsFlags & ~SQLITE_OPEN_MAIN_DB) | SQLITE_OPEN_TEMP_DB;
@ -7296,16 +7285,9 @@ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){
return rc;
}
int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){
BtShared *pBt = p->pBt;
int rc;
sqlite3BtreeEnter(p);
if( (pBt->openFlags&BTREE_SINGLE) ){
pBt->nPage = 0;
sqlite3PagerTruncateImage(pBt->pPager, 1);
rc = newDatabase(pBt);
}else{
rc = btreeDropTable(p, iTable, piMoved);
}
sqlite3BtreeLeave(p);
return rc;
}

View File

@ -61,7 +61,6 @@ int sqlite3BtreeOpen(
#define BTREE_MEMORY 4 /* This is an in-memory DB */
#define BTREE_SINGLE 8 /* The file contains at most 1 b-tree */
#define BTREE_UNORDERED 16 /* Use of a hash implementation is OK */
#define BTREE_SORTER 32 /* Used as accumulator in external merge sort */
int sqlite3BtreeClose(Btree*);
int sqlite3BtreeSetCacheSize(Btree*,int);

View File

@ -2326,6 +2326,7 @@ static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){
int iIdx = pParse->nTab++; /* Btree cursor used for pIndex */
int iSorter = iTab; /* Cursor opened by OpenSorter (if in use) */
int addr1; /* Address of top of loop */
int addr2; /* Address to jump to for next iteration */
int tnum; /* Root page of index */
Vdbe *v; /* Generate code into this virtual machine */
KeyInfo *pKey; /* KeyInfo for index */
@ -2334,15 +2335,6 @@ static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){
sqlite3 *db = pParse->db; /* The database connection */
int iDb = sqlite3SchemaToIndex(db, pIndex->pSchema);
/* Set bUseSorter to use OP_OpenSorter, or clear it to insert directly
** into the index. The sorter is used unless either OMIT_MERGE_SORT is
** defined or the system is configured to store temp files in-memory. */
#ifdef SQLITE_OMIT_MERGE_SORT
static const int bUseSorter = 0;
#else
const int bUseSorter = !sqlite3TempInMemory(pParse->db);
#endif
#ifndef SQLITE_OMIT_AUTHORIZATION
if( sqlite3AuthCheck(pParse, SQLITE_REINDEX, pIndex->zName, 0,
db->aDb[iDb].zName ) ){
@ -2368,28 +2360,40 @@ static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){
sqlite3VdbeChangeP5(v, 1);
}
#ifndef SQLITE_OMIT_MERGE_SORT
/* Open the sorter cursor if we are to use one. */
if( bUseSorter ){
iSorter = pParse->nTab++;
sqlite3VdbeAddOp4(v, OP_OpenSorter, iSorter, 0, 0, (char*)pKey, P4_KEYINFO);
sqlite3VdbeChangeP5(v, BTREE_SORTER);
}
sqlite3VdbeAddOp4(v, OP_SorterOpen, iSorter, 0, 0, (char*)pKey, P4_KEYINFO);
#endif
/* Open the table. Loop through all rows of the table, inserting index
** records into the sorter. */
sqlite3OpenTable(pParse, iTab, iDb, pTab, OP_OpenRead);
addr1 = sqlite3VdbeAddOp2(v, OP_Rewind, iTab, 0);
addr2 = addr1 + 1;
regRecord = sqlite3GetTempReg(pParse);
regIdxKey = sqlite3GenerateIndexKey(pParse, pIndex, iTab, regRecord, 1);
if( bUseSorter ){
sqlite3VdbeAddOp2(v, OP_IdxInsert, iSorter, regRecord);
#ifndef SQLITE_OMIT_MERGE_SORT
sqlite3VdbeAddOp2(v, OP_SorterInsert, iSorter, regRecord);
sqlite3VdbeAddOp2(v, OP_Next, iTab, addr1+1);
sqlite3VdbeJumpHere(v, addr1);
addr1 = sqlite3VdbeAddOp2(v, OP_Sort, iSorter, 0);
sqlite3VdbeAddOp2(v, OP_RowKey, iSorter, regRecord);
addr1 = sqlite3VdbeAddOp2(v, OP_SorterSort, iSorter, 0);
if( pIndex->onError!=OE_None ){
int j2 = sqlite3VdbeCurrentAddr(v) + 3;
sqlite3VdbeAddOp2(v, OP_Goto, 0, j2);
addr2 = sqlite3VdbeCurrentAddr(v);
sqlite3VdbeAddOp3(v, OP_SorterCompare, iSorter, j2, regRecord);
sqlite3HaltConstraint(
pParse, OE_Abort, "indexed columns are not unique", P4_STATIC
);
}else{
addr2 = sqlite3VdbeCurrentAddr(v);
}
sqlite3VdbeAddOp2(v, OP_SorterData, iSorter, regRecord);
sqlite3VdbeAddOp3(v, OP_IdxInsert, iIdx, regRecord, 1);
sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT);
#else
if( pIndex->onError!=OE_None ){
const int regRowid = regIdxKey + pIndex->nColumn;
const int j2 = sqlite3VdbeCurrentAddr(v) + 2;
@ -2408,10 +2412,11 @@ static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){
sqlite3HaltConstraint(
pParse, OE_Abort, "indexed columns are not unique", P4_STATIC);
}
sqlite3VdbeAddOp3(v, OP_IdxInsert, iIdx, regRecord, bUseSorter);
sqlite3VdbeAddOp3(v, OP_IdxInsert, iIdx, regRecord, 0);
sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT);
#endif
sqlite3ReleaseTempReg(pParse, regRecord);
sqlite3VdbeAddOp2(v, OP_Next, iSorter, addr1+1);
sqlite3VdbeAddOp2(v, OP_SorterNext, iSorter, addr2);
sqlite3VdbeJumpHere(v, addr1);
sqlite3VdbeAddOp1(v, OP_Close, iTab);

View File

@ -2287,7 +2287,7 @@ int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target){
inReg = pCol->iMem;
break;
}else if( pAggInfo->useSortingIdx ){
sqlite3VdbeAddOp3(v, OP_Column, pAggInfo->sortingIdx,
sqlite3VdbeAddOp3(v, OP_Column, pAggInfo->sortingIdxPTab,
pCol->iSorterColumn, target);
break;
}

View File

@ -621,7 +621,6 @@ struct Pager {
u8 readOnly; /* True for a read-only database */
u8 memDb; /* True to inhibit all file I/O */
u8 hasSeenStress; /* pagerStress() called one or more times */
u8 isSorter; /* True for a PAGER_SORTER */
/**************************************************************************
** The following block contains those class members that change during
@ -845,15 +844,6 @@ static int assert_pager_state(Pager *p){
assert( pagerUseWal(p)==0 );
}
/* A sorter is a temp file that never spills to disk and always has
** the doNotSpill flag set
*/
if( p->isSorter ){
assert( p->tempFile );
assert( p->doNotSpill );
assert( p->fd->pMethods==0 );
}
/* If changeCountDone is set, a RESERVED lock or greater must be held
** on the file.
*/
@ -4557,12 +4547,6 @@ int sqlite3PagerOpen(
/* pPager->pBusyHandlerArg = 0; */
pPager->xReiniter = xReinit;
/* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
#ifndef SQLITE_OMIT_MERGE_SORT
if( flags & PAGER_SORTER ){
pPager->doNotSpill = 1;
pPager->isSorter = 1;
}
#endif
*ppPager = pPager;
return SQLITE_OK;
@ -6107,17 +6091,6 @@ int sqlite3PagerIsMemdb(Pager *pPager){
return MEMDB;
}
#ifndef SQLITE_OMIT_MERGE_SORT
/*
** Return true if the pager has seen a pagerStress callback.
*/
int sqlite3PagerUnderStress(Pager *pPager){
assert( pPager->isSorter );
assert( pPager->doNotSpill );
return pPager->hasSeenStress;
}
#endif
/*
** Check that there are at least nSavepoint savepoints open. If there are
** currently less than nSavepoints open, then open one or more savepoints

View File

@ -156,9 +156,6 @@ const char *sqlite3PagerJournalname(Pager*);
int sqlite3PagerNosync(Pager*);
void *sqlite3PagerTempSpace(Pager*);
int sqlite3PagerIsMemdb(Pager*);
#ifndef SQLITE_OMIT_MERGE_SORT
int sqlite3PagerUnderStress(Pager*);
#endif
/* Functions used to truncate the database file. */
void sqlite3PagerTruncateImage(Pager*,Pgno);

View File

@ -419,12 +419,18 @@ static void pushOntoSorter(
int nExpr = pOrderBy->nExpr;
int regBase = sqlite3GetTempRange(pParse, nExpr+2);
int regRecord = sqlite3GetTempReg(pParse);
int op;
sqlite3ExprCacheClear(pParse);
sqlite3ExprCodeExprList(pParse, pOrderBy, regBase, 0);
sqlite3VdbeAddOp2(v, OP_Sequence, pOrderBy->iECursor, regBase+nExpr);
sqlite3ExprCodeMove(pParse, regData, regBase+nExpr+1, 1);
sqlite3VdbeAddOp3(v, OP_MakeRecord, regBase, nExpr + 2, regRecord);
sqlite3VdbeAddOp2(v, OP_IdxInsert, pOrderBy->iECursor, regRecord);
if( pSelect->selFlags & SF_UseSorter ){
op = OP_SorterInsert;
}else{
op = OP_IdxInsert;
}
sqlite3VdbeAddOp2(v, op, pOrderBy->iECursor, regRecord);
sqlite3ReleaseTempReg(pParse, regRecord);
sqlite3ReleaseTempRange(pParse, regBase, nExpr+2);
if( pSelect->iLimit ){
@ -893,9 +899,20 @@ static void generateSortTail(
}else{
regRowid = sqlite3GetTempReg(pParse);
}
if( p->selFlags & SF_UseSorter ){
int regSortOut = sqlite3GetTempReg(pParse);
int ptab2 = pParse->nTab++;
sqlite3VdbeAddOp3(v, OP_OpenPseudo, ptab2, regSortOut, pOrderBy->nExpr+2);
addr = 1 + sqlite3VdbeAddOp2(v, OP_SorterSort, iTab, addrBreak);
codeOffset(v, p, addrContinue);
sqlite3VdbeAddOp2(v, OP_SorterData, iTab, regSortOut);
sqlite3VdbeAddOp3(v, OP_Column, ptab2, pOrderBy->nExpr+1, regRow);
sqlite3VdbeChangeP5(v, OPFLAG_CLEARCACHE);
}else{
addr = 1 + sqlite3VdbeAddOp2(v, OP_Sort, iTab, addrBreak);
codeOffset(v, p, addrContinue);
sqlite3VdbeAddOp3(v, OP_Column, iTab, pOrderBy->nExpr + 1, regRow);
sqlite3VdbeAddOp3(v, OP_Column, iTab, pOrderBy->nExpr+1, regRow);
}
switch( eDest ){
case SRT_Table:
case SRT_EphemTab: {
@ -948,7 +965,11 @@ static void generateSortTail(
/* The bottom of the loop
*/
sqlite3VdbeResolveLabel(v, addrContinue);
if( p->selFlags & SF_UseSorter ){
sqlite3VdbeAddOp2(v, OP_SorterNext, iTab, addr);
}else{
sqlite3VdbeAddOp2(v, OP_Next, iTab, addr);
}
sqlite3VdbeResolveLabel(v, addrBreak);
if( eDest==SRT_Output || eDest==SRT_Coroutine ){
sqlite3VdbeAddOp2(v, OP_Close, pseudoTab, 0);
@ -3914,6 +3935,10 @@ int sqlite3Select(
iEnd = sqlite3VdbeMakeLabel(v);
p->nSelectRow = (double)LARGEST_INT64;
computeLimitRegisters(pParse, p, iEnd);
if( p->iLimit==0 && addrSortIndex>=0 ){
sqlite3VdbeGetOp(v, addrSortIndex)->opcode = OP_SorterOpen;
p->selFlags |= SF_UseSorter;
}
/* Open a virtual index to use for the distinct set.
*/
@ -4008,6 +4033,8 @@ int sqlite3Select(
int iAbortFlag; /* Mem address which causes query abort if positive */
int groupBySort; /* Rows come from source in GROUP BY order */
int addrEnd; /* End of processing for this SELECT */
int sortPTab = 0; /* Pseudotable used to decode sorting results */
int sortOut = 0; /* Output register from the sorter */
/* Remove any and all aliases between the result set and the
** GROUP BY clause.
@ -4069,12 +4096,12 @@ int sqlite3Select(
/* If there is a GROUP BY clause we might need a sorting index to
** implement it. Allocate that sorting index now. If it turns out
** that we do not need it after all, the OpenEphemeral instruction
** that we do not need it after all, the OP_SorterOpen instruction
** will be converted into a Noop.
*/
sAggInfo.sortingIdx = pParse->nTab++;
pKeyInfo = keyInfoFromExprList(pParse, pGroupBy);
addrSortingIdx = sqlite3VdbeAddOp4(v, OP_OpenEphemeral,
addrSortingIdx = sqlite3VdbeAddOp4(v, OP_SorterOpen,
sAggInfo.sortingIdx, sAggInfo.nSortingColumn,
0, (char*)pKeyInfo, P4_KEYINFO_HANDOFF);
@ -4155,11 +4182,14 @@ int sqlite3Select(
}
regRecord = sqlite3GetTempReg(pParse);
sqlite3VdbeAddOp3(v, OP_MakeRecord, regBase, nCol, regRecord);
sqlite3VdbeAddOp2(v, OP_IdxInsert, sAggInfo.sortingIdx, regRecord);
sqlite3VdbeAddOp2(v, OP_SorterInsert, sAggInfo.sortingIdx, regRecord);
sqlite3ReleaseTempReg(pParse, regRecord);
sqlite3ReleaseTempRange(pParse, regBase, nCol);
sqlite3WhereEnd(pWInfo);
sqlite3VdbeAddOp2(v, OP_Sort, sAggInfo.sortingIdx, addrEnd);
sAggInfo.sortingIdxPTab = sortPTab = pParse->nTab++;
sortOut = sqlite3GetTempReg(pParse);
sqlite3VdbeAddOp3(v, OP_OpenPseudo, sortPTab, sortOut, nCol);
sqlite3VdbeAddOp2(v, OP_SorterSort, sAggInfo.sortingIdx, addrEnd);
VdbeComment((v, "GROUP BY sort"));
sAggInfo.useSortingIdx = 1;
sqlite3ExprCacheClear(pParse);
@ -4172,9 +4202,13 @@ int sqlite3Select(
*/
addrTopOfLoop = sqlite3VdbeCurrentAddr(v);
sqlite3ExprCacheClear(pParse);
if( groupBySort ){
sqlite3VdbeAddOp2(v, OP_SorterData, sAggInfo.sortingIdx, sortOut);
}
for(j=0; j<pGroupBy->nExpr; j++){
if( groupBySort ){
sqlite3VdbeAddOp3(v, OP_Column, sAggInfo.sortingIdx, j, iBMem+j);
sqlite3VdbeAddOp3(v, OP_Column, sortPTab, j, iBMem+j);
if( j==0 ) sqlite3VdbeChangeP5(v, OPFLAG_CLEARCACHE);
}else{
sAggInfo.directMode = 1;
sqlite3ExprCode(pParse, pGroupBy->a[j].pExpr, iBMem+j);
@ -4213,7 +4247,7 @@ int sqlite3Select(
/* End of the loop
*/
if( groupBySort ){
sqlite3VdbeAddOp2(v, OP_Next, sAggInfo.sortingIdx, addrTopOfLoop);
sqlite3VdbeAddOp2(v, OP_SorterNext, sAggInfo.sortingIdx, addrTopOfLoop);
}else{
sqlite3WhereEnd(pWInfo);
sqlite3VdbeChangeToNoop(v, addrSortingIdx, 1);

View File

@ -372,14 +372,6 @@
# define SQLITE_TEMP_STORE 1
#endif
/*
** If all temporary storage is in-memory, then omit the external merge-sort
** logic since it is superfluous.
*/
#if SQLITE_TEMP_STORE==3 && !defined(SQLITE_OMIT_MERGE_SORT)
# define SQLITE_OMIT_MERGE_SORT
#endif
/*
** GCC does not define the offsetof() macro so we'll have to do it
** ourselves.
@ -1550,6 +1542,7 @@ struct AggInfo {
u8 useSortingIdx; /* In direct mode, reference the sorting index rather
** than the source table */
int sortingIdx; /* Cursor number of the sorting index */
int sortingIdxPTab; /* Cursor number of pseudo-table */
ExprList *pGroupBy; /* The group by clause */
int nSortingColumn; /* Number of columns in the sorting index */
struct AggInfo_col { /* For each column used in source tables */
@ -2082,6 +2075,7 @@ struct Select {
#define SF_UsesEphemeral 0x0008 /* Uses the OpenEphemeral opcode */
#define SF_Expanded 0x0010 /* sqlite3SelectExpand() called on this */
#define SF_HasTypeInfo 0x0020 /* FROM subqueries have Table metadata */
#define SF_UseSorter 0x0040 /* Sort using a sorter */
/*

View File

@ -3162,13 +3162,6 @@ case OP_OpenWrite: {
** by this opcode will be used for automatically created transient
** indices in joins.
*/
/* Opcode: OpenSorter P1 P2 * P4 *
**
** This opcode works like OP_OpenEphemeral except that it opens
** a transient index that is specifically designed to sort large
** tables using an external merge-sort algorithm.
*/
case OP_OpenSorter:
case OP_OpenAutoindex:
case OP_OpenEphemeral: {
VdbeCursor *pCx;
@ -3180,7 +3173,6 @@ case OP_OpenEphemeral: {
SQLITE_OPEN_TRANSIENT_DB;
assert( pOp->p1>=0 );
assert( (pOp->opcode==OP_OpenSorter)==((pOp->p5 & BTREE_SORTER)!=0) );
pCx = allocateCursor(p, pOp->p1, pOp->p2, -1, 1);
if( pCx==0 ) goto no_mem;
pCx->nullRow = 1;
@ -3214,10 +3206,27 @@ case OP_OpenEphemeral: {
}
pCx->isOrdered = (pOp->p5!=BTREE_UNORDERED);
pCx->isIndex = !pCx->isTable;
break;
}
/* Opcode: OpenSorter P1 P2 * P4 *
**
** This opcode works like OP_OpenEphemeral except that it opens
** a transient index that is specifically designed to sort large
** tables using an external merge-sort algorithm.
*/
case OP_SorterOpen: {
VdbeCursor *pCx;
#ifndef SQLITE_OMIT_MERGE_SORT
if( rc==SQLITE_OK && pOp->opcode==OP_OpenSorter ){
pCx = allocateCursor(p, pOp->p1, pOp->p2, -1, 1);
if( pCx==0 ) goto no_mem;
pCx->pKeyInfo = pOp->p4.pKeyInfo;
pCx->pKeyInfo->enc = ENC(p->db);
pCx->isSorter = 1;
rc = sqlite3VdbeSorterInit(db, pCx);
}
#else
pOp->opcode = OP_OpenEphemeral;
pc--;
#endif
break;
}
@ -4070,6 +4079,45 @@ case OP_ResetCount: {
break;
}
/* Opcode: SorterCompare P1 P2 P3
**
** P1 is a sorter cursor. This instruction compares the record blob in
** register P3 with the entry that the sorter cursor currently points to.
** If, excluding the rowid fields at the end, the two records are a match,
** fall through to the next instruction. Otherwise, jump to instruction P2.
*/
case OP_SorterCompare: {
VdbeCursor *pC;
int res;
pC = p->apCsr[pOp->p1];
assert( isSorter(pC) );
pIn3 = &aMem[pOp->p3];
rc = sqlite3VdbeSorterCompare(pC, pIn3, &res);
if( res ){
pc = pOp->p2-1;
}
break;
};
/* Opcode: SorterData P1 P2 * * *
**
** Write into register P2 the current sorter data for sorter cursor P1.
*/
case OP_SorterData: {
VdbeCursor *pC;
#ifndef SQLITE_OMIT_MERGE_SORT
pOut = &aMem[pOp->p2];
pC = p->apCsr[pOp->p1];
assert( pC->isSorter );
rc = sqlite3VdbeSorterRowkey(pC, pOut);
#else
pOp->opcode = OP_RowKey;
pc--;
#endif
break;
}
/* Opcode: RowData P1 P2 * * *
**
** Write into register P2 the complete row data for cursor P1.
@ -4103,18 +4151,13 @@ case OP_RowData: {
/* Note that RowKey and RowData are really exactly the same instruction */
assert( pOp->p1>=0 && pOp->p1<p->nCursor );
pC = p->apCsr[pOp->p1];
assert( pC->isTable || pOp->opcode==OP_RowKey );
assert( pC->isSorter==0 );
assert( pC->isTable || pOp->opcode!=OP_RowData );
assert( pC->isIndex || pOp->opcode==OP_RowData );
assert( pC!=0 );
assert( pC->nullRow==0 );
assert( pC->pseudoTableReg==0 );
if( isSorter(pC) ){
assert( pOp->opcode==OP_RowKey );
rc = sqlite3VdbeSorterRowkey(pC, pOut);
break;
}
assert( !pC->isSorter );
assert( pC->pCursor!=0 );
pCrsr = pC->pCursor;
assert( sqlite3BtreeCursorIsValid(pCrsr) );
@ -4271,6 +4314,10 @@ case OP_Last: { /* jump */
** regression tests can determine whether or not the optimizer is
** correctly optimizing out sorts.
*/
case OP_SorterSort: /* jump */
#ifdef SQLITE_OMIT_MERGE_SORT
pOp->opcode = OP_Sort;
#endif
case OP_Sort: { /* jump */
#ifdef SQLITE_TEST
sqlite3_sort_count++;
@ -4295,6 +4342,7 @@ case OP_Rewind: { /* jump */
assert( pOp->p1>=0 && pOp->p1<p->nCursor );
pC = p->apCsr[pOp->p1];
assert( pC!=0 );
assert( pC->isSorter==(pOp->opcode==OP_SorterSort) );
res = 1;
if( isSorter(pC) ){
rc = sqlite3VdbeSorterRewind(db, pC, &res);
@ -4347,6 +4395,10 @@ case OP_Rewind: { /* jump */
** If P5 is positive and the jump is taken, then event counter
** number P5-1 in the prepared statement is incremented.
*/
case OP_SorterNext: /* jump */
#ifdef SQLITE_OMIT_MERGE_SORT
pOp->opcode = OP_Next;
#endif
case OP_Prev: /* jump */
case OP_Next: { /* jump */
VdbeCursor *pC;
@ -4359,8 +4411,9 @@ case OP_Next: { /* jump */
if( pC==0 ){
break; /* See ticket #2273 */
}
assert( pC->isSorter==(pOp->opcode==OP_SorterNext) );
if( isSorter(pC) ){
assert( pOp->opcode==OP_Next );
assert( pOp->opcode==OP_SorterNext );
rc = sqlite3VdbeSorterNext(db, pC, &res);
}else{
res = 1;
@ -4395,6 +4448,10 @@ case OP_Next: { /* jump */
** This instruction only works for indices. The equivalent instruction
** for tables is OP_Insert.
*/
case OP_SorterInsert: /* in2 */
#ifdef SQLITE_OMIT_MERGE_SORT
pOp->opcode = OP_IdxInsert;
#endif
case OP_IdxInsert: { /* in2 */
VdbeCursor *pC;
BtCursor *pCrsr;
@ -4404,6 +4461,7 @@ case OP_IdxInsert: { /* in2 */
assert( pOp->p1>=0 && pOp->p1<p->nCursor );
pC = p->apCsr[pOp->p1];
assert( pC!=0 );
assert( pC->isSorter==(pOp->opcode==OP_SorterInsert) );
pIn2 = &aMem[pOp->p2];
assert( pIn2->flags & MEM_Blob );
pCrsr = pC->pCursor;
@ -4411,18 +4469,19 @@ case OP_IdxInsert: { /* in2 */
assert( pC->isTable==0 );
rc = ExpandBlob(pIn2);
if( rc==SQLITE_OK ){
if( isSorter(pC) ){
rc = sqlite3VdbeSorterWrite(db, pC, pIn2);
}else{
nKey = pIn2->n;
zKey = pIn2->z;
rc = sqlite3VdbeSorterWrite(db, pC, nKey);
if( rc==SQLITE_OK ){
rc = sqlite3BtreeInsert(pCrsr, zKey, nKey, "", 0, 0, pOp->p3,
((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0)
);
assert( pC->deferredMoveto==0 );
}
pC->cacheStatus = CACHE_STALE;
}
}
}
break;
}

View File

@ -59,12 +59,13 @@ struct VdbeCursor {
Bool isTable; /* True if a table requiring integer keys */
Bool isIndex; /* True if an index containing keys only - no data */
Bool isOrdered; /* True if the underlying table is BTREE_UNORDERED */
Bool isSorter; /* True if a new-style sorter */
sqlite3_vtab_cursor *pVtabCursor; /* The cursor for a virtual table */
const sqlite3_module *pModule; /* Module for cursor pVtabCursor */
i64 seqCount; /* Sequence counter */
i64 movetoTarget; /* Argument to the deferred sqlite3BtreeMoveto() */
i64 lastRowid; /* Last rowid from a Next or NextIdx operation */
VdbeSorter *pSorter; /* Sorter object for OP_OpenSorter cursors */
VdbeSorter *pSorter; /* Sorter object for OP_SorterOpen cursors */
/* Result of last sqlite3BtreeMoveto() done by an OP_NotExists or
** OP_IsUnique opcode on this cursor. */
@ -402,13 +403,15 @@ void sqlite3VdbeMemStoreType(Mem *pMem);
# define sqlite3VdbeSorterRowkey(Y,Z) SQLITE_OK
# define sqlite3VdbeSorterRewind(X,Y,Z) SQLITE_OK
# define sqlite3VdbeSorterNext(X,Y,Z) SQLITE_OK
# define sqlite3VdbeSorterCompare(X,Y,Z) SQLITE_OK
#else
int sqlite3VdbeSorterInit(sqlite3 *, VdbeCursor *);
int sqlite3VdbeSorterWrite(sqlite3 *, VdbeCursor *, int);
void sqlite3VdbeSorterClose(sqlite3 *, VdbeCursor *);
int sqlite3VdbeSorterRowkey(VdbeCursor *, Mem *);
int sqlite3VdbeSorterRewind(sqlite3 *, VdbeCursor *, int *);
int sqlite3VdbeSorterNext(sqlite3 *, VdbeCursor *, int *);
int sqlite3VdbeSorterRewind(sqlite3 *, VdbeCursor *, int *);
int sqlite3VdbeSorterWrite(sqlite3 *, VdbeCursor *, Mem *);
int sqlite3VdbeSorterCompare(VdbeCursor *, Mem *, int *);
#endif
#if !defined(SQLITE_OMIT_SHARED_CACHE) && SQLITE_THREADSAFE>0

View File

@ -433,7 +433,7 @@ static void resolveP2Values(Vdbe *p, int *pMaxFuncArgs){
n = pOp[-1].p1;
if( n>nMaxArgs ) nMaxArgs = n;
#endif
}else if( opcode==OP_Next ){
}else if( opcode==OP_Next || opcode==OP_SorterNext ){
pOp->p4.xAdvance = sqlite3BtreeNext;
pOp->p4type = P4_ADVANCE;
}else if( opcode==OP_Prev ){

View File

@ -21,6 +21,7 @@
#ifndef SQLITE_OMIT_MERGE_SORT
typedef struct VdbeSorterIter VdbeSorterIter;
typedef struct SorterRecord SorterRecord;
/*
** NOTES ON DATA STRUCTURE USED FOR N-WAY MERGES:
@ -92,8 +93,7 @@ typedef struct VdbeSorterIter VdbeSorterIter;
** being merged (rounded up to the next power of 2).
*/
struct VdbeSorter {
int nWorking; /* Start a new b-tree after this many pages */
int nBtree; /* Current size of b-tree contents as PMA */
int nInMemory; /* Current size of pRecord list as PMA */
int nTree; /* Used size of aTree/aIter (power of 2) */
VdbeSorterIter *aIter; /* Array of iterators to merge */
int *aTree; /* Current state of incremental merge */
@ -101,6 +101,11 @@ struct VdbeSorter {
i64 iReadOff; /* Current read offset within file pTemp1 */
sqlite3_file *pTemp1; /* PMA file 1 */
int nPMA; /* Number of PMAs stored in pTemp1 */
SorterRecord *pRecord; /* Head of in-memory record list */
int mnPmaSize; /* Minimum PMA size, in bytes */
int mxPmaSize; /* Maximum PMA size, in bytes. 0==no limit */
char *aSpace; /* Space for UnpackRecord() */
int nSpace; /* Size of aSpace in bytes */
};
/*
@ -117,6 +122,17 @@ struct VdbeSorterIter {
u8 *aKey; /* Pointer to current key */
};
/*
** A structure to store a single record. All in-memory records are connected
** together into a linked list headed at VdbeSorter.pRecord using the
** SorterRecord.pNext pointer.
*/
struct SorterRecord {
void *pVal;
int nVal;
SorterRecord *pNext;
};
/* Minimum allowable value for the VdbeSorter.nWorking variable */
#define SORTER_MIN_WORKING 10
@ -275,6 +291,70 @@ static int vdbeSorterIterInit(
return rc;
}
/*
** Compare key1 (buffer pKey1, size nKey1 bytes) with key2 (buffer pKey2,
** size nKey2 bytes). Argument pKeyInfo supplies the collation functions
** used by the comparison. If an error occurs, return an SQLite error code.
** Otherwise, return SQLITE_OK and set *pRes to a negative, zero or positive
** value, depending on whether key1 is smaller, equal to or larger than key2.
**
** If the bOmitRowid argument is non-zero, assume both keys end in a rowid
** field. For the purposes of the comparison, ignore it. Also, if bOmitRowid
** is true and key1 contains even a single NULL value, it is considered to
** be less than key2. Even if key2 also contains NULL values.
**
** If pKey2 is passed a NULL pointer, then it is assumed that the pCsr->aSpace
** has been allocated and contains an unpacked record that is used as key2.
*/
static int vdbeSorterCompare(
VdbeCursor *pCsr, /* Cursor object (for pKeyInfo) */
int bOmitRowid, /* Ignore rowid field at end of keys */
void *pKey1, int nKey1, /* Left side of comparison */
void *pKey2, int nKey2, /* Right side of comparison */
int *pRes /* OUT: Result of comparison */
){
KeyInfo *pKeyInfo = pCsr->pKeyInfo;
VdbeSorter *pSorter = pCsr->pSorter;
char *aSpace = pSorter->aSpace;
int nSpace = pSorter->nSpace;
UnpackedRecord *r2;
int i;
if( aSpace==0 ){
nSpace = ROUND8(sizeof(UnpackedRecord))+(pKeyInfo->nField+1)*sizeof(Mem);
aSpace = (char *)sqlite3Malloc(nSpace);
if( aSpace==0 ) return SQLITE_NOMEM;
pSorter->aSpace = aSpace;
pSorter->nSpace = nSpace;
}
if( pKey2 ){
/* This call cannot fail. As the memory is already allocated. */
r2 = sqlite3VdbeRecordUnpack(pKeyInfo, nKey2, pKey2, aSpace, nSpace);
assert( r2 && (r2->flags & UNPACKED_NEED_FREE)==0 );
assert( r2==aSpace );
}else{
r2 = (UnpackedRecord *)aSpace;
assert( !bOmitRowid );
}
if( bOmitRowid ){
for(i=0; i<r2->nField-1; i++){
if( r2->aMem[i].flags & MEM_Null ){
*pRes = -1;
return SQLITE_OK;
}
}
r2->flags |= UNPACKED_PREFIX_MATCH;
r2->nField--;
assert( r2->nField>0 );
}
*pRes = sqlite3VdbeRecordCompare(nKey1, pKey1, r2);
return SQLITE_OK;
}
/*
** This function is called to compare two iterator keys when merging
** multiple b-tree segments. Parameter iOut is the index of the aTree[]
@ -306,20 +386,16 @@ static int vdbeSorterDoCompare(VdbeCursor *pCsr, int iOut){
}else if( p2->pFile==0 ){
iRes = i1;
}else{
char aSpace[150];
UnpackedRecord *r1;
r1 = sqlite3VdbeRecordUnpack(
pCsr->pKeyInfo, p1->nKey, p1->aKey, aSpace, sizeof(aSpace)
int res;
int rc = vdbeSorterCompare(
pCsr, 0, p1->aKey, p1->nKey, p2->aKey, p2->nKey, &res
);
if( r1==0 ) return SQLITE_NOMEM;
if( sqlite3VdbeRecordCompare(p2->nKey, p2->aKey, r1)>=0 ){
if( rc!=SQLITE_OK ) return rc;
if( res<=0 ){
iRes = i1;
}else{
iRes = i2;
}
sqlite3VdbeDeleteUnpackedRecord(r1);
}
pSorter->aTree[iOut] = iRes;
@ -330,9 +406,37 @@ static int vdbeSorterDoCompare(VdbeCursor *pCsr, int iOut){
** Initialize the temporary index cursor just opened as a sorter cursor.
*/
int sqlite3VdbeSorterInit(sqlite3 *db, VdbeCursor *pCsr){
assert( pCsr->pKeyInfo && pCsr->pBt );
pCsr->pSorter = sqlite3DbMallocZero(db, sizeof(VdbeSorter));
return (pCsr->pSorter ? SQLITE_OK : SQLITE_NOMEM);
int pgsz; /* Page size of main database */
int mxCache; /* Cache size */
VdbeSorter *pSorter; /* The new sorter */
assert( pCsr->pKeyInfo && pCsr->pBt==0 );
pCsr->pSorter = pSorter = sqlite3DbMallocZero(db, sizeof(VdbeSorter));
if( pSorter==0 ){
return SQLITE_NOMEM;
}
if( !sqlite3TempInMemory(db) ){
pgsz = sqlite3BtreeGetPageSize(db->aDb[0].pBt);
pSorter->mnPmaSize = SORTER_MIN_WORKING * pgsz;
mxCache = db->aDb[0].pSchema->cache_size;
if( mxCache<SORTER_MIN_WORKING ) mxCache = SORTER_MIN_WORKING;
pSorter->mxPmaSize = mxCache * pgsz;
}
return SQLITE_OK;
}
/*
** Free the list of sorted records starting at pRecord.
*/
static void vdbeSorterRecordFree(sqlite3 *db, SorterRecord *pRecord){
SorterRecord *p;
SorterRecord *pNext;
for(p=pRecord; p; p=pNext){
pNext = p->pNext;
sqlite3DbFree(db, p);
}
}
/*
@ -351,6 +455,8 @@ void sqlite3VdbeSorterClose(sqlite3 *db, VdbeCursor *pCsr){
if( pSorter->pTemp1 ){
sqlite3OsCloseFree(pSorter->pTemp1);
}
vdbeSorterRecordFree(db, pSorter->pRecord);
sqlite3_free(pSorter->aSpace);
sqlite3DbFree(db, pSorter);
pCsr->pSorter = 0;
}
@ -370,10 +476,103 @@ static int vdbeSorterOpenTempFile(sqlite3 *db, sqlite3_file **ppFile){
);
}
/*
** Attemp to merge the two sorted lists p1 and p2 into a single list. If no
** error occurs set *ppOut to the head of the new list and return SQLITE_OK.
*/
static int vdbeSorterMerge(
sqlite3 *db, /* Database handle */
VdbeCursor *pCsr, /* For pKeyInfo */
SorterRecord *p1, /* First list to merge */
SorterRecord *p2, /* Second list to merge */
SorterRecord **ppOut /* OUT: Head of merged list */
){
int rc = SQLITE_OK;
SorterRecord *pFinal = 0;
SorterRecord **pp = &pFinal;
void *pVal2 = p2 ? p2->pVal : 0;
while( p1 && p2 ){
int res;
rc = vdbeSorterCompare(pCsr, 0, p1->pVal, p1->nVal, pVal2, p2->nVal, &res);
if( rc!=SQLITE_OK ){
*pp = 0;
vdbeSorterRecordFree(db, p1);
vdbeSorterRecordFree(db, p2);
vdbeSorterRecordFree(db, pFinal);
*ppOut = 0;
return rc;
}
if( res<=0 ){
*pp = p1;
pp = &p1->pNext;
p1 = p1->pNext;
pVal2 = 0;
}else{
*pp = p2;
pp = &p2->pNext;
p2 = p2->pNext;
if( p2==0 ) break;
pVal2 = p2->pVal;
}
}
*pp = p1 ? p1 : p2;
*ppOut = pFinal;
return SQLITE_OK;
}
/*
** Write the current contents of the b-tree to a PMA. Return SQLITE_OK
** if successful, or an SQLite error code otherwise.
** Sort the linked list of records headed at pCsr->pRecord. Return SQLITE_OK
** if successful, or an SQLite error code (i.e. SQLITE_NOMEM) if an error
** occurs.
*/
static int vdbeSorterSort(sqlite3 *db, VdbeCursor *pCsr){
int rc = SQLITE_OK;
int i;
SorterRecord **aSlot;
SorterRecord *p;
VdbeSorter *pSorter = pCsr->pSorter;
aSlot = (SorterRecord **)sqlite3MallocZero(64 * sizeof(SorterRecord *));
if( !aSlot ){
return SQLITE_NOMEM;
}
p = pSorter->pRecord;
while( p ){
SorterRecord *pNext = p->pNext;
p->pNext = 0;
for(i=0; rc==SQLITE_OK && aSlot[i]; i++){
rc = vdbeSorterMerge(db, pCsr, p, aSlot[i], &p);
aSlot[i] = 0;
}
if( rc!=SQLITE_OK ){
vdbeSorterRecordFree(db, pNext);
break;
}
aSlot[i] = p;
p = pNext;
}
p = 0;
for(i=0; i<64; i++){
if( rc==SQLITE_OK ){
rc = vdbeSorterMerge(db, pCsr, p, aSlot[i], &p);
}else{
vdbeSorterRecordFree(db, aSlot[i]);
}
}
pSorter->pRecord = p;
sqlite3_free(aSlot);
return rc;
}
/*
** Write the current contents of the in-memory linked-list to a PMA. Return
** SQLITE_OK if successful, or an SQLite error code otherwise.
**
** The format of a PMA is:
**
@ -384,19 +583,19 @@ static int vdbeSorterOpenTempFile(sqlite3 *db, sqlite3_file **ppFile){
** Each record consists of a varint followed by a blob of data (the
** key). The varint is the number of bytes in the blob of data.
*/
static int vdbeSorterBtreeToPMA(sqlite3 *db, VdbeCursor *pCsr){
static int vdbeSorterListToPMA(sqlite3 *db, VdbeCursor *pCsr){
int rc = SQLITE_OK; /* Return code */
VdbeSorter *pSorter = pCsr->pSorter;
int res = 0;
/* sqlite3BtreeFirst() cannot fail because sorter btrees are always held
** in memory and so an I/O error is not possible. */
rc = sqlite3BtreeFirst(pCsr->pCursor, &res);
if( NEVER(rc!=SQLITE_OK) || res ) return rc;
assert( pSorter->nBtree>0 );
if( pSorter->nInMemory==0 ){
assert( pSorter->pRecord==0 );
return rc;
}
rc = vdbeSorterSort(db, pCsr);
/* If the first temporary PMA file has not been opened, open it now. */
if( pSorter->pTemp1==0 ){
if( rc==SQLITE_OK && pSorter->pTemp1==0 ){
rc = vdbeSorterOpenTempFile(db, &pSorter->pTemp1);
assert( rc!=SQLITE_OK || pSorter->pTemp1 );
assert( pSorter->iWriteOff==0 );
@ -404,129 +603,81 @@ static int vdbeSorterBtreeToPMA(sqlite3 *db, VdbeCursor *pCsr){
}
if( rc==SQLITE_OK ){
i64 iWriteOff = pSorter->iWriteOff;
void *aMalloc = 0; /* Array used to hold a single record */
int nMalloc = 0; /* Allocated size of aMalloc[] in bytes */
i64 iOff = pSorter->iWriteOff;
SorterRecord *p;
SorterRecord *pNext = 0;
pSorter->nPMA++;
for(
rc = vdbeSorterWriteVarint(pSorter->pTemp1, pSorter->nBtree, &iWriteOff);
rc==SQLITE_OK && res==0;
rc = sqlite3BtreeNext(pCsr->pCursor, &res)
){
i64 nKey; /* Size of this key in bytes */
rc = vdbeSorterWriteVarint(pSorter->pTemp1, pSorter->nInMemory, &iOff);
for(p=pSorter->pRecord; rc==SQLITE_OK && p; p=pNext){
pNext = p->pNext;
rc = vdbeSorterWriteVarint(pSorter->pTemp1, p->nVal, &iOff);
/* Write the size of the record in bytes to the output file */
(void)sqlite3BtreeKeySize(pCsr->pCursor, &nKey);
rc = vdbeSorterWriteVarint(pSorter->pTemp1, nKey, &iWriteOff);
/* Make sure the aMalloc[] buffer is large enough for the record */
if( rc==SQLITE_OK && nKey>nMalloc ){
aMalloc = sqlite3DbReallocOrFree(db, aMalloc, nKey);
if( !aMalloc ){
rc = SQLITE_NOMEM;
}else{
nMalloc = nKey;
}
}
/* Write the record itself to the output file */
if( rc==SQLITE_OK ){
/* sqlite3BtreeKey() cannot fail because sorter btrees held in memory */
rc = sqlite3BtreeKey(pCsr->pCursor, 0, nKey, aMalloc);
if( ALWAYS(rc==SQLITE_OK) ){
rc = sqlite3OsWrite(pSorter->pTemp1, aMalloc, nKey, iWriteOff);
iWriteOff += nKey;
}
rc = sqlite3OsWrite(pSorter->pTemp1, p->pVal, p->nVal, iOff);
iOff += p->nVal;
}
if( rc!=SQLITE_OK ) break;
sqlite3DbFree(db, p);
}
/* This assert verifies that unless an error has occurred, the size of
** the PMA on disk is the same as the expected size stored in
** pSorter->nBtree. */
assert( rc!=SQLITE_OK || pSorter->nBtree==(
iWriteOff-pSorter->iWriteOff-sqlite3VarintLen(pSorter->nBtree)
** pSorter->nInMemory. */
assert( rc!=SQLITE_OK || pSorter->nInMemory==(
iOff-pSorter->iWriteOff-sqlite3VarintLen(pSorter->nInMemory)
));
pSorter->iWriteOff = iWriteOff;
sqlite3DbFree(db, aMalloc);
pSorter->iWriteOff = iOff;
pSorter->pRecord = p;
}
pSorter->nBtree = 0;
return rc;
}
/*
** This function is called on a sorter cursor by the VDBE before each row
** is inserted into VdbeCursor.pCsr. Argument nKey is the size of the key, in
** bytes, about to be inserted.
**
** If it is determined that the temporary b-tree accessed via VdbeCursor.pCsr
** is large enough, its contents are written to a sorted PMA on disk and the
** tree emptied. This prevents the b-tree (which must be small enough to
** fit entirely in the cache in order to support efficient inserts) from
** growing too large.
**
** An SQLite error code is returned if an error occurs. Otherwise, SQLITE_OK.
** Add a record to the sorter.
*/
int sqlite3VdbeSorterWrite(sqlite3 *db, VdbeCursor *pCsr, int nKey){
int rc = SQLITE_OK; /* Return code */
int sqlite3VdbeSorterWrite(
sqlite3 *db, /* Database handle */
VdbeCursor *pCsr, /* Sorter cursor */
Mem *pVal /* Memory cell containing record */
){
VdbeSorter *pSorter = pCsr->pSorter;
if( pSorter ){
Pager *pPager = sqlite3BtreePager(pCsr->pBt);
int nPage; /* Current size of temporary file in pages */
int rc = SQLITE_OK; /* Return Code */
SorterRecord *pNew; /* New list element */
/* Sorters never spill to disk */
assert( sqlite3PagerFile(pPager)->pMethods==0 );
assert( pSorter );
pSorter->nInMemory += sqlite3VarintLen(pVal->n) + pVal->n;
/* Determine how many pages the temporary b-tree has grown to */
sqlite3PagerPagecount(pPager, &nPage);
/* If pSorter->nWorking is still zero, but the temporary file has been
** created in the file-system, then the most recent insert into the
** current b-tree segment probably caused the cache to overflow (it is
** also possible that sqlite3_release_memory() was called). So set the
** size of the working set to a little less than the current size of the
** file in pages. */
if( pSorter->nWorking==0 && sqlite3PagerUnderStress(pPager) ){
pSorter->nWorking = nPage-5;
if( pSorter->nWorking<SORTER_MIN_WORKING ){
pSorter->nWorking = SORTER_MIN_WORKING;
}
pNew = (SorterRecord *)sqlite3DbMallocRaw(db, pVal->n + sizeof(SorterRecord));
if( pNew==0 ){
rc = SQLITE_NOMEM;
}else{
pNew->pVal = (void *)&pNew[1];
memcpy(pNew->pVal, pVal->z, pVal->n);
pNew->nVal = pVal->n;
pNew->pNext = pSorter->pRecord;
pSorter->pRecord = pNew;
}
/* If the number of pages used by the current b-tree segment is greater
** than the size of the working set (VdbeSorter.nWorking), start a new
** segment b-tree. */
if( pSorter->nWorking && nPage>=pSorter->nWorking ){
BtCursor *p = pCsr->pCursor;/* Cursor structure to close and reopen */
int iRoot; /* Root page of new tree */
/* Copy the current contents of the b-tree into a PMA in sorted order.
** Close the currently open b-tree cursor. */
rc = vdbeSorterBtreeToPMA(db, pCsr);
sqlite3BtreeCloseCursor(p);
if( rc==SQLITE_OK ){
rc = sqlite3BtreeDropTable(pCsr->pBt, 2, 0);
#ifdef SQLITE_DEBUG
sqlite3PagerPagecount(pPager, &nPage);
assert( rc!=SQLITE_OK || nPage==1 );
#endif
}
if( rc==SQLITE_OK ){
rc = sqlite3BtreeCreateTable(pCsr->pBt, &iRoot, BTREE_BLOBKEY);
}
if( rc==SQLITE_OK ){
assert( iRoot==2 );
rc = sqlite3BtreeCursor(pCsr->pBt, iRoot, 1, pCsr->pKeyInfo, p);
}
/* See if the contents of the sorter should now be written out. They
** are written out when either of the following are true:
**
** * The total memory allocated for the in-memory list is greater
** than (page-size * cache-size), or
**
** * The total memory allocated for the in-memory list is greater
** than (page-size * 10) and sqlite3HeapNearlyFull() returns true.
*/
if( rc==SQLITE_OK && pSorter->mxPmaSize>0 && (
(pSorter->nInMemory>pSorter->mxPmaSize)
|| (pSorter->nInMemory>pSorter->mnPmaSize && sqlite3HeapNearlyFull())
)){
rc = vdbeSorterListToPMA(db, pCsr);
pSorter->nInMemory = 0;
}
pSorter->nBtree += sqlite3VarintLen(nKey) + nKey;
}
return rc;
}
@ -576,15 +727,19 @@ int sqlite3VdbeSorterRewind(sqlite3 *db, VdbeCursor *pCsr, int *pbEof){
assert( pSorter );
/* Write the current b-tree to a PMA. Close the b-tree cursor. */
rc = vdbeSorterBtreeToPMA(db, pCsr);
sqlite3BtreeCloseCursor(pCsr->pCursor);
if( rc!=SQLITE_OK ) return rc;
/* If no data has been written to disk, then do not do so now. Instead,
** sort the VdbeSorter.pRecord list. The vdbe layer will read data directly
** from the in-memory list. */
if( pSorter->nPMA==0 ){
*pbEof = 1;
return SQLITE_OK;
*pbEof = !pSorter->pRecord;
assert( pSorter->aTree==0 );
return vdbeSorterSort(db, pCsr);
}
/* Write the current b-tree to a PMA. Close the b-tree cursor. */
rc = vdbeSorterListToPMA(db, pCsr);
if( rc!=SQLITE_OK ) return rc;
/* Allocate space for aIter[] and aTree[]. */
nIter = pSorter->nPMA;
if( nIter>SORTER_MAX_MERGE_COUNT ) nIter = SORTER_MAX_MERGE_COUNT;
@ -671,42 +826,91 @@ int sqlite3VdbeSorterRewind(sqlite3 *db, VdbeCursor *pCsr, int *pbEof){
*/
int sqlite3VdbeSorterNext(sqlite3 *db, VdbeCursor *pCsr, int *pbEof){
VdbeSorter *pSorter = pCsr->pSorter;
int iPrev = pSorter->aTree[1]; /* Index of iterator to advance */
int i; /* Index of aTree[] to recalculate */
int rc; /* Return code */
if( pSorter->aTree ){
int iPrev = pSorter->aTree[1];/* Index of iterator to advance */
int i; /* Index of aTree[] to recalculate */
rc = vdbeSorterIterNext(db, &pSorter->aIter[iPrev]);
for(i=(pSorter->nTree+iPrev)/2; rc==SQLITE_OK && i>0; i=i/2){
rc = vdbeSorterDoCompare(pCsr, i);
}
*pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0);
}else{
SorterRecord *pFree = pSorter->pRecord;
pSorter->pRecord = pFree->pNext;
pFree->pNext = 0;
vdbeSorterRecordFree(db, pFree);
*pbEof = !pSorter->pRecord;
rc = SQLITE_OK;
}
return rc;
}
/*
** Return a pointer to a buffer owned by the sorter that contains the
** current key.
*/
static void *vdbeSorterRowkey(
VdbeSorter *pSorter, /* Sorter object */
int *pnKey /* OUT: Size of current key in bytes */
){
void *pKey;
if( pSorter->aTree ){
VdbeSorterIter *pIter;
pIter = &pSorter->aIter[ pSorter->aTree[1] ];
*pnKey = pIter->nKey;
pKey = pIter->aKey;
}else{
*pnKey = pSorter->pRecord->nVal;
pKey = pSorter->pRecord->pVal;
}
return pKey;
}
/*
** Copy the current sorter key into the memory cell pOut.
*/
int sqlite3VdbeSorterRowkey(VdbeCursor *pCsr, Mem *pOut){
VdbeSorter *pSorter = pCsr->pSorter;
VdbeSorterIter *pIter;
void *pKey; int nKey; /* Sorter key to copy into pOut */
pIter = &pSorter->aIter[ pSorter->aTree[1] ];
/* Coverage testing note: As things are currently, this call will always
** succeed. This is because the memory cell passed by the VDBE layer
** happens to be the same one as was used to assemble the keys before they
** were passed to the sorter - meaning it is always large enough for the
** largest key. But this could change very easily, so we leave the call
** to sqlite3VdbeMemGrow() in. */
if( NEVER(sqlite3VdbeMemGrow(pOut, pIter->nKey, 0)) ){
pKey = vdbeSorterRowkey(pSorter, &nKey);
if( sqlite3VdbeMemGrow(pOut, nKey, 0) ){
return SQLITE_NOMEM;
}
pOut->n = pIter->nKey;
pOut->n = nKey;
MemSetTypeFlag(pOut, MEM_Blob);
memcpy(pOut->z, pIter->aKey, pIter->nKey);
memcpy(pOut->z, pKey, nKey);
return SQLITE_OK;
}
/*
** Compare the key in memory cell pVal with the key that the sorter cursor
** passed as the first argument currently points to. For the purposes of
** the comparison, ignore the rowid field at the end of each record.
**
** If an error occurs, return an SQLite error code (i.e. SQLITE_NOMEM).
** Otherwise, set *pRes to a negative, zero or positive value if the
** key in pVal is smaller than, equal to or larger than the current sorter
** key.
*/
int sqlite3VdbeSorterCompare(
VdbeCursor *pCsr, /* Sorter cursor */
Mem *pVal, /* Value to compare to current sorter key */
int *pRes /* OUT: Result of comparison */
){
int rc;
VdbeSorter *pSorter = pCsr->pSorter;
void *pKey; int nKey; /* Sorter key to compare pVal with */
pKey = vdbeSorterRowkey(pSorter, &nKey);
rc = vdbeSorterCompare(pCsr, 1, pVal->z, pVal->n, pKey, nKey, pRes);
assert( rc!=SQLITE_OK || pVal->db->mallocFailed || (*pRes)<=0 );
return rc;
}
#endif /* #ifndef SQLITE_OMIT_MERGE_SORT */

View File

@ -45,7 +45,7 @@ proc do_temptables_test {tn sql temptables} {
uplevel [list do_test $tn [subst -novar {
set ret ""
db eval "EXPLAIN [set sql]" {
if {$opcode == "OpenEphemeral"} {
if {$opcode == "OpenEphemeral" || $opcode == "SorterOpen"} {
if {$p5 != "10" && $p5!="00"} { error "p5 = $p5" }
if {$p5 == "10"} {
lappend ret hash

View File

@ -108,5 +108,19 @@ do_execsql_test 1.8 {
PRAGMA integrity_check
} {ok}
do_execsql_test 2.1 {
BEGIN;
CREATE TABLE t2(x);
INSERT INTO t2 VALUES(14);
INSERT INTO t2 VALUES(35);
INSERT INTO t2 VALUES(15);
INSERT INTO t2 VALUES(35);
INSERT INTO t2 VALUES(16);
COMMIT;
}
do_catchsql_test 2.2 {
CREATE UNIQUE INDEX i3 ON t2(x);
} {1 {indexed columns are not unique}}
finish_test

View File

@ -270,7 +270,7 @@ ifcapable {explain} {
CREATE UNIQUE INDEX ex1i1 ON ex1(a);
EXPLAIN REINDEX;
}]
regexp { IsUnique \d+ \d+ \d+ \d+ } $x
regexp { SorterCompare \d+ \d+ \d+ } $x
} {1}
if {[regexp {16} [db one {PRAGMA encoding}]]} {
do_test misc3-6.11-utf16 {