From c99130fd82a9cfcd24c4031566c4d23c284f053d Mon Sep 17 00:00:00 2001 From: drh Date: Sun, 11 Sep 2005 11:56:27 +0000 Subject: [PATCH] COUNT(DISTINCT x) is now fully functional, though it could benefit from additional testing. (CVS 2688) FossilOrigin-Name: 2f397bd8142ec5e8f7238127012d14fdb558d918 --- manifest | 19 ++++++------ manifest.uuid | 2 +- src/parse.y | 4 +-- src/select.c | 68 ++++++++++++++++++++++++++++++++++--------- test/distinctagg.test | 44 ++++++++++++++++++++++++++++ www/lang.tcl | 13 +++++++-- www/omitted.tcl | 7 +---- 7 files changed, 122 insertions(+), 35 deletions(-) create mode 100644 test/distinctagg.test diff --git a/manifest b/manifest index 2b22caf5ed..e15c42e7aa 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C The\s".dump"\scommand\sfrom\sthe\sshell\scorrectly\ssaves\sthe\sstate\sof\sthe\nsqlite_sequence\sand\ssqlite_stat1\stables,\sif\sthey\sexist.\s\sTicket\s#1419.\s(CVS\s2687) -D 2005-09-11T02:03:04 +C COUNT(DISTINCT\sx)\sis\snow\sfully\sfunctional,\sthough\sit\scould\sbenefit\sfrom\nadditional\stesting.\s(CVS\s2688) +D 2005-09-11T11:56:28 F Makefile.in 12784cdce5ffc8dfb707300c34e4f1eb3b8a14f1 F Makefile.linux-gcc 06be33b2a9ad4f005a5f42b22c4a19dab3cbb5c7 F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028 @@ -58,12 +58,12 @@ F src/os_win.c ed03a35b2894f9b99840415f941a9f8594dea756 F src/os_win.h 41a946bea10f61c158ce8645e7646b29d44f122b F src/pager.c 2b48db1cc6073a6d2577100866db6ae039d20940 F src/pager.h 17b13225abd93c1e9f470060f40a21b9edb5a164 -F src/parse.y c97d885c344579c55257e0bbaeaa1daa1659ef2d +F src/parse.y 3bceaece9f70571f2770fc88ff175febf6827367 F src/pragma.c 69413fbdc0c6aaa493a776ea52c1b3e6cf35dfb2 F src/prepare.c fc098db25d2a121affb08686cf04833fd50452d4 F src/printf.c c01e9ad473d79463fb1f483b1eca5c3cbed2a4e5 F src/random.c 90adff4e73a3b249eb4f1fc2a6ff9cf78c7233a4 -F src/select.c a255ca7eddd14c68d966b6323234dd94fcc7a31f +F src/select.c 67653a6fe6879be49e90ea05957c9f517978e215 F src/shell.c 3596c1e559b82663057940d19ba533ad421c7dd3 F src/sqlite.h.in 461b2535550cf77aedfd44385da11ef7d63e57a2 F src/sqliteInt.h 0d38d50ebe15a16a1253c9a6f9ce4ad188e097fe @@ -140,6 +140,7 @@ F test/delete.test 33e1670049364fc3604217a6c2eda042a47115ab F test/delete2.test e382b6a97787197eb8b93dd4ccd37797c3725ea3 F test/delete3.test 555e84a00a99230b7d049d477a324a631126a6ab F test/diskfull.test d828d72adfc9e2d1a194d25996718c1989152cf9 +F test/distinctagg.test 08ba294140bde69f24f6b2e0e3f5b181953fa5b6 F test/enc.test 7a03417a1051fe8bc6c7641cf4c8c3f7e0066d52 F test/enc2.test 76c13b8c00beaf95b15c152e95dab51292eb1f0d F test/enc3.test f6a5f0b7b7f3a88f030d3143729b87cd5c86d837 @@ -286,12 +287,12 @@ F www/fullscanb.gif f7c94cb227f060511f8909e10f570157263e9a25 F www/index-ex1-x-b.gif f9b1d85c3fa2435cf38b15970c7e3aa1edae23a3 F www/index.tcl 853525c11fb519dac801bcbbe0488c447e526e7b F www/indirect1b1.gif adfca361d2df59e34f9c5cac52a670c2bfc303a1 -F www/lang.tcl 7fd48a2d1866a58bca4d43808aed991616bf198d +F www/lang.tcl b04a87ce05cdbd8d356d6b760a9a0b6f6fce927e F www/lockingv3.tcl f59b19d6c8920a931f096699d6faaf61c05db55f F www/mingw.tcl d96b451568c5d28545fefe0c80bee3431c73f69c F www/nulls.tcl ec35193f92485b87b90a994a01d0171b58823fcf F www/oldnews.tcl 1a808d86882621557774bf7741ed81c7f4ef9f19 -F www/omitted.tcl f1e57977299c3ed54fbae55e4b5ea6a64de39e19 +F www/omitted.tcl 658ebdc83781ac419dc8a08b3f6cf93929023470 F www/opcode.tcl 5bd68059416b223515a680d410a9f7cb6736485f F www/optimizer.tcl d6812a10269bd0d7c488987aac0ad5036cace9dc F www/optimizing.tcl f0b2538988d1bbad16cbfe63ec6e8f48c9eb04e5 @@ -306,7 +307,7 @@ F www/tclsqlite.tcl 3df553505b6efcad08f91e9b975deb2e6c9bb955 F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0 F www/version3.tcl a99cf5f6d8bd4d5537584a2b342f0fb9fa601d8b F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513 -P 26565b8931419031f9a8dd3947e1e2bd23ccbff2 -R 9a963bfef0e17a1fd6c7beefb5ea8bc6 +P 3f191cf497e5798a8620ebc5a85e34187f58371c +R 8acc7ab8193e40ac6db21da4fc146b2f U drh -Z 37627707368565dca6f88fb92264a6dc +Z 09cbf8e6da3db4ebea040b8fc4deab31 diff --git a/manifest.uuid b/manifest.uuid index a8e6ce6326..162f265d08 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3f191cf497e5798a8620ebc5a85e34187f58371c \ No newline at end of file +2f397bd8142ec5e8f7238127012d14fdb558d918 \ No newline at end of file diff --git a/src/parse.y b/src/parse.y index e0e4199f44..0abcf10642 100644 --- a/src/parse.y +++ b/src/parse.y @@ -14,7 +14,7 @@ ** the parser. Lemon will also generate a header file containing ** numeric codes for all of the tokens. ** -** @(#) $Id: parse.y,v 1.178 2005/09/10 16:46:13 drh Exp $ +** @(#) $Id: parse.y,v 1.179 2005/09/11 11:56:28 drh Exp $ */ // All token codes are small integers with #defines that begin with "TK_" @@ -665,8 +665,6 @@ expr(A) ::= ID(X) LP distinct(D) exprlist(Y) RP(E). { A = sqlite3ExprFunction(Y, &X); sqlite3ExprSpan(A,&X,&E); if( D ){ - sqlite3ErrorMsg(pParse, "DISTINCT in an aggregate function " - "is not currently supported"); A->flags |= EP_Distinct; } } diff --git a/src/select.c b/src/select.c index da7c2ab4c0..f156935272 100644 --- a/src/select.c +++ b/src/select.c @@ -12,7 +12,7 @@ ** This file contains C code routines that are called by the parser ** to handle SELECT statements in SQLite. ** -** $Id: select.c,v 1.267 2005/09/10 15:28:09 drh Exp $ +** $Id: select.c,v 1.268 2005/09/11 11:56:28 drh Exp $ */ #include "sqliteInt.h" @@ -364,6 +364,34 @@ static void codeLimiter( } } +/* +** Add code that will check to make sure the top N elements of the +** stack are distinct. iTab is a sorting index that holds previously +** seen combinations of the N values. A new entry is made in iTab +** if the current N values are new. +** +** A jump to addrRepeat is made and the K values are popped from the +** stack if the top N elements are not distinct. +*/ +static void codeDistinct( + Vdbe *v, /* Generate code into this VM */ + int iTab, /* A sorting index used to test for distinctness */ + int addrRepeat, /* Jump to here if not distinct */ + int N, /* The top N elements of the stack must be distinct */ + int K /* Pop K elements from the stack if indistinct */ +){ +#if NULL_ALWAYS_DISTINCT + sqlite3VdbeAddOp(v, OP_IsNull, -N, sqlite3VdbeCurrentAddr(v)+6); +#endif + sqlite3VdbeAddOp(v, OP_MakeRecord, -N, 0); + sqlite3VdbeAddOp(v, OP_Distinct, iTab, sqlite3VdbeCurrentAddr(v)+3); + sqlite3VdbeAddOp(v, OP_Pop, K, 0); + sqlite3VdbeAddOp(v, OP_Goto, 0, addrRepeat); + VdbeComment((v, "# skip indistinct records")); + sqlite3VdbeAddOp(v, OP_IdxInsert, iTab, 0); +} + + /* ** This routine generates the code for the inside of the inner loop ** of a SELECT. @@ -419,17 +447,7 @@ static int selectInnerLoop( */ if( hasDistinct ){ int n = pEList->nExpr; -#if NULL_ALWAYS_DISTINCT - sqlite3VdbeAddOp(v, OP_IsNull, -pEList->nExpr, sqlite3VdbeCurrentAddr(v)+7); -#endif - /* Deliberately leave the affinity string off of the following - ** OP_MakeRecord */ - sqlite3VdbeAddOp(v, OP_MakeRecord, -n, 0); - sqlite3VdbeAddOp(v, OP_Distinct, distinct, sqlite3VdbeCurrentAddr(v)+3); - sqlite3VdbeAddOp(v, OP_Pop, n+1, 0); - sqlite3VdbeAddOp(v, OP_Goto, 0, iContinue); - VdbeComment((v, "# skip indistinct records")); - sqlite3VdbeAddOp(v, OP_IdxInsert, distinct, 0); + codeDistinct(v, distinct, iContinue, n, n+1); if( pOrderBy==0 ){ codeLimiter(v, p, iContinue, iBreak, nColumn); } @@ -2407,6 +2425,7 @@ static void resetAccumulator(Parse *pParse, AggInfo *pAggInfo){ Vdbe *v = pParse->pVdbe; int i; int addr; + struct AggInfo_func *pFunc; if( pAggInfo->nFunc+pAggInfo->nColumn==0 ){ return; } @@ -2414,8 +2433,20 @@ static void resetAccumulator(Parse *pParse, AggInfo *pAggInfo){ for(i=0; inColumn; i++){ addr = sqlite3VdbeAddOp(v, OP_MemStore, pAggInfo->aCol[i].iMem, 0); } - for(i=0; inFunc; i++){ - addr = sqlite3VdbeAddOp(v, OP_MemStore, pAggInfo->aFunc[i].iMem, 0); + for(pFunc=pAggInfo->aFunc, i=0; inFunc; i++, pFunc++){ + addr = sqlite3VdbeAddOp(v, OP_MemStore, pFunc->iMem, 0); + if( pFunc->iDistinct>=0 ){ + Expr *pE = pFunc->pExpr; + if( pE->pList==0 || pE->pList->nExpr!=1 ){ + sqlite3ErrorMsg(pParse, "DISTINCT in aggregate must be followed " + "by an expression"); + pFunc->iDistinct = -1; + }else{ + KeyInfo *pKeyInfo = keyInfoFromExprList(pParse, pE->pList); + sqlite3VdbeOp3(v, OP_OpenVirtual, pFunc->iDistinct, 0, + (char*)pKeyInfo, P3_KEYINFO_HANDOFF); + } + } } sqlite3VdbeChangeP2(v, addr, 1); } @@ -2448,6 +2479,7 @@ static void updateAccumulator(Parse *pParse, AggInfo *pAggInfo){ pAggInfo->directMode = 1; for(i=0, pF=pAggInfo->aFunc; inFunc; i++, pF++){ int nArg; + int addrNext = 0; ExprList *pList = pF->pExpr->pList; if( pList ){ nArg = pList->nExpr; @@ -2455,6 +2487,11 @@ static void updateAccumulator(Parse *pParse, AggInfo *pAggInfo){ }else{ nArg = 0; } + if( pF->iDistinct>=0 ){ + addrNext = sqlite3VdbeMakeLabel(v); + assert( nArg==1 ); + codeDistinct(v, pF->iDistinct, addrNext, 1, 1); + } if( pF->pFunc->needCollSeq ){ CollSeq *pColl = 0; struct ExprList_item *pItem; @@ -2468,6 +2505,9 @@ static void updateAccumulator(Parse *pParse, AggInfo *pAggInfo){ sqlite3VdbeOp3(v, OP_CollSeq, 0, 0, (char *)pColl, P3_COLLSEQ); } sqlite3VdbeOp3(v, OP_AggStep, pF->iMem, nArg, (void*)pF->pFunc, P3_FUNCDEF); + if( addrNext ){ + sqlite3VdbeResolveLabel(v, addrNext); + } } for(i=0, pC=pAggInfo->aCol; inAccumulator; i++, pC++){ sqlite3ExprCode(pParse, pC->pExpr); diff --git a/test/distinctagg.test b/test/distinctagg.test new file mode 100644 index 0000000000..ebd3edd1f5 --- /dev/null +++ b/test/distinctagg.test @@ -0,0 +1,44 @@ +# 2005 September 11 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this script is the DISTINCT modifier on aggregate functions. +# +# $Id: distinctagg.test,v 1.1 2005/09/11 11:56:28 drh Exp $ + + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +do_test distinctagg-1.1 { + execsql { + CREATE TABLE t1(a,b,c); + INSERT INTO t1 VALUES(1,2,3); + INSERT INTO t1 VALUES(1,3,4); + INSERT INTO t1 VALUES(1,3,5); + SELECT count(distinct a), + count(distinct b), + count(distinct c), + count(all a) FROM t1; + } +} {1 2 3 3} +do_test distinctagg-1.2 { + execsql { + SELECT b, count(distinct c) FROM t1 GROUP BY b ORDER BY b + } +} {2 1 3 2} + +do_test distinctagg-2.1 { + catchsql { + SELECT count(distinct) FROM t1; + } +} {1 {DISTINCT in aggregate must be followed by an expression}} + +finish_test diff --git a/www/lang.tcl b/www/lang.tcl index 36c187a067..26094b7a31 100644 --- a/www/lang.tcl +++ b/www/lang.tcl @@ -1,7 +1,7 @@ # # Run this Tcl script to generate the lang-*.html files. # -set rcsid {$Id: lang.tcl,v 1.99 2005/09/08 20:37:44 drh Exp $} +set rcsid {$Id: lang.tcl,v 1.100 2005/09/11 11:56:28 drh Exp $} source common.tcl if {[llength $argv]>0} { @@ -1352,11 +1352,20 @@ UTF-8 strings.

-The following aggregate functions are available by default. Additional +The aggregate functions shown below are available by default. Additional aggregate functions written in C may be added using the sqlite3_create_function() API.

+

+In any aggregate function that takes a single argument, that argument +can be preceeded by the keyword DISTINCT. In such cases, duplicate +elements are filtered before being passed into the aggregate function. +For example, the function "count(distinct X)" will return the number +of distinct values of column X instead of the total number of non-null +values in column X. +

+ diff --git a/www/omitted.tcl b/www/omitted.tcl index aa61274588..ef2c8c70d9 100644 --- a/www/omitted.tcl +++ b/www/omitted.tcl @@ -1,7 +1,7 @@ # # Run this script to generated a omitted.html output file # -set rcsid {$Id: omitted.tcl,v 1.8 2005/03/19 01:41:22 drh Exp $} +set rcsid {$Id: omitted.tcl,v 1.9 2005/09/11 11:56:28 drh Exp $} source common.tcl header {SQL Features That SQLite Does Not Implement} puts { @@ -56,11 +56,6 @@ feature {Nested transactions} { The current implementation only allows a single active transaction. } -feature {The COUNT(DISTINCT X) function} { - You can accomplish the same thing using a subquery, like this:
-   SELECT count(x) FROM (SELECT DISTINCT x FROM tbl); -} - feature {RIGHT and FULL OUTER JOIN} { LEFT OUTER JOIN is implemented, but not RIGHT OUTER JOIN or FULL OUTER JOIN.
avg(X)