Avoid unnecessary sort operations when running one of the percentile
aggregates as a window function. FossilOrigin-Name: 5d311536211eb1e3c887ceb7e6516d3900e6eebbccc8c445dd43cdd556182728
This commit is contained in:
parent
129767958a
commit
ad8ec9db63
@ -62,6 +62,29 @@
|
||||
**
|
||||
** (14) A separate percentile_cond(Y,X) function is the equivalent of
|
||||
** percentile(Y,X*100.0).
|
||||
**
|
||||
** (15) All three SQL functions implemented by this module can also be
|
||||
** used as window-functions.
|
||||
**
|
||||
** Implementation notes as of 2024-08-31:
|
||||
**
|
||||
** * The regular aggregate-function versions of the merge(), percentile(),
|
||||
** and percentile_cond() routines work by accumulating all values in
|
||||
** an array of doubles, then sorting that array using a quicksort
|
||||
** before computing the answer. Thus the runtime is O(NlogN) where
|
||||
** N is the number of rows of input.
|
||||
**
|
||||
** * For the window-function versions of these routines, the array of
|
||||
** inputs is sorted as soon as the first value is computed. Thereafter,
|
||||
** the array is kept in sorted order using an insert-sort. This
|
||||
** results in O(N*K) performance where K is the size of the window.
|
||||
** One can devise alternative implementations that give O(N*logN*logK)
|
||||
** performance, but they require more complex logic and data structures.
|
||||
** The developers have elected to keep the asymptotically slower
|
||||
** algorithm for now, for simplicity, under the theory that window
|
||||
** functions are seldom used and when they are, the window size K is
|
||||
** often small. The developers might revisit that decision later,
|
||||
** should the need arise.
|
||||
*/
|
||||
#include "sqlite3ext.h"
|
||||
SQLITE_EXTENSION_INIT1
|
||||
@ -78,6 +101,7 @@ struct Percentile {
|
||||
unsigned nAlloc; /* Number of slots allocated for a[] */
|
||||
unsigned nUsed; /* Number of slots actually used in a[] */
|
||||
char bSorted; /* True if a[] is already in sorted order */
|
||||
char bKeepSorted; /* True if advantageous to keep a[] sorted */
|
||||
double rPct; /* 1.0 more than the value for P */
|
||||
double *a; /* Array of Y values */
|
||||
};
|
||||
@ -85,7 +109,7 @@ struct Percentile {
|
||||
/*
|
||||
** Return TRUE if the input floating-point number is an infinity.
|
||||
*/
|
||||
static int isInfinity(double r){
|
||||
static int percentIsInfinity(double r){
|
||||
sqlite3_uint64 u;
|
||||
assert( sizeof(u)==sizeof(r) );
|
||||
memcpy(&u, &r, sizeof(u));
|
||||
@ -93,13 +117,55 @@ static int isInfinity(double r){
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if two doubles differ by 0.001 or less
|
||||
** Return TRUE if two doubles differ by 0.001 or less.
|
||||
*/
|
||||
static int sameValue(double a, double b){
|
||||
static int percentSameValue(double a, double b){
|
||||
a -= b;
|
||||
return a>=-0.001 && a<=0.001;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* Verify that the elements of the Percentile p are in fact sorted.
|
||||
** Used for testing and debugging only.
|
||||
*/
|
||||
static void percentAssertSorted(Percentile *p){
|
||||
int i;
|
||||
for(i=p->nUsed-2; i>=0 && p->a[i]<=p->a[i+1]; i--){}
|
||||
assert( i<0 );
|
||||
}
|
||||
#else
|
||||
# define percentAssertSorted(X)
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Search p (which must have p->bSorted) looking for an entry with
|
||||
** value y. Return the index of that entry.
|
||||
**
|
||||
** If bExact is true, return -1 if the entry is not found.
|
||||
**
|
||||
** If bExact is false, return the index at which a new entry with
|
||||
** value y should be insert in order to keep the values in sorted
|
||||
** order. The smallest return value in this case will be 0, and
|
||||
** the largest return value will be p->nUsed.
|
||||
*/
|
||||
static int percentBinarySearch(Percentile *p, double y, int bExact){
|
||||
int iFirst = 0; /* First element of search range */
|
||||
int iLast = p->nUsed - 1; /* Last element of search range */
|
||||
while( iLast>=iFirst ){
|
||||
int iMid = (iFirst+iLast)/2;
|
||||
double x = p->a[iMid];
|
||||
if( x<y ){
|
||||
iFirst = iMid + 1;
|
||||
}else if( x>y ){
|
||||
iLast = iMid - 1;
|
||||
}else{
|
||||
return iMid;
|
||||
}
|
||||
}
|
||||
if( bExact ) return -1;
|
||||
return iFirst;
|
||||
}
|
||||
|
||||
/*
|
||||
** The "step" function for percentile(Y,P) is called once for each
|
||||
** input row.
|
||||
@ -145,7 +211,7 @@ static void percentStep(sqlite3_context *pCtx, int argc, sqlite3_value **argv){
|
||||
** from any prior row, per Requirement (2). */
|
||||
if( p->rPct==0.0 ){
|
||||
p->rPct = rPct+1.0;
|
||||
}else if( !sameValue(p->rPct,rPct+1.0) ){
|
||||
}else if( !percentSameValue(p->rPct,rPct+1.0) ){
|
||||
sqlite3_result_error(pCtx, "2nd argument to percentile() is not the "
|
||||
"same for all input rows", -1);
|
||||
return;
|
||||
@ -165,7 +231,7 @@ static void percentStep(sqlite3_context *pCtx, int argc, sqlite3_value **argv){
|
||||
|
||||
/* Throw an error if the Y value is infinity or NaN */
|
||||
y = sqlite3_value_double(argv[0]);
|
||||
if( isInfinity(y) ){
|
||||
if( percentIsInfinity(y) ){
|
||||
sqlite3_result_error(pCtx, "Inf input to percentile()", -1);
|
||||
return;
|
||||
}
|
||||
@ -183,55 +249,26 @@ static void percentStep(sqlite3_context *pCtx, int argc, sqlite3_value **argv){
|
||||
p->nAlloc = n;
|
||||
p->a = a;
|
||||
}
|
||||
p->a[p->nUsed++] = y;
|
||||
assert( p->nUsed>=1 );
|
||||
if( p->nUsed==1 ){
|
||||
if( p->nUsed==0 ){
|
||||
p->a[p->nUsed++] = y;
|
||||
p->bSorted = 1;
|
||||
}else if( p->bSorted && p->a[p->nUsed-2]>y ){
|
||||
}else if( !p->bSorted || y>=p->a[p->nUsed-1] ){
|
||||
p->a[p->nUsed++] = y;
|
||||
}else if( p->bKeepSorted ){
|
||||
int i;
|
||||
percentAssertSorted(p);
|
||||
i = percentBinarySearch(p, y, 0);
|
||||
if( i<p->nUsed ){
|
||||
memmove(&p->a[i+1], &p->a[i], (p->nUsed-i)*sizeof(p->a[0]));
|
||||
}
|
||||
p->a[i] = y;
|
||||
p->nUsed++;
|
||||
}else{
|
||||
p->a[p->nUsed++] = y;
|
||||
p->bSorted = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** The "inverse" function for percentile(Y,P) is called to remove a
|
||||
** row that was previously inserted by "step".
|
||||
*/
|
||||
static void percentInverse(sqlite3_context *pCtx,int argc,sqlite3_value **argv){
|
||||
Percentile *p;
|
||||
int eType;
|
||||
double y;
|
||||
int i;
|
||||
assert( argc==2 || argc==1 );
|
||||
|
||||
/* Allocate the session context. */
|
||||
p = (Percentile*)sqlite3_aggregate_context(pCtx, sizeof(*p));
|
||||
assert( p!=0 );
|
||||
|
||||
/* Ignore rows for which Y is NULL */
|
||||
eType = sqlite3_value_type(argv[0]);
|
||||
if( eType==SQLITE_NULL ) return;
|
||||
|
||||
/* If not NULL, then Y must be numeric. Otherwise throw an error.
|
||||
** Requirement 4 */
|
||||
if( eType!=SQLITE_INTEGER && eType!=SQLITE_FLOAT ){
|
||||
return;
|
||||
}
|
||||
|
||||
/* Ignore the Y value if it is infinity or NaN */
|
||||
y = sqlite3_value_double(argv[0]);
|
||||
if( isInfinity(y) ){
|
||||
return;
|
||||
}
|
||||
|
||||
/* Find and remove the row */
|
||||
for(i=0; i<p->nUsed && p->a[i]!=y; i++){}
|
||||
if( i<p->nUsed ){
|
||||
p->a[i] = p->a[p->nUsed-1];
|
||||
p->nUsed--;
|
||||
}
|
||||
p->bSorted = p->nUsed<=1;
|
||||
}
|
||||
|
||||
/*
|
||||
** Sort an array of doubles.
|
||||
**
|
||||
@ -290,9 +327,59 @@ static void sortDoubles(double *a, int n){
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Called to compute the final output of percentile() and to clean
|
||||
** up all allocated memory.
|
||||
** The "inverse" function for percentile(Y,P) is called to remove a
|
||||
** row that was previously inserted by "step".
|
||||
*/
|
||||
static void percentInverse(sqlite3_context *pCtx,int argc,sqlite3_value **argv){
|
||||
Percentile *p;
|
||||
int eType;
|
||||
double y;
|
||||
int i;
|
||||
assert( argc==2 || argc==1 );
|
||||
|
||||
/* Allocate the session context. */
|
||||
p = (Percentile*)sqlite3_aggregate_context(pCtx, sizeof(*p));
|
||||
assert( p!=0 );
|
||||
|
||||
/* Ignore rows for which Y is NULL */
|
||||
eType = sqlite3_value_type(argv[0]);
|
||||
if( eType==SQLITE_NULL ) return;
|
||||
|
||||
/* If not NULL, then Y must be numeric. Otherwise throw an error.
|
||||
** Requirement 4 */
|
||||
if( eType!=SQLITE_INTEGER && eType!=SQLITE_FLOAT ){
|
||||
return;
|
||||
}
|
||||
|
||||
/* Ignore the Y value if it is infinity or NaN */
|
||||
y = sqlite3_value_double(argv[0]);
|
||||
if( percentIsInfinity(y) ){
|
||||
return;
|
||||
}
|
||||
if( p->bSorted==0 ){
|
||||
sortDoubles(p->a, p->nUsed);
|
||||
p->bSorted = 1;
|
||||
}else{
|
||||
percentAssertSorted(p);
|
||||
}
|
||||
p->bKeepSorted = 1;
|
||||
|
||||
/* Find and remove the row */
|
||||
i = percentBinarySearch(p, y, 1);
|
||||
if( i>=0 ){
|
||||
p->nUsed--;
|
||||
if( i<p->nUsed ){
|
||||
memmove(&p->a[i], &p->a[i+1], (p->nUsed - i)*sizeof(p->a[0]));
|
||||
}
|
||||
}
|
||||
percentAssertSorted(p);
|
||||
}
|
||||
|
||||
/*
|
||||
** Compute the final output of percentile(). Clean up all allocated
|
||||
** memory if and only if bIsFinal is true.
|
||||
*/
|
||||
static void percentCompute(sqlite3_context *pCtx, int bIsFinal){
|
||||
Percentile *p;
|
||||
@ -306,6 +393,8 @@ static void percentCompute(sqlite3_context *pCtx, int bIsFinal){
|
||||
if( p->bSorted==0 ){
|
||||
sortDoubles(p->a, p->nUsed);
|
||||
p->bSorted = 1;
|
||||
}else{
|
||||
percentAssertSorted(p);
|
||||
}
|
||||
ix = (p->rPct-1.0)*(p->nUsed-1)*0.01;
|
||||
i1 = (unsigned)ix;
|
||||
@ -318,6 +407,8 @@ static void percentCompute(sqlite3_context *pCtx, int bIsFinal){
|
||||
if( bIsFinal ){
|
||||
sqlite3_free(p->a);
|
||||
memset(p, 0, sizeof(*p));
|
||||
}else{
|
||||
p->bKeepSorted = 1;
|
||||
}
|
||||
}
|
||||
static void percentFinal(sqlite3_context *pCtx){
|
||||
@ -327,8 +418,6 @@ static void percentValue(sqlite3_context *pCtx){
|
||||
percentCompute(pCtx, 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
__declspec(dllexport)
|
||||
#endif
|
||||
@ -349,7 +438,6 @@ int sqlite3_percentile_init(
|
||||
SQLITE_UTF8|SQLITE_INNOCUOUS, 0,
|
||||
percentStep, percentFinal,
|
||||
percentValue, percentInverse, 0);
|
||||
|
||||
}
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = sqlite3_create_window_function(db, "percentile_cont", 2,
|
||||
|
12
manifest
12
manifest
@ -1,5 +1,5 @@
|
||||
C Test\scases\sadded.
|
||||
D 2024-08-31T15:02:07.805
|
||||
C Avoid\sunnecessary\ssort\soperations\swhen\srunning\sone\sof\sthe\spercentile\naggregates\sas\sa\swindow\sfunction.
|
||||
D 2024-08-31T16:55:14.747
|
||||
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
|
||||
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
|
||||
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
|
||||
@ -410,7 +410,7 @@ F ext/misc/nextchar.c 7877914c2a80c2f181dd04c3dbef550dfb54c93495dc03da2403b5dd58
|
||||
F ext/misc/noop.c f1a21cc9b7a4e667e5c8458d80ba680b8bd4315a003f256006046879f679c5a0
|
||||
F ext/misc/normalize.c bd84355c118e297522aba74de34a4fd286fc775524e0499b14473918d09ea61f
|
||||
F ext/misc/pcachetrace.c f4227ce03fb16aa8d6f321b72dd051097419d7a028a9853af048bee7645cb405
|
||||
F ext/misc/percentile.c 89416b108569171be1d8dda4fa2687ad116ea969b4d129d02cf3dc1fd67fc87e
|
||||
F ext/misc/percentile.c 46627b7495c69344d384f667bb6c80ba2c4aeb779997a4e22fea1a39cd20beb9
|
||||
F ext/misc/prefixes.c 82645f79229877afab08c8b08ca1e7fa31921280906b90a61c294e4f540cd2a6
|
||||
F ext/misc/qpvtab.c fc189e127f68f791af90a487f4460ec91539a716daf45a0c357e963fd47cc06c
|
||||
F ext/misc/randomjson.c ef835fc64289e76ac4873b85fe12f9463a036168d7683cf2b773e36e6262c4ed
|
||||
@ -2211,8 +2211,8 @@ F vsixtest/vsixtest.tcl 6195aba1f12a5e10efc2b8c0009532167be5e301abe5b31385638080
|
||||
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
|
||||
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
|
||||
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
|
||||
P 4d0e3df4b9c609755977b8a462126242d2be1310c0122a8d4ba76d98d32a7230
|
||||
R 06b03a21c6b126e146ea802b7b43139b
|
||||
P 25e68229843cc84978955817285550085d1306ba4ce3b0517dd00e5d05b9ae0a
|
||||
R 1b58d4d1990506055e066a3b56b6ff9a
|
||||
U drh
|
||||
Z a8a75faed57aa0d8766eb4f3377323de
|
||||
Z 10ad6a710202821993e7562cc60a54b3
|
||||
# Remove this line to create a well-formed Fossil manifest.
|
||||
|
@ -1 +1 @@
|
||||
25e68229843cc84978955817285550085d1306ba4ce3b0517dd00e5d05b9ae0a
|
||||
5d311536211eb1e3c887ceb7e6516d3900e6eebbccc8c445dd43cdd556182728
|
||||
|
Loading…
Reference in New Issue
Block a user