diff --git a/contrib/statmath/Makefile b/contrib/statmath/Makefile new file mode 100644 index 0000000000..cafd36e2b4 --- /dev/null +++ b/contrib/statmath/Makefile @@ -0,0 +1,58 @@ +# +# $Header: /cvsroot/pgsql/contrib/statmath/Attic/Makefile,v 1.1 2000/07/04 14:36:03 wieck Exp $ +# + +TOPDIR=../.. + +include ../Makefile.global + +NAME = statmath + +PROGRAM = +OBJS = $(NAME).o +DOCS = README.$(NAME) +SQLS = $(NAME).sql +BINS = +EXAMPLES= +MODS = $(NAME)$(DLSUFFIX) + +CFLAGS += -I. $(CFLAGS_SL) + +OTHER_CLEAN = $(SQLS) + +all: $(MODS) $(SQLS) + +%.sql: %.sql.in + $(SED) "s|MODULE_PATHNAME|$(CONTRIB_MODDIR)/$(NAME)$(DLSUFFIX)|" < $< > $@ + +install: install_doc install_sql install_mod install_bin + +install_doc: + for inst_file in $(DOCS); do \ + $(INSTALL) $(INSTL_LIB_OPTS) $$inst_file $(CONTRIB_DOCDIR); \ + done + +install_sql: + for inst_file in $(SQLS); do \ + $(INSTALL) $(INSTL_LIB_OPTS) $$inst_file $(CONTRIB_SQLDIR); \ + done + +install_mod: + for inst_file in $(MODS); do \ + $(INSTALL) $(INSTL_SHLIB_OPTS) $$inst_file $(CONTRIB_MODDIR); \ + done + +install_bin: + for inst_file in $(BINS); do \ + $(INSTALL) $(INSTL_EXE_OPTS) $$inst_file $(CONTRIB_BINDIR); \ + done + +depend dep: + $(CC) -MM -MG $(CFLAGS) *.c > depend + +clean: + $(RM) *~ $(OBJS) $(MODS) $(PROGRAM) depend $(OTHER_CLEAN) core log + +ifeq (depend,$(wildcard depend)) +include depend +endif diff --git a/contrib/statmath/README.statmath b/contrib/statmath/README.statmath new file mode 100644 index 0000000000..7db5f7c673 --- /dev/null +++ b/contrib/statmath/README.statmath @@ -0,0 +1,39 @@ +Statistical aggregate functions for PostgreSQL. + +This module provides some aggregate functions for statistical +mathematics. A new datatype holding two double precision +floating point values is required by them. + +The aggregates are: + +average(float8) average value computed by + + n + ___ + _ 1 \ + x = --- > x + n /__ i + i=1 + + +variance(float8 variance computed by + + n + ___ + 2 1 \ _ 2 + s = --- > (x - x) + n-1 /__ i + i=1 + + + +stddev(float8) standard deviation computed by + + _____ + / 2 + s = / s + \/ + + +-- +Jan Wieck diff --git a/contrib/statmath/statmath.c b/contrib/statmath/statmath.c new file mode 100644 index 0000000000..f248a2890a --- /dev/null +++ b/contrib/statmath/statmath.c @@ -0,0 +1,307 @@ +/* ---------- + * Module statmath + * + * statistical aggregates for average, variance and standard + * deviation. + * + * Jan Wieck + * ---------- + */ + +#include +#include +#include +#include + +#include "postgres.h" +#include "utils/palloc.h" + + +/* ---------- + * Declarations + * + * statmath_stateval_in() Input function for state transition variable + * statmath_stateval_out() Output function for state transition variable + * statmath_collect() State transition function to collect items + * statmath_average_fin() Final function for average aggregate + * statmath_variance_fin() Final function for variance aggregate + * statmath_stddev_fin() Final function for deviation aggregate + * ---------- + */ + +float64 statmath_stateval_in(char *str); +char *statmath_stateval_out(float64 sval); + +float64 statmath_collect(float64 sval, float64 item); +float64 statmath_average_fin(float64 sval, int4 n); +float64 statmath_variance_fin(float64 sval, int4 n); +float64 statmath_stddev_fin(float64 sval, int4 n); + + +/* ---------- + * statmath_checkval - + * + * Bounds checking for float8 values in Postgres + * ---------- + */ +static void +statmath_checkval(double val) +{ + if (fabs(val) > DBL_MAX) + elog(ERROR, "statmath: overflow"); + if (val != 0.0 && fabs(val) < DBL_MIN) + elog(ERROR, "statmath: underflow"); +} + + +/* ---------- + * statmath_stateval_in - + * + * Input function for the state transition value data type. + * The input string are two float8's separated with a colon ':'. + * ---------- + */ +float64 +statmath_stateval_in(char *str) +{ + float64 retval; + double tmp; + char *cp1, *cp2; + + if (!str) + return (float64) NULL; + + /* + * Allocate space for the result + */ + retval = (float64) palloc(sizeof(float64data) * 2); + + /* + * Get the first number + */ + errno = 0; + tmp = strtod(str, &cp1); + if (*cp1 != ':' || errno == ERANGE) + elog(ERROR, "statmath: illegal input format '%s'", str); + statmath_checkval(tmp); + retval[0] = tmp; + + /* + * Get the second number + */ + tmp = strtod(++cp1, &cp2); + if (*cp2 != '\0' || errno == ERANGE) + elog(ERROR, "statmath: illegal input format '%s'", str); + statmath_checkval(tmp); + retval[1] = tmp; + + /* + * Return the internal binary format + */ + return retval; +} + + +/* ---------- + * statmath_stateval_out - + * + * Output function for the state transition value data type. + * ---------- + */ +char * +statmath_stateval_out(float64 sval) +{ + char buf[1024]; + double v1, v2; + + if (!sval) + return pstrdup("(null)"); + + /* + * Print the values in the external format and return + * the result in allocated space + */ + v1 = sval[0]; + v2 = sval[1]; + sprintf(buf, "%.*g:%.*g", DBL_DIG, v1, DBL_DIG, v2); + return pstrdup(buf); +} + + +/* ---------- + * statmath_collect - + * + * State transition function to collect data for the variance + * and standard deviation aggregates. + * The state transition variable holds 2 float8 values. The + * first is the sum of the items, the second the sum of the + * item quadratic products. + * ---------- + */ +float64 +statmath_collect(float64 sval, float64 item) +{ + float64 retval; + double tmp; + + if (!sval || !item) + return (float64) NULL; + + /* + * Allocate space for the result + */ + retval = (float64) palloc(sizeof(float64data) * 2); + + /* + * Compute the new values + */ + tmp = sval[0] + *item; + statmath_checkval(tmp); + retval[0] = tmp; + + tmp = sval[1] + *item * *item; + statmath_checkval(tmp); + retval[1] = tmp; + + /* + * Return the result + */ + return retval; +} + + +/* ---------- + * statmath_average_fin - + * + * Final computation function for the average aggregate. + * ---------- + */ +float64 +statmath_average_fin(float64 sum, int4 n) +{ + float64 retval; + double tmp; + + if (!sum) + return (float64) NULL; + + /* + * Allocate space for the result + */ + retval = (float64) palloc(sizeof(float64data)); + + /* + * Avoid division by zero if no items collected + */ + if (n == 0) + { + *retval = 0.0; + return retval; + } + + /* + * Compute the average + */ + tmp = *sum / (double)n; + statmath_checkval(tmp); + *retval = tmp; + + /* + * Return the result + */ + return retval; +} + + +/* ---------- + * statmath_variance_fin - + * + * Final computation function for the variance aggregate + * ---------- + */ +float64 +statmath_variance_fin(float64 sval, int4 n) +{ + float64 retval; + double avg; + double variance; + + if (!sval) + return (float64) NULL; + + /* + * Allocate space for the result + */ + retval = (float64) palloc(sizeof(float64data)); + + /* + * Avoid division by zero if less than 2 items collected + */ + if (n < 2) + { + *retval = 0.0; + return retval; + } + + /* + * Calculate the variance + */ + avg = sval[0] / (double)n; + variance = (sval[1] - sval[0] * avg) / ((double)n - 1.0); + + statmath_checkval(variance); + *retval = variance; + + /* + * Return the result + */ + return retval; +} + + +/* ---------- + * statmath_stateval_in - + * + * Input function for the state transition value data type + * ---------- + */ +float64 +statmath_stddev_fin(float64 sval, int4 n) +{ + float64 retval; + double avg; + double stddev; + + if (!sval) + return (float64) NULL; + + /* + * Allocate space for the result + */ + retval = (float64) palloc(sizeof(float64data)); + + /* + * Avoid division by zero if less than 2 items collected + */ + if (n < 2) + { + *retval = 0.0; + return retval; + } + + /* + * Calculate the standard deviation + */ + avg = sval[0] / (double)n; + stddev = sqrt((sval[1] - sval[0] * avg) / ((double)n - 1.0)); + + statmath_checkval(stddev); + *retval = stddev; + + /* + * Return the result + */ + return retval; +} + + diff --git a/contrib/statmath/statmath.sql.in b/contrib/statmath/statmath.sql.in new file mode 100644 index 0000000000..ce01b460ad --- /dev/null +++ b/contrib/statmath/statmath.sql.in @@ -0,0 +1,88 @@ +-- statmath.sql +-- +-- Install the statistical aggregates +-- + +-- +-- Create the new data type for the state transition variable +-- +CREATE FUNCTION statmath_stateval_in(opaque) + RETURNS statmath_stateval + AS 'MODULE_PATHNAME' + LANGUAGE 'C'; + +CREATE FUNCTION statmath_stateval_out(opaque) + RETURNS opaque + AS 'MODULE_PATHNAME' + LANGUAGE 'C'; + +CREATE TYPE statmath_stateval ( + internallength = 16, + input = statmath_stateval_in, + output = statmath_stateval_out, + alignment = double +); + +-- +-- Create the statistic data collector used in the aggregates +-- +CREATE FUNCTION statmath_collect(statmath_stateval, float8) + RETURNS statmath_stateval + AS 'MODULE_PATHNAME' + LANGUAGE 'C'; + +-- +-- Create the final functions for the three aggregates +-- +CREATE FUNCTION statmath_average_fin(float8, int4) + RETURNS float8 + AS 'MODULE_PATHNAME' + LANGUAGE 'C'; + +CREATE FUNCTION statmath_variance_fin(statmath_stateval, int4) + RETURNS float8 + AS 'MODULE_PATHNAME' + LANGUAGE 'C'; + +CREATE FUNCTION statmath_stddev_fin(statmath_stateval, int4) + RETURNS float8 + AS 'MODULE_PATHNAME' + LANGUAGE 'C'; + +-- +-- Create the aggregates themself +-- +CREATE AGGREGATE average ( + basetype = float8, + stype1 = float8, + stype2 = int4, + sfunc1 = float8pl, + sfunc2 = int4inc, + finalfunc = statmath_average_fin, + initcond1 = '0', + initcond2 = '0' +); + +CREATE AGGREGATE variance ( + basetype = float8, + stype1 = statmath_stateval, + stype2 = int4, + sfunc1 = statmath_collect, + sfunc2 = int4inc, + finalfunc = statmath_variance_fin, + initcond1 = '0:0', + initcond2 = '0' +); + +CREATE AGGREGATE stddev ( + basetype = float8, + stype1 = statmath_stateval, + stype2 = int4, + sfunc1 = statmath_collect, + sfunc2 = int4inc, + finalfunc = statmath_stddev_fin, + initcond1 = '0:0', + initcond2 = '0' +); + +