Add integer aggregator to /contrib.

mlw
2002-02-25 03:45:27 +00:00 · 2002-02-25 03:45:27 +00:00 · 2146d8c6a0
parent e105f9a119
commit 2146d8c6a0
5 changed files with 403 additions and 2 deletions
--- a/contrib/README
+++ b/contrib/README
@ -71,10 +71,14 @@ fuzzystrmatch -
 	Levenshtein, metaphone, and soundex fuzzy string matching
 	by Joe Conway <joseph.conway@home.com>, Joel Burton <jburton@scw.org>

+intagg -
+	Integer aggregator
+	by  mlw <markw@mohawksoft.com>
+
+
 intarray -
 	Index support for arrays of int4, using GiST
-	by Teodor Sigaev <teodor@stack.net> and Oleg Bartunov
-	<oleg@sai.msu.su>.
+	by Teodor Sigaev <teodor@stack.net> and Oleg Bartunov <oleg@sai.msu.su>

 ipc_check -
 	Simple test script to help in configuring IPC.
--- a/contrib/intagg/Makefile
+++ b/contrib/intagg/Makefile
@ -0,0 +1,31 @@
+#############################################
+# Makefile for integer aggregator
+# Copyright (C) 2001 Digital Music Network.
+# by Mark L. Woodward
+#
+subdir = contrib/intagg
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+NAME=int_aggregate
+SONAME	= $(NAME)$(DLSUFFIX)
+MODULES = int_aggregate
+DATA_built = int_aggregate.so
+DOCS = README.int_aggrigate
+SQLS=int_aggregate.sql
+
+include $(top_srcdir)/contrib/contrib-global.mk
+
+%.sql: %.sql.in
+	sed 's,MODULE_FILENAME,$$libdir/$(NAME),g' $< >$@
+
+all : $(SONAME) $(SQLS)
+
+
+install : all
+	$(INSTALL_SHLIB) $(SONAME) $(DESTDIR)$(pkglibdir)
+		
+
+clean :
+	rm -f $(SONAME)
+	rm -f $(SQLS)
--- a/contrib/intagg/README.int_aggrigate
+++ b/contrib/intagg/README.int_aggrigate
@ -0,0 +1,55 @@
+Integer aggregator/enumerator.
+
+Many database systems have the notion of a one to many table.
+
+A one to many table usually sits between two indexed tables, 
+as: 
+
+create table one_to_many(left int, right int) ;
+
+And it is used like this:
+
+SELECT right.* from right JOIN one_to_many ON (right.id = one_to_many.right) 
+	WHERE  one_to_many.left = item;
+
+This will return all the items in the right hand table for an entry 
+in the left hand table. This is a very common construct in SQL.
+
+Now, this methodology can be cumbersome with a very large number of
+entries in the one_to_many table. Depending on the order in which
+data was entered, a join like this could result in an index scan
+and a fetch for each right hand entry in the table for a particular
+left hand entry.
+
+If you have a very dynamic system, there is not much you can do. 
+However, if you have some data which is fairly static, you can
+create a summary table with the aggregator.
+
+CREATE TABLE summary as SELECT left, int_array_aggregate(right) 
+	AS right FROM one_to_many GROUP BY left;
+
+This will create a table with one row per left item, and an array
+of right items. Now this is pretty useless without some way of using
+the array, thats why there is an array enumerator.
+
+SELECT left, int_array_enum(right) FROM summary WHERE left = item;
+
+The above query using int_array_enum, produces the same results as:
+
+SELECT left, right FROM one_to_many WHERE left = item;
+
+The difference is that the query against the summary table has to get
+only one row from the table, where as the query against "one_to_many"
+must index scan and fetch a row for each entry.
+
+On our system, an EXPLAIN shows a query with a cost of 8488 gets reduced
+to a cost of 329. The query is a join between the one_to_many table,
+
+select right, count(right) from 
+(
+	select left, int_array_enum(right) as right from summary join
+                (select left from left_table where left = item) as lefts
+                 ON (summary.left = lefts.left ) 
+) as list group by right order by count desc ;
+
+
--- a/contrib/intagg/int_aggregate.c
+++ b/contrib/intagg/int_aggregate.c
@ -0,0 +1,271 @@
+/*
+ * Integer array aggregator / enumerator
+ *
+ * Mark L. Woodward 
+ * DMN Digital Music Network.
+ * www.dmn.com
+ *
+ * Copyright (C) Digital Music Network
+ * December 20, 2001
+ *
+ * This file is the property of the Digital Music Network (DMN).
+ * It is being made available to users of the PostgreSQL system
+ * under the BSD license.
+ *
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <string.h>
+#include "postgres.h"
+#include "access/heapam.h"
+#include "catalog/catname.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_proc.h"
+#include "executor/executor.h"
+#include "utils/fcache.h"
+#include "utils/sets.h"
+#include "utils/syscache.h"
+#include "access/tupmacs.h"
+#include "access/xact.h"
+#include "fmgr.h"
+#include "miscadmin.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/lsyscache.h"
+
+
+/* This is actually a postgres version of a one dimentional array */
+
+typedef struct agg
+{
+	ArrayType a;
+	int 	items;
+	int 	lower;
+	int4	array[1];
+}PGARRAY;
+
+/* This is used to keep track of our position during enumeration */
+typedef struct callContext
+{
+	PGARRAY *p;
+	int num;
+	int flags;
+}CTX;
+
+#define TOASTED		1
+#define START_NUM 	8
+#define PGARRAY_SIZE(n) (sizeof(PGARRAY) + ((n-1)*sizeof(int4)))
+
+static PGARRAY * GetPGArray(int4 state, int fAdd);
+static PGARRAY *ShrinkPGArray(PGARRAY *p);
+
+Datum int_agg_state(PG_FUNCTION_ARGS);
+Datum int_agg_final_count(PG_FUNCTION_ARGS);
+Datum int_agg_final_array(PG_FUNCTION_ARGS);
+Datum int_enum(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(int_agg_state);
+PG_FUNCTION_INFO_V1(int_agg_final_count);
+PG_FUNCTION_INFO_V1(int_agg_final_array);
+PG_FUNCTION_INFO_V1(int_enum);
+
+/* 
+ * Manage the aggregation state of the array 
+ * You need to specify the correct memory context, or it will vanish! 
+ */
+static PGARRAY * GetPGArray(int4 state, int fAdd)
+{
+	PGARRAY *p = (PGARRAY *) state;
+
+	if(!state)
+	{
+		/* New array */
+		int cb = PGARRAY_SIZE(START_NUM);
+
+		p = (PGARRAY *) MemoryContextAlloc(TopTransactionContext, cb);
+
+		if(!p)
+		{
+			elog(ERROR,"Integer aggregator, cant allocate TopTransactionContext memory");
+			return 0;
+		}
+
+		p->a.size = cb;
+		p->a.ndim= 0;
+		p->a.flags = 0;
+		p->items = 0;
+		p->lower= START_NUM;
+	}
+	else if(fAdd)
+	{	/* Ensure array has space */
+		if(p->items >= p->lower)
+		{
+			PGARRAY *pn;
+			int n = p->lower + p->lower;
+			int cbNew = PGARRAY_SIZE(n);
+
+			pn = (PGARRAY *) repalloc(p, cbNew);
+
+			if(!pn)
+			{	/* Realloc failed! Reallocate new block. */
+				pn = (PGARRAY *) MemoryContextAlloc(TopTransactionContext, cbNew);
+				if(!pn)
+				{
+					elog(ERROR, "Integer aggregator, REALLY REALLY can't alloc memory");
+					return (PGARRAY *) NULL;
+				}
+				memcpy(pn, p, p->a.size);
+				pfree(p);
+			}
+			pn->a.size = cbNew;
+			pn->lower = n;
+			return pn;
+		}
+	}
+	return p;
+}
+
+/* Shrinks the array to its actual size and moves it into the standard 
+ * memory allocation context, frees working memory  */
+static PGARRAY *ShrinkPGArray(PGARRAY *p)
+{
+	PGARRAY *pnew=NULL;
+	if(p)
+	{
+		/* get target size */
+		int cb = PGARRAY_SIZE(p->items);
+
+		/* use current transaction context */
+		pnew = palloc(cb);
+
+		if(pnew)
+		{
+			/* Fix up the fields in the new structure, so Postgres understands */
+			memcpy(pnew, p, cb);
+			pnew->a.size = cb;
+			pnew->a.ndim=1;
+			pnew->a.flags = 0;
+			pnew->lower = 0;
+		}
+		else
+		{
+			elog(ERROR, "Integer aggregator, can't allocate memory");
+		}
+		pfree(p);
+	}	
+	return pnew;
+}
+
+/* Called for each iteration during an aggregate function */
+Datum int_agg_state(PG_FUNCTION_ARGS)
+{
+	int4 state = PG_GETARG_INT32(0);
+	int4 value = PG_GETARG_INT32(1);
+
+	PGARRAY *p = GetPGArray(state, 1);
+	if(!p)
+	{
+		elog(ERROR,"No aggregate storage\n");
+	}
+	else if(p->items >= p->lower)
+	{
+		elog(ERROR,"aggregate storage too small\n");
+	}
+	else
+	{
+		p->array[p->items++]= value;
+	}
+	PG_RETURN_INT32(p);
+}
+
+/* This is the final function used for the integer aggregator. It returns all the integers
+ * collected as a one dimentional integer array */
+Datum int_agg_final_array(PG_FUNCTION_ARGS)
+{
+	PGARRAY *pnew = ShrinkPGArray(GetPGArray(PG_GETARG_INT32(0),0));
+	if(pnew)
+	{
+		PG_RETURN_POINTER(pnew);
+	}
+	else
+	{
+		PG_RETURN_NULL();
+	}
+}
+
+/* This function accepts an array, and returns one item for each entry in the array */
+Datum int_enum(PG_FUNCTION_ARGS)
+{
+	CTX *pc;
+	PGARRAY *p = (PGARRAY *) PG_GETARG_POINTER(0);
+	ReturnSetInfo *rsi = (ReturnSetInfo *)fcinfo->resultinfo;
+
+	if(!p)
+	{
+		elog(NOTICE, "No data sent\n");
+		return 0;
+	}
+	if(!rsi)
+	{
+		elog(ERROR, "No ReturnSetInfo sent! function must be declared returning a 'setof' integer");
+		PG_RETURN_NULL();
+		
+	}
+	if(!fcinfo->context)
+	{
+		/* Allocate a working context */
+		pc = (CTX *) palloc(sizeof(CTX));
+
+		if(!pc)
+		{
+			elog(ERROR, "CTX Alocation failed\n");
+			PG_RETURN_NULL();
+		}
+
+		/* Don't copy atribute if you don't need too */
+		if(VARATT_IS_EXTENDED(p) )
+		{
+			/* Toasted!!! */
+			pc->p = (PGARRAY *) PG_DETOAST_DATUM_COPY(p);
+			pc->flags = TOASTED;
+			if(!pc->p)
+			{
+				elog(ERROR, "Error in toaster!!! no detoasting\n");
+				PG_RETURN_NULL();
+			}
+		}
+		else
+		{
+			/* Untoasted */
+			pc->p = p;
+			pc->flags = 0;
+		}
+		fcinfo->context = (Node *) pc;
+		pc->num=0;
+	}
+	else /* use an existing one */
+	{
+		pc = (CTX *) fcinfo->context;
+	}
+	/* Are we done yet? */
+	if(pc->num >= pc->p->items)
+	{
+		/* We are done */
+		if(pc->flags & TOASTED)
+			pfree(pc->p);
+		pfree(fcinfo->context);
+		fcinfo->context = NULL;
+		rsi->isDone = ExprEndResult ;
+	}
+	else	/* nope, return the next value */
+	{
+		int val = pc->p->array[pc->num++];
+		rsi->isDone = ExprMultipleResult;
+		PG_RETURN_INT32(val);
+	}
+	PG_RETURN_NULL();
+}
--- a/contrib/intagg/int_aggregate.sql.in
+++ b/contrib/intagg/int_aggregate.sql.in
@ -0,0 +1,40 @@
+-- Drop functions
+drop function int_agg_state (int4, int4);
+drop function int_agg_final_array (int4);
+drop aggregate int_array_aggregate(int4);
+drop function int_array_enum (int4[]);
+
+
+-- Internal function for the aggregate
+-- Is called for each item in an aggregation
+create function int_agg_state (int4, int4)
+	returns int4
+	as 'MODULE_FILENAME','int_agg_state'
+	language 'c';
+
+-- Internal function for the aggregate
+-- Is called at the end of the aggregation, and returns an array.
+create function int_agg_final_array (int4)
+	returns int4[]
+	as 'MODULE_FILENAME','int_agg_final_array'
+	language 'c';
+
+-- The aggration funcion.
+-- uses the above functions to create an array of integers from an aggregation.
+create aggregate int_array_aggregate
+(
+	BASETYPE = int4,
+	SFUNC = int_agg_state,
+	STYPE = int4,
+	FINALFUNC = int_agg_final_array,
+	INITCOND = 0
+);
+
+-- The enumeration function
+-- returns each element in a one dimentional integer array
+-- as a row.
+create function int_array_enum(int4[])
+	returns setof integer
+	as 'MODULE_FILENAME','int_enum'
+	language 'c';
+