Add integer aggregator to /contrib.

mlw
This commit is contained in:
Bruce Momjian 2002-02-25 03:45:27 +00:00
parent e105f9a119
commit 2146d8c6a0
5 changed files with 403 additions and 2 deletions

View File

@ -71,10 +71,14 @@ fuzzystrmatch -
Levenshtein, metaphone, and soundex fuzzy string matching
by Joe Conway <joseph.conway@home.com>, Joel Burton <jburton@scw.org>
intagg -
Integer aggregator
by mlw <markw@mohawksoft.com>
intarray -
Index support for arrays of int4, using GiST
by Teodor Sigaev <teodor@stack.net> and Oleg Bartunov
<oleg@sai.msu.su>.
by Teodor Sigaev <teodor@stack.net> and Oleg Bartunov <oleg@sai.msu.su>
ipc_check -
Simple test script to help in configuring IPC.

31
contrib/intagg/Makefile Normal file
View File

@ -0,0 +1,31 @@
#############################################
# Makefile for integer aggregator
# Copyright (C) 2001 Digital Music Network.
# by Mark L. Woodward
#
subdir = contrib/intagg
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
NAME=int_aggregate
SONAME = $(NAME)$(DLSUFFIX)
MODULES = int_aggregate
DATA_built = int_aggregate.so
DOCS = README.int_aggrigate
SQLS=int_aggregate.sql
include $(top_srcdir)/contrib/contrib-global.mk
%.sql: %.sql.in
sed 's,MODULE_FILENAME,$$libdir/$(NAME),g' $< >$@
all : $(SONAME) $(SQLS)
install : all
$(INSTALL_SHLIB) $(SONAME) $(DESTDIR)$(pkglibdir)
clean :
rm -f $(SONAME)
rm -f $(SQLS)

View File

@ -0,0 +1,55 @@
Integer aggregator/enumerator.
Many database systems have the notion of a one to many table.
A one to many table usually sits between two indexed tables,
as:
create table one_to_many(left int, right int) ;
And it is used like this:
SELECT right.* from right JOIN one_to_many ON (right.id = one_to_many.right)
WHERE one_to_many.left = item;
This will return all the items in the right hand table for an entry
in the left hand table. This is a very common construct in SQL.
Now, this methodology can be cumbersome with a very large number of
entries in the one_to_many table. Depending on the order in which
data was entered, a join like this could result in an index scan
and a fetch for each right hand entry in the table for a particular
left hand entry.
If you have a very dynamic system, there is not much you can do.
However, if you have some data which is fairly static, you can
create a summary table with the aggregator.
CREATE TABLE summary as SELECT left, int_array_aggregate(right)
AS right FROM one_to_many GROUP BY left;
This will create a table with one row per left item, and an array
of right items. Now this is pretty useless without some way of using
the array, thats why there is an array enumerator.
SELECT left, int_array_enum(right) FROM summary WHERE left = item;
The above query using int_array_enum, produces the same results as:
SELECT left, right FROM one_to_many WHERE left = item;
The difference is that the query against the summary table has to get
only one row from the table, where as the query against "one_to_many"
must index scan and fetch a row for each entry.
On our system, an EXPLAIN shows a query with a cost of 8488 gets reduced
to a cost of 329. The query is a join between the one_to_many table,
select right, count(right) from
(
select left, int_array_enum(right) as right from summary join
(select left from left_table where left = item) as lefts
ON (summary.left = lefts.left )
) as list group by right order by count desc ;

View File

@ -0,0 +1,271 @@
/*
* Integer array aggregator / enumerator
*
* Mark L. Woodward
* DMN Digital Music Network.
* www.dmn.com
*
* Copyright (C) Digital Music Network
* December 20, 2001
*
* This file is the property of the Digital Music Network (DMN).
* It is being made available to users of the PostgreSQL system
* under the BSD license.
*
*/
#include "postgres.h"
#include <ctype.h>
#include <stdio.h>
#include <sys/types.h>
#include <string.h>
#include "postgres.h"
#include "access/heapam.h"
#include "catalog/catname.h"
#include "catalog/indexing.h"
#include "catalog/pg_proc.h"
#include "executor/executor.h"
#include "utils/fcache.h"
#include "utils/sets.h"
#include "utils/syscache.h"
#include "access/tupmacs.h"
#include "access/xact.h"
#include "fmgr.h"
#include "miscadmin.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/lsyscache.h"
/* This is actually a postgres version of a one dimentional array */
typedef struct agg
{
ArrayType a;
int items;
int lower;
int4 array[1];
}PGARRAY;
/* This is used to keep track of our position during enumeration */
typedef struct callContext
{
PGARRAY *p;
int num;
int flags;
}CTX;
#define TOASTED 1
#define START_NUM 8
#define PGARRAY_SIZE(n) (sizeof(PGARRAY) + ((n-1)*sizeof(int4)))
static PGARRAY * GetPGArray(int4 state, int fAdd);
static PGARRAY *ShrinkPGArray(PGARRAY *p);
Datum int_agg_state(PG_FUNCTION_ARGS);
Datum int_agg_final_count(PG_FUNCTION_ARGS);
Datum int_agg_final_array(PG_FUNCTION_ARGS);
Datum int_enum(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(int_agg_state);
PG_FUNCTION_INFO_V1(int_agg_final_count);
PG_FUNCTION_INFO_V1(int_agg_final_array);
PG_FUNCTION_INFO_V1(int_enum);
/*
* Manage the aggregation state of the array
* You need to specify the correct memory context, or it will vanish!
*/
static PGARRAY * GetPGArray(int4 state, int fAdd)
{
PGARRAY *p = (PGARRAY *) state;
if(!state)
{
/* New array */
int cb = PGARRAY_SIZE(START_NUM);
p = (PGARRAY *) MemoryContextAlloc(TopTransactionContext, cb);
if(!p)
{
elog(ERROR,"Integer aggregator, cant allocate TopTransactionContext memory");
return 0;
}
p->a.size = cb;
p->a.ndim= 0;
p->a.flags = 0;
p->items = 0;
p->lower= START_NUM;
}
else if(fAdd)
{ /* Ensure array has space */
if(p->items >= p->lower)
{
PGARRAY *pn;
int n = p->lower + p->lower;
int cbNew = PGARRAY_SIZE(n);
pn = (PGARRAY *) repalloc(p, cbNew);
if(!pn)
{ /* Realloc failed! Reallocate new block. */
pn = (PGARRAY *) MemoryContextAlloc(TopTransactionContext, cbNew);
if(!pn)
{
elog(ERROR, "Integer aggregator, REALLY REALLY can't alloc memory");
return (PGARRAY *) NULL;
}
memcpy(pn, p, p->a.size);
pfree(p);
}
pn->a.size = cbNew;
pn->lower = n;
return pn;
}
}
return p;
}
/* Shrinks the array to its actual size and moves it into the standard
* memory allocation context, frees working memory */
static PGARRAY *ShrinkPGArray(PGARRAY *p)
{
PGARRAY *pnew=NULL;
if(p)
{
/* get target size */
int cb = PGARRAY_SIZE(p->items);
/* use current transaction context */
pnew = palloc(cb);
if(pnew)
{
/* Fix up the fields in the new structure, so Postgres understands */
memcpy(pnew, p, cb);
pnew->a.size = cb;
pnew->a.ndim=1;
pnew->a.flags = 0;
pnew->lower = 0;
}
else
{
elog(ERROR, "Integer aggregator, can't allocate memory");
}
pfree(p);
}
return pnew;
}
/* Called for each iteration during an aggregate function */
Datum int_agg_state(PG_FUNCTION_ARGS)
{
int4 state = PG_GETARG_INT32(0);
int4 value = PG_GETARG_INT32(1);
PGARRAY *p = GetPGArray(state, 1);
if(!p)
{
elog(ERROR,"No aggregate storage\n");
}
else if(p->items >= p->lower)
{
elog(ERROR,"aggregate storage too small\n");
}
else
{
p->array[p->items++]= value;
}
PG_RETURN_INT32(p);
}
/* This is the final function used for the integer aggregator. It returns all the integers
* collected as a one dimentional integer array */
Datum int_agg_final_array(PG_FUNCTION_ARGS)
{
PGARRAY *pnew = ShrinkPGArray(GetPGArray(PG_GETARG_INT32(0),0));
if(pnew)
{
PG_RETURN_POINTER(pnew);
}
else
{
PG_RETURN_NULL();
}
}
/* This function accepts an array, and returns one item for each entry in the array */
Datum int_enum(PG_FUNCTION_ARGS)
{
CTX *pc;
PGARRAY *p = (PGARRAY *) PG_GETARG_POINTER(0);
ReturnSetInfo *rsi = (ReturnSetInfo *)fcinfo->resultinfo;
if(!p)
{
elog(NOTICE, "No data sent\n");
return 0;
}
if(!rsi)
{
elog(ERROR, "No ReturnSetInfo sent! function must be declared returning a 'setof' integer");
PG_RETURN_NULL();
}
if(!fcinfo->context)
{
/* Allocate a working context */
pc = (CTX *) palloc(sizeof(CTX));
if(!pc)
{
elog(ERROR, "CTX Alocation failed\n");
PG_RETURN_NULL();
}
/* Don't copy atribute if you don't need too */
if(VARATT_IS_EXTENDED(p) )
{
/* Toasted!!! */
pc->p = (PGARRAY *) PG_DETOAST_DATUM_COPY(p);
pc->flags = TOASTED;
if(!pc->p)
{
elog(ERROR, "Error in toaster!!! no detoasting\n");
PG_RETURN_NULL();
}
}
else
{
/* Untoasted */
pc->p = p;
pc->flags = 0;
}
fcinfo->context = (Node *) pc;
pc->num=0;
}
else /* use an existing one */
{
pc = (CTX *) fcinfo->context;
}
/* Are we done yet? */
if(pc->num >= pc->p->items)
{
/* We are done */
if(pc->flags & TOASTED)
pfree(pc->p);
pfree(fcinfo->context);
fcinfo->context = NULL;
rsi->isDone = ExprEndResult ;
}
else /* nope, return the next value */
{
int val = pc->p->array[pc->num++];
rsi->isDone = ExprMultipleResult;
PG_RETURN_INT32(val);
}
PG_RETURN_NULL();
}

View File

@ -0,0 +1,40 @@
-- Drop functions
drop function int_agg_state (int4, int4);
drop function int_agg_final_array (int4);
drop aggregate int_array_aggregate(int4);
drop function int_array_enum (int4[]);
-- Internal function for the aggregate
-- Is called for each item in an aggregation
create function int_agg_state (int4, int4)
returns int4
as 'MODULE_FILENAME','int_agg_state'
language 'c';
-- Internal function for the aggregate
-- Is called at the end of the aggregation, and returns an array.
create function int_agg_final_array (int4)
returns int4[]
as 'MODULE_FILENAME','int_agg_final_array'
language 'c';
-- The aggration funcion.
-- uses the above functions to create an array of integers from an aggregation.
create aggregate int_array_aggregate
(
BASETYPE = int4,
SFUNC = int_agg_state,
STYPE = int4,
FINALFUNC = int_agg_final_array,
INITCOND = 0
);
-- The enumeration function
-- returns each element in a one dimentional integer array
-- as a row.
create function int_array_enum(int4[])
returns setof integer
as 'MODULE_FILENAME','int_enum'
language 'c';