commit Oleg and Teodor's RD-tree implementation ... this provides the
regression tests for the GiST changes ... this should be integrated into the regular regression tests similar to Vadim's SPI contrib stuff ...
This commit is contained in:
parent
0ad7db4be4
commit
1db943b3ca
69
contrib/intarray/Makefile
Normal file
69
contrib/intarray/Makefile
Normal file
@ -0,0 +1,69 @@
|
||||
subdir = contrib/intarray
|
||||
top_builddir = ../..
|
||||
include $(top_builddir)/src/Makefile.global
|
||||
|
||||
# override libdir to install shlib in contrib not main directory
|
||||
libdir := $(libdir)/contrib
|
||||
|
||||
# shared library parameters
|
||||
NAME= _int
|
||||
SO_MAJOR_VERSION= 1
|
||||
SO_MINOR_VERSION= 0
|
||||
|
||||
override CPPFLAGS += -I$(srcdir) -DPGSQL71
|
||||
|
||||
OBJS= _int.o
|
||||
|
||||
all: all-lib $(NAME).sql
|
||||
|
||||
# Shared library stuff
|
||||
include $(top_srcdir)/src/Makefile.shlib
|
||||
|
||||
|
||||
$(NAME).sql: $(NAME).sql.in
|
||||
sed -e 's:MODULE_PATHNAME:$(libdir)/$(shlib):g' < $< > $@
|
||||
|
||||
.PHONY: submake
|
||||
submake:
|
||||
$(MAKE) -C $(top_builddir)/src/test/regress pg_regress
|
||||
|
||||
# against installed postmaster
|
||||
installcheck: submake
|
||||
@echo "'make installcheck' is not supported."
|
||||
|
||||
installcheck: submake
|
||||
$(top_builddir)/src/test/regress/pg_regress _int
|
||||
|
||||
# in-tree test doesn't work yet (no way to install my shared library)
|
||||
#check: all submake
|
||||
# $(top_builddir)/src/test/regress/pg_regress --temp-install \
|
||||
# --top-builddir=$(top_builddir) _int
|
||||
check:
|
||||
@echo "'make check' is not supported."
|
||||
@echo "Do 'make install', then 'make installcheck' instead."
|
||||
|
||||
install: all installdirs install-lib
|
||||
#$(INSTALL_DATA) $(srcdir)/README.$(NAME) $(docdir)/contrib
|
||||
$(INSTALL_DATA) $(NAME).sql $(datadir)/contrib
|
||||
|
||||
installdirs:
|
||||
$(mkinstalldirs) $(docdir)/contrib $(datadir)/contrib $(libdir)
|
||||
|
||||
uninstall: uninstall-lib
|
||||
rm -f $(docdir)/contrib/README.$(NAME) $(datadir)/contrib/$(NAME).sql
|
||||
|
||||
clean distclean maintainer-clean: clean-lib
|
||||
rm -f *.so y.tab.c y.tab.h $(OBJS) $(NAME).sql
|
||||
# things created by various check targets
|
||||
rm -rf results tmp_check log
|
||||
rm -f regression.diffs regression.out regress.out run_check.out
|
||||
ifeq ($(PORTNAME), win)
|
||||
rm -f regress.def
|
||||
endif
|
||||
|
||||
depend dep:
|
||||
$(CC) -MM $(CFLAGS) *.c >depend
|
||||
|
||||
ifeq (depend,$(wildcard depend))
|
||||
include depend
|
||||
endif
|
64
contrib/intarray/Makefile.703
Normal file
64
contrib/intarray/Makefile.703
Normal file
@ -0,0 +1,64 @@
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# Makefile --
|
||||
#
|
||||
# Makefile for Enzyme Commission catalogue number type -- ec_code
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
PGDIR = ../..
|
||||
SRCDIR = $(PGDIR)/src
|
||||
|
||||
include $(SRCDIR)/Makefile.global
|
||||
|
||||
INCLUDE_OPT = -I ./ \
|
||||
-I $(SRCDIR)/ \
|
||||
-I $(SRCDIR)/include \
|
||||
-I $(SRCDIR)/port/$(PORTNAME)
|
||||
|
||||
CFLAGS += $(INCLUDE_OPT) $(CFLAGS_SL)
|
||||
|
||||
MODNAME = _int
|
||||
OBJFILES = $(MODNAME).o
|
||||
|
||||
SQLDEFS = $(MODNAME).sql
|
||||
|
||||
MODULE = $(MODNAME)$(DLSUFFIX)
|
||||
|
||||
MODDIR = $(LIBDIR)/modules
|
||||
|
||||
SQLDIR = $(LIBDIR)/sql
|
||||
|
||||
all: module sql
|
||||
|
||||
module: $(MODULE)
|
||||
|
||||
sql: $(SQLDEFS)
|
||||
|
||||
$(MODULE): $(OBJFILES)
|
||||
$(CC) $(CFLAGS) -shared -o $@ $(OBJFILES)
|
||||
|
||||
install: $(MODULE) $(SQLDEFS) $(MODDIR) $(SQLDIR)
|
||||
cp -p $(MODULE) $(MODDIR)/
|
||||
strip $(MODDIR)/$(MODULE)
|
||||
cp -p $(SQLDEFS) $(SQLDIR)/
|
||||
|
||||
$(MODDIR):
|
||||
mkdir -p $@
|
||||
|
||||
$(SQLDIR):
|
||||
mkdir -p $@
|
||||
|
||||
%.sql: %.sql.in
|
||||
sed "s|MODULE_PATHNAME|$(MODDIR)/$(MODULE)|" < $< > $@
|
||||
|
||||
depend dep:
|
||||
$(CC) -MM $(INCLUDE_OPT) *.c >depend
|
||||
|
||||
clean:
|
||||
rm -f $(MODULE) $(SQLDEFS) *$(DLSUFFIX)
|
||||
rm -f *~ *# *.b *.o *.output *.tab.h $(MODNAME)parse.h $(MODNAME)parse.c $(MODNAME)scan.c
|
||||
|
||||
ifeq (depend,$(wildcard depend))
|
||||
include depend
|
||||
endif
|
81
contrib/intarray/README.intarray
Normal file
81
contrib/intarray/README.intarray
Normal file
@ -0,0 +1,81 @@
|
||||
This is an implementation of RD-tree data structure using GiST interface
|
||||
of PostgreSQL. It has built-in lossy compression - must be declared
|
||||
in index creation - with (islossy). Current implementation has index support
|
||||
for one-dimensional array of int4's.
|
||||
All work was done by Teodor Sigaev (teodor@stack.net) and Oleg Bartunov
|
||||
(oleg@sai.msu.su). See http://www.sai.msu.su/~megera/postgres/gist
|
||||
for additional information.
|
||||
|
||||
INSTALLATION:
|
||||
|
||||
gmake
|
||||
gmake install
|
||||
-- load functions
|
||||
psql <database> < _int.sql
|
||||
|
||||
REGRESSION TEST:
|
||||
|
||||
gmake installcheck
|
||||
|
||||
EXAMPLE USAGE:
|
||||
|
||||
create table message (mid int not null,sections int[]);
|
||||
create table message_section_map (mid int not null,sid int not null);
|
||||
|
||||
-- create indices
|
||||
CREATE unique index message_key on message ( mid );
|
||||
CREATE unique index message_section_map_key2 on message_section_map (sid, mid );
|
||||
CREATE INDEX message_rdtree_idx on message using gist ( sections ) with ( islossy );
|
||||
|
||||
-- select some messages with section in 1 OR 2 - OVERLAP operator
|
||||
select message.mid from message where message.sections && '{1,2}';
|
||||
|
||||
-- select messages contains in sections 1 AND 2 - CONTAINS operator
|
||||
select message.mid from message where message.sections @ '{1,2}';
|
||||
-- the same, CONTAINED operator
|
||||
select message.mid from message where '{1,2}' ~ message.sections;
|
||||
|
||||
BENCHMARK:
|
||||
|
||||
subdirectory bench contains benchmark suite.
|
||||
cd ./bench
|
||||
1. createdb TEST
|
||||
2. psql TEST < ../_int.sql
|
||||
3. ./create_test.pl | psql TEST
|
||||
4. ./bench.pl - perl script to benchmark queries, supports OR, AND queries
|
||||
with/without RD-Tree. Run script without arguments to
|
||||
see availbale options.
|
||||
|
||||
a)test without RD-Tree (OR)
|
||||
./bench.pl -d TEST -s 1,2 -v
|
||||
b)test with RD-Tree
|
||||
./bench.pl -d TEST -s 1,2 -v -r
|
||||
|
||||
BENCHMARKS:
|
||||
|
||||
Size of table <message>: 200000
|
||||
Size of table <message_section_map>: 268538
|
||||
|
||||
Distribution of messages by sections:
|
||||
|
||||
section 0: 73899 messages
|
||||
section 1: 16298 messages
|
||||
section 50: 1241 messages
|
||||
section 99: 705 messages
|
||||
|
||||
old - without RD-Tree support,
|
||||
new - with RD-Tree
|
||||
|
||||
+----------+---------------+----------------+
|
||||
|Search set|OR, time in sec|AND, time in sec|
|
||||
| +-------+-------+--------+-------+
|
||||
| | old | new | old | new |
|
||||
+----------+-------+-------+--------+-------+
|
||||
| 1| 1.427| 0.215| -| -|
|
||||
+----------+-------+-------+--------+-------+
|
||||
| 99| 1.029| 0.018| -| -|
|
||||
+----------+-------+-------+--------+-------+
|
||||
| 1,2| 1.829| 0.334| 5.654| 0.042|
|
||||
+----------+-------+-------+--------+-------+
|
||||
| 1,2,50,60| 2.057| 0.359| 5.044| 0.007|
|
||||
+----------+-------+-------+--------+-------+
|
842
contrib/intarray/_int.c
Normal file
842
contrib/intarray/_int.c
Normal file
@ -0,0 +1,842 @@
|
||||
/******************************************************************************
|
||||
This file contains routines that can be bound to a Postgres backend and
|
||||
called by the backend in the process of processing queries. The calling
|
||||
format for these routines is dictated by Postgres architecture.
|
||||
******************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "postgres.h"
|
||||
#include "access/gist.h"
|
||||
#include "access/itup.h"
|
||||
#include "access/rtree.h"
|
||||
#include "utils/elog.h"
|
||||
#include "utils/palloc.h"
|
||||
#include "utils/array.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "storage/bufpage.h"
|
||||
|
||||
#define MAXNUMRANGE 100
|
||||
|
||||
#define max(a,b) ((a) > (b) ? (a) : (b))
|
||||
#define min(a,b) ((a) <= (b) ? (a) : (b))
|
||||
#define abs(a) ((a) < (0) ? (-a) : (a))
|
||||
|
||||
#define ARRPTR(x) ( (int4 *) ARR_DATA_PTR(x) )
|
||||
#ifdef PGSQL71
|
||||
#define ARRSIZE(x) ArrayGetNItems( ARR_NDIM(x), ARR_DIMS(x))
|
||||
#else
|
||||
#define ARRSIZE(x) getNitems( ARR_NDIM(x), ARR_DIMS(x))
|
||||
#endif
|
||||
|
||||
#define NDIM 1
|
||||
#define ARRISNULL(x) ( (x) ? ( ( ARR_NDIM(x) == NDIM ) ? ( ( ARRSIZE( x ) ) ? 0 : 1 ) : 1 ) : 1 )
|
||||
#define SORT(x) if ( ARRSIZE( x ) > 1 ) isort( (void*)ARRPTR( x ), ARRSIZE( x ) );
|
||||
#define PREPAREARR(x) \
|
||||
if ( ARRSIZE( x ) > 1 ) {\
|
||||
if ( isort( (void*)ARRPTR( x ), ARRSIZE( x ) ) )\
|
||||
x = _int_unique( x );\
|
||||
}
|
||||
/*
|
||||
#define GIST_DEBUG
|
||||
#define GIST_QUERY_DEBUG
|
||||
*/
|
||||
#ifdef GIST_DEBUG
|
||||
static void printarr ( ArrayType * a, int num ) {
|
||||
char bbb[16384];
|
||||
char *cur;
|
||||
int l;
|
||||
int *d;
|
||||
d = ARRPTR( a );
|
||||
*bbb = '\0';
|
||||
cur = bbb;
|
||||
for(l=0; l<min( num, ARRSIZE( a ));l++) {
|
||||
sprintf(cur,"%d ", d[l] );
|
||||
cur = strchr( cur, '\0' ) ;
|
||||
}
|
||||
elog(NOTICE, "\t\t%s", bbb);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
** usefull function
|
||||
*/
|
||||
bool isort( int *a, const int len );
|
||||
ArrayType * new_intArrayType( int num );
|
||||
ArrayType * copy_intArrayType( ArrayType * a );
|
||||
ArrayType * resize_intArrayType( ArrayType * a, int num );
|
||||
int internal_size( int *a, int len );
|
||||
ArrayType * _int_unique( ArrayType * a );
|
||||
|
||||
/*
|
||||
** GiST support methods
|
||||
*/
|
||||
bool g_int_consistent(GISTENTRY *entry, ArrayType *query, StrategyNumber strategy);
|
||||
GISTENTRY * g_int_compress(GISTENTRY *entry);
|
||||
GISTENTRY * g_int_decompress(GISTENTRY *entry);
|
||||
float * g_int_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result);
|
||||
GIST_SPLITVEC * g_int_picksplit(bytea *entryvec, GIST_SPLITVEC *v);
|
||||
bool g_int_internal_consistent(ArrayType *key, ArrayType *query, StrategyNumber strategy);
|
||||
ArrayType * g_int_union(bytea *entryvec, int *sizep);
|
||||
bool * g_int_same(ArrayType *b1, ArrayType *b2, bool *result);
|
||||
|
||||
|
||||
/*
|
||||
** R-tree suport functions
|
||||
*/
|
||||
bool inner_int_contains(ArrayType *a, ArrayType *b);
|
||||
bool inner_int_overlap(ArrayType *a, ArrayType *b);
|
||||
ArrayType * inner_int_union(ArrayType *a, ArrayType *b);
|
||||
ArrayType * inner_int_inter(ArrayType *a, ArrayType *b);
|
||||
|
||||
bool _int_different(ArrayType *a, ArrayType *b);
|
||||
bool _int_same(ArrayType *a, ArrayType *b);
|
||||
bool _int_contains(ArrayType *a, ArrayType *b);
|
||||
bool _int_contained(ArrayType *a, ArrayType *b);
|
||||
bool _int_overlap(ArrayType *a, ArrayType *b);
|
||||
ArrayType * _int_union(ArrayType *a, ArrayType *b);
|
||||
ArrayType * _int_inter(ArrayType *a, ArrayType *b);
|
||||
void rt__int_size(ArrayType *a, float* sz);
|
||||
|
||||
|
||||
/*****************************************************************************
|
||||
* GiST functions
|
||||
*****************************************************************************/
|
||||
|
||||
/*
|
||||
** The GiST Consistent method for _intments
|
||||
** Should return false if for all data items x below entry,
|
||||
** the predicate x op query == FALSE, where op is the oper
|
||||
** corresponding to strategy in the pg_amop table.
|
||||
*/
|
||||
bool
|
||||
g_int_consistent(GISTENTRY *entry,
|
||||
ArrayType *query,
|
||||
StrategyNumber strategy)
|
||||
{
|
||||
|
||||
/* sort query for fast search, key is already sorted */
|
||||
if ( ARRISNULL( query ) ) return FALSE;
|
||||
PREPAREARR( query );
|
||||
/*
|
||||
** if entry is not leaf, use g_int_internal_consistent,
|
||||
** else use g_int_leaf_consistent
|
||||
*/
|
||||
return(g_int_internal_consistent((ArrayType *)(entry->pred), query, strategy));
|
||||
}
|
||||
|
||||
/*
|
||||
** The GiST Union method for _intments
|
||||
** returns the minimal set that encloses all the entries in entryvec
|
||||
*/
|
||||
ArrayType *
|
||||
g_int_union(bytea *entryvec, int *sizep)
|
||||
{
|
||||
int numranges, i;
|
||||
ArrayType *out = (ArrayType *)NULL;
|
||||
ArrayType *tmp;
|
||||
|
||||
numranges = (VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY);
|
||||
tmp = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[0]).pred;
|
||||
|
||||
#ifdef GIST_DEBUG
|
||||
elog(NOTICE, "union %d", numranges);
|
||||
#endif
|
||||
|
||||
for (i = 1; i < numranges; i++) {
|
||||
out = inner_int_union(tmp, (ArrayType *)
|
||||
(((GISTENTRY *)(VARDATA(entryvec)))[i]).pred);
|
||||
if (i > 1 && tmp) pfree(tmp);
|
||||
tmp = out;
|
||||
}
|
||||
|
||||
*sizep = VARSIZE( out );
|
||||
#ifdef GIST_DEBUG
|
||||
elog(NOTICE, "\t ENDunion %d %d", *sizep, ARRSIZE( out ) );
|
||||
#endif
|
||||
if ( *sizep == 0 ) {
|
||||
pfree( out );
|
||||
return NULL;
|
||||
}
|
||||
return(out);
|
||||
}
|
||||
|
||||
/*
|
||||
** GiST Compress and Decompress methods
|
||||
*/
|
||||
GISTENTRY *
|
||||
g_int_compress(GISTENTRY *entry)
|
||||
{
|
||||
GISTENTRY *retval;
|
||||
ArrayType * r;
|
||||
int len;
|
||||
int *dr;
|
||||
int i,min,cand;
|
||||
|
||||
retval = palloc(sizeof(GISTENTRY));
|
||||
if ( ! retval )
|
||||
elog(ERROR,"Can't allocate memory for compression");
|
||||
|
||||
if ( ARRISNULL( (ArrayType *) entry->pred ) ) {
|
||||
#ifdef GIST_DEBUG
|
||||
elog(NOTICE,"COMP IN: NULL");
|
||||
#endif
|
||||
gistentryinit(*retval, (char *)NULL, entry->rel, entry->page, entry->offset,
|
||||
0, FALSE);
|
||||
return( retval );
|
||||
}
|
||||
|
||||
r = copy_intArrayType( (ArrayType *) entry->pred );
|
||||
if ( entry->leafkey ) PREPAREARR( r );
|
||||
len = ARRSIZE( r );
|
||||
|
||||
#ifdef GIST_DEBUG
|
||||
elog(NOTICE, "COMP IN: %d leaf; %d rel; %d page; %d offset; %d bytes; %d elems", entry->leafkey, (int)entry->rel, (int)entry->page, (int)entry->offset, (int)entry->bytes, len);
|
||||
//printarr( r, len );
|
||||
#endif
|
||||
|
||||
if ( len >= 2*MAXNUMRANGE ) { /*compress*/
|
||||
r = resize_intArrayType( r, 2*( len ) );
|
||||
|
||||
dr = ARRPTR( r );
|
||||
|
||||
for(i=len-1; i>=0;i--)
|
||||
dr[2*i] = dr[2*i+1] = dr[i];
|
||||
|
||||
len *= 2;
|
||||
cand = 1;
|
||||
while( len > MAXNUMRANGE * 2 ) {
|
||||
min = 0x7fffffff;
|
||||
for( i=2; i<len;i+=2 )
|
||||
if ( min > (dr[i] - dr[i-1]) ) {
|
||||
min = (dr[i] - dr[i-1]);
|
||||
cand = i;
|
||||
}
|
||||
memmove( (void*)&dr[cand-1], (void*)&dr[cand+1], (len - cand - 1)*sizeof(int) );
|
||||
len -= 2;
|
||||
}
|
||||
r = resize_intArrayType(r, len );
|
||||
}
|
||||
|
||||
gistentryinit(*retval, (char *)r, entry->rel, entry->page, entry->offset, VARSIZE( r ), FALSE);
|
||||
|
||||
return(retval);
|
||||
}
|
||||
|
||||
GISTENTRY *
|
||||
g_int_decompress(GISTENTRY *entry)
|
||||
{
|
||||
GISTENTRY *retval;
|
||||
ArrayType * r;
|
||||
int *dr, lenr;
|
||||
ArrayType * in;
|
||||
int lenin;
|
||||
int *din;
|
||||
int i,j;
|
||||
|
||||
if ( entry->bytes < ARR_OVERHEAD( NDIM ) || ARRISNULL( (ArrayType *) entry->pred ) ) {
|
||||
retval = palloc(sizeof(GISTENTRY));
|
||||
if ( ! retval )
|
||||
elog(ERROR,"Can't allocate memory for decompression");
|
||||
gistentryinit(*retval, (char *)NULL, entry->rel, entry->page, entry->offset, 0, FALSE);
|
||||
#ifdef GIST_DEBUG
|
||||
elog(NOTICE,"DECOMP IN: NULL");
|
||||
#endif
|
||||
return( retval );
|
||||
}
|
||||
|
||||
|
||||
in = (ArrayType *) entry->pred;
|
||||
lenin = ARRSIZE(in);
|
||||
din = ARRPTR(in);
|
||||
|
||||
if ( lenin < 2*MAXNUMRANGE ) { /*not comressed value*/
|
||||
/* sometimes strange bytesize */
|
||||
gistentryinit(*entry, (char *)in, entry->rel, entry->page, entry->offset, VARSIZE( in ), FALSE);
|
||||
return (entry);
|
||||
}
|
||||
|
||||
#ifdef GIST_DEBUG
|
||||
elog(NOTICE, "DECOMP IN: %d leaf; %d rel; %d page; %d offset; %d bytes; %d elems", entry->leafkey, (int)entry->rel, (int)entry->page, (int)entry->offset, (int)entry->bytes, lenin);
|
||||
//printarr( in, lenin );
|
||||
#endif
|
||||
|
||||
lenr = internal_size(din, lenin);
|
||||
|
||||
r = new_intArrayType( lenr );
|
||||
dr = ARRPTR( r );
|
||||
|
||||
for(i=0;i<lenin;i+=2)
|
||||
for(j=din[i]; j<=din[i+1]; j++)
|
||||
if ( (!i) || *(dr-1) != j )
|
||||
*dr++ = j;
|
||||
|
||||
retval = palloc(sizeof(GISTENTRY));
|
||||
if ( ! retval )
|
||||
elog(ERROR,"Can't allocate memory for decompression");
|
||||
gistentryinit(*retval, (char *)r, entry->rel, entry->page, entry->offset, VARSIZE( r ), FALSE);
|
||||
|
||||
return(retval);
|
||||
}
|
||||
|
||||
/*
|
||||
** The GiST Penalty method for _intments
|
||||
*/
|
||||
float *
|
||||
g_int_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result)
|
||||
{
|
||||
Datum ud;
|
||||
float tmp1, tmp2;
|
||||
|
||||
#ifdef GIST_DEBUG
|
||||
elog(NOTICE, "penalty");
|
||||
#endif
|
||||
ud = (Datum)inner_int_union((ArrayType *)(origentry->pred), (ArrayType *)(newentry->pred));
|
||||
rt__int_size((ArrayType *)ud, &tmp1);
|
||||
rt__int_size((ArrayType *)(origentry->pred), &tmp2);
|
||||
*result = tmp1 - tmp2;
|
||||
pfree((char *)ud);
|
||||
|
||||
#ifdef GIST_DEBUG
|
||||
elog(NOTICE, "--penalty\t%g", *result);
|
||||
#endif
|
||||
|
||||
return(result);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
** The GiST PickSplit method for _intments
|
||||
** We use Guttman's poly time split algorithm
|
||||
*/
|
||||
GIST_SPLITVEC *
|
||||
g_int_picksplit(bytea *entryvec,
|
||||
GIST_SPLITVEC *v)
|
||||
{
|
||||
OffsetNumber i, j;
|
||||
ArrayType *datum_alpha, *datum_beta;
|
||||
ArrayType *datum_l, *datum_r;
|
||||
ArrayType *union_d, *union_dl, *union_dr;
|
||||
ArrayType *inter_d;
|
||||
bool firsttime;
|
||||
float size_alpha, size_beta, size_union, size_inter;
|
||||
float size_waste, waste;
|
||||
float size_l, size_r;
|
||||
int nbytes;
|
||||
OffsetNumber seed_1 = 0, seed_2 = 0;
|
||||
OffsetNumber *left, *right;
|
||||
OffsetNumber maxoff;
|
||||
|
||||
#ifdef GIST_DEBUG
|
||||
elog(NOTICE, "--------picksplit %d",(VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY));
|
||||
#endif
|
||||
|
||||
maxoff = ((VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY)) - 2;
|
||||
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
|
||||
v->spl_left = (OffsetNumber *) palloc(nbytes);
|
||||
v->spl_right = (OffsetNumber *) palloc(nbytes);
|
||||
|
||||
firsttime = true;
|
||||
waste = 0.0;
|
||||
|
||||
for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) {
|
||||
datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[i].pred);
|
||||
for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) {
|
||||
datum_beta = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[j].pred);
|
||||
|
||||
/* compute the wasted space by unioning these guys */
|
||||
/* size_waste = size_union - size_inter; */
|
||||
union_d = (ArrayType *)inner_int_union(datum_alpha, datum_beta);
|
||||
rt__int_size(union_d, &size_union);
|
||||
inter_d = (ArrayType *)inner_int_inter(datum_alpha, datum_beta);
|
||||
rt__int_size(inter_d, &size_inter);
|
||||
size_waste = size_union - size_inter;
|
||||
|
||||
pfree(union_d);
|
||||
|
||||
if (inter_d != (ArrayType *) NULL)
|
||||
pfree(inter_d);
|
||||
|
||||
/*
|
||||
* are these a more promising split that what we've
|
||||
* already seen?
|
||||
*/
|
||||
|
||||
if (size_waste > waste || firsttime) {
|
||||
waste = size_waste;
|
||||
seed_1 = i;
|
||||
seed_2 = j;
|
||||
firsttime = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
left = v->spl_left;
|
||||
v->spl_nleft = 0;
|
||||
right = v->spl_right;
|
||||
v->spl_nright = 0;
|
||||
|
||||
datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[seed_1].pred);
|
||||
datum_l = copy_intArrayType( datum_alpha );
|
||||
rt__int_size((ArrayType *)datum_l, &size_l);
|
||||
datum_beta = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[seed_2].pred);
|
||||
datum_r = copy_intArrayType( datum_beta );
|
||||
rt__int_size((ArrayType *)datum_r, &size_r);
|
||||
|
||||
/*
|
||||
* Now split up the regions between the two seeds. An important
|
||||
* property of this split algorithm is that the split vector v
|
||||
* has the indices of items to be split in order in its left and
|
||||
* right vectors. We exploit this property by doing a merge in
|
||||
* the code that actually splits the page.
|
||||
*
|
||||
* For efficiency, we also place the new index tuple in this loop.
|
||||
* This is handled at the very end, when we have placed all the
|
||||
* existing tuples and i == maxoff + 1.
|
||||
*/
|
||||
|
||||
maxoff = OffsetNumberNext(maxoff);
|
||||
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
|
||||
|
||||
|
||||
/*
|
||||
* If we've already decided where to place this item, just
|
||||
* put it on the right list. Otherwise, we need to figure
|
||||
* out which page needs the least enlargement in order to
|
||||
* store the item.
|
||||
*/
|
||||
|
||||
if (i == seed_1) {
|
||||
*left++ = i;
|
||||
v->spl_nleft++;
|
||||
continue;
|
||||
} else if (i == seed_2) {
|
||||
*right++ = i;
|
||||
v->spl_nright++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* okay, which page needs least enlargement? */
|
||||
datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[i].pred);
|
||||
union_dl = (ArrayType *)inner_int_union(datum_l, datum_alpha);
|
||||
union_dr = (ArrayType *)inner_int_union(datum_r, datum_alpha);
|
||||
rt__int_size((ArrayType *)union_dl, &size_alpha);
|
||||
rt__int_size((ArrayType *)union_dr, &size_beta);
|
||||
|
||||
/* pick which page to add it to */
|
||||
if (size_alpha - size_l < size_beta - size_r) {
|
||||
if ( datum_l ) pfree(datum_l);
|
||||
if ( union_dr ) pfree(union_dr);
|
||||
datum_l = union_dl;
|
||||
size_l = size_alpha;
|
||||
*left++ = i;
|
||||
v->spl_nleft++;
|
||||
} else {
|
||||
if ( datum_r ) pfree(datum_r);
|
||||
if ( union_dl ) pfree(union_dl);
|
||||
datum_r = union_dr;
|
||||
size_r = size_beta;
|
||||
*right++ = i;
|
||||
v->spl_nright++;
|
||||
}
|
||||
}
|
||||
/**left = *right = FirstOffsetNumber;*/ /* sentinel value, see dosplit() */
|
||||
|
||||
if ( *(left-1) > *(right-1) ) {
|
||||
*right = FirstOffsetNumber;
|
||||
*(left-1) = InvalidOffsetNumber;
|
||||
} else {
|
||||
*left = FirstOffsetNumber;
|
||||
*(right-1) = InvalidOffsetNumber;
|
||||
}
|
||||
|
||||
|
||||
v->spl_ldatum = (char *)datum_l;
|
||||
v->spl_rdatum = (char *)datum_r;
|
||||
|
||||
#ifdef GIST_DEBUG
|
||||
elog(NOTICE, "--------ENDpicksplit %d %d",v->spl_nleft, v->spl_nright);
|
||||
#endif
|
||||
return v;
|
||||
}
|
||||
|
||||
/*
|
||||
** Equality methods
|
||||
*/
|
||||
|
||||
|
||||
bool *
|
||||
g_int_same(ArrayType *b1, ArrayType *b2, bool *result)
|
||||
{
|
||||
if (_int_same(b1, b2))
|
||||
*result = TRUE;
|
||||
else *result = FALSE;
|
||||
|
||||
return(result);
|
||||
}
|
||||
|
||||
bool
|
||||
g_int_internal_consistent(ArrayType *key,
|
||||
ArrayType *query,
|
||||
StrategyNumber strategy)
|
||||
{
|
||||
bool retval;
|
||||
|
||||
#ifdef GIST_QUERY_DEBUG
|
||||
elog(NOTICE, "internal_consistent, %d", strategy);
|
||||
#endif
|
||||
|
||||
switch(strategy) {
|
||||
case RTOverlapStrategyNumber:
|
||||
retval = (bool)inner_int_overlap(key, query);
|
||||
break;
|
||||
case RTSameStrategyNumber:
|
||||
case RTContainsStrategyNumber:
|
||||
retval = (bool)inner_int_contains(key, query);
|
||||
break;
|
||||
case RTContainedByStrategyNumber:
|
||||
retval = (bool)inner_int_overlap(key, query);
|
||||
break;
|
||||
default:
|
||||
retval = FALSE;
|
||||
}
|
||||
return(retval);
|
||||
}
|
||||
|
||||
bool
|
||||
_int_contained(ArrayType *a, ArrayType *b)
|
||||
{
|
||||
return ( _int_contains(b, a) );
|
||||
}
|
||||
|
||||
bool
|
||||
_int_contains ( ArrayType *a, ArrayType *b ) {
|
||||
bool res;
|
||||
ArrayType *an, *bn;
|
||||
if ( ARRISNULL( a ) || ARRISNULL( b ) ) return FALSE;
|
||||
|
||||
an = copy_intArrayType( a );
|
||||
bn = copy_intArrayType( b );
|
||||
|
||||
PREPAREARR(an);
|
||||
PREPAREARR(bn);
|
||||
|
||||
res = inner_int_contains( an, bn );
|
||||
pfree( an ); pfree( bn );
|
||||
return res;
|
||||
}
|
||||
|
||||
bool
|
||||
inner_int_contains ( ArrayType *a, ArrayType *b ) {
|
||||
int na, nb;
|
||||
int i,j, n;
|
||||
int *da, *db;
|
||||
|
||||
if ( ARRISNULL( a ) || ARRISNULL( b ) ) return FALSE;
|
||||
|
||||
na = ARRSIZE( a );
|
||||
nb = ARRSIZE( b );
|
||||
da = ARRPTR( a );
|
||||
db = ARRPTR( b );
|
||||
|
||||
#ifdef GIST_DEBUG
|
||||
elog(NOTICE, "contains %d %d", na, nb);
|
||||
#endif
|
||||
|
||||
i = j = n = 0;
|
||||
while( i<na && j<nb )
|
||||
if ( da[i] < db[j] )
|
||||
i++;
|
||||
else if ( da[i] == db[j] ) {
|
||||
n++; i++; j++;
|
||||
} else
|
||||
j++;
|
||||
|
||||
return ( n == nb ) ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
* Operator class for R-tree indexing
|
||||
*****************************************************************************/
|
||||
|
||||
bool
|
||||
_int_different(ArrayType *a, ArrayType *b)
|
||||
{
|
||||
return ( !_int_same( a, b ) );
|
||||
}
|
||||
|
||||
bool
|
||||
_int_same ( ArrayType *a, ArrayType *b ) {
|
||||
int na , nb ;
|
||||
int n;
|
||||
int *da, *db;
|
||||
bool anull = ARRISNULL( a );
|
||||
bool bnull = ARRISNULL( b );
|
||||
|
||||
if ( anull || bnull )
|
||||
return ( anull && bnull ) ? TRUE : FALSE;
|
||||
|
||||
SORT( a );
|
||||
SORT( b );
|
||||
na = ARRSIZE( a );
|
||||
nb = ARRSIZE( b );
|
||||
da = ARRPTR( a );
|
||||
db = ARRPTR( b );
|
||||
|
||||
if ( na != nb ) return FALSE;
|
||||
|
||||
n = 0;
|
||||
for(n=0; n<na; n++)
|
||||
if ( da[n] != db[n] )
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* _int_overlap -- does a overlap b?
|
||||
*/
|
||||
bool
|
||||
_int_overlap ( ArrayType *a, ArrayType *b ) {
|
||||
if ( ARRISNULL( a ) || ARRISNULL( b ) ) return FALSE;
|
||||
|
||||
SORT(a);
|
||||
SORT(b);
|
||||
|
||||
return inner_int_overlap( a, b );
|
||||
}
|
||||
|
||||
bool
|
||||
inner_int_overlap ( ArrayType *a, ArrayType *b ) {
|
||||
int na , nb ;
|
||||
int i,j;
|
||||
int *da, *db;
|
||||
|
||||
if ( ARRISNULL( a ) || ARRISNULL( b ) ) return FALSE;
|
||||
|
||||
na = ARRSIZE( a );
|
||||
nb = ARRSIZE( b );
|
||||
da = ARRPTR( a );
|
||||
db = ARRPTR( b );
|
||||
|
||||
#ifdef GIST_DEBUG
|
||||
elog(NOTICE, "g_int_overlap");
|
||||
#endif
|
||||
|
||||
i = j = 0;
|
||||
while( i<na && j<nb )
|
||||
if ( da[i] < db[j] )
|
||||
i++;
|
||||
else if ( da[i] == db[j] )
|
||||
return TRUE;
|
||||
else
|
||||
j++;
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
ArrayType *
|
||||
_int_union ( ArrayType *a, ArrayType *b ) {
|
||||
if ( ! ARRISNULL( a ) ) SORT(a);
|
||||
if ( ! ARRISNULL( b ) ) SORT(b);
|
||||
|
||||
return inner_int_union( a, b );
|
||||
}
|
||||
|
||||
ArrayType *
|
||||
inner_int_union ( ArrayType *a, ArrayType *b ) {
|
||||
ArrayType * r = NULL;
|
||||
int na , nb;
|
||||
int *da, *db, *dr;
|
||||
int i,j;
|
||||
|
||||
#ifdef GIST_DEBUG
|
||||
//elog(NOTICE, "inner_union %d %d", ARRISNULL( a ) , ARRISNULL( b ) );
|
||||
#endif
|
||||
|
||||
if ( ARRISNULL( a ) && ARRISNULL( b ) ) return new_intArrayType(0);
|
||||
if ( ARRISNULL( a ) ) r = copy_intArrayType( b );
|
||||
if ( ARRISNULL( b ) ) r = copy_intArrayType( a );
|
||||
|
||||
if ( r ) {
|
||||
dr = ARRPTR( r );
|
||||
} else {
|
||||
na = ARRSIZE( a );
|
||||
nb = ARRSIZE( b );
|
||||
da = ARRPTR( a );
|
||||
db = ARRPTR( b );
|
||||
|
||||
r = new_intArrayType( na + nb );
|
||||
dr = ARRPTR( r );
|
||||
|
||||
/* union */
|
||||
i = j = 0;
|
||||
while( i<na && j<nb )
|
||||
if ( da[i] < db[j] )
|
||||
*dr++ = da[i++];
|
||||
else
|
||||
*dr++ = db[j++];
|
||||
|
||||
while( i<na ) *dr++ = da[i++];
|
||||
while( j<nb ) *dr++ = db[j++];
|
||||
|
||||
}
|
||||
|
||||
if ( ARRSIZE(r) > 1 )
|
||||
r = _int_unique( r );
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
ArrayType *
|
||||
_int_inter ( ArrayType *a, ArrayType *b ) {
|
||||
if ( ARRISNULL( a ) || ARRISNULL( b ) ) return FALSE;
|
||||
|
||||
SORT(a);
|
||||
SORT(b);
|
||||
|
||||
return inner_int_inter( a, b );
|
||||
}
|
||||
|
||||
ArrayType *
|
||||
inner_int_inter ( ArrayType *a, ArrayType *b ) {
|
||||
ArrayType * r;
|
||||
int na , nb ;
|
||||
int *da, *db, *dr;
|
||||
int i,j;
|
||||
|
||||
#ifdef GIST_DEBUG
|
||||
//elog(NOTICE, "inner_inter %d %d", ARRISNULL( a ), ARRISNULL( b ) );
|
||||
#endif
|
||||
|
||||
if ( ARRISNULL( a ) || ARRISNULL( b ) ) return NULL;
|
||||
|
||||
na = ARRSIZE( a );
|
||||
nb = ARRSIZE( b );
|
||||
da = ARRPTR( a );
|
||||
db = ARRPTR( b );
|
||||
r = new_intArrayType( min(na, nb) );
|
||||
dr = ARRPTR( r );
|
||||
|
||||
i = j = 0;
|
||||
while( i<na && j<nb )
|
||||
if ( da[i] < db[j] )
|
||||
i++;
|
||||
else if ( da[i] == db[j] ) {
|
||||
if ( i+j == 0 || ( i+j>0 && *(dr-1) != db[j] ) )
|
||||
*dr++ = db[j];
|
||||
i++; j++;
|
||||
} else
|
||||
j++;
|
||||
|
||||
if ( (dr - ARRPTR(r)) == 0 ) {
|
||||
pfree( r );
|
||||
return NULL;
|
||||
} else
|
||||
return resize_intArrayType(r, dr - ARRPTR(r) );
|
||||
}
|
||||
|
||||
void
|
||||
rt__int_size(ArrayType *a, float *size)
|
||||
{
|
||||
if ( ARRISNULL( a ) )
|
||||
*size = 0.0;
|
||||
else
|
||||
*size = (float)ARRSIZE( a );
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/*****************************************************************************
|
||||
* Miscellaneous operators and functions
|
||||
*****************************************************************************/
|
||||
|
||||
/* len >= 2 */
|
||||
bool isort ( int *a, int len ) {
|
||||
int tmp, index;
|
||||
int *cur, *end;
|
||||
bool r = FALSE;
|
||||
end = a + len;
|
||||
do {
|
||||
index = 0;
|
||||
cur = a + 1;
|
||||
while( cur < end ) {
|
||||
if( *(cur-1) > *cur ) {
|
||||
tmp=*(cur-1); *(cur-1) = *cur; *cur=tmp;
|
||||
index = 1;
|
||||
} else if ( ! r && *(cur-1) == *cur )
|
||||
r = TRUE;
|
||||
cur++;
|
||||
}
|
||||
} while( index );
|
||||
return r;
|
||||
}
|
||||
|
||||
ArrayType * new_intArrayType( int num ) {
|
||||
ArrayType * r;
|
||||
int nbytes = ARR_OVERHEAD( NDIM ) + sizeof(int)*num;
|
||||
|
||||
r = (ArrayType *) palloc( nbytes );
|
||||
if ( ! r )
|
||||
elog(ERROR, "Can't allocate memory for new array");
|
||||
MemSet(r, 0, nbytes);
|
||||
r->size = nbytes;
|
||||
r->ndim = NDIM;
|
||||
#ifndef PGSQL71
|
||||
SET_LO_FLAG(false, r);
|
||||
#endif
|
||||
*( (int*)ARR_DIMS(r) ) = num;
|
||||
*( (int*)ARR_LBOUND(r) ) = 1;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
ArrayType * resize_intArrayType( ArrayType * a, int num ) {
|
||||
int nbytes = ARR_OVERHEAD( NDIM ) + sizeof(int)*num;
|
||||
|
||||
if ( num == ARRSIZE(a) ) return a;
|
||||
|
||||
a = (ArrayType *) repalloc( a, nbytes );
|
||||
if ( ! a )
|
||||
elog(ERROR, "Can't reallocate memory for new array");
|
||||
|
||||
a->size = nbytes;
|
||||
*( (int*)ARR_DIMS(a) ) = num;
|
||||
return a;
|
||||
}
|
||||
|
||||
ArrayType * copy_intArrayType( ArrayType * a ) {
|
||||
ArrayType * r;
|
||||
if ( ! a ) return NULL;
|
||||
r = new_intArrayType( ARRSIZE(a) );
|
||||
memmove(r,a,VARSIZE(a));
|
||||
return r;
|
||||
}
|
||||
|
||||
/* num for compressed key */
|
||||
int internal_size (int *a, int len ) {
|
||||
int i,size=0;
|
||||
|
||||
for(i=0;i<len;i+=2)
|
||||
if ( ! i || a[i] != a[i-1] ) /* do not count repeated range */
|
||||
size += a[i+1] - a[i] + 1;
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/* r is sorted and size of r > 1 */
|
||||
ArrayType * _int_unique( ArrayType * r ) {
|
||||
int *tmp, *dr, *data;
|
||||
int num = ARRSIZE(r);
|
||||
data = tmp = dr = ARRPTR( r );
|
||||
while( tmp - data < num )
|
||||
if ( *tmp != *dr )
|
||||
*(++dr) = *tmp++;
|
||||
else
|
||||
tmp++;
|
||||
return resize_intArrayType(r, dr + 1 - ARRPTR(r) );
|
||||
}
|
211
contrib/intarray/_int.sql.in
Normal file
211
contrib/intarray/_int.sql.in
Normal file
@ -0,0 +1,211 @@
|
||||
-- Create the user-defined type for the 1-D frloating point indervals (_int4)
|
||||
--
|
||||
BEGIN TRANSACTION;
|
||||
|
||||
--
|
||||
-- External C-functions for R-tree methods
|
||||
--
|
||||
|
||||
-- Comparison methods
|
||||
|
||||
CREATE FUNCTION _int_contains(_int4, _int4) RETURNS bool
|
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c';
|
||||
|
||||
INSERT INTO pg_description (objoid, description)
|
||||
SELECT oid, 'contains'::text
|
||||
FROM pg_proc
|
||||
WHERE proname = '_int_contains'::name;
|
||||
|
||||
CREATE FUNCTION _int_contained(_int4, _int4) RETURNS bool
|
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c';
|
||||
|
||||
INSERT INTO pg_description (objoid, description)
|
||||
SELECT oid, 'contained in'::text
|
||||
FROM pg_proc
|
||||
WHERE proname = '_int_contained'::name;
|
||||
|
||||
CREATE FUNCTION _int_overlap(_int4, _int4) RETURNS bool
|
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c';
|
||||
|
||||
INSERT INTO pg_description (objoid, description)
|
||||
SELECT oid, 'overlaps'::text
|
||||
FROM pg_proc
|
||||
WHERE proname = '_int_overlap'::name;
|
||||
|
||||
CREATE FUNCTION _int_same(_int4, _int4) RETURNS bool
|
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c';
|
||||
|
||||
INSERT INTO pg_description (objoid, description)
|
||||
SELECT oid, 'same as'::text
|
||||
FROM pg_proc
|
||||
WHERE proname = '_int_same'::name;
|
||||
|
||||
CREATE FUNCTION _int_different(_int4, _int4) RETURNS bool
|
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c';
|
||||
|
||||
INSERT INTO pg_description (objoid, description)
|
||||
SELECT oid, 'different'::text
|
||||
FROM pg_proc
|
||||
WHERE proname = '_int_different'::name;
|
||||
|
||||
-- support routines for indexing
|
||||
|
||||
CREATE FUNCTION _int_union(_int4, _int4) RETURNS _int4
|
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c';
|
||||
|
||||
CREATE FUNCTION _int_inter(_int4, _int4) RETURNS _int4
|
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c';
|
||||
|
||||
--
|
||||
-- OPERATORS
|
||||
--
|
||||
|
||||
CREATE OPERATOR && (
|
||||
LEFTARG = _int4, RIGHTARG = _int4, PROCEDURE = _int_overlap,
|
||||
COMMUTATOR = '&&',
|
||||
RESTRICT = contsel, JOIN = contjoinsel
|
||||
);
|
||||
|
||||
--CREATE OPERATOR = (
|
||||
-- LEFTARG = _int4, RIGHTARG = _int4, PROCEDURE = _int_same,
|
||||
-- COMMUTATOR = '=', NEGATOR = '<>',
|
||||
-- RESTRICT = eqsel, JOIN = eqjoinsel,
|
||||
-- SORT1 = '<', SORT2 = '<'
|
||||
--);
|
||||
|
||||
CREATE OPERATOR <> (
|
||||
LEFTARG = _int4, RIGHTARG = _int4, PROCEDURE = _int_different,
|
||||
COMMUTATOR = '<>', NEGATOR = '=',
|
||||
RESTRICT = neqsel, JOIN = neqjoinsel
|
||||
);
|
||||
|
||||
CREATE OPERATOR @ (
|
||||
LEFTARG = _int4, RIGHTARG = _int4, PROCEDURE = _int_contains,
|
||||
COMMUTATOR = '~', RESTRICT = contsel, JOIN = contjoinsel
|
||||
);
|
||||
|
||||
CREATE OPERATOR ~ (
|
||||
LEFTARG = _int4, RIGHTARG = _int4, PROCEDURE = _int_contained,
|
||||
COMMUTATOR = '@', RESTRICT = contsel, JOIN = contjoinsel
|
||||
);
|
||||
|
||||
|
||||
-- define the GiST support methods
|
||||
CREATE FUNCTION g_int_consistent(opaque,_int4,int4) RETURNS bool
|
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c';
|
||||
|
||||
CREATE FUNCTION g_int_compress(opaque) RETURNS opaque
|
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c';
|
||||
|
||||
CREATE FUNCTION g_int_decompress(opaque) RETURNS opaque
|
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c';
|
||||
|
||||
CREATE FUNCTION g_int_penalty(opaque,opaque,opaque) RETURNS opaque
|
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c';
|
||||
|
||||
CREATE FUNCTION g_int_picksplit(opaque, opaque) RETURNS opaque
|
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c';
|
||||
|
||||
CREATE FUNCTION g_int_union(bytea, opaque) RETURNS _int4
|
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c';
|
||||
|
||||
CREATE FUNCTION g_int_same(_int4, _int4, opaque) RETURNS opaque
|
||||
AS 'MODULE_PATHNAME' LANGUAGE 'c';
|
||||
|
||||
|
||||
-- register the default opclass for indexing
|
||||
INSERT INTO pg_opclass (opcname, opcdeftype)
|
||||
SELECT 'gist__int_ops', oid
|
||||
FROM pg_type
|
||||
WHERE typname = '_int4';
|
||||
|
||||
|
||||
-- get the comparators for _intments and store them in a tmp table
|
||||
SELECT o.oid AS opoid, o.oprname
|
||||
INTO TABLE _int_ops_tmp
|
||||
FROM pg_operator o, pg_type t
|
||||
WHERE o.oprleft = t.oid and o.oprright = t.oid
|
||||
and t.typname = '_int4';
|
||||
|
||||
-- make sure we have the right operators
|
||||
-- SELECT * from _int_ops_tmp;
|
||||
|
||||
-- using the tmp table, generate the amop entries
|
||||
|
||||
-- _int_overlap
|
||||
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
|
||||
SELECT am.oid, opcl.oid, c.opoid, 3
|
||||
FROM pg_am am, pg_opclass opcl, _int_ops_tmp c
|
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops'
|
||||
and c.oprname = '&&';
|
||||
|
||||
-- _int_same
|
||||
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
|
||||
SELECT am.oid, opcl.oid, c.opoid, 6
|
||||
FROM pg_am am, pg_opclass opcl, _int_ops_tmp c
|
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops'
|
||||
and c.oprname = '=';
|
||||
|
||||
-- _int_contains
|
||||
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
|
||||
SELECT am.oid, opcl.oid, c.opoid, 7
|
||||
FROM pg_am am, pg_opclass opcl, _int_ops_tmp c
|
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops'
|
||||
and c.oprname = '@';
|
||||
|
||||
-- _int_contained
|
||||
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
|
||||
SELECT am.oid, opcl.oid, c.opoid, 8
|
||||
FROM pg_am am, pg_opclass opcl, _int_ops_tmp c
|
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops'
|
||||
and c.oprname = '~';
|
||||
|
||||
DROP TABLE _int_ops_tmp;
|
||||
|
||||
|
||||
-- add the entries to amproc for the support methods
|
||||
-- note the amprocnum numbers associated with each are specific!
|
||||
|
||||
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
|
||||
SELECT am.oid, opcl.oid, pro.oid, 1
|
||||
FROM pg_am am, pg_opclass opcl, pg_proc pro
|
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops'
|
||||
and proname = 'g_int_consistent';
|
||||
|
||||
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
|
||||
SELECT am.oid, opcl.oid, pro.oid, 2
|
||||
FROM pg_am am, pg_opclass opcl, pg_proc pro
|
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops'
|
||||
and proname = 'g_int_union';
|
||||
|
||||
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
|
||||
SELECT am.oid, opcl.oid, pro.oid, 3
|
||||
FROM pg_am am, pg_opclass opcl, pg_proc pro
|
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops'
|
||||
and proname = 'g_int_compress';
|
||||
|
||||
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
|
||||
SELECT am.oid, opcl.oid, pro.oid, 4
|
||||
FROM pg_am am, pg_opclass opcl, pg_proc pro
|
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops'
|
||||
and proname = 'g_int_decompress';
|
||||
|
||||
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
|
||||
SELECT am.oid, opcl.oid, pro.oid, 5
|
||||
FROM pg_am am, pg_opclass opcl, pg_proc pro
|
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops'
|
||||
and proname = 'g_int_penalty';
|
||||
|
||||
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
|
||||
SELECT am.oid, opcl.oid, pro.oid, 6
|
||||
FROM pg_am am, pg_opclass opcl, pg_proc pro
|
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops'
|
||||
and proname = 'g_int_picksplit';
|
||||
|
||||
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
|
||||
SELECT am.oid, opcl.oid, pro.oid, 7
|
||||
FROM pg_am am, pg_opclass opcl, pg_proc pro
|
||||
WHERE amname = 'gist' and opcname = 'gist__int_ops'
|
||||
and proname = 'g_int_same';
|
||||
|
||||
END TRANSACTION;
|
104
contrib/intarray/bench/bench.pl
Executable file
104
contrib/intarray/bench/bench.pl
Executable file
@ -0,0 +1,104 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
# make sure we are in a sane environment.
|
||||
use DBI();
|
||||
use DBD::Pg();
|
||||
use Time::HiRes qw( usleep ualarm gettimeofday tv_interval );
|
||||
use Getopt::Std;
|
||||
|
||||
my %opt;
|
||||
getopts('d:b:s:veorauc', \%opt);
|
||||
|
||||
if ( !( scalar %opt && defined $opt{s} ) ) {
|
||||
print <<EOT;
|
||||
Usage:
|
||||
$0 -d DATABASE -s SECTIONS [-b NUMBER] [-v] [-e] [-o] [-r] [-a] [-u]
|
||||
-d DATABASE -DATABASE
|
||||
-b NUMBER -number of repeats
|
||||
-s SECTIONS -sections, format sid1[,sid2[,sid3[...]]]]
|
||||
-v -verbose (show SQL)
|
||||
-e -show explain
|
||||
-r -use RD-tree index
|
||||
-a -AND section
|
||||
-o -show output
|
||||
-u -unique
|
||||
-c -count
|
||||
|
||||
EOT
|
||||
exit;
|
||||
}
|
||||
|
||||
$opt{d} ||= '_int4';
|
||||
my $dbi=DBI->connect('DBI:Pg:dbname='.$opt{d});
|
||||
|
||||
my %table;
|
||||
my @where;
|
||||
|
||||
$table{message}=1;
|
||||
|
||||
if ( $opt{a} ) {
|
||||
if ( $opt{r} ) {
|
||||
push @where, "message.sections @ '{$opt{s}}'";
|
||||
} else {
|
||||
foreach my $sid ( split(/[,\s]+/, $opt{s} )) {
|
||||
push @where, "EXISTS ( select message_section_map.mid from message_section_map where message.mid=message_section_map.mid and message_section_map.sid = $sid )";
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ( $opt{r} ) {
|
||||
push @where, "message.sections && '{$opt{s}}'";
|
||||
} else {
|
||||
$table{message_section_map} = 1;
|
||||
push @where, "message.mid = message_section_map.mid";
|
||||
push @where, "message_section_map.sid in ($opt{s})";
|
||||
}
|
||||
}
|
||||
|
||||
my $outf;
|
||||
if ( $opt{c} ) {
|
||||
$outf = ( $opt{u} ) ? 'count( distinct message.mid )' : 'count( message.mid )';
|
||||
} else {
|
||||
$outf = ( $opt{u} ) ? 'distinct( message.mid )' : 'message.mid';
|
||||
}
|
||||
my $sql = "select $outf from ".join(', ', keys %table)." where ".join(' AND ', @where).';';
|
||||
|
||||
if ( $opt{v} ) {
|
||||
print "$sql\n";
|
||||
}
|
||||
|
||||
if ( $opt{e} ) {
|
||||
$dbi->do("explain $sql");
|
||||
}
|
||||
|
||||
my $t0 = [gettimeofday];
|
||||
my $count=0;
|
||||
my $b=$opt{b};
|
||||
$b||=1;
|
||||
my @a;
|
||||
foreach ( 1..$b ) {
|
||||
@a=exec_sql($dbi,$sql);
|
||||
$count=$#a;
|
||||
}
|
||||
my $elapsed = tv_interval ( $t0, [gettimeofday]);
|
||||
if ( $opt{o} ) {
|
||||
foreach ( @a ) {
|
||||
print "$_->{mid}\t$_->{sections}\n";
|
||||
}
|
||||
}
|
||||
print sprintf("total: %.02f sec; number: %d; for one: %.03f sec; found %d docs\n", $elapsed, $b, $elapsed/$b, $count+1 );
|
||||
$dbi -> disconnect;
|
||||
|
||||
sub exec_sql {
|
||||
my ($dbi, $sql, @keys) = @_;
|
||||
my $sth=$dbi->prepare($sql) || die;
|
||||
$sth->execute( @keys ) || die;
|
||||
my $r;
|
||||
my @row;
|
||||
while ( defined ( $r=$sth->fetchrow_hashref ) ) {
|
||||
push @row, $r;
|
||||
}
|
||||
$sth->finish;
|
||||
return @row;
|
||||
}
|
||||
|
73
contrib/intarray/bench/create_test.pl
Executable file
73
contrib/intarray/bench/create_test.pl
Executable file
@ -0,0 +1,73 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
print <<EOT;
|
||||
create table message (
|
||||
mid int not null,
|
||||
sections int[]
|
||||
);
|
||||
create table message_section_map (
|
||||
mid int not null,
|
||||
sid int not null
|
||||
);
|
||||
|
||||
EOT
|
||||
|
||||
open(MSG,">message.tmp") || die;
|
||||
open(MAP,">message_section_map.tmp") || die;
|
||||
|
||||
srand( 1 );
|
||||
#foreach my $i ( 1..1778 ) {
|
||||
#foreach my $i ( 1..3443 ) {
|
||||
#foreach my $i ( 1..5000 ) {
|
||||
#foreach my $i ( 1..29362 ) {
|
||||
#foreach my $i ( 1..33331 ) {
|
||||
#foreach my $i ( 1..83268 ) {
|
||||
foreach my $i ( 1..200000 ) {
|
||||
my @sect;
|
||||
if ( rand() < 0.7 ) {
|
||||
$sect[0] = int( (rand()**4)*100 );
|
||||
} else {
|
||||
my %hash;
|
||||
@sect = grep { $hash{$_}++; $hash{$_} <= 1 } map { int( (rand()**4)*100) } 0..( int(rand()*5) );
|
||||
}
|
||||
if ( $#sect < 0 || rand() < 0.1 ) {
|
||||
print MSG "$i\t\\N\n";
|
||||
} else {
|
||||
print MSG "$i\t{".join(',',@sect)."}\n";
|
||||
map { print MAP "$i\t$_\n" } @sect;
|
||||
}
|
||||
}
|
||||
close MAP;
|
||||
close MSG;
|
||||
|
||||
copytable('message');
|
||||
copytable('message_section_map');
|
||||
|
||||
print <<EOT;
|
||||
|
||||
CREATE unique index message_key on message ( mid );
|
||||
--CREATE unique index message_section_map_key1 on message_section_map ( mid, sid );
|
||||
CREATE unique index message_section_map_key2 on message_section_map ( sid, mid );
|
||||
CREATE INDEX message_rdtree_idx on message using gist ( sections ) with ( islossy );
|
||||
VACUUM ANALYZE;
|
||||
|
||||
select count(*) from message;
|
||||
select count(*) from message_section_map;
|
||||
|
||||
|
||||
|
||||
EOT
|
||||
|
||||
|
||||
unlink 'message.tmp', 'message_section_map.tmp';
|
||||
|
||||
sub copytable {
|
||||
my $t = shift;
|
||||
|
||||
print "COPY $t from stdin;\n";
|
||||
open( FFF, "$t.tmp") || die;
|
||||
while(<FFF>) { print; }
|
||||
close FFF;
|
||||
print "\\.\n";
|
||||
}
|
5000
contrib/intarray/data/test__int.data
Normal file
5000
contrib/intarray/data/test__int.data
Normal file
File diff suppressed because it is too large
Load Diff
19
contrib/intarray/expected/_int.out
Normal file
19
contrib/intarray/expected/_int.out
Normal file
@ -0,0 +1,19 @@
|
||||
--
|
||||
-- first, define the datatype. Turn off echoing so that expected file
|
||||
-- does not depend on contents of seg.sql.
|
||||
--
|
||||
\set ECHO none
|
||||
CREATE TABLE test__int( a int[] );
|
||||
\copy test__int from 'data/test__int.data'
|
||||
SELECT count(*) from test__int WHERE a && '{23,50}';
|
||||
count
|
||||
-------
|
||||
345
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) from test__int WHERE a @ '{23,50}';
|
||||
count
|
||||
-------
|
||||
12
|
||||
(1 row)
|
||||
|
15
contrib/intarray/sql/_int.sql
Normal file
15
contrib/intarray/sql/_int.sql
Normal file
@ -0,0 +1,15 @@
|
||||
--
|
||||
-- first, define the datatype. Turn off echoing so that expected file
|
||||
-- does not depend on contents of seg.sql.
|
||||
--
|
||||
\set ECHO none
|
||||
\i _int.sql
|
||||
\set ECHO all
|
||||
|
||||
CREATE TABLE test__int( a int[] );
|
||||
|
||||
\copy test__int from 'data/test__int.data'
|
||||
|
||||
SELECT count(*) from test__int WHERE a && '{23,50}';
|
||||
SELECT count(*) from test__int WHERE a @ '{23,50}';
|
||||
|
Loading…
x
Reference in New Issue
Block a user