diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h
index c21eebfc94..c6091a8dd6 100644
--- a/contrib/bloom/bloom.h
+++ b/contrib/bloom/bloom.h
@@ -79,18 +79,31 @@ typedef BloomPageOpaqueData *BloomPageOpaque;
#define BLOOM_HEAD_BLKNO (1) /* first data page */
/*
- * Maximum of bloom signature length in uint16. Actual value
- * is 512 bytes
+ * We store Bloom signatures as arrays of uint16 words.
*/
-#define MAX_BLOOM_LENGTH (256)
+typedef uint16 BloomSignatureWord;
+
+#define SIGNWORDBITS ((int) (BITS_PER_BYTE * sizeof(BloomSignatureWord)))
+
+/*
+ * Default and maximum Bloom signature length in bits.
+ */
+#define DEFAULT_BLOOM_LENGTH (5 * SIGNWORDBITS)
+#define MAX_BLOOM_LENGTH (256 * SIGNWORDBITS)
+
+/*
+ * Default and maximum signature bits generated per index key.
+ */
+#define DEFAULT_BLOOM_BITS 2
+#define MAX_BLOOM_BITS (MAX_BLOOM_LENGTH - 1)
/* Bloom index options */
typedef struct BloomOptions
{
int32 vl_len_; /* varlena header (do not touch directly!) */
- int bloomLength; /* length of signature in uint16 */
- int bitSize[INDEX_MAX_KEYS]; /* signature bits per index
- * key */
+ int bloomLength; /* length of signature in words (not bits!) */
+ int bitSize[INDEX_MAX_KEYS]; /* # of bits generated for each
+ * index key */
} BloomOptions;
/*
@@ -143,12 +156,10 @@ typedef struct BloomState
/*
* Tuples are very different from all other relations
*/
-typedef uint16 SignType;
-
typedef struct BloomTuple
{
ItemPointerData heapPtr;
- SignType sign[FLEXIBLE_ARRAY_MEMBER];
+ BloomSignatureWord sign[FLEXIBLE_ARRAY_MEMBER];
} BloomTuple;
#define BLOOMTUPLEHDRSZ offsetof(BloomTuple, sign)
@@ -156,7 +167,7 @@ typedef struct BloomTuple
/* Opaque data structure for bloom index scan */
typedef struct BloomScanOpaqueData
{
- SignType *sign; /* Scan signature */
+ BloomSignatureWord *sign; /* Scan signature */
BloomState state;
} BloomScanOpaqueData;
@@ -170,7 +181,7 @@ extern void BloomFillMetapage(Relation index, Page metaPage);
extern void BloomInitMetapage(Relation index);
extern void BloomInitPage(Page page, uint16 flags);
extern Buffer BloomNewBuffer(Relation index);
-extern void signValue(BloomState * state, SignType * sign, Datum value, int attno);
+extern void signValue(BloomState * state, BloomSignatureWord * sign, Datum value, int attno);
extern BloomTuple *BloomFormTuple(BloomState * state, ItemPointer iptr, Datum *values, bool *isnull);
extern bool BloomPageAddItem(BloomState * state, Page page, BloomTuple * tuple);
diff --git a/contrib/bloom/blscan.c b/contrib/bloom/blscan.c
index aebf32a8d2..0c954dc8d5 100644
--- a/contrib/bloom/blscan.c
+++ b/contrib/bloom/blscan.c
@@ -93,7 +93,7 @@ blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
/* New search: have to calculate search signature */
ScanKey skey = scan->keyData;
- so->sign = palloc0(sizeof(SignType) * so->state.opts.bloomLength);
+ so->sign = palloc0(sizeof(BloomSignatureWord) * so->state.opts.bloomLength);
for (i = 0; i < scan->numberOfKeys; i++)
{
diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c
index 4a5b343dd0..876952f2d5 100644
--- a/contrib/bloom/blutils.c
+++ b/contrib/bloom/blutils.c
@@ -27,23 +27,26 @@
#include "bloom.h"
-/* Signature dealing macros */
-#define BITSIGNTYPE (BITS_PER_BYTE * sizeof(SignType))
-#define GETWORD(x,i) ( *( (SignType*)(x) + (int)( (i) / BITSIGNTYPE ) ) )
-#define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % BITSIGNTYPE ) )
-#define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % BITSIGNTYPE ) )
-#define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % BITSIGNTYPE )) & 0x01 )
+/* Signature dealing macros - note i is assumed to be of type int */
+#define GETWORD(x,i) ( *( (BloomSignatureWord *)(x) + ( (i) / SIGNWORDBITS ) ) )
+#define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % SIGNWORDBITS ) )
+#define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % SIGNWORDBITS ) )
+#define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % SIGNWORDBITS )) & 0x01 )
PG_FUNCTION_INFO_V1(blhandler);
-/* Kind of relation optioms for bloom index */
+/* Kind of relation options for bloom index */
static relopt_kind bl_relopt_kind;
+/* parse table for fillRelOptions */
+static relopt_parse_elt bl_relopt_tab[INDEX_MAX_KEYS + 1];
static int32 myRand(void);
static void mySrand(uint32 seed);
/*
- * Module initialize function: initilized relation options.
+ * Module initialize function: initialize info about Bloom relation options.
+ *
+ * Note: keep this in sync with makeDefaultBloomOptions().
*/
void
_PG_init(void)
@@ -53,17 +56,46 @@ _PG_init(void)
bl_relopt_kind = add_reloption_kind();
+ /* Option for length of signature */
add_int_reloption(bl_relopt_kind, "length",
- "Length of signature in uint16 type", 5, 1, 256);
+ "Length of signature in bits",
+ DEFAULT_BLOOM_LENGTH, 1, MAX_BLOOM_LENGTH);
+ bl_relopt_tab[0].optname = "length";
+ bl_relopt_tab[0].opttype = RELOPT_TYPE_INT;
+ bl_relopt_tab[0].offset = offsetof(BloomOptions, bloomLength);
+ /* Number of bits for each possible index column: col1, col2, ... */
for (i = 0; i < INDEX_MAX_KEYS; i++)
{
- snprintf(buf, 16, "col%d", i + 1);
+ snprintf(buf, sizeof(buf), "col%d", i + 1);
add_int_reloption(bl_relopt_kind, buf,
- "Number of bits for corresponding column", 2, 1, 2048);
+ "Number of bits generated for each index column",
+ DEFAULT_BLOOM_BITS, 1, MAX_BLOOM_BITS);
+ bl_relopt_tab[i + 1].optname = MemoryContextStrdup(TopMemoryContext,
+ buf);
+ bl_relopt_tab[i + 1].opttype = RELOPT_TYPE_INT;
+ bl_relopt_tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]);
}
}
+/*
+ * Construct a default set of Bloom options.
+ */
+static BloomOptions *
+makeDefaultBloomOptions(void)
+{
+ BloomOptions *opts;
+ int i;
+
+ opts = (BloomOptions *) palloc0(sizeof(BloomOptions));
+ /* Convert DEFAULT_BLOOM_LENGTH from # of bits to # of words */
+ opts->bloomLength = (DEFAULT_BLOOM_LENGTH + SIGNWORDBITS - 1) / SIGNWORDBITS;
+ for (i = 0; i < INDEX_MAX_KEYS; i++)
+ opts->bitSize[i] = DEFAULT_BLOOM_BITS;
+ SET_VARSIZE(opts, sizeof(BloomOptions));
+ return opts;
+}
+
/*
* Bloom handler function: return IndexAmRoutine with access method parameters
* and callbacks.
@@ -157,7 +189,7 @@ initBloomState(BloomState *state, Relation index)
memcpy(&state->opts, index->rd_amcache, sizeof(state->opts));
state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
- sizeof(SignType) * state->opts.bloomLength;
+ sizeof(BloomSignatureWord) * state->opts.bloomLength;
}
/*
@@ -208,7 +240,7 @@ mySrand(uint32 seed)
* Add bits of given value to the signature.
*/
void
-signValue(BloomState *state, SignType *sign, Datum value, int attno)
+signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno)
{
uint32 hashVal;
int nBit,
@@ -231,8 +263,8 @@ signValue(BloomState *state, SignType *sign, Datum value, int attno)
for (j = 0; j < state->opts.bitSize[attno]; j++)
{
- /* prevent mutiple evaluation */
- nBit = myRand() % (state->opts.bloomLength * BITSIGNTYPE);
+ /* prevent multiple evaluation in SETBIT macro */
+ nBit = myRand() % (state->opts.bloomLength * SIGNWORDBITS);
SETBIT(sign, nBit);
}
}
@@ -361,39 +393,6 @@ BloomInitPage(Page page, uint16 flags)
opaque->bloom_page_id = BLOOM_PAGE_ID;
}
-/*
- * Adjust options of bloom index.
- *
- * This must produce default options when *opts is initially all-zero.
- */
-static void
-adjustBloomOptions(BloomOptions *opts)
-{
- int i;
-
- /* Default length of bloom filter is 5 of 16-bit integers */
- if (opts->bloomLength <= 0)
- opts->bloomLength = 5;
- else if (opts->bloomLength > MAX_BLOOM_LENGTH)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("length of bloom signature (%d) is greater than maximum %d",
- opts->bloomLength, MAX_BLOOM_LENGTH)));
-
- /* Check signature length */
- for (i = 0; i < INDEX_MAX_KEYS; i++)
- {
- /*
- * Zero and negative number of bits is meaningless. Also setting
- * more bits than signature have seems useless. Replace both cases
- * with 2 bits default.
- */
- if (opts->bitSize[i] <= 0
- || opts->bitSize[i] >= opts->bloomLength * sizeof(SignType) * BITS_PER_BYTE)
- opts->bitSize[i] = 2;
- }
-}
-
/*
* Fill in metapage for bloom index.
*/
@@ -405,14 +404,11 @@ BloomFillMetapage(Relation index, Page metaPage)
/*
* Choose the index's options. If reloptions have been assigned, use
- * those, otherwise create default options by applying adjustBloomOptions
- * to a zeroed chunk of memory. We apply adjustBloomOptions to existing
- * reloptions too, just out of paranoia; they should be valid already.
+ * those, otherwise create default options.
*/
opts = (BloomOptions *) index->rd_options;
if (!opts)
- opts = (BloomOptions *) palloc0(sizeof(BloomOptions));
- adjustBloomOptions(opts);
+ opts = makeDefaultBloomOptions();
/*
* Initialize contents of meta page, including a copy of the options,
@@ -462,30 +458,15 @@ bloptions(Datum reloptions, bool validate)
relopt_value *options;
int numoptions;
BloomOptions *rdopts;
- relopt_parse_elt tab[INDEX_MAX_KEYS + 1];
- int i;
- char buf[16];
-
- /* Option for length of signature */
- tab[0].optname = "length";
- tab[0].opttype = RELOPT_TYPE_INT;
- tab[0].offset = offsetof(BloomOptions, bloomLength);
-
- /* Number of bits for each of possible columns: col1, col2, ... */
- for (i = 0; i < INDEX_MAX_KEYS; i++)
- {
- snprintf(buf, sizeof(buf), "col%d", i + 1);
- tab[i + 1].optname = pstrdup(buf);
- tab[i + 1].opttype = RELOPT_TYPE_INT;
- tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]);
- }
+ /* Parse the user-given reloptions */
options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions);
rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions);
fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions,
- validate, tab, INDEX_MAX_KEYS + 1);
+ validate, bl_relopt_tab, lengthof(bl_relopt_tab));
- adjustBloomOptions(rdopts);
+ /* Convert signature length from # of bits to # to words, rounding up */
+ rdopts->bloomLength = (rdopts->bloomLength + SIGNWORDBITS - 1) / SIGNWORDBITS;
return (bytea *) rdopts;
}
diff --git a/doc/src/sgml/bloom.sgml b/doc/src/sgml/bloom.sgml
index 49cb066144..8667763c43 100644
--- a/doc/src/sgml/bloom.sgml
+++ b/doc/src/sgml/bloom.sgml
@@ -8,8 +8,8 @@
- bloom> is a module which implements an index access method. It comes
- as an example of custom access methods and generic WAL records usage. But it
+ bloom> is a module that implements an index access method. It comes
+ as an example of custom access methods and generic WAL record usage. But it
is also useful in itself.
@@ -22,8 +22,9 @@
allows fast exclusion of non-candidate tuples via signatures.
Since a signature is a lossy representation of all indexed attributes,
search results must be rechecked using heap information.
- The user can specify signature length (in uint16, default is 5) and the
- number of bits, which can be set per attribute (1 < colN < 2048).
+ The user can specify signature length in bits (default 80, maximum 4096)
+ and the number of bits generated for each index column (default 2,
+ maximum 4095).
@@ -51,17 +52,17 @@
length>
- Length of signature in uint16 type values
+ Length of signature in bits
- col1 — col16>
+ col1 — col32>
- Number of bits for corresponding column
+ Number of bits generated for each index column
@@ -77,12 +78,12 @@
CREATE INDEX bloomidx ON tbloom USING bloom (i1,i2,i3)
- WITH (length=5, col1=2, col2=2, col3=4);
+ WITH (length=80, col1=2, col2=2, col3=4);
Here, we created a bloom index with a signature length of 80 bits,
- and attributes i1 and i2 mapped to 2 bits, and attribute i3 to 4 bits.
+ and attributes i1 and i2 mapped to 2 bits, and attribute i3 mapped to 4 bits.