diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h index c21eebfc94..c6091a8dd6 100644 --- a/contrib/bloom/bloom.h +++ b/contrib/bloom/bloom.h @@ -79,18 +79,31 @@ typedef BloomPageOpaqueData *BloomPageOpaque; #define BLOOM_HEAD_BLKNO (1) /* first data page */ /* - * Maximum of bloom signature length in uint16. Actual value - * is 512 bytes + * We store Bloom signatures as arrays of uint16 words. */ -#define MAX_BLOOM_LENGTH (256) +typedef uint16 BloomSignatureWord; + +#define SIGNWORDBITS ((int) (BITS_PER_BYTE * sizeof(BloomSignatureWord))) + +/* + * Default and maximum Bloom signature length in bits. + */ +#define DEFAULT_BLOOM_LENGTH (5 * SIGNWORDBITS) +#define MAX_BLOOM_LENGTH (256 * SIGNWORDBITS) + +/* + * Default and maximum signature bits generated per index key. + */ +#define DEFAULT_BLOOM_BITS 2 +#define MAX_BLOOM_BITS (MAX_BLOOM_LENGTH - 1) /* Bloom index options */ typedef struct BloomOptions { int32 vl_len_; /* varlena header (do not touch directly!) */ - int bloomLength; /* length of signature in uint16 */ - int bitSize[INDEX_MAX_KEYS]; /* signature bits per index - * key */ + int bloomLength; /* length of signature in words (not bits!) */ + int bitSize[INDEX_MAX_KEYS]; /* # of bits generated for each + * index key */ } BloomOptions; /* @@ -143,12 +156,10 @@ typedef struct BloomState /* * Tuples are very different from all other relations */ -typedef uint16 SignType; - typedef struct BloomTuple { ItemPointerData heapPtr; - SignType sign[FLEXIBLE_ARRAY_MEMBER]; + BloomSignatureWord sign[FLEXIBLE_ARRAY_MEMBER]; } BloomTuple; #define BLOOMTUPLEHDRSZ offsetof(BloomTuple, sign) @@ -156,7 +167,7 @@ typedef struct BloomTuple /* Opaque data structure for bloom index scan */ typedef struct BloomScanOpaqueData { - SignType *sign; /* Scan signature */ + BloomSignatureWord *sign; /* Scan signature */ BloomState state; } BloomScanOpaqueData; @@ -170,7 +181,7 @@ extern void BloomFillMetapage(Relation index, Page metaPage); extern void BloomInitMetapage(Relation index); extern void BloomInitPage(Page page, uint16 flags); extern Buffer BloomNewBuffer(Relation index); -extern void signValue(BloomState * state, SignType * sign, Datum value, int attno); +extern void signValue(BloomState * state, BloomSignatureWord * sign, Datum value, int attno); extern BloomTuple *BloomFormTuple(BloomState * state, ItemPointer iptr, Datum *values, bool *isnull); extern bool BloomPageAddItem(BloomState * state, Page page, BloomTuple * tuple); diff --git a/contrib/bloom/blscan.c b/contrib/bloom/blscan.c index aebf32a8d2..0c954dc8d5 100644 --- a/contrib/bloom/blscan.c +++ b/contrib/bloom/blscan.c @@ -93,7 +93,7 @@ blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm) /* New search: have to calculate search signature */ ScanKey skey = scan->keyData; - so->sign = palloc0(sizeof(SignType) * so->state.opts.bloomLength); + so->sign = palloc0(sizeof(BloomSignatureWord) * so->state.opts.bloomLength); for (i = 0; i < scan->numberOfKeys; i++) { diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c index 4a5b343dd0..876952f2d5 100644 --- a/contrib/bloom/blutils.c +++ b/contrib/bloom/blutils.c @@ -27,23 +27,26 @@ #include "bloom.h" -/* Signature dealing macros */ -#define BITSIGNTYPE (BITS_PER_BYTE * sizeof(SignType)) -#define GETWORD(x,i) ( *( (SignType*)(x) + (int)( (i) / BITSIGNTYPE ) ) ) -#define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % BITSIGNTYPE ) ) -#define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % BITSIGNTYPE ) ) -#define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % BITSIGNTYPE )) & 0x01 ) +/* Signature dealing macros - note i is assumed to be of type int */ +#define GETWORD(x,i) ( *( (BloomSignatureWord *)(x) + ( (i) / SIGNWORDBITS ) ) ) +#define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % SIGNWORDBITS ) ) +#define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % SIGNWORDBITS ) ) +#define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % SIGNWORDBITS )) & 0x01 ) PG_FUNCTION_INFO_V1(blhandler); -/* Kind of relation optioms for bloom index */ +/* Kind of relation options for bloom index */ static relopt_kind bl_relopt_kind; +/* parse table for fillRelOptions */ +static relopt_parse_elt bl_relopt_tab[INDEX_MAX_KEYS + 1]; static int32 myRand(void); static void mySrand(uint32 seed); /* - * Module initialize function: initilized relation options. + * Module initialize function: initialize info about Bloom relation options. + * + * Note: keep this in sync with makeDefaultBloomOptions(). */ void _PG_init(void) @@ -53,17 +56,46 @@ _PG_init(void) bl_relopt_kind = add_reloption_kind(); + /* Option for length of signature */ add_int_reloption(bl_relopt_kind, "length", - "Length of signature in uint16 type", 5, 1, 256); + "Length of signature in bits", + DEFAULT_BLOOM_LENGTH, 1, MAX_BLOOM_LENGTH); + bl_relopt_tab[0].optname = "length"; + bl_relopt_tab[0].opttype = RELOPT_TYPE_INT; + bl_relopt_tab[0].offset = offsetof(BloomOptions, bloomLength); + /* Number of bits for each possible index column: col1, col2, ... */ for (i = 0; i < INDEX_MAX_KEYS; i++) { - snprintf(buf, 16, "col%d", i + 1); + snprintf(buf, sizeof(buf), "col%d", i + 1); add_int_reloption(bl_relopt_kind, buf, - "Number of bits for corresponding column", 2, 1, 2048); + "Number of bits generated for each index column", + DEFAULT_BLOOM_BITS, 1, MAX_BLOOM_BITS); + bl_relopt_tab[i + 1].optname = MemoryContextStrdup(TopMemoryContext, + buf); + bl_relopt_tab[i + 1].opttype = RELOPT_TYPE_INT; + bl_relopt_tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]); } } +/* + * Construct a default set of Bloom options. + */ +static BloomOptions * +makeDefaultBloomOptions(void) +{ + BloomOptions *opts; + int i; + + opts = (BloomOptions *) palloc0(sizeof(BloomOptions)); + /* Convert DEFAULT_BLOOM_LENGTH from # of bits to # of words */ + opts->bloomLength = (DEFAULT_BLOOM_LENGTH + SIGNWORDBITS - 1) / SIGNWORDBITS; + for (i = 0; i < INDEX_MAX_KEYS; i++) + opts->bitSize[i] = DEFAULT_BLOOM_BITS; + SET_VARSIZE(opts, sizeof(BloomOptions)); + return opts; +} + /* * Bloom handler function: return IndexAmRoutine with access method parameters * and callbacks. @@ -157,7 +189,7 @@ initBloomState(BloomState *state, Relation index) memcpy(&state->opts, index->rd_amcache, sizeof(state->opts)); state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ + - sizeof(SignType) * state->opts.bloomLength; + sizeof(BloomSignatureWord) * state->opts.bloomLength; } /* @@ -208,7 +240,7 @@ mySrand(uint32 seed) * Add bits of given value to the signature. */ void -signValue(BloomState *state, SignType *sign, Datum value, int attno) +signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno) { uint32 hashVal; int nBit, @@ -231,8 +263,8 @@ signValue(BloomState *state, SignType *sign, Datum value, int attno) for (j = 0; j < state->opts.bitSize[attno]; j++) { - /* prevent mutiple evaluation */ - nBit = myRand() % (state->opts.bloomLength * BITSIGNTYPE); + /* prevent multiple evaluation in SETBIT macro */ + nBit = myRand() % (state->opts.bloomLength * SIGNWORDBITS); SETBIT(sign, nBit); } } @@ -361,39 +393,6 @@ BloomInitPage(Page page, uint16 flags) opaque->bloom_page_id = BLOOM_PAGE_ID; } -/* - * Adjust options of bloom index. - * - * This must produce default options when *opts is initially all-zero. - */ -static void -adjustBloomOptions(BloomOptions *opts) -{ - int i; - - /* Default length of bloom filter is 5 of 16-bit integers */ - if (opts->bloomLength <= 0) - opts->bloomLength = 5; - else if (opts->bloomLength > MAX_BLOOM_LENGTH) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("length of bloom signature (%d) is greater than maximum %d", - opts->bloomLength, MAX_BLOOM_LENGTH))); - - /* Check signature length */ - for (i = 0; i < INDEX_MAX_KEYS; i++) - { - /* - * Zero and negative number of bits is meaningless. Also setting - * more bits than signature have seems useless. Replace both cases - * with 2 bits default. - */ - if (opts->bitSize[i] <= 0 - || opts->bitSize[i] >= opts->bloomLength * sizeof(SignType) * BITS_PER_BYTE) - opts->bitSize[i] = 2; - } -} - /* * Fill in metapage for bloom index. */ @@ -405,14 +404,11 @@ BloomFillMetapage(Relation index, Page metaPage) /* * Choose the index's options. If reloptions have been assigned, use - * those, otherwise create default options by applying adjustBloomOptions - * to a zeroed chunk of memory. We apply adjustBloomOptions to existing - * reloptions too, just out of paranoia; they should be valid already. + * those, otherwise create default options. */ opts = (BloomOptions *) index->rd_options; if (!opts) - opts = (BloomOptions *) palloc0(sizeof(BloomOptions)); - adjustBloomOptions(opts); + opts = makeDefaultBloomOptions(); /* * Initialize contents of meta page, including a copy of the options, @@ -462,30 +458,15 @@ bloptions(Datum reloptions, bool validate) relopt_value *options; int numoptions; BloomOptions *rdopts; - relopt_parse_elt tab[INDEX_MAX_KEYS + 1]; - int i; - char buf[16]; - - /* Option for length of signature */ - tab[0].optname = "length"; - tab[0].opttype = RELOPT_TYPE_INT; - tab[0].offset = offsetof(BloomOptions, bloomLength); - - /* Number of bits for each of possible columns: col1, col2, ... */ - for (i = 0; i < INDEX_MAX_KEYS; i++) - { - snprintf(buf, sizeof(buf), "col%d", i + 1); - tab[i + 1].optname = pstrdup(buf); - tab[i + 1].opttype = RELOPT_TYPE_INT; - tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]); - } + /* Parse the user-given reloptions */ options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions); rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions); fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions, - validate, tab, INDEX_MAX_KEYS + 1); + validate, bl_relopt_tab, lengthof(bl_relopt_tab)); - adjustBloomOptions(rdopts); + /* Convert signature length from # of bits to # to words, rounding up */ + rdopts->bloomLength = (rdopts->bloomLength + SIGNWORDBITS - 1) / SIGNWORDBITS; return (bytea *) rdopts; } diff --git a/doc/src/sgml/bloom.sgml b/doc/src/sgml/bloom.sgml index 49cb066144..8667763c43 100644 --- a/doc/src/sgml/bloom.sgml +++ b/doc/src/sgml/bloom.sgml @@ -8,8 +8,8 @@ - bloom is a module which implements an index access method. It comes - as an example of custom access methods and generic WAL records usage. But it + bloom is a module that implements an index access method. It comes + as an example of custom access methods and generic WAL record usage. But it is also useful in itself. @@ -22,8 +22,9 @@ allows fast exclusion of non-candidate tuples via signatures. Since a signature is a lossy representation of all indexed attributes, search results must be rechecked using heap information. - The user can specify signature length (in uint16, default is 5) and the - number of bits, which can be set per attribute (1 < colN < 2048). + The user can specify signature length in bits (default 80, maximum 4096) + and the number of bits generated for each index column (default 2, + maximum 4095). @@ -51,17 +52,17 @@ length - Length of signature in uint16 type values + Length of signature in bits - col1 — col16 + col1 — col32 - Number of bits for corresponding column + Number of bits generated for each index column @@ -77,12 +78,12 @@ CREATE INDEX bloomidx ON tbloom USING bloom (i1,i2,i3) - WITH (length=5, col1=2, col2=2, col3=4); + WITH (length=80, col1=2, col2=2, col3=4); Here, we created a bloom index with a signature length of 80 bits, - and attributes i1 and i2 mapped to 2 bits, and attribute i3 to 4 bits. + and attributes i1 and i2 mapped to 2 bits, and attribute i3 mapped to 4 bits.