Require empty Bitmapsets to be represented as NULL.
When I designed the Bitmapset module, I set things up so that an empty Bitmapset could be represented either by a NULL pointer, or by an allocated object all of whose bits are zero. I've recently come to the conclusion that that was a bad idea and we should instead have a convention like the longstanding invariant for Lists, whereby an empty list is represented by NIL and nothing else. To do this, we need to fix bms_intersect, bms_difference, and a couple of other functions to check for having produced an empty result; but then we can replace bms_is_empty(a) by a simple "a == NULL" test. This is very likely a (marginal) win performance-wise, because we call bms_is_empty many more times than those other functions put together. However, the real reason to do it is that we have various places that have hand-implemented a rule about "this Bitmapset variable must be exactly NULL if empty", so that they can use checks-for-null in place of bms_is_empty calls in particularly hot code paths. That is a really fragile, mistake-prone way to do things, and I'm surprised that we've seldom been bitten by it. It's not well documented at all which variables have this property, so you can't readily tell which code might be violating those conventions. By making the convention universal, we can eliminate a subtle source of bugs. Patch by me; thanks to Nathan Bossart and Richard Guo for review. Discussion: https://postgr.es/m/1159933.1677621588@sss.pgh.pa.us
This commit is contained in:
parent
141225b251
commit
00b41463c2
@ -5,10 +5,8 @@
|
||||
*
|
||||
* A bitmap set can represent any set of nonnegative integers, although
|
||||
* it is mainly intended for sets where the maximum value is not large,
|
||||
* say at most a few hundred. By convention, a NULL pointer is always
|
||||
* accepted by all operations to represent the empty set. (But beware
|
||||
* that this is not the only representation of the empty set. Use
|
||||
* bms_is_empty() in preference to testing for NULL.)
|
||||
* say at most a few hundred. By convention, we always represent the
|
||||
* empty set by a NULL pointer.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2003-2023, PostgreSQL Global Development Group
|
||||
@ -66,6 +64,8 @@
|
||||
#error "invalid BITS_PER_BITMAPWORD"
|
||||
#endif
|
||||
|
||||
static bool bms_is_empty_internal(const Bitmapset *a);
|
||||
|
||||
|
||||
/*
|
||||
* bms_copy - make a palloc'd copy of a bitmapset
|
||||
@ -104,10 +104,10 @@ bms_equal(const Bitmapset *a, const Bitmapset *b)
|
||||
{
|
||||
if (b == NULL)
|
||||
return true;
|
||||
return bms_is_empty(b);
|
||||
return false;
|
||||
}
|
||||
else if (b == NULL)
|
||||
return bms_is_empty(a);
|
||||
return false;
|
||||
/* Identify shorter and longer input */
|
||||
if (a->nwords <= b->nwords)
|
||||
{
|
||||
@ -151,9 +151,9 @@ bms_compare(const Bitmapset *a, const Bitmapset *b)
|
||||
|
||||
/* Handle cases where either input is NULL */
|
||||
if (a == NULL)
|
||||
return bms_is_empty(b) ? 0 : -1;
|
||||
return (b == NULL) ? 0 : -1;
|
||||
else if (b == NULL)
|
||||
return bms_is_empty(a) ? 0 : +1;
|
||||
return +1;
|
||||
/* Handle cases where one input is longer than the other */
|
||||
shortlen = Min(a->nwords, b->nwords);
|
||||
for (i = shortlen; i < a->nwords; i++)
|
||||
@ -282,6 +282,12 @@ bms_intersect(const Bitmapset *a, const Bitmapset *b)
|
||||
resultlen = result->nwords;
|
||||
for (i = 0; i < resultlen; i++)
|
||||
result->words[i] &= other->words[i];
|
||||
/* If we computed an empty result, we must return NULL */
|
||||
if (bms_is_empty_internal(result))
|
||||
{
|
||||
pfree(result);
|
||||
return NULL;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -300,12 +306,22 @@ bms_difference(const Bitmapset *a, const Bitmapset *b)
|
||||
return NULL;
|
||||
if (b == NULL)
|
||||
return bms_copy(a);
|
||||
|
||||
/*
|
||||
* In Postgres' usage, an empty result is a very common case, so it's
|
||||
* worth optimizing for that by testing bms_nonempty_difference(). This
|
||||
* saves us a palloc/pfree cycle compared to checking after-the-fact.
|
||||
*/
|
||||
if (!bms_nonempty_difference(a, b))
|
||||
return NULL;
|
||||
|
||||
/* Copy the left input */
|
||||
result = bms_copy(a);
|
||||
/* And remove b's bits from result */
|
||||
shortlen = Min(a->nwords, b->nwords);
|
||||
for (i = 0; i < shortlen; i++)
|
||||
result->words[i] &= ~b->words[i];
|
||||
/* Need not check for empty result, since we handled that case above */
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -323,7 +339,7 @@ bms_is_subset(const Bitmapset *a, const Bitmapset *b)
|
||||
if (a == NULL)
|
||||
return true; /* empty set is a subset of anything */
|
||||
if (b == NULL)
|
||||
return bms_is_empty(a);
|
||||
return false;
|
||||
/* Check common words */
|
||||
shortlen = Min(a->nwords, b->nwords);
|
||||
for (i = 0; i < shortlen; i++)
|
||||
@ -362,10 +378,10 @@ bms_subset_compare(const Bitmapset *a, const Bitmapset *b)
|
||||
{
|
||||
if (b == NULL)
|
||||
return BMS_EQUAL;
|
||||
return bms_is_empty(b) ? BMS_EQUAL : BMS_SUBSET1;
|
||||
return BMS_SUBSET1;
|
||||
}
|
||||
if (b == NULL)
|
||||
return bms_is_empty(a) ? BMS_EQUAL : BMS_SUBSET2;
|
||||
return BMS_SUBSET2;
|
||||
/* Check common words */
|
||||
result = BMS_EQUAL; /* status so far */
|
||||
shortlen = Min(a->nwords, b->nwords);
|
||||
@ -554,7 +570,7 @@ bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
|
||||
if (a == NULL)
|
||||
return false;
|
||||
if (b == NULL)
|
||||
return !bms_is_empty(a);
|
||||
return true;
|
||||
/* Check words in common */
|
||||
shortlen = Min(a->nwords, b->nwords);
|
||||
for (i = 0; i < shortlen; i++)
|
||||
@ -696,18 +712,18 @@ bms_membership(const Bitmapset *a)
|
||||
}
|
||||
|
||||
/*
|
||||
* bms_is_empty - is a set empty?
|
||||
* bms_is_empty_internal - is a set empty?
|
||||
*
|
||||
* This is even faster than bms_membership().
|
||||
* This is now used only locally, to detect cases where a function has
|
||||
* computed an empty set that we must now get rid of. Hence, we can
|
||||
* assume the input isn't NULL.
|
||||
*/
|
||||
bool
|
||||
bms_is_empty(const Bitmapset *a)
|
||||
static bool
|
||||
bms_is_empty_internal(const Bitmapset *a)
|
||||
{
|
||||
int nwords;
|
||||
int wordnum;
|
||||
|
||||
if (a == NULL)
|
||||
return true;
|
||||
nwords = a->nwords;
|
||||
for (wordnum = 0; wordnum < nwords; wordnum++)
|
||||
{
|
||||
@ -786,6 +802,12 @@ bms_del_member(Bitmapset *a, int x)
|
||||
bitnum = BITNUM(x);
|
||||
if (wordnum < a->nwords)
|
||||
a->words[wordnum] &= ~((bitmapword) 1 << bitnum);
|
||||
/* If we computed an empty result, we must return NULL */
|
||||
if (bms_is_empty_internal(a))
|
||||
{
|
||||
pfree(a);
|
||||
return NULL;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
@ -922,6 +944,12 @@ bms_int_members(Bitmapset *a, const Bitmapset *b)
|
||||
a->words[i] &= b->words[i];
|
||||
for (; i < a->nwords; i++)
|
||||
a->words[i] = 0;
|
||||
/* If we computed an empty result, we must return NULL */
|
||||
if (bms_is_empty_internal(a))
|
||||
{
|
||||
pfree(a);
|
||||
return NULL;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
@ -943,6 +971,12 @@ bms_del_members(Bitmapset *a, const Bitmapset *b)
|
||||
shortlen = Min(a->nwords, b->nwords);
|
||||
for (i = 0; i < shortlen; i++)
|
||||
a->words[i] &= ~b->words[i];
|
||||
/* If we computed an empty result, we must return NULL */
|
||||
if (bms_is_empty_internal(a))
|
||||
{
|
||||
pfree(a);
|
||||
return NULL;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
|
@ -5,10 +5,8 @@
|
||||
*
|
||||
* A bitmap set can represent any set of nonnegative integers, although
|
||||
* it is mainly intended for sets where the maximum value is not large,
|
||||
* say at most a few hundred. By convention, a NULL pointer is always
|
||||
* accepted by all operations to represent the empty set. (But beware
|
||||
* that this is not the only representation of the empty set. Use
|
||||
* bms_is_empty() in preference to testing for NULL.)
|
||||
* say at most a few hundred. By convention, we always represent the
|
||||
* empty set by a NULL pointer.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2003-2023, PostgreSQL Global Development Group
|
||||
@ -102,7 +100,9 @@ extern int bms_num_members(const Bitmapset *a);
|
||||
|
||||
/* optimized tests when we don't need to know exact membership count: */
|
||||
extern BMS_Membership bms_membership(const Bitmapset *a);
|
||||
extern bool bms_is_empty(const Bitmapset *a);
|
||||
|
||||
/* NULL is now the only allowed representation of an empty bitmapset */
|
||||
#define bms_is_empty(a) ((a) == NULL)
|
||||
|
||||
/* these routines recycle (modify or free) their non-const inputs: */
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user